CbC/CbC_gcc: libcpp/charset.c comparison

comparison libcpp/charset.c @ 55:77e2b8dfacca gcc-4.4.5

update it from 4.4.3 to 4.5.0

author	ryoma <e075725@ie.u-ryukyu.ac.jp>
date	Fri, 12 Feb 2010 23:39:51 +0900
parents	a06113de4d67
children	b7f97abdc517

comparison

equal deleted inserted replaced

-:c156f1bd5cd9
+:77e2b8dfacca
 operation in several places below.  */
 static inline int
 one_utf8_to_cppchar (const uchar **inbufp, size_t *inbytesleftp,
 		     cppchar_t *cp)
 {
-static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x02, 0x01 };
+static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x03, 0x01 };
 static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
 cppchar_t c;
 const uchar *inbuf = *inbufp;
 size_t nbytes, i;
 if (!wcset)
 wcset = default_wcset;
 pfile->narrow_cset_desc = init_iconv_desc (pfile, ncset, SOURCE_CHARSET);
 pfile->narrow_cset_desc.width = CPP_OPTION (pfile, char_precision);
+pfile->utf8_cset_desc = init_iconv_desc (pfile, "UTF-8", SOURCE_CHARSET);
+pfile->utf8_cset_desc.width = CPP_OPTION (pfile, char_precision);
 pfile->char16_cset_desc = init_iconv_desc (pfile,
 					     be ? "UTF-16BE" : "UTF-16LE",
 					     SOURCE_CHARSET);
 pfile->char16_cset_desc.width = 16;
 pfile->char32_cset_desc = init_iconv_desc (pfile,
 {
 if (HAVE_ICONV)
 {
 if (pfile->narrow_cset_desc.func == convert_using_iconv)
 	iconv_close (pfile->narrow_cset_desc.cd);
+if (pfile->utf8_cset_desc.func == convert_using_iconv)
+	iconv_close (pfile->utf8_cset_desc.cd);
+if (pfile->char16_cset_desc.func == convert_using_iconv)
+	iconv_close (pfile->char16_cset_desc.cd);
+if (pfile->char32_cset_desc.func == convert_using_iconv)
+	iconv_close (pfile->char32_cset_desc.cd);
 if (pfile->wide_cset_desc.func == convert_using_iconv)
 	iconv_close (pfile->wide_cset_desc.cd);
 }
 }
 /* [lex.charset]: The character designated by the universal character
 name \UNNNNNNNN is that character whose character short name in
 ISO/IEC 10646 is NNNNNNNN; the character designated by the
 universal character name \uNNNN is that character whose character
 short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
-for a universal character name is less than 0x20 or in the range
+for a universal character name corresponds to a surrogate code point
-0x7F-0x9F (inclusive), or if the universal character name
+(in the range 0xD800-0xDFFF, inclusive), the program is ill-formed.
-designates a character in the basic source character set, then the
+Additionally, if the hexadecimal value for a universal-character-name
-program is ill-formed.
+outside a character or string literal corresponds to a control character
+(in either of the ranges 0x00-0x1F or 0x7F-0x9F, both inclusive) or to a
+character in the basic source character set, the program is ill-formed.
+C99 6.4.3: A universal character name shall not specify a character
+whose short identifier is less than 00A0 other than 0024 ($), 0040 (@),
+or 0060 (`), nor one in the range D800 through DFFF inclusive.
 *PSTR must be preceded by "\u" or "\U"; it is assumed that the
 buffer end is delimited by a non-hex digit.  Returns zero if the
 UCN has not been consumed.
 cpp_error (pfile, CPP_DL_ERROR,
 		 "incomplete universal character name %.*s",
 		 (int) (str - base), base);
 result = 1;
 }
-/* The standard permits $, @ and ` to be specified as UCNs.  We use
+/* The C99 standard permits $, @ and ` to be specified as UCNs.  We use
-hex escapes so that this also works with EBCDIC hosts.  */
+hex escapes so that this also works with EBCDIC hosts.
+C++0x permits everything below 0xa0 within literals;
+ucn_valid_in_identifier will complain about identifiers.  */
 else if ((result < 0xa0
+	    && !CPP_OPTION (pfile, cplusplus)
 	    && (result != 0x24 && result != 0x40 && result != 0x60))
 	   || (result & 0x80000000)
 	   || (result >= 0xD800 && result <= 0xDFFF))
 {
 cpp_error (pfile, CPP_DL_ERROR,
 default:
 unknown:
 if (ISGRAPH (c))
 	cpp_error (pfile, CPP_DL_PEDWARN,
-		   "unknown escape sequence '\\%c'", (int) c);
+		   "unknown escape sequence: '\\%c'", (int) c);
 else
 	{
 	  /* diagnostic.c does not support "%03o".  When it does, this
 	     code can use %03o directly in the diagnostic again.  */
 	  char buf[32];
 {
 switch (type)
 {
 default:
 	return pfile->narrow_cset_desc;
+case CPP_UTF8STRING:
+	return pfile->utf8_cset_desc;
 case CPP_CHAR16:
 case CPP_STRING16:
 	return pfile->char16_cset_desc;
 case CPP_CHAR32:
 case CPP_STRING32:
 tbuf.len = 0;
 for (i = 0; i < count; i++)
 {
 p = from[i].text;
-if (*p == 'L' || *p == 'u' || *p == 'U') p++;
+if (*p == 'u')
+	{
+	  if (*++p == '8')
+	    p++;
+	}
+else if (*p == 'L' || *p == 'U') p++;
+if (*p == 'R')
+	{
+	  const uchar *prefix;
+	  /* Skip over 'R"'.  */
+	  p += 2;
+	  prefix = p;
+	  while (*p != '[')
+	    p++;
+	  p++;
+	  limit = from[i].text + from[i].len;
+	  if (limit >= p + (p - prefix) + 1)
+	    limit -= (p - prefix) + 1;
+	  for (;;)
+	    {
+	      base = p;
+	      while (p < limit && (*p != '\\' || (p[1] != 'u' && p[1] != 'U')))
+		p++;
+	      if (p > base)
+		{
+		  /* We have a run of normal characters; these can be fed
+		     directly to convert_cset.  */
+		  if (!APPLY_CONVERSION (cvt, base, p - base, &tbuf))
+		    goto fail;
+		}
+	      if (p == limit)
+		break;
+	      p = convert_ucn (pfile, p + 1, limit, &tbuf, cvt);
+	    }
+	  continue;
+	}
 p++; /* Skip leading quote.  */
 limit = from[i].text + from[i].len - 1; /* Skip trailing quote.  */
 for (;;)
 	{

Mercurial > hg > CbC > CbC_gcc

comparison libcpp/charset.c @ 55:77e2b8dfacca gcc-4.4.5