comparison libcpp/charset.c @ 55:77e2b8dfacca gcc-4.4.5

update it from 4.4.3 to 4.5.0
author ryoma <e075725@ie.u-ryukyu.ac.jp>
date Fri, 12 Feb 2010 23:39:51 +0900
parents a06113de4d67
children b7f97abdc517
comparison
equal deleted inserted replaced
52:c156f1bd5cd9 55:77e2b8dfacca
167 operation in several places below. */ 167 operation in several places below. */
168 static inline int 168 static inline int
169 one_utf8_to_cppchar (const uchar **inbufp, size_t *inbytesleftp, 169 one_utf8_to_cppchar (const uchar **inbufp, size_t *inbytesleftp,
170 cppchar_t *cp) 170 cppchar_t *cp)
171 { 171 {
172 static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x02, 0x01 }; 172 static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x03, 0x01 };
173 static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; 173 static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
174 174
175 cppchar_t c; 175 cppchar_t c;
176 const uchar *inbuf = *inbufp; 176 const uchar *inbuf = *inbufp;
177 size_t nbytes, i; 177 size_t nbytes, i;
719 if (!wcset) 719 if (!wcset)
720 wcset = default_wcset; 720 wcset = default_wcset;
721 721
722 pfile->narrow_cset_desc = init_iconv_desc (pfile, ncset, SOURCE_CHARSET); 722 pfile->narrow_cset_desc = init_iconv_desc (pfile, ncset, SOURCE_CHARSET);
723 pfile->narrow_cset_desc.width = CPP_OPTION (pfile, char_precision); 723 pfile->narrow_cset_desc.width = CPP_OPTION (pfile, char_precision);
724 pfile->utf8_cset_desc = init_iconv_desc (pfile, "UTF-8", SOURCE_CHARSET);
725 pfile->utf8_cset_desc.width = CPP_OPTION (pfile, char_precision);
724 pfile->char16_cset_desc = init_iconv_desc (pfile, 726 pfile->char16_cset_desc = init_iconv_desc (pfile,
725 be ? "UTF-16BE" : "UTF-16LE", 727 be ? "UTF-16BE" : "UTF-16LE",
726 SOURCE_CHARSET); 728 SOURCE_CHARSET);
727 pfile->char16_cset_desc.width = 16; 729 pfile->char16_cset_desc.width = 16;
728 pfile->char32_cset_desc = init_iconv_desc (pfile, 730 pfile->char32_cset_desc = init_iconv_desc (pfile,
739 { 741 {
740 if (HAVE_ICONV) 742 if (HAVE_ICONV)
741 { 743 {
742 if (pfile->narrow_cset_desc.func == convert_using_iconv) 744 if (pfile->narrow_cset_desc.func == convert_using_iconv)
743 iconv_close (pfile->narrow_cset_desc.cd); 745 iconv_close (pfile->narrow_cset_desc.cd);
746 if (pfile->utf8_cset_desc.func == convert_using_iconv)
747 iconv_close (pfile->utf8_cset_desc.cd);
748 if (pfile->char16_cset_desc.func == convert_using_iconv)
749 iconv_close (pfile->char16_cset_desc.cd);
750 if (pfile->char32_cset_desc.func == convert_using_iconv)
751 iconv_close (pfile->char32_cset_desc.cd);
744 if (pfile->wide_cset_desc.func == convert_using_iconv) 752 if (pfile->wide_cset_desc.func == convert_using_iconv)
745 iconv_close (pfile->wide_cset_desc.cd); 753 iconv_close (pfile->wide_cset_desc.cd);
746 } 754 }
747 } 755 }
748 756
946 /* [lex.charset]: The character designated by the universal character 954 /* [lex.charset]: The character designated by the universal character
947 name \UNNNNNNNN is that character whose character short name in 955 name \UNNNNNNNN is that character whose character short name in
948 ISO/IEC 10646 is NNNNNNNN; the character designated by the 956 ISO/IEC 10646 is NNNNNNNN; the character designated by the
949 universal character name \uNNNN is that character whose character 957 universal character name \uNNNN is that character whose character
950 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value 958 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
951 for a universal character name is less than 0x20 or in the range 959 for a universal character name corresponds to a surrogate code point
952 0x7F-0x9F (inclusive), or if the universal character name 960 (in the range 0xD800-0xDFFF, inclusive), the program is ill-formed.
953 designates a character in the basic source character set, then the 961 Additionally, if the hexadecimal value for a universal-character-name
954 program is ill-formed. 962 outside a character or string literal corresponds to a control character
963 (in either of the ranges 0x00-0x1F or 0x7F-0x9F, both inclusive) or to a
964 character in the basic source character set, the program is ill-formed.
965
966 C99 6.4.3: A universal character name shall not specify a character
967 whose short identifier is less than 00A0 other than 0024 ($), 0040 (@),
968 or 0060 (`), nor one in the range D800 through DFFF inclusive.
955 969
956 *PSTR must be preceded by "\u" or "\U"; it is assumed that the 970 *PSTR must be preceded by "\u" or "\U"; it is assumed that the
957 buffer end is delimited by a non-hex digit. Returns zero if the 971 buffer end is delimited by a non-hex digit. Returns zero if the
958 UCN has not been consumed. 972 UCN has not been consumed.
959 973
1016 cpp_error (pfile, CPP_DL_ERROR, 1030 cpp_error (pfile, CPP_DL_ERROR,
1017 "incomplete universal character name %.*s", 1031 "incomplete universal character name %.*s",
1018 (int) (str - base), base); 1032 (int) (str - base), base);
1019 result = 1; 1033 result = 1;
1020 } 1034 }
1021 /* The standard permits $, @ and ` to be specified as UCNs. We use 1035 /* The C99 standard permits $, @ and ` to be specified as UCNs. We use
1022 hex escapes so that this also works with EBCDIC hosts. */ 1036 hex escapes so that this also works with EBCDIC hosts.
1037 C++0x permits everything below 0xa0 within literals;
1038 ucn_valid_in_identifier will complain about identifiers. */
1023 else if ((result < 0xa0 1039 else if ((result < 0xa0
1040 && !CPP_OPTION (pfile, cplusplus)
1024 && (result != 0x24 && result != 0x40 && result != 0x60)) 1041 && (result != 0x24 && result != 0x40 && result != 0x60))
1025 || (result & 0x80000000) 1042 || (result & 0x80000000)
1026 || (result >= 0xD800 && result <= 0xDFFF)) 1043 || (result >= 0xD800 && result <= 0xDFFF))
1027 { 1044 {
1028 cpp_error (pfile, CPP_DL_ERROR, 1045 cpp_error (pfile, CPP_DL_ERROR,
1299 1316
1300 default: 1317 default:
1301 unknown: 1318 unknown:
1302 if (ISGRAPH (c)) 1319 if (ISGRAPH (c))
1303 cpp_error (pfile, CPP_DL_PEDWARN, 1320 cpp_error (pfile, CPP_DL_PEDWARN,
1304 "unknown escape sequence '\\%c'", (int) c); 1321 "unknown escape sequence: '\\%c'", (int) c);
1305 else 1322 else
1306 { 1323 {
1307 /* diagnostic.c does not support "%03o". When it does, this 1324 /* diagnostic.c does not support "%03o". When it does, this
1308 code can use %03o directly in the diagnostic again. */ 1325 code can use %03o directly in the diagnostic again. */
1309 char buf[32]; 1326 char buf[32];
1328 { 1345 {
1329 switch (type) 1346 switch (type)
1330 { 1347 {
1331 default: 1348 default:
1332 return pfile->narrow_cset_desc; 1349 return pfile->narrow_cset_desc;
1350 case CPP_UTF8STRING:
1351 return pfile->utf8_cset_desc;
1333 case CPP_CHAR16: 1352 case CPP_CHAR16:
1334 case CPP_STRING16: 1353 case CPP_STRING16:
1335 return pfile->char16_cset_desc; 1354 return pfile->char16_cset_desc;
1336 case CPP_CHAR32: 1355 case CPP_CHAR32:
1337 case CPP_STRING32: 1356 case CPP_STRING32:
1362 tbuf.len = 0; 1381 tbuf.len = 0;
1363 1382
1364 for (i = 0; i < count; i++) 1383 for (i = 0; i < count; i++)
1365 { 1384 {
1366 p = from[i].text; 1385 p = from[i].text;
1367 if (*p == 'L' || *p == 'u' || *p == 'U') p++; 1386 if (*p == 'u')
1387 {
1388 if (*++p == '8')
1389 p++;
1390 }
1391 else if (*p == 'L' || *p == 'U') p++;
1392 if (*p == 'R')
1393 {
1394 const uchar *prefix;
1395
1396 /* Skip over 'R"'. */
1397 p += 2;
1398 prefix = p;
1399 while (*p != '[')
1400 p++;
1401 p++;
1402 limit = from[i].text + from[i].len;
1403 if (limit >= p + (p - prefix) + 1)
1404 limit -= (p - prefix) + 1;
1405
1406 for (;;)
1407 {
1408 base = p;
1409 while (p < limit && (*p != '\\' || (p[1] != 'u' && p[1] != 'U')))
1410 p++;
1411 if (p > base)
1412 {
1413 /* We have a run of normal characters; these can be fed
1414 directly to convert_cset. */
1415 if (!APPLY_CONVERSION (cvt, base, p - base, &tbuf))
1416 goto fail;
1417 }
1418 if (p == limit)
1419 break;
1420
1421 p = convert_ucn (pfile, p + 1, limit, &tbuf, cvt);
1422 }
1423
1424 continue;
1425 }
1426
1368 p++; /* Skip leading quote. */ 1427 p++; /* Skip leading quote. */
1369 limit = from[i].text + from[i].len - 1; /* Skip trailing quote. */ 1428 limit = from[i].text + from[i].len - 1; /* Skip trailing quote. */
1370 1429
1371 for (;;) 1430 for (;;)
1372 { 1431 {