Mercurial > hg > CbC > CbC_gcc
comparison libcpp/charset.c @ 55:77e2b8dfacca gcc-4.4.5
update it from 4.4.3 to 4.5.0
author | ryoma <e075725@ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 12 Feb 2010 23:39:51 +0900 |
parents | a06113de4d67 |
children | b7f97abdc517 |
comparison
equal
deleted
inserted
replaced
52:c156f1bd5cd9 | 55:77e2b8dfacca |
---|---|
167 operation in several places below. */ | 167 operation in several places below. */ |
168 static inline int | 168 static inline int |
169 one_utf8_to_cppchar (const uchar **inbufp, size_t *inbytesleftp, | 169 one_utf8_to_cppchar (const uchar **inbufp, size_t *inbytesleftp, |
170 cppchar_t *cp) | 170 cppchar_t *cp) |
171 { | 171 { |
172 static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x02, 0x01 }; | 172 static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x03, 0x01 }; |
173 static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; | 173 static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; |
174 | 174 |
175 cppchar_t c; | 175 cppchar_t c; |
176 const uchar *inbuf = *inbufp; | 176 const uchar *inbuf = *inbufp; |
177 size_t nbytes, i; | 177 size_t nbytes, i; |
719 if (!wcset) | 719 if (!wcset) |
720 wcset = default_wcset; | 720 wcset = default_wcset; |
721 | 721 |
722 pfile->narrow_cset_desc = init_iconv_desc (pfile, ncset, SOURCE_CHARSET); | 722 pfile->narrow_cset_desc = init_iconv_desc (pfile, ncset, SOURCE_CHARSET); |
723 pfile->narrow_cset_desc.width = CPP_OPTION (pfile, char_precision); | 723 pfile->narrow_cset_desc.width = CPP_OPTION (pfile, char_precision); |
724 pfile->utf8_cset_desc = init_iconv_desc (pfile, "UTF-8", SOURCE_CHARSET); | |
725 pfile->utf8_cset_desc.width = CPP_OPTION (pfile, char_precision); | |
724 pfile->char16_cset_desc = init_iconv_desc (pfile, | 726 pfile->char16_cset_desc = init_iconv_desc (pfile, |
725 be ? "UTF-16BE" : "UTF-16LE", | 727 be ? "UTF-16BE" : "UTF-16LE", |
726 SOURCE_CHARSET); | 728 SOURCE_CHARSET); |
727 pfile->char16_cset_desc.width = 16; | 729 pfile->char16_cset_desc.width = 16; |
728 pfile->char32_cset_desc = init_iconv_desc (pfile, | 730 pfile->char32_cset_desc = init_iconv_desc (pfile, |
739 { | 741 { |
740 if (HAVE_ICONV) | 742 if (HAVE_ICONV) |
741 { | 743 { |
742 if (pfile->narrow_cset_desc.func == convert_using_iconv) | 744 if (pfile->narrow_cset_desc.func == convert_using_iconv) |
743 iconv_close (pfile->narrow_cset_desc.cd); | 745 iconv_close (pfile->narrow_cset_desc.cd); |
746 if (pfile->utf8_cset_desc.func == convert_using_iconv) | |
747 iconv_close (pfile->utf8_cset_desc.cd); | |
748 if (pfile->char16_cset_desc.func == convert_using_iconv) | |
749 iconv_close (pfile->char16_cset_desc.cd); | |
750 if (pfile->char32_cset_desc.func == convert_using_iconv) | |
751 iconv_close (pfile->char32_cset_desc.cd); | |
744 if (pfile->wide_cset_desc.func == convert_using_iconv) | 752 if (pfile->wide_cset_desc.func == convert_using_iconv) |
745 iconv_close (pfile->wide_cset_desc.cd); | 753 iconv_close (pfile->wide_cset_desc.cd); |
746 } | 754 } |
747 } | 755 } |
748 | 756 |
946 /* [lex.charset]: The character designated by the universal character | 954 /* [lex.charset]: The character designated by the universal character |
947 name \UNNNNNNNN is that character whose character short name in | 955 name \UNNNNNNNN is that character whose character short name in |
948 ISO/IEC 10646 is NNNNNNNN; the character designated by the | 956 ISO/IEC 10646 is NNNNNNNN; the character designated by the |
949 universal character name \uNNNN is that character whose character | 957 universal character name \uNNNN is that character whose character |
950 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value | 958 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value |
951 for a universal character name is less than 0x20 or in the range | 959 for a universal character name corresponds to a surrogate code point |
952 0x7F-0x9F (inclusive), or if the universal character name | 960 (in the range 0xD800-0xDFFF, inclusive), the program is ill-formed. |
953 designates a character in the basic source character set, then the | 961 Additionally, if the hexadecimal value for a universal-character-name |
954 program is ill-formed. | 962 outside a character or string literal corresponds to a control character |
963 (in either of the ranges 0x00-0x1F or 0x7F-0x9F, both inclusive) or to a | |
964 character in the basic source character set, the program is ill-formed. | |
965 | |
966 C99 6.4.3: A universal character name shall not specify a character | |
967 whose short identifier is less than 00A0 other than 0024 ($), 0040 (@), | |
968 or 0060 (`), nor one in the range D800 through DFFF inclusive. | |
955 | 969 |
956 *PSTR must be preceded by "\u" or "\U"; it is assumed that the | 970 *PSTR must be preceded by "\u" or "\U"; it is assumed that the |
957 buffer end is delimited by a non-hex digit. Returns zero if the | 971 buffer end is delimited by a non-hex digit. Returns zero if the |
958 UCN has not been consumed. | 972 UCN has not been consumed. |
959 | 973 |
1016 cpp_error (pfile, CPP_DL_ERROR, | 1030 cpp_error (pfile, CPP_DL_ERROR, |
1017 "incomplete universal character name %.*s", | 1031 "incomplete universal character name %.*s", |
1018 (int) (str - base), base); | 1032 (int) (str - base), base); |
1019 result = 1; | 1033 result = 1; |
1020 } | 1034 } |
1021 /* The standard permits $, @ and ` to be specified as UCNs. We use | 1035 /* The C99 standard permits $, @ and ` to be specified as UCNs. We use |
1022 hex escapes so that this also works with EBCDIC hosts. */ | 1036 hex escapes so that this also works with EBCDIC hosts. |
1037 C++0x permits everything below 0xa0 within literals; | |
1038 ucn_valid_in_identifier will complain about identifiers. */ | |
1023 else if ((result < 0xa0 | 1039 else if ((result < 0xa0 |
1040 && !CPP_OPTION (pfile, cplusplus) | |
1024 && (result != 0x24 && result != 0x40 && result != 0x60)) | 1041 && (result != 0x24 && result != 0x40 && result != 0x60)) |
1025 || (result & 0x80000000) | 1042 || (result & 0x80000000) |
1026 || (result >= 0xD800 && result <= 0xDFFF)) | 1043 || (result >= 0xD800 && result <= 0xDFFF)) |
1027 { | 1044 { |
1028 cpp_error (pfile, CPP_DL_ERROR, | 1045 cpp_error (pfile, CPP_DL_ERROR, |
1299 | 1316 |
1300 default: | 1317 default: |
1301 unknown: | 1318 unknown: |
1302 if (ISGRAPH (c)) | 1319 if (ISGRAPH (c)) |
1303 cpp_error (pfile, CPP_DL_PEDWARN, | 1320 cpp_error (pfile, CPP_DL_PEDWARN, |
1304 "unknown escape sequence '\\%c'", (int) c); | 1321 "unknown escape sequence: '\\%c'", (int) c); |
1305 else | 1322 else |
1306 { | 1323 { |
1307 /* diagnostic.c does not support "%03o". When it does, this | 1324 /* diagnostic.c does not support "%03o". When it does, this |
1308 code can use %03o directly in the diagnostic again. */ | 1325 code can use %03o directly in the diagnostic again. */ |
1309 char buf[32]; | 1326 char buf[32]; |
1328 { | 1345 { |
1329 switch (type) | 1346 switch (type) |
1330 { | 1347 { |
1331 default: | 1348 default: |
1332 return pfile->narrow_cset_desc; | 1349 return pfile->narrow_cset_desc; |
1350 case CPP_UTF8STRING: | |
1351 return pfile->utf8_cset_desc; | |
1333 case CPP_CHAR16: | 1352 case CPP_CHAR16: |
1334 case CPP_STRING16: | 1353 case CPP_STRING16: |
1335 return pfile->char16_cset_desc; | 1354 return pfile->char16_cset_desc; |
1336 case CPP_CHAR32: | 1355 case CPP_CHAR32: |
1337 case CPP_STRING32: | 1356 case CPP_STRING32: |
1362 tbuf.len = 0; | 1381 tbuf.len = 0; |
1363 | 1382 |
1364 for (i = 0; i < count; i++) | 1383 for (i = 0; i < count; i++) |
1365 { | 1384 { |
1366 p = from[i].text; | 1385 p = from[i].text; |
1367 if (*p == 'L' || *p == 'u' || *p == 'U') p++; | 1386 if (*p == 'u') |
1387 { | |
1388 if (*++p == '8') | |
1389 p++; | |
1390 } | |
1391 else if (*p == 'L' || *p == 'U') p++; | |
1392 if (*p == 'R') | |
1393 { | |
1394 const uchar *prefix; | |
1395 | |
1396 /* Skip over 'R"'. */ | |
1397 p += 2; | |
1398 prefix = p; | |
1399 while (*p != '[') | |
1400 p++; | |
1401 p++; | |
1402 limit = from[i].text + from[i].len; | |
1403 if (limit >= p + (p - prefix) + 1) | |
1404 limit -= (p - prefix) + 1; | |
1405 | |
1406 for (;;) | |
1407 { | |
1408 base = p; | |
1409 while (p < limit && (*p != '\\' || (p[1] != 'u' && p[1] != 'U'))) | |
1410 p++; | |
1411 if (p > base) | |
1412 { | |
1413 /* We have a run of normal characters; these can be fed | |
1414 directly to convert_cset. */ | |
1415 if (!APPLY_CONVERSION (cvt, base, p - base, &tbuf)) | |
1416 goto fail; | |
1417 } | |
1418 if (p == limit) | |
1419 break; | |
1420 | |
1421 p = convert_ucn (pfile, p + 1, limit, &tbuf, cvt); | |
1422 } | |
1423 | |
1424 continue; | |
1425 } | |
1426 | |
1368 p++; /* Skip leading quote. */ | 1427 p++; /* Skip leading quote. */ |
1369 limit = from[i].text + from[i].len - 1; /* Skip trailing quote. */ | 1428 limit = from[i].text + from[i].len - 1; /* Skip trailing quote. */ |
1370 | 1429 |
1371 for (;;) | 1430 for (;;) |
1372 { | 1431 { |