comparison libcpp/lex.c @ 111:04ced10e8804

gcc 7
author kono
date Fri, 27 Oct 2017 22:46:09 +0900
parents f6334be47118
children 84e7813d76e9
comparison
equal deleted inserted replaced
68:561a7518be6b 111:04ced10e8804
1 /* CPP Library - lexical analysis. 1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010 2 Copyright (C) 2000-2017 Free Software Foundation, Inc.
3 Free Software Foundation, Inc.
4 Contributed by Per Bothner, 1994-95. 3 Contributed by Per Bothner, 1994-95.
5 Based on CCCP program by Paul Rubin, June 1986 4 Based on CCCP program by Paul Rubin, June 1986
6 Adapted to ANSI C, Richard Stallman, Jan 1987 5 Adapted to ANSI C, Richard Stallman, Jan 1987
7 Broken out to separate file, Zack Weinberg, Mar 2000 6 Broken out to separate file, Zack Weinberg, Mar 2000
8 7
262 261
263 val = *++p; 262 val = *++p;
264 } 263 }
265 } 264 }
266 265
267 /* Disable on Solaris 2/x86 until the following problems can be properly 266 /* Disable on Solaris 2/x86 until the following problem can be properly
268 autoconfed: 267 autoconfed:
269 268
270 The Solaris 8 assembler cannot assemble SSE2/SSE4.2 insns.
271 The Solaris 9 assembler cannot assemble SSE4.2 insns.
272 Before Solaris 9 Update 6, SSE insns cannot be executed.
273 The Solaris 10+ assembler tags objects with the instruction set 269 The Solaris 10+ assembler tags objects with the instruction set
274 extensions used, so SSE4.2 executables cannot run on machines that 270 extensions used, so SSE4.2 executables cannot run on machines that
275 don't support that extension. */ 271 don't support that extension. */
276 272
277 #if (GCC_VERSION >= 4005) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__)) 273 #if (GCC_VERSION >= 4005) && (__GNUC__ >= 5 || !defined(__PIC__)) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
278 274
279 /* Replicated character data to be shared between implementations. 275 /* Replicated character data to be shared between implementations.
280 Recall that outside of a context with vector support we can't 276 Recall that outside of a context with vector support we can't
281 define compatible vector types, therefore these are all defined 277 define compatible vector types, therefore these are all defined
282 in terms of raw characters. */ 278 in terms of raw characters. */
292 }; 288 };
293 289
294 /* A version of the fast scanner using MMX vectorized byte compare insns. 290 /* A version of the fast scanner using MMX vectorized byte compare insns.
295 291
296 This uses the PMOVMSKB instruction which was introduced with "MMX2", 292 This uses the PMOVMSKB instruction which was introduced with "MMX2",
297 which was packaged into SSE1; it is also present in the AMD 3dNOW-A 293 which was packaged into SSE1; it is also present in the AMD MMX
298 extension. Mark the function as using "sse" so that we emit a real 294 extension. Mark the function as using "sse" so that we emit a real
299 "emms" instruction, rather than the 3dNOW "femms" instruction. */ 295 "emms" instruction, rather than the 3dNOW "femms" instruction. */
300 296
301 static const uchar * 297 static const uchar *
302 #ifndef __SSE__ 298 #ifndef __SSE__
426 uintptr_t index; 422 uintptr_t index;
427 423
428 /* Check for unaligned input. */ 424 /* Check for unaligned input. */
429 if (si & 15) 425 if (si & 15)
430 { 426 {
427 v16qi sv;
428
431 if (__builtin_expect (end - s < 16, 0) 429 if (__builtin_expect (end - s < 16, 0)
432 && __builtin_expect ((si & 0xfff) > 0xff0, 0)) 430 && __builtin_expect ((si & 0xfff) > 0xff0, 0))
433 { 431 {
434 /* There are less than 16 bytes left in the buffer, and less 432 /* There are less than 16 bytes left in the buffer, and less
435 than 16 bytes left on the page. Reading 16 bytes at this 433 than 16 bytes left on the page. Reading 16 bytes at this
438 return search_line_sse2 (s, end); 436 return search_line_sse2 (s, end);
439 } 437 }
440 438
441 /* ??? The builtin doesn't understand that the PCMPESTRI read from 439 /* ??? The builtin doesn't understand that the PCMPESTRI read from
442 memory need not be aligned. */ 440 memory need not be aligned. */
443 __asm ("%vpcmpestri $0, (%1), %2" 441 sv = __builtin_ia32_loaddqu ((const char *) s);
444 : "=c"(index) : "r"(s), "x"(search), "a"(4), "d"(16)); 442 index = __builtin_ia32_pcmpestri128 (search, 4, sv, 16, 0);
443
445 if (__builtin_expect (index < 16, 0)) 444 if (__builtin_expect (index < 16, 0))
446 goto found; 445 goto found;
447 446
448 /* Advance the pointer to an aligned address. We will re-scan a 447 /* Advance the pointer to an aligned address. We will re-scan a
449 few bytes, but we no longer need care for reading past the 448 few bytes, but we no longer need care for reading past the
450 end of a page, since we're guaranteed a match. */ 449 end of a page, since we're guaranteed a match. */
451 s = (const uchar *)((si + 16) & -16); 450 s = (const uchar *)((si + 15) & -16);
452 } 451 }
453 452
454 /* Main loop, processing 16 bytes at a time. By doing the whole loop 453 /* Main loop, processing 16 bytes at a time. */
455 in inline assembly, we can make proper use of the flags set. */ 454 #ifdef __GCC_ASM_FLAG_OUTPUTS__
456 __asm ( "sub $16, %1\n" 455 while (1)
457 " .balign 16\n" 456 {
457 char f;
458
459 /* By using inline assembly instead of the builtin,
460 we can use the result, as well as the flags set. */
461 __asm ("%vpcmpestri\t$0, %2, %3"
462 : "=c"(index), "=@ccc"(f)
463 : "m"(*s), "x"(search), "a"(4), "d"(16));
464 if (f)
465 break;
466
467 s += 16;
468 }
469 #else
470 s -= 16;
471 /* By doing the whole loop in inline assembly,
472 we can make proper use of the flags set. */
473 __asm ( ".balign 16\n"
458 "0: add $16, %1\n" 474 "0: add $16, %1\n"
459 " %vpcmpestri $0, (%1), %2\n" 475 " %vpcmpestri\t$0, (%1), %2\n"
460 " jnc 0b" 476 " jnc 0b"
461 : "=&c"(index), "+r"(s) 477 : "=&c"(index), "+r"(s)
462 : "x"(search), "a"(4), "d"(16)); 478 : "x"(search), "a"(4), "d"(16));
479 #endif
463 480
464 found: 481 found:
465 return s + index; 482 return s + index;
466 } 483 }
467 484
475 #include "../gcc/config/i386/cpuid.h" 492 #include "../gcc/config/i386/cpuid.h"
476 493
477 typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *); 494 typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *);
478 static search_line_fast_type search_line_fast; 495 static search_line_fast_type search_line_fast;
479 496
480 static void __attribute__((constructor)) 497 #define HAVE_init_vectorized_lexer 1
498 static inline void
481 init_vectorized_lexer (void) 499 init_vectorized_lexer (void)
482 { 500 {
483 unsigned dummy, ecx = 0, edx = 0; 501 unsigned dummy, ecx = 0, edx = 0;
484 search_line_fast_type impl = search_line_acc_char; 502 search_line_fast_type impl = search_line_acc_char;
485 int minimum = 0; 503 int minimum = 0;
486 504
487 #if defined(__SSE4_2__) 505 #if defined(__SSE4_2__)
488 minimum = 3; 506 minimum = 3;
489 #elif defined(__SSE2__) 507 #elif defined(__SSE2__)
490 minimum = 2; 508 minimum = 2;
491 #elif defined(__SSE__) || defined(__3dNOW_A__) 509 #elif defined(__SSE__)
492 minimum = 1; 510 minimum = 1;
493 #endif 511 #endif
494 512
495 if (minimum == 3) 513 if (minimum == 3)
496 impl = search_line_sse42; 514 impl = search_line_sse42;
503 else if (minimum == 1 || (edx & bit_SSE)) 521 else if (minimum == 1 || (edx & bit_SSE))
504 impl = search_line_mmx; 522 impl = search_line_mmx;
505 } 523 }
506 else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx)) 524 else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
507 { 525 {
508 if (minimum == 1 || edx & bit_3DNOWP) 526 if (minimum == 1
527 || (edx & (bit_MMXEXT | bit_CMOV)) == (bit_MMXEXT | bit_CMOV))
509 impl = search_line_mmx; 528 impl = search_line_mmx;
510 } 529 }
511 530
512 search_line_fast = impl; 531 search_line_fast = impl;
513 } 532 }
514 533
515 #elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) 534 #elif defined(_ARCH_PWR8) && defined(__ALTIVEC__)
516 535
517 /* A vection of the fast scanner using AltiVec vectorized byte compares. */ 536 /* A vection of the fast scanner using AltiVec vectorized byte compares
537 and VSX unaligned loads (when VSX is available). This is otherwise
538 the same as the pre-GCC 5 version. */
539
540 ATTRIBUTE_NO_SANITIZE_UNDEFINED
541 static const uchar *
542 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
543 {
544 typedef __attribute__((altivec(vector))) unsigned char vc;
545
546 const vc repl_nl = {
547 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
548 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
549 };
550 const vc repl_cr = {
551 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
552 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
553 };
554 const vc repl_bs = {
555 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
556 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
557 };
558 const vc repl_qm = {
559 '?', '?', '?', '?', '?', '?', '?', '?',
560 '?', '?', '?', '?', '?', '?', '?', '?',
561 };
562 const vc zero = { 0 };
563
564 vc data, t;
565
566 /* Main loop processing 16 bytes at a time. */
567 do
568 {
569 vc m_nl, m_cr, m_bs, m_qm;
570
571 data = *((const vc *)s);
572 s += 16;
573
574 m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
575 m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
576 m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
577 m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
578 t = (m_nl | m_cr) | (m_bs | m_qm);
579
580 /* T now contains 0xff in bytes for which we matched one of the relevant
581 characters. We want to exit the loop if any byte in T is non-zero.
582 Below is the expansion of vec_any_ne(t, zero). */
583 }
584 while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
585
586 /* Restore s to to point to the 16 bytes we just processed. */
587 s -= 16;
588
589 {
590 #define N (sizeof(vc) / sizeof(long))
591
592 union {
593 vc v;
594 /* Statically assert that N is 2 or 4. */
595 unsigned long l[(N == 2 || N == 4) ? N : -1];
596 } u;
597 unsigned long l, i = 0;
598
599 u.v = t;
600
601 /* Find the first word of T that is non-zero. */
602 switch (N)
603 {
604 case 4:
605 l = u.l[i++];
606 if (l != 0)
607 break;
608 s += sizeof(unsigned long);
609 l = u.l[i++];
610 if (l != 0)
611 break;
612 s += sizeof(unsigned long);
613 /* FALLTHRU */
614 case 2:
615 l = u.l[i++];
616 if (l != 0)
617 break;
618 s += sizeof(unsigned long);
619 l = u.l[i];
620 }
621
622 /* L now contains 0xff in bytes for which we matched one of the
623 relevant characters. We can find the byte index by finding
624 its bit index and dividing by 8. */
625 #ifdef __BIG_ENDIAN__
626 l = __builtin_clzl(l) >> 3;
627 #else
628 l = __builtin_ctzl(l) >> 3;
629 #endif
630 return s + l;
631
632 #undef N
633 }
634 }
635
636 #elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
637
638 /* A vection of the fast scanner using AltiVec vectorized byte compares.
639 This cannot be used for little endian because vec_lvsl/lvsr are
640 deprecated for little endian and the code won't work properly. */
518 /* ??? Unfortunately, attribute(target("altivec")) is not yet supported, 641 /* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
519 so we can't compile this function without -maltivec on the command line 642 so we can't compile this function without -maltivec on the command line
520 (or implied by some other switch). */ 643 (or implied by some other switch). */
521 644
522 static const uchar * 645 static const uchar *
587 while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero)); 710 while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
588 711
589 { 712 {
590 #define N (sizeof(vc) / sizeof(long)) 713 #define N (sizeof(vc) / sizeof(long))
591 714
592 typedef char check_count[(N == 2 || N == 4) * 2 - 1];
593 union { 715 union {
594 vc v; 716 vc v;
595 unsigned long l[N]; 717 /* Statically assert that N is 2 or 4. */
718 unsigned long l[(N == 2 || N == 4) ? N : -1];
596 } u; 719 } u;
597 unsigned long l, i = 0; 720 unsigned long l, i = 0;
598 721
599 u.v = t; 722 u.v = t;
600 723
608 s += sizeof(unsigned long); 731 s += sizeof(unsigned long);
609 l = u.l[i++]; 732 l = u.l[i++];
610 if (l != 0) 733 if (l != 0)
611 break; 734 break;
612 s += sizeof(unsigned long); 735 s += sizeof(unsigned long);
736 /* FALLTHROUGH */
613 case 2: 737 case 2:
614 l = u.l[i++]; 738 l = u.l[i++];
615 if (l != 0) 739 if (l != 0)
616 break; 740 break;
617 s += sizeof(unsigned long); 741 s += sizeof(unsigned long);
626 750
627 #undef N 751 #undef N
628 } 752 }
629 } 753 }
630 754
755 #elif defined (__ARM_NEON) && defined (__ARM_64BIT_STATE)
756 #include "arm_neon.h"
757
758 /* This doesn't have to be the exact page size, but no system may use
759 a size smaller than this. ARMv8 requires a minimum page size of
760 4k. The impact of being conservative here is a small number of
761 cases will take the slightly slower entry path into the main
762 loop. */
763
764 #define AARCH64_MIN_PAGE_SIZE 4096
765
766 static const uchar *
767 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
768 {
769 const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
770 const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
771 const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
772 const uint8x16_t repl_qm = vdupq_n_u8 ('?');
773 const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
774
775 #ifdef __AARCH64EB
776 const int16x8_t shift = {8, 8, 8, 8, 0, 0, 0, 0};
631 #else 777 #else
632 778 const int16x8_t shift = {0, 0, 0, 0, 8, 8, 8, 8};
633 /* We only have one accellerated alternative. Use a direct call so that 779 #endif
780
781 unsigned int found;
782 const uint8_t *p;
783 uint8x16_t data;
784 uint8x16_t t;
785 uint16x8_t m;
786 uint8x16_t u, v, w;
787
788 /* Align the source pointer. */
789 p = (const uint8_t *)((uintptr_t)s & -16);
790
791 /* Assuming random string start positions, with a 4k page size we'll take
792 the slow path about 0.37% of the time. */
793 if (__builtin_expect ((AARCH64_MIN_PAGE_SIZE
794 - (((uintptr_t) s) & (AARCH64_MIN_PAGE_SIZE - 1)))
795 < 16, 0))
796 {
797 /* Slow path: the string starts near a possible page boundary. */
798 uint32_t misalign, mask;
799
800 misalign = (uintptr_t)s & 15;
801 mask = (-1u << misalign) & 0xffff;
802 data = vld1q_u8 (p);
803 t = vceqq_u8 (data, repl_nl);
804 u = vceqq_u8 (data, repl_cr);
805 v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
806 w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
807 t = vorrq_u8 (v, w);
808 t = vandq_u8 (t, xmask);
809 m = vpaddlq_u8 (t);
810 m = vshlq_u16 (m, shift);
811 found = vaddvq_u16 (m);
812 found &= mask;
813 if (found)
814 return (const uchar*)p + __builtin_ctz (found);
815 }
816 else
817 {
818 data = vld1q_u8 ((const uint8_t *) s);
819 t = vceqq_u8 (data, repl_nl);
820 u = vceqq_u8 (data, repl_cr);
821 v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
822 w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
823 t = vorrq_u8 (v, w);
824 if (__builtin_expect (vpaddd_u64 ((uint64x2_t)t) != 0, 0))
825 goto done;
826 }
827
828 do
829 {
830 p += 16;
831 data = vld1q_u8 (p);
832 t = vceqq_u8 (data, repl_nl);
833 u = vceqq_u8 (data, repl_cr);
834 v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
835 w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
836 t = vorrq_u8 (v, w);
837 } while (!vpaddd_u64 ((uint64x2_t)t));
838
839 done:
840 /* Now that we've found the terminating substring, work out precisely where
841 we need to stop. */
842 t = vandq_u8 (t, xmask);
843 m = vpaddlq_u8 (t);
844 m = vshlq_u16 (m, shift);
845 found = vaddvq_u16 (m);
846 return (((((uintptr_t) p) < (uintptr_t) s) ? s : (const uchar *)p)
847 + __builtin_ctz (found));
848 }
849
850 #elif defined (__ARM_NEON)
851 #include "arm_neon.h"
852
853 static const uchar *
854 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
855 {
856 const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
857 const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
858 const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
859 const uint8x16_t repl_qm = vdupq_n_u8 ('?');
860 const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
861
862 unsigned int misalign, found, mask;
863 const uint8_t *p;
864 uint8x16_t data;
865
866 /* Align the source pointer. */
867 misalign = (uintptr_t)s & 15;
868 p = (const uint8_t *)((uintptr_t)s & -16);
869 data = vld1q_u8 (p);
870
871 /* Create a mask for the bytes that are valid within the first
872 16-byte block. The Idea here is that the AND with the mask
873 within the loop is "free", since we need some AND or TEST
874 insn in order to set the flags for the branch anyway. */
875 mask = (-1u << misalign) & 0xffff;
876
877 /* Main loop, processing 16 bytes at a time. */
878 goto start;
879
880 do
881 {
882 uint8x8_t l;
883 uint16x4_t m;
884 uint32x2_t n;
885 uint8x16_t t, u, v, w;
886
887 p += 16;
888 data = vld1q_u8 (p);
889 mask = 0xffff;
890
891 start:
892 t = vceqq_u8 (data, repl_nl);
893 u = vceqq_u8 (data, repl_cr);
894 v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
895 w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
896 t = vandq_u8 (vorrq_u8 (v, w), xmask);
897 l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));
898 m = vpaddl_u8 (l);
899 n = vpaddl_u16 (m);
900
901 found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n,
902 vshr_n_u64 ((uint64x1_t) n, 24)), 0);
903 found &= mask;
904 }
905 while (!found);
906
907 /* FOUND contains 1 in bits for which we matched a relevant
908 character. Conversion to the byte index is trivial. */
909 found = __builtin_ctz (found);
910 return (const uchar *)p + found;
911 }
912
913 #else
914
915 /* We only have one accelerated alternative. Use a direct call so that
634 we encourage inlining. */ 916 we encourage inlining. */
635 917
636 #define search_line_fast search_line_acc_char 918 #define search_line_fast search_line_acc_char
637 919
638 #endif 920 #endif
921
922 /* Initialize the lexer if needed. */
923
924 void
925 _cpp_init_lexer (void)
926 {
927 #ifdef HAVE_init_vectorized_lexer
928 init_vectorized_lexer ();
929 #endif
930 }
639 931
640 /* Returns with a logical line that contains no escaped newlines or 932 /* Returns with a logical line that contains no escaped newlines or
641 trigraphs. This is a time-critical inner loop. */ 933 trigraphs. This is a time-critical inner loop. */
642 void 934 void
643 _cpp_clean_line (cpp_reader *pfile) 935 _cpp_clean_line (cpp_reader *pfile)
1015 cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0, 1307 cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1016 "`%.*s' is not in NFKC", (int) sz, buf); 1308 "`%.*s' is not in NFKC", (int) sz, buf);
1017 else 1309 else
1018 cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0, 1310 cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1019 "`%.*s' is not in NFC", (int) sz, buf); 1311 "`%.*s' is not in NFC", (int) sz, buf);
1312 free (buf);
1020 } 1313 }
1021 } 1314 }
1022 1315
1023 /* Returns TRUE if the sequence starting at buffer->cur is invalid in 1316 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
1024 an identifier. FIRST is TRUE if this starts an identifier. */ 1317 an identifier. FIRST is TRUE if this starts an identifier. */
1046 /* Is this a syntactically valid UCN? */ 1339 /* Is this a syntactically valid UCN? */
1047 if (CPP_OPTION (pfile, extended_identifiers) 1340 if (CPP_OPTION (pfile, extended_identifiers)
1048 && *buffer->cur == '\\' 1341 && *buffer->cur == '\\'
1049 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U')) 1342 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
1050 { 1343 {
1344 cppchar_t s;
1051 buffer->cur += 2; 1345 buffer->cur += 2;
1052 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first, 1346 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
1053 state)) 1347 state, &s, NULL, NULL))
1054 return true; 1348 return true;
1055 buffer->cur -= 2; 1349 buffer->cur -= 2;
1056 } 1350 }
1057 1351
1058 return false; 1352 return false;
1089 1383
1090 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the 1384 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1091 replacement list of a variadic macro. */ 1385 replacement list of a variadic macro. */
1092 if (result == pfile->spec_nodes.n__VA_ARGS__ 1386 if (result == pfile->spec_nodes.n__VA_ARGS__
1093 && !pfile->state.va_args_ok) 1387 && !pfile->state.va_args_ok)
1094 cpp_error (pfile, CPP_DL_PEDWARN, 1388 {
1095 "__VA_ARGS__ can only appear in the expansion" 1389 if (CPP_OPTION (pfile, cplusplus))
1096 " of a C99 variadic macro"); 1390 cpp_error (pfile, CPP_DL_PEDWARN,
1391 "__VA_ARGS__ can only appear in the expansion"
1392 " of a C++11 variadic macro");
1393 else
1394 cpp_error (pfile, CPP_DL_PEDWARN,
1395 "__VA_ARGS__ can only appear in the expansion"
1396 " of a C99 variadic macro");
1397 }
1097 1398
1098 /* For -Wc++-compat, warn about use of C++ named operators. */ 1399 /* For -Wc++-compat, warn about use of C++ named operators. */
1099 if (result->flags & NODE_WARN_OPERATOR) 1400 if (result->flags & NODE_WARN_OPERATOR)
1100 cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES, 1401 cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1101 "identifier \"%s\" is a special operator name in C++", 1402 "identifier \"%s\" is a special operator name in C++",
1116 } 1417 }
1117 1418
1118 /* Lex an identifier starting at BUFFER->CUR - 1. */ 1419 /* Lex an identifier starting at BUFFER->CUR - 1. */
1119 static cpp_hashnode * 1420 static cpp_hashnode *
1120 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn, 1421 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
1121 struct normalize_state *nst) 1422 struct normalize_state *nst, cpp_hashnode **spelling)
1122 { 1423 {
1123 cpp_hashnode *result; 1424 cpp_hashnode *result;
1124 const uchar *cur; 1425 const uchar *cur;
1125 unsigned int len; 1426 unsigned int len;
1126 unsigned int hash = HT_HASHSTEP (0, *base); 1427 unsigned int hash = HT_HASHSTEP (0, *base);
1127 1428
1128 cur = pfile->buffer->cur; 1429 cur = pfile->buffer->cur;
1129 if (! starts_ucn) 1430 if (! starts_ucn)
1130 while (ISIDNUM (*cur)) 1431 {
1131 { 1432 while (ISIDNUM (*cur))
1132 hash = HT_HASHSTEP (hash, *cur); 1433 {
1133 cur++; 1434 hash = HT_HASHSTEP (hash, *cur);
1134 } 1435 cur++;
1436 }
1437 NORMALIZE_STATE_UPDATE_IDNUM (nst, *(cur - 1));
1438 }
1135 pfile->buffer->cur = cur; 1439 pfile->buffer->cur = cur;
1136 if (starts_ucn || forms_identifier_p (pfile, false, nst)) 1440 if (starts_ucn || forms_identifier_p (pfile, false, nst))
1137 { 1441 {
1138 /* Slower version for identifiers containing UCNs (or $). */ 1442 /* Slower version for identifiers containing UCNs (or $). */
1139 do { 1443 do {
1140 while (ISIDNUM (*pfile->buffer->cur)) 1444 while (ISIDNUM (*pfile->buffer->cur))
1141 { 1445 {
1446 NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur);
1142 pfile->buffer->cur++; 1447 pfile->buffer->cur++;
1143 NORMALIZE_STATE_UPDATE_IDNUM (nst);
1144 } 1448 }
1145 } while (forms_identifier_p (pfile, false, nst)); 1449 } while (forms_identifier_p (pfile, false, nst));
1146 result = _cpp_interpret_identifier (pfile, base, 1450 result = _cpp_interpret_identifier (pfile, base,
1147 pfile->buffer->cur - base); 1451 pfile->buffer->cur - base);
1452 *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
1148 } 1453 }
1149 else 1454 else
1150 { 1455 {
1151 len = cur - base; 1456 len = cur - base;
1152 hash = HT_HASHFINISH (hash, len); 1457 hash = HT_HASHFINISH (hash, len);
1153 1458
1154 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table, 1459 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1155 base, len, hash, HT_ALLOC)); 1460 base, len, hash, HT_ALLOC));
1461 *spelling = result;
1156 } 1462 }
1157 1463
1158 /* Rarely, identifiers require diagnostics when lexed. */ 1464 /* Rarely, identifiers require diagnostics when lexed. */
1159 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC) 1465 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1160 && !pfile->state.skipping, 0)) 1466 && !pfile->state.skipping, 0))
1166 1472
1167 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the 1473 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1168 replacement list of a variadic macro. */ 1474 replacement list of a variadic macro. */
1169 if (result == pfile->spec_nodes.n__VA_ARGS__ 1475 if (result == pfile->spec_nodes.n__VA_ARGS__
1170 && !pfile->state.va_args_ok) 1476 && !pfile->state.va_args_ok)
1171 cpp_error (pfile, CPP_DL_PEDWARN, 1477 {
1172 "__VA_ARGS__ can only appear in the expansion" 1478 if (CPP_OPTION (pfile, cplusplus))
1173 " of a C99 variadic macro"); 1479 cpp_error (pfile, CPP_DL_PEDWARN,
1480 "__VA_ARGS__ can only appear in the expansion"
1481 " of a C++11 variadic macro");
1482 else
1483 cpp_error (pfile, CPP_DL_PEDWARN,
1484 "__VA_ARGS__ can only appear in the expansion"
1485 " of a C99 variadic macro");
1486 }
1174 1487
1175 /* For -Wc++-compat, warn about use of C++ named operators. */ 1488 /* For -Wc++-compat, warn about use of C++ named operators. */
1176 if (result->flags & NODE_WARN_OPERATOR) 1489 if (result->flags & NODE_WARN_OPERATOR)
1177 cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES, 1490 cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1178 "identifier \"%s\" is a special operator name in C++", 1491 "identifier \"%s\" is a special operator name in C++",
1195 do 1508 do
1196 { 1509 {
1197 cur = pfile->buffer->cur; 1510 cur = pfile->buffer->cur;
1198 1511
1199 /* N.B. ISIDNUM does not include $. */ 1512 /* N.B. ISIDNUM does not include $. */
1200 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1])) 1513 while (ISIDNUM (*cur) || *cur == '.' || DIGIT_SEP (*cur)
1201 { 1514 || VALID_SIGN (*cur, cur[-1]))
1515 {
1516 NORMALIZE_STATE_UPDATE_IDNUM (nst, *cur);
1202 cur++; 1517 cur++;
1203 NORMALIZE_STATE_UPDATE_IDNUM (nst); 1518 }
1204 } 1519 /* A number can't end with a digit separator. */
1520 while (cur > pfile->buffer->cur && DIGIT_SEP (cur[-1]))
1521 --cur;
1205 1522
1206 pfile->buffer->cur = cur; 1523 pfile->buffer->cur = cur;
1207 } 1524 }
1208 while (forms_identifier_p (pfile, false, nst)); 1525 while (forms_identifier_p (pfile, false, nst));
1209 1526
1255 1572
1256 *first_buff_p = first_buff; 1573 *first_buff_p = first_buff;
1257 *last_buff_p = last_buff; 1574 *last_buff_p = last_buff;
1258 } 1575 }
1259 1576
1577
1578 /* Returns true if a macro has been defined.
1579 This might not work if compile with -save-temps,
1580 or preprocess separately from compilation. */
1581
1582 static bool
1583 is_macro(cpp_reader *pfile, const uchar *base)
1584 {
1585 const uchar *cur = base;
1586 if (! ISIDST (*cur))
1587 return false;
1588 unsigned int hash = HT_HASHSTEP (0, *cur);
1589 ++cur;
1590 while (ISIDNUM (*cur))
1591 {
1592 hash = HT_HASHSTEP (hash, *cur);
1593 ++cur;
1594 }
1595 hash = HT_HASHFINISH (hash, cur - base);
1596
1597 cpp_hashnode *result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1598 base, cur - base, hash, HT_NO_INSERT));
1599
1600 return !result ? false : (result->type == NT_MACRO);
1601 }
1602
1603
1260 /* Lexes a raw string. The stored string contains the spelling, including 1604 /* Lexes a raw string. The stored string contains the spelling, including
1261 double quotes, delimiter string, '(' and ')', any leading 1605 double quotes, delimiter string, '(' and ')', any leading
1262 'L', 'u', 'U' or 'u8' and 'R' modifier. It returns the type of the 1606 'L', 'u', 'U' or 'u8' and 'R' modifier. It returns the type of the
1263 literal, or CPP_OTHER if it was not properly terminated. 1607 literal, or CPP_OTHER if it was not properly terminated.
1264 1608
1267 1611
1268 static void 1612 static void
1269 lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base, 1613 lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
1270 const uchar *cur) 1614 const uchar *cur)
1271 { 1615 {
1272 source_location saw_NUL = 0; 1616 uchar raw_prefix[17];
1273 const uchar *raw_prefix; 1617 uchar temp_buffer[18];
1274 unsigned int raw_prefix_len = 0; 1618 const uchar *orig_base;
1619 unsigned int raw_prefix_len = 0, raw_suffix_len = 0;
1620 enum raw_str_phase { RAW_STR_PREFIX, RAW_STR, RAW_STR_SUFFIX };
1621 raw_str_phase phase = RAW_STR_PREFIX;
1275 enum cpp_ttype type; 1622 enum cpp_ttype type;
1276 size_t total_len = 0; 1623 size_t total_len = 0;
1624 /* Index into temp_buffer during phases other than RAW_STR,
1625 during RAW_STR phase 17 to tell BUF_APPEND that nothing should
1626 be appended to temp_buffer. */
1627 size_t temp_buffer_len = 0;
1277 _cpp_buff *first_buff = NULL, *last_buff = NULL; 1628 _cpp_buff *first_buff = NULL, *last_buff = NULL;
1629 size_t raw_prefix_start;
1278 _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note]; 1630 _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
1279 1631
1280 type = (*base == 'L' ? CPP_WSTRING : 1632 type = (*base == 'L' ? CPP_WSTRING :
1281 *base == 'U' ? CPP_STRING32 : 1633 *base == 'U' ? CPP_STRING32 :
1282 *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16) 1634 *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1283 : CPP_STRING); 1635 : CPP_STRING);
1284 1636
1285 raw_prefix = cur + 1;
1286 while (raw_prefix_len < 16)
1287 {
1288 switch (raw_prefix[raw_prefix_len])
1289 {
1290 case ' ': case '(': case ')': case '\\': case '\t':
1291 case '\v': case '\f': case '\n': default:
1292 break;
1293 /* Basic source charset except the above chars. */
1294 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1295 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1296 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1297 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1298 case 'y': case 'z':
1299 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1300 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1301 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1302 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1303 case 'Y': case 'Z':
1304 case '0': case '1': case '2': case '3': case '4': case '5':
1305 case '6': case '7': case '8': case '9':
1306 case '_': case '{': case '}': case '#': case '[': case ']':
1307 case '<': case '>': case '%': case ':': case ';': case '.':
1308 case '?': case '*': case '+': case '-': case '/': case '^':
1309 case '&': case '|': case '~': case '!': case '=': case ',':
1310 case '"': case '\'':
1311 raw_prefix_len++;
1312 continue;
1313 }
1314 break;
1315 }
1316
1317 if (raw_prefix[raw_prefix_len] != '(')
1318 {
1319 int col = CPP_BUF_COLUMN (pfile->buffer, raw_prefix + raw_prefix_len)
1320 + 1;
1321 if (raw_prefix_len == 16)
1322 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
1323 "raw string delimiter longer than 16 characters");
1324 else
1325 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
1326 "invalid character '%c' in raw string delimiter",
1327 (int) raw_prefix[raw_prefix_len]);
1328 pfile->buffer->cur = raw_prefix - 1;
1329 create_literal (pfile, token, base, raw_prefix - 1 - base, CPP_OTHER);
1330 return;
1331 }
1332
1333 cur = raw_prefix + raw_prefix_len + 1;
1334 for (;;)
1335 {
1336 #define BUF_APPEND(STR,LEN) \ 1637 #define BUF_APPEND(STR,LEN) \
1337 do { \ 1638 do { \
1338 bufring_append (pfile, (const uchar *)(STR), (LEN), \ 1639 bufring_append (pfile, (const uchar *)(STR), (LEN), \
1339 &first_buff, &last_buff); \ 1640 &first_buff, &last_buff); \
1340 total_len += (LEN); \ 1641 total_len += (LEN); \
1642 if (__builtin_expect (temp_buffer_len < 17, 0) \
1643 && (const uchar *)(STR) != base \
1644 && (LEN) <= 2) \
1645 { \
1646 memcpy (temp_buffer + temp_buffer_len, \
1647 (const uchar *)(STR), (LEN)); \
1648 temp_buffer_len += (LEN); \
1649 } \
1341 } while (0); 1650 } while (0);
1342 1651
1652 orig_base = base;
1653 ++cur;
1654 raw_prefix_start = cur - base;
1655 for (;;)
1656 {
1343 cppchar_t c; 1657 cppchar_t c;
1344 1658
1345 /* If we previously performed any trigraph or line splicing 1659 /* If we previously performed any trigraph or line splicing
1346 transformations, undo them within the body of the raw string. */ 1660 transformations, undo them in between the opening and closing
1661 double quote. */
1347 while (note->pos < cur) 1662 while (note->pos < cur)
1348 ++note; 1663 ++note;
1349 for (; note->pos == cur; ++note) 1664 for (; note->pos == cur; ++note)
1350 { 1665 {
1351 switch (note->type) 1666 switch (note->type)
1401 abort (); 1716 abort ();
1402 BUF_APPEND ("/", 1); 1717 BUF_APPEND ("/", 1);
1403 ++note; 1718 ++note;
1404 goto after_backslash; 1719 goto after_backslash;
1405 } 1720 }
1406 /* The ) from ??) could be part of the suffix. */
1407 else if (type == ')'
1408 && strncmp ((const char *) cur+1,
1409 (const char *) raw_prefix,
1410 raw_prefix_len) == 0
1411 && cur[raw_prefix_len+1] == '"')
1412 {
1413 cur += raw_prefix_len+2;
1414 goto break_outer_loop;
1415 }
1416 else 1721 else
1417 { 1722 {
1418 /* Skip the replacement character. */ 1723 /* Skip the replacement character. */
1419 base = ++cur; 1724 base = ++cur;
1420 BUF_APPEND (&type, 1); 1725 BUF_APPEND (&type, 1);
1726 c = type;
1727 goto check_c;
1421 } 1728 }
1422 } 1729 }
1423 else 1730 else
1424 abort (); 1731 abort ();
1425 break; 1732 break;
1426 } 1733 }
1427 } 1734 }
1428 c = *cur++; 1735 c = *cur++;
1429 1736 if (__builtin_expect (temp_buffer_len < 17, 0))
1430 if (c == ')' 1737 temp_buffer[temp_buffer_len++] = c;
1431 && strncmp ((const char *) cur, (const char *) raw_prefix, 1738
1432 raw_prefix_len) == 0 1739 check_c:
1433 && cur[raw_prefix_len] == '"') 1740 if (phase == RAW_STR_PREFIX)
1434 { 1741 {
1435 cur += raw_prefix_len + 1; 1742 while (raw_prefix_len < temp_buffer_len)
1436 break; 1743 {
1744 raw_prefix[raw_prefix_len] = temp_buffer[raw_prefix_len];
1745 switch (raw_prefix[raw_prefix_len])
1746 {
1747 case ' ': case '(': case ')': case '\\': case '\t':
1748 case '\v': case '\f': case '\n': default:
1749 break;
1750 /* Basic source charset except the above chars. */
1751 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1752 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1753 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1754 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1755 case 'y': case 'z':
1756 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1757 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1758 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1759 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1760 case 'Y': case 'Z':
1761 case '0': case '1': case '2': case '3': case '4': case '5':
1762 case '6': case '7': case '8': case '9':
1763 case '_': case '{': case '}': case '#': case '[': case ']':
1764 case '<': case '>': case '%': case ':': case ';': case '.':
1765 case '?': case '*': case '+': case '-': case '/': case '^':
1766 case '&': case '|': case '~': case '!': case '=': case ',':
1767 case '"': case '\'':
1768 if (raw_prefix_len < 16)
1769 {
1770 raw_prefix_len++;
1771 continue;
1772 }
1773 break;
1774 }
1775
1776 if (raw_prefix[raw_prefix_len] != '(')
1777 {
1778 int col = CPP_BUF_COLUMN (pfile->buffer, cur) + 1;
1779 if (raw_prefix_len == 16)
1780 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1781 col, "raw string delimiter longer "
1782 "than 16 characters");
1783 else if (raw_prefix[raw_prefix_len] == '\n')
1784 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1785 col, "invalid new-line in raw "
1786 "string delimiter");
1787 else
1788 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1789 col, "invalid character '%c' in "
1790 "raw string delimiter",
1791 (int) raw_prefix[raw_prefix_len]);
1792 pfile->buffer->cur = orig_base + raw_prefix_start - 1;
1793 create_literal (pfile, token, orig_base,
1794 raw_prefix_start - 1, CPP_OTHER);
1795 if (first_buff)
1796 _cpp_release_buff (pfile, first_buff);
1797 return;
1798 }
1799 raw_prefix[raw_prefix_len] = '"';
1800 phase = RAW_STR;
1801 /* Nothing should be appended to temp_buffer during
1802 RAW_STR phase. */
1803 temp_buffer_len = 17;
1804 break;
1805 }
1806 continue;
1807 }
1808 else if (phase == RAW_STR_SUFFIX)
1809 {
1810 while (raw_suffix_len <= raw_prefix_len
1811 && raw_suffix_len < temp_buffer_len
1812 && temp_buffer[raw_suffix_len] == raw_prefix[raw_suffix_len])
1813 raw_suffix_len++;
1814 if (raw_suffix_len > raw_prefix_len)
1815 break;
1816 if (raw_suffix_len == temp_buffer_len)
1817 continue;
1818 phase = RAW_STR;
1819 /* Nothing should be appended to temp_buffer during
1820 RAW_STR phase. */
1821 temp_buffer_len = 17;
1822 }
1823 if (c == ')')
1824 {
1825 phase = RAW_STR_SUFFIX;
1826 raw_suffix_len = 0;
1827 temp_buffer_len = 0;
1437 } 1828 }
1438 else if (c == '\n') 1829 else if (c == '\n')
1439 { 1830 {
1440 if (pfile->state.in_directive 1831 if (pfile->state.in_directive
1441 || pfile->state.parsing_args 1832 || (pfile->state.parsing_args
1442 || pfile->state.in_deferred_pragma) 1833 && pfile->buffer->next_line >= pfile->buffer->rlimit))
1443 { 1834 {
1444 cur--; 1835 cur--;
1445 type = CPP_OTHER; 1836 type = CPP_OTHER;
1446 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0, 1837 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
1447 "unterminated raw string"); 1838 "unterminated raw string");
1471 } 1862 }
1472 1863
1473 cur = base = pfile->buffer->cur; 1864 cur = base = pfile->buffer->cur;
1474 note = &pfile->buffer->notes[pfile->buffer->cur_note]; 1865 note = &pfile->buffer->notes[pfile->buffer->cur_note];
1475 } 1866 }
1476 else if (c == '\0' && !saw_NUL) 1867 }
1477 LINEMAP_POSITION_FOR_COLUMN (saw_NUL, pfile->line_table, 1868
1478 CPP_BUF_COLUMN (pfile->buffer, cur)); 1869 if (CPP_OPTION (pfile, user_literals))
1479 } 1870 {
1480 break_outer_loop: 1871 /* If a string format macro, say from inttypes.h, is placed touching
1481 1872 a string literal it could be parsed as a C++11 user-defined string
1482 if (saw_NUL && !pfile->state.skipping) 1873 literal thus breaking the program.
1483 cpp_error_with_line (pfile, CPP_DL_WARNING, saw_NUL, 0, 1874 Try to identify macros with is_macro. A warning is issued. */
1484 "null character(s) preserved in literal"); 1875 if (is_macro (pfile, cur))
1876 {
1877 /* Raise a warning, but do not consume subsequent tokens. */
1878 if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
1879 cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1880 token->src_loc, 0,
1881 "invalid suffix on literal; C++11 requires "
1882 "a space between literal and string macro");
1883 }
1884 /* Grab user defined literal suffix. */
1885 else if (ISIDST (*cur))
1886 {
1887 type = cpp_userdef_string_add_type (type);
1888 ++cur;
1889
1890 while (ISIDNUM (*cur))
1891 ++cur;
1892 }
1893 }
1485 1894
1486 pfile->buffer->cur = cur; 1895 pfile->buffer->cur = cur;
1487 if (first_buff == NULL) 1896 if (first_buff == NULL)
1488 create_literal (pfile, token, base, cur - base, type); 1897 create_literal (pfile, token, base, cur - base, type);
1489 else 1898 else
1545 *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16) 1954 *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1546 : CPP_STRING); 1955 : CPP_STRING);
1547 else if (terminator == '\'') 1956 else if (terminator == '\'')
1548 type = (*base == 'L' ? CPP_WCHAR : 1957 type = (*base == 'L' ? CPP_WCHAR :
1549 *base == 'U' ? CPP_CHAR32 : 1958 *base == 'U' ? CPP_CHAR32 :
1550 *base == 'u' ? CPP_CHAR16 : CPP_CHAR); 1959 *base == 'u' ? (base[1] == '8' ? CPP_UTF8CHAR : CPP_CHAR16)
1960 : CPP_CHAR);
1551 else 1961 else
1552 terminator = '>', type = CPP_HEADER_NAME; 1962 terminator = '>', type = CPP_HEADER_NAME;
1553 1963
1554 for (;;) 1964 for (;;)
1555 { 1965 {
1584 1994
1585 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM) 1995 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
1586 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character", 1996 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
1587 (int) terminator); 1997 (int) terminator);
1588 1998
1999 if (CPP_OPTION (pfile, user_literals))
2000 {
2001 /* If a string format macro, say from inttypes.h, is placed touching
2002 a string literal it could be parsed as a C++11 user-defined string
2003 literal thus breaking the program.
2004 Try to identify macros with is_macro. A warning is issued. */
2005 if (is_macro (pfile, cur))
2006 {
2007 /* Raise a warning, but do not consume subsequent tokens. */
2008 if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
2009 cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
2010 token->src_loc, 0,
2011 "invalid suffix on literal; C++11 requires "
2012 "a space between literal and string macro");
2013 }
2014 /* Grab user defined literal suffix. */
2015 else if (ISIDST (*cur))
2016 {
2017 type = cpp_userdef_char_add_type (type);
2018 type = cpp_userdef_string_add_type (type);
2019 ++cur;
2020
2021 while (ISIDNUM (*cur))
2022 ++cur;
2023 }
2024 }
2025 else if (CPP_OPTION (pfile, cpp_warn_cxx11_compat)
2026 && is_macro (pfile, cur)
2027 && !pfile->state.skipping)
2028 cpp_warning_with_line (pfile, CPP_W_CXX11_COMPAT,
2029 token->src_loc, 0, "C++11 requires a space "
2030 "between string literal and macro");
2031
1589 pfile->buffer->cur = cur; 2032 pfile->buffer->cur = cur;
1590 create_literal (pfile, token, base, cur - base, type); 2033 create_literal (pfile, token, base, cur - base, type);
1591 } 2034 }
1592 2035
1593 /* Return the comment table. The client may not make any assumption 2036 /* Return the comment table. The client may not make any assumption
1683 2126
1684 /* Finally store this comment for use by clients of libcpp. */ 2127 /* Finally store this comment for use by clients of libcpp. */
1685 store_comment (pfile, token); 2128 store_comment (pfile, token);
1686 } 2129 }
1687 2130
2131 /* Returns true if comment at COMMENT_START is a recognized FALLTHROUGH
2132 comment. */
2133
2134 static bool
2135 fallthrough_comment_p (cpp_reader *pfile, const unsigned char *comment_start)
2136 {
2137 const unsigned char *from = comment_start + 1;
2138
2139 switch (CPP_OPTION (pfile, cpp_warn_implicit_fallthrough))
2140 {
2141 /* For both -Wimplicit-fallthrough=0 and -Wimplicit-fallthrough=5 we
2142 don't recognize any comments. The latter only checks attributes,
2143 the former doesn't warn. */
2144 case 0:
2145 default:
2146 return false;
2147 /* -Wimplicit-fallthrough=1 considers any comment, no matter what
2148 content it has. */
2149 case 1:
2150 return true;
2151 case 2:
2152 /* -Wimplicit-fallthrough=2 looks for (case insensitive)
2153 .*falls?[ \t-]*thr(u|ough).* regex. */
2154 for (; (size_t) (pfile->buffer->cur - from) >= sizeof "fallthru" - 1;
2155 from++)
2156 {
2157 /* Is there anything like strpbrk with upper boundary, or
2158 memchr looking for 2 characters rather than just one? */
2159 if (from[0] != 'f' && from[0] != 'F')
2160 continue;
2161 if (from[1] != 'a' && from[1] != 'A')
2162 continue;
2163 if (from[2] != 'l' && from[2] != 'L')
2164 continue;
2165 if (from[3] != 'l' && from[3] != 'L')
2166 continue;
2167 from += sizeof "fall" - 1;
2168 if (from[0] == 's' || from[0] == 'S')
2169 from++;
2170 while (*from == ' ' || *from == '\t' || *from == '-')
2171 from++;
2172 if (from[0] != 't' && from[0] != 'T')
2173 continue;
2174 if (from[1] != 'h' && from[1] != 'H')
2175 continue;
2176 if (from[2] != 'r' && from[2] != 'R')
2177 continue;
2178 if (from[3] == 'u' || from[3] == 'U')
2179 return true;
2180 if (from[3] != 'o' && from[3] != 'O')
2181 continue;
2182 if (from[4] != 'u' && from[4] != 'U')
2183 continue;
2184 if (from[5] != 'g' && from[5] != 'G')
2185 continue;
2186 if (from[6] != 'h' && from[6] != 'H')
2187 continue;
2188 return true;
2189 }
2190 return false;
2191 case 3:
2192 case 4:
2193 break;
2194 }
2195
2196 /* Whole comment contents:
2197 -fallthrough
2198 @fallthrough@
2199 */
2200 if (*from == '-' || *from == '@')
2201 {
2202 size_t len = sizeof "fallthrough" - 1;
2203 if ((size_t) (pfile->buffer->cur - from - 1) < len)
2204 return false;
2205 if (memcmp (from + 1, "fallthrough", len))
2206 return false;
2207 if (*from == '@')
2208 {
2209 if (from[len + 1] != '@')
2210 return false;
2211 len++;
2212 }
2213 from += 1 + len;
2214 }
2215 /* Whole comment contents (regex):
2216 lint -fallthrough[ \t]*
2217 */
2218 else if (*from == 'l')
2219 {
2220 size_t len = sizeof "int -fallthrough" - 1;
2221 if ((size_t) (pfile->buffer->cur - from - 1) < len)
2222 return false;
2223 if (memcmp (from + 1, "int -fallthrough", len))
2224 return false;
2225 from += 1 + len;
2226 while (*from == ' ' || *from == '\t')
2227 from++;
2228 }
2229 /* Whole comment contents (regex):
2230 [ \t]*FALLTHR(U|OUGH)[ \t]*
2231 */
2232 else if (CPP_OPTION (pfile, cpp_warn_implicit_fallthrough) == 4)
2233 {
2234 while (*from == ' ' || *from == '\t')
2235 from++;
2236 if ((size_t) (pfile->buffer->cur - from) < sizeof "FALLTHRU" - 1)
2237 return false;
2238 if (memcmp (from, "FALLTHR", sizeof "FALLTHR" - 1))
2239 return false;
2240 from += sizeof "FALLTHR" - 1;
2241 if (*from == 'U')
2242 from++;
2243 else if ((size_t) (pfile->buffer->cur - from) < sizeof "OUGH" - 1)
2244 return false;
2245 else if (memcmp (from, "OUGH", sizeof "OUGH" - 1))
2246 return false;
2247 else
2248 from += sizeof "OUGH" - 1;
2249 while (*from == ' ' || *from == '\t')
2250 from++;
2251 }
2252 /* Whole comment contents (regex):
2253 [ \t.!]*(ELSE,? |INTENTIONAL(LY)? )?FALL(S | |-)?THR(OUGH|U)[ \t.!]*(-[^\n\r]*)?
2254 [ \t.!]*(Else,? |Intentional(ly)? )?Fall((s | |-)[Tt]|t)hr(ough|u)[ \t.!]*(-[^\n\r]*)?
2255 [ \t.!]*([Ee]lse,? |[Ii]ntentional(ly)? )?fall(s | |-)?thr(ough|u)[ \t.!]*(-[^\n\r]*)?
2256 */
2257 else
2258 {
2259 while (*from == ' ' || *from == '\t' || *from == '.' || *from == '!')
2260 from++;
2261 unsigned char f = *from;
2262 bool all_upper = false;
2263 if (f == 'E' || f == 'e')
2264 {
2265 if ((size_t) (pfile->buffer->cur - from)
2266 < sizeof "else fallthru" - 1)
2267 return false;
2268 if (f == 'E' && memcmp (from + 1, "LSE", sizeof "LSE" - 1) == 0)
2269 all_upper = true;
2270 else if (memcmp (from + 1, "lse", sizeof "lse" - 1))
2271 return false;
2272 from += sizeof "else" - 1;
2273 if (*from == ',')
2274 from++;
2275 if (*from != ' ')
2276 return false;
2277 from++;
2278 if (all_upper && *from == 'f')
2279 return false;
2280 if (f == 'e' && *from == 'F')
2281 return false;
2282 f = *from;
2283 }
2284 else if (f == 'I' || f == 'i')
2285 {
2286 if ((size_t) (pfile->buffer->cur - from)
2287 < sizeof "intentional fallthru" - 1)
2288 return false;
2289 if (f == 'I' && memcmp (from + 1, "NTENTIONAL",
2290 sizeof "NTENTIONAL" - 1) == 0)
2291 all_upper = true;
2292 else if (memcmp (from + 1, "ntentional",
2293 sizeof "ntentional" - 1))
2294 return false;
2295 from += sizeof "intentional" - 1;
2296 if (*from == ' ')
2297 {
2298 from++;
2299 if (all_upper && *from == 'f')
2300 return false;
2301 }
2302 else if (all_upper)
2303 {
2304 if (memcmp (from, "LY F", sizeof "LY F" - 1))
2305 return false;
2306 from += sizeof "LY " - 1;
2307 }
2308 else
2309 {
2310 if (memcmp (from, "ly ", sizeof "ly " - 1))
2311 return false;
2312 from += sizeof "ly " - 1;
2313 }
2314 if (f == 'i' && *from == 'F')
2315 return false;
2316 f = *from;
2317 }
2318 if (f != 'F' && f != 'f')
2319 return false;
2320 if ((size_t) (pfile->buffer->cur - from) < sizeof "fallthru" - 1)
2321 return false;
2322 if (f == 'F' && memcmp (from + 1, "ALL", sizeof "ALL" - 1) == 0)
2323 all_upper = true;
2324 else if (all_upper)
2325 return false;
2326 else if (memcmp (from + 1, "all", sizeof "all" - 1))
2327 return false;
2328 from += sizeof "fall" - 1;
2329 if (*from == (all_upper ? 'S' : 's') && from[1] == ' ')
2330 from += 2;
2331 else if (*from == ' ' || *from == '-')
2332 from++;
2333 else if (*from != (all_upper ? 'T' : 't'))
2334 return false;
2335 if ((f == 'f' || *from != 'T') && (all_upper || *from != 't'))
2336 return false;
2337 if ((size_t) (pfile->buffer->cur - from) < sizeof "thru" - 1)
2338 return false;
2339 if (memcmp (from + 1, all_upper ? "HRU" : "hru", sizeof "hru" - 1))
2340 {
2341 if ((size_t) (pfile->buffer->cur - from) < sizeof "through" - 1)
2342 return false;
2343 if (memcmp (from + 1, all_upper ? "HROUGH" : "hrough",
2344 sizeof "hrough" - 1))
2345 return false;
2346 from += sizeof "through" - 1;
2347 }
2348 else
2349 from += sizeof "thru" - 1;
2350 while (*from == ' ' || *from == '\t' || *from == '.' || *from == '!')
2351 from++;
2352 if (*from == '-')
2353 {
2354 from++;
2355 if (*comment_start == '*')
2356 {
2357 do
2358 {
2359 while (*from && *from != '*'
2360 && *from != '\n' && *from != '\r')
2361 from++;
2362 if (*from != '*' || from[1] == '/')
2363 break;
2364 from++;
2365 }
2366 while (1);
2367 }
2368 else
2369 while (*from && *from != '\n' && *from != '\r')
2370 from++;
2371 }
2372 }
2373 /* C block comment. */
2374 if (*comment_start == '*')
2375 {
2376 if (*from != '*' || from[1] != '/')
2377 return false;
2378 }
2379 /* C++ line comment. */
2380 else if (*from != '\n')
2381 return false;
2382
2383 return true;
2384 }
2385
1688 /* Allocate COUNT tokens for RUN. */ 2386 /* Allocate COUNT tokens for RUN. */
1689 void 2387 void
1690 _cpp_init_tokenrun (tokenrun *run, unsigned int count) 2388 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
1691 { 2389 {
1692 run->base = XNEWVEC (cpp_token, count); 2390 run->base = XNEWVEC (cpp_token, count);
1706 } 2404 }
1707 2405
1708 return run->next; 2406 return run->next;
1709 } 2407 }
1710 2408
2409 /* Return the number of not yet processed token in a given
2410 context. */
2411 int
2412 _cpp_remaining_tokens_num_in_context (cpp_context *context)
2413 {
2414 if (context->tokens_kind == TOKENS_KIND_DIRECT)
2415 return (LAST (context).token - FIRST (context).token);
2416 else if (context->tokens_kind == TOKENS_KIND_INDIRECT
2417 || context->tokens_kind == TOKENS_KIND_EXTENDED)
2418 return (LAST (context).ptoken - FIRST (context).ptoken);
2419 else
2420 abort ();
2421 }
2422
2423 /* Returns the token present at index INDEX in a given context. If
2424 INDEX is zero, the next token to be processed is returned. */
2425 static const cpp_token*
2426 _cpp_token_from_context_at (cpp_context *context, int index)
2427 {
2428 if (context->tokens_kind == TOKENS_KIND_DIRECT)
2429 return &(FIRST (context).token[index]);
2430 else if (context->tokens_kind == TOKENS_KIND_INDIRECT
2431 || context->tokens_kind == TOKENS_KIND_EXTENDED)
2432 return FIRST (context).ptoken[index];
2433 else
2434 abort ();
2435 }
2436
1711 /* Look ahead in the input stream. */ 2437 /* Look ahead in the input stream. */
1712 const cpp_token * 2438 const cpp_token *
1713 cpp_peek_token (cpp_reader *pfile, int index) 2439 cpp_peek_token (cpp_reader *pfile, int index)
1714 { 2440 {
1715 cpp_context *context = pfile->context; 2441 cpp_context *context = pfile->context;
1717 int count; 2443 int count;
1718 2444
1719 /* First, scan through any pending cpp_context objects. */ 2445 /* First, scan through any pending cpp_context objects. */
1720 while (context->prev) 2446 while (context->prev)
1721 { 2447 {
1722 ptrdiff_t sz = (context->direct_p 2448 ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
1723 ? LAST (context).token - FIRST (context).token
1724 : LAST (context).ptoken - FIRST (context).ptoken);
1725 2449
1726 if (index < (int) sz) 2450 if (index < (int) sz)
1727 return (context->direct_p 2451 return _cpp_token_from_context_at (context, index);
1728 ? FIRST (context).token + index
1729 : *(FIRST (context).ptoken + index));
1730
1731 index -= (int) sz; 2452 index -= (int) sz;
1732 context = context->prev; 2453 context = context->prev;
1733 } 2454 }
1734 2455
1735 /* We will have to read some new tokens after all (and do so 2456 /* We will have to read some new tokens after all (and do so
1736 without invalidating preceding tokens). */ 2457 without invalidating preceding tokens). */
1737 count = index; 2458 count = index;
1738 pfile->keep_tokens++; 2459 pfile->keep_tokens++;
1739 2460
2461 /* For peeked tokens temporarily disable line_change reporting,
2462 until the tokens are parsed for real. */
2463 void (*line_change) (cpp_reader *, const cpp_token *, int)
2464 = pfile->cb.line_change;
2465 pfile->cb.line_change = NULL;
2466
1740 do 2467 do
1741 { 2468 {
1742 peektok = _cpp_lex_token (pfile); 2469 peektok = _cpp_lex_token (pfile);
1743 if (peektok->type == CPP_EOF) 2470 if (peektok->type == CPP_EOF)
1744 return peektok; 2471 {
2472 index--;
2473 break;
2474 }
1745 } 2475 }
1746 while (index--); 2476 while (index--);
1747 2477
1748 _cpp_backup_tokens_direct (pfile, count + 1); 2478 _cpp_backup_tokens_direct (pfile, count - index);
1749 pfile->keep_tokens--; 2479 pfile->keep_tokens--;
2480 pfile->cb.line_change = line_change;
1750 2481
1751 return peektok; 2482 return peektok;
1752 } 2483 }
1753 2484
1754 /* Allocate a single token that is invalidated at the same time as the 2485 /* Allocate a single token that is invalidated at the same time as the
1929 _cpp_lex_direct (cpp_reader *pfile) 2660 _cpp_lex_direct (cpp_reader *pfile)
1930 { 2661 {
1931 cppchar_t c; 2662 cppchar_t c;
1932 cpp_buffer *buffer; 2663 cpp_buffer *buffer;
1933 const unsigned char *comment_start; 2664 const unsigned char *comment_start;
2665 bool fallthrough_comment = false;
1934 cpp_token *result = pfile->cur_token++; 2666 cpp_token *result = pfile->cur_token++;
1935 2667
1936 fresh_line: 2668 fresh_line:
1937 result->flags = 0; 2669 result->flags = 0;
1938 buffer = pfile->buffer; 2670 buffer = pfile->buffer;
1955 result->src_loc = pfile->line_table->highest_line; 2687 result->src_loc = pfile->line_table->highest_line;
1956 result->flags = BOL; 2688 result->flags = BOL;
1957 } 2689 }
1958 return result; 2690 return result;
1959 } 2691 }
2692 if (buffer != pfile->buffer)
2693 fallthrough_comment = false;
1960 if (!pfile->keep_tokens) 2694 if (!pfile->keep_tokens)
1961 { 2695 {
1962 pfile->cur_run = &pfile->base_run; 2696 pfile->cur_run = &pfile->base_run;
1963 result = pfile->base_run.base; 2697 result = pfile->base_run.base;
1964 pfile->cur_token = result + 1; 2698 pfile->cur_token = result + 1;
1978 _cpp_process_line_notes (pfile, false); 2712 _cpp_process_line_notes (pfile, false);
1979 result->src_loc = pfile->line_table->highest_line; 2713 result->src_loc = pfile->line_table->highest_line;
1980 } 2714 }
1981 c = *buffer->cur++; 2715 c = *buffer->cur++;
1982 2716
1983 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table, 2717 if (pfile->forced_token_location_p)
1984 CPP_BUF_COLUMN (buffer, buffer->cur)); 2718 result->src_loc = *pfile->forced_token_location_p;
2719 else
2720 result->src_loc = linemap_position_for_column (pfile->line_table,
2721 CPP_BUF_COLUMN (buffer, buffer->cur));
1985 2722
1986 switch (c) 2723 switch (c)
1987 { 2724 {
1988 case ' ': case '\t': case '\f': case '\v': case '\0': 2725 case ' ': case '\t': case '\f': case '\v': case '\0':
1989 result->flags |= PREV_WHITE; 2726 result->flags |= PREV_WHITE;
2010 case 'u': 2747 case 'u':
2011 case 'U': 2748 case 'U':
2012 case 'R': 2749 case 'R':
2013 /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters, 2750 /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
2014 wide strings or raw strings. */ 2751 wide strings or raw strings. */
2015 if (c == 'L' || CPP_OPTION (pfile, uliterals)) 2752 if (c == 'L' || CPP_OPTION (pfile, rliterals)
2753 || (c != 'R' && CPP_OPTION (pfile, uliterals)))
2016 { 2754 {
2017 if ((*buffer->cur == '\'' && c != 'R') 2755 if ((*buffer->cur == '\'' && c != 'R')
2018 || *buffer->cur == '"' 2756 || *buffer->cur == '"'
2019 || (*buffer->cur == 'R' 2757 || (*buffer->cur == 'R'
2020 && c != 'R' 2758 && c != 'R'
2021 && buffer->cur[1] == '"' 2759 && buffer->cur[1] == '"'
2022 && CPP_OPTION (pfile, uliterals)) 2760 && CPP_OPTION (pfile, rliterals))
2023 || (*buffer->cur == '8' 2761 || (*buffer->cur == '8'
2024 && c == 'u' 2762 && c == 'u'
2025 && (buffer->cur[1] == '"' 2763 && ((buffer->cur[1] == '"' || (buffer->cur[1] == '\''
2026 || (buffer->cur[1] == 'R' && buffer->cur[2] == '"')))) 2764 && CPP_OPTION (pfile, utf8_char_literals)))
2765 || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
2766 && CPP_OPTION (pfile, rliterals)))))
2027 { 2767 {
2028 lex_string (pfile, result, buffer->cur - 1); 2768 lex_string (pfile, result, buffer->cur - 1);
2029 break; 2769 break;
2030 } 2770 }
2031 } 2771 }
2044 case 'Y': case 'Z': 2784 case 'Y': case 'Z':
2045 result->type = CPP_NAME; 2785 result->type = CPP_NAME;
2046 { 2786 {
2047 struct normalize_state nst = INITIAL_NORMALIZE_STATE; 2787 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2048 result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false, 2788 result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
2049 &nst); 2789 &nst,
2790 &result->val.node.spelling);
2050 warn_about_normalization (pfile, result, &nst); 2791 warn_about_normalization (pfile, result, &nst);
2051 } 2792 }
2052 2793
2053 /* Convert named operators to their proper types. */ 2794 /* Convert named operators to their proper types. */
2054 if (result->val.node.node->flags & NODE_OPERATOR) 2795 if (result->val.node.node->flags & NODE_OPERATOR)
2055 { 2796 {
2056 result->flags |= NAMED_OP; 2797 result->flags |= NAMED_OP;
2057 result->type = (enum cpp_ttype) result->val.node.node->directive_index; 2798 result->type = (enum cpp_ttype) result->val.node.node->directive_index;
2058 } 2799 }
2800
2801 /* Signal FALLTHROUGH comment followed by another token. */
2802 if (fallthrough_comment)
2803 result->flags |= PREV_FALLTHROUGH;
2059 break; 2804 break;
2060 2805
2061 case '\'': 2806 case '\'':
2062 case '"': 2807 case '"':
2063 lex_string (pfile, result, buffer->cur - 1); 2808 lex_string (pfile, result, buffer->cur - 1);
2071 if (c == '*') 2816 if (c == '*')
2072 { 2817 {
2073 if (_cpp_skip_block_comment (pfile)) 2818 if (_cpp_skip_block_comment (pfile))
2074 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment"); 2819 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
2075 } 2820 }
2076 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments) 2821 else if (c == '/' && ! CPP_OPTION (pfile, traditional))
2077 || cpp_in_system_header (pfile))) 2822 {
2078 { 2823 /* Don't warn for system headers. */
2079 /* Warn about comments only if pedantically GNUC89, and not 2824 if (cpp_in_system_header (pfile))
2825 ;
2826 /* Warn about comments if pedantically GNUC89, and not
2080 in system headers. */ 2827 in system headers. */
2081 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile) 2828 else if (CPP_OPTION (pfile, lang) == CLK_GNUC89
2082 && ! buffer->warned_cplusplus_comments) 2829 && CPP_PEDANTIC (pfile)
2830 && ! buffer->warned_cplusplus_comments)
2083 { 2831 {
2084 cpp_error (pfile, CPP_DL_PEDWARN, 2832 cpp_error (pfile, CPP_DL_PEDWARN,
2085 "C++ style comments are not allowed in ISO C90"); 2833 "C++ style comments are not allowed in ISO C90");
2086 cpp_error (pfile, CPP_DL_PEDWARN, 2834 cpp_error (pfile, CPP_DL_PEDWARN,
2087 "(this will be reported only once per input file)"); 2835 "(this will be reported only once per input file)");
2088 buffer->warned_cplusplus_comments = 1; 2836 buffer->warned_cplusplus_comments = 1;
2089 } 2837 }
2090 2838 /* Or if specifically desired via -Wc90-c99-compat. */
2839 else if (CPP_OPTION (pfile, cpp_warn_c90_c99_compat) > 0
2840 && ! CPP_OPTION (pfile, cplusplus)
2841 && ! buffer->warned_cplusplus_comments)
2842 {
2843 cpp_error (pfile, CPP_DL_WARNING,
2844 "C++ style comments are incompatible with C90");
2845 cpp_error (pfile, CPP_DL_WARNING,
2846 "(this will be reported only once per input file)");
2847 buffer->warned_cplusplus_comments = 1;
2848 }
2849 /* In C89/C94, C++ style comments are forbidden. */
2850 else if ((CPP_OPTION (pfile, lang) == CLK_STDC89
2851 || CPP_OPTION (pfile, lang) == CLK_STDC94))
2852 {
2853 /* But don't be confused about valid code such as
2854 - // immediately followed by *,
2855 - // in a preprocessing directive,
2856 - // in an #if 0 block. */
2857 if (buffer->cur[1] == '*'
2858 || pfile->state.in_directive
2859 || pfile->state.skipping)
2860 {
2861 result->type = CPP_DIV;
2862 break;
2863 }
2864 else if (! buffer->warned_cplusplus_comments)
2865 {
2866 cpp_error (pfile, CPP_DL_ERROR,
2867 "C++ style comments are not allowed in ISO C90");
2868 cpp_error (pfile, CPP_DL_ERROR,
2869 "(this will be reported only once per input "
2870 "file)");
2871 buffer->warned_cplusplus_comments = 1;
2872 }
2873 }
2091 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments)) 2874 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
2092 cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment"); 2875 cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
2093 } 2876 }
2094 else if (c == '=') 2877 else if (c == '=')
2095 { 2878 {
2101 { 2884 {
2102 result->type = CPP_DIV; 2885 result->type = CPP_DIV;
2103 break; 2886 break;
2104 } 2887 }
2105 2888
2889 if (fallthrough_comment_p (pfile, comment_start))
2890 fallthrough_comment = true;
2891
2892 if (pfile->cb.comment)
2893 {
2894 size_t len = pfile->buffer->cur - comment_start;
2895 pfile->cb.comment (pfile, result->src_loc, comment_start - 1,
2896 len + 1);
2897 }
2898
2106 if (!pfile->state.save_comments) 2899 if (!pfile->state.save_comments)
2107 { 2900 {
2108 result->flags |= PREV_WHITE; 2901 result->flags |= PREV_WHITE;
2109 goto update_tokens_line; 2902 goto update_tokens_line;
2110 } 2903 }
2904
2905 if (fallthrough_comment)
2906 result->flags |= PREV_FALLTHROUGH;
2111 2907
2112 /* Save the comment as a token in its own right. */ 2908 /* Save the comment as a token in its own right. */
2113 save_comment (pfile, result, comment_start, c); 2909 save_comment (pfile, result, comment_start, c);
2114 break; 2910 break;
2115 2911
2131 } 2927 }
2132 else if (CPP_OPTION (pfile, digraphs)) 2928 else if (CPP_OPTION (pfile, digraphs))
2133 { 2929 {
2134 if (*buffer->cur == ':') 2930 if (*buffer->cur == ':')
2135 { 2931 {
2932 /* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
2933 three characters are <:: and the subsequent character
2934 is neither : nor >, the < is treated as a preprocessor
2935 token by itself". */
2936 if (CPP_OPTION (pfile, cplusplus)
2937 && CPP_OPTION (pfile, lang) != CLK_CXX98
2938 && CPP_OPTION (pfile, lang) != CLK_GNUCXX
2939 && buffer->cur[1] == ':'
2940 && buffer->cur[2] != ':' && buffer->cur[2] != '>')
2941 break;
2942
2136 buffer->cur++; 2943 buffer->cur++;
2137 result->flags |= DIGRAPH; 2944 result->flags |= DIGRAPH;
2138 result->type = CPP_OPEN_SQUARE; 2945 result->type = CPP_OPEN_SQUARE;
2139 } 2946 }
2140 else if (*buffer->cur == '%') 2947 else if (*buffer->cur == '%')
2273 struct normalize_state nst = INITIAL_NORMALIZE_STATE; 3080 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2274 3081
2275 if (forms_identifier_p (pfile, true, &nst)) 3082 if (forms_identifier_p (pfile, true, &nst))
2276 { 3083 {
2277 result->type = CPP_NAME; 3084 result->type = CPP_NAME;
2278 result->val.node.node = lex_identifier (pfile, base, true, &nst); 3085 result->val.node.node = lex_identifier (pfile, base, true, &nst,
3086 &result->val.node.spelling);
2279 warn_about_normalization (pfile, result, &nst); 3087 warn_about_normalization (pfile, result, &nst);
2280 break; 3088 break;
2281 } 3089 }
2282 buffer->cur++; 3090 buffer->cur++;
2283 } 3091 }
3092 /* FALLTHRU */
2284 3093
2285 default: 3094 default:
2286 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER); 3095 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
2287 break; 3096 break;
3097 }
3098
3099 /* Potentially convert the location of the token to a range. */
3100 if (result->src_loc >= RESERVED_LOCATION_COUNT
3101 && result->type != CPP_EOF)
3102 {
3103 /* Ensure that any line notes are processed, so that we have the
3104 correct physical line/column for the end-point of the token even
3105 when a logical line is split via one or more backslashes. */
3106 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
3107 && !pfile->overlaid_buffer)
3108 _cpp_process_line_notes (pfile, false);
3109
3110 source_range tok_range;
3111 tok_range.m_start = result->src_loc;
3112 tok_range.m_finish
3113 = linemap_position_for_column (pfile->line_table,
3114 CPP_BUF_COLUMN (buffer, buffer->cur));
3115
3116 result->src_loc = COMBINE_LOCATION_DATA (pfile->line_table,
3117 result->src_loc,
3118 tok_range, NULL);
2288 } 3119 }
2289 3120
2290 return result; 3121 return result;
2291 } 3122 }
2292 3123
2347 cpp_digraph2name (enum cpp_ttype type) 3178 cpp_digraph2name (enum cpp_ttype type)
2348 { 3179 {
2349 return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH]; 3180 return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
2350 } 3181 }
2351 3182
3183 /* Write the spelling of an identifier IDENT, using UCNs, to BUFFER.
3184 The buffer must already contain the enough space to hold the
3185 token's spelling. Returns a pointer to the character after the
3186 last character written. */
3187 unsigned char *
3188 _cpp_spell_ident_ucns (unsigned char *buffer, cpp_hashnode *ident)
3189 {
3190 size_t i;
3191 const unsigned char *name = NODE_NAME (ident);
3192
3193 for (i = 0; i < NODE_LEN (ident); i++)
3194 if (name[i] & ~0x7F)
3195 {
3196 i += utf8_to_ucn (buffer, name + i) - 1;
3197 buffer += 10;
3198 }
3199 else
3200 *buffer++ = name[i];
3201
3202 return buffer;
3203 }
3204
2352 /* Write the spelling of a token TOKEN to BUFFER. The buffer must 3205 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
2353 already contain the enough space to hold the token's spelling. 3206 already contain the enough space to hold the token's spelling.
2354 Returns a pointer to the character after the last character written. 3207 Returns a pointer to the character after the last character written.
2355 FORSTRING is true if this is to be the spelling after translation 3208 FORSTRING is true if this is to be the spelling after translation
2356 phase 1 (this is different for UCNs). 3209 phase 1 (with the original spelling of extended identifiers), false
3210 if extended identifiers should always be written using UCNs (there is
3211 no option for always writing them in the internal UTF-8 form).
2357 FIXME: Would be nice if we didn't need the PFILE argument. */ 3212 FIXME: Would be nice if we didn't need the PFILE argument. */
2358 unsigned char * 3213 unsigned char *
2359 cpp_spell_token (cpp_reader *pfile, const cpp_token *token, 3214 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
2360 unsigned char *buffer, bool forstring) 3215 unsigned char *buffer, bool forstring)
2361 { 3216 {
2380 3235
2381 spell_ident: 3236 spell_ident:
2382 case SPELL_IDENT: 3237 case SPELL_IDENT:
2383 if (forstring) 3238 if (forstring)
2384 { 3239 {
2385 memcpy (buffer, NODE_NAME (token->val.node.node), 3240 memcpy (buffer, NODE_NAME (token->val.node.spelling),
2386 NODE_LEN (token->val.node.node)); 3241 NODE_LEN (token->val.node.spelling));
2387 buffer += NODE_LEN (token->val.node.node); 3242 buffer += NODE_LEN (token->val.node.spelling);
2388 } 3243 }
2389 else 3244 else
2390 { 3245 buffer = _cpp_spell_ident_ucns (buffer, token->val.node.node);
2391 size_t i;
2392 const unsigned char * name = NODE_NAME (token->val.node.node);
2393
2394 for (i = 0; i < NODE_LEN (token->val.node.node); i++)
2395 if (name[i] & ~0x7F)
2396 {
2397 i += utf8_to_ucn (buffer, name + i) - 1;
2398 buffer += 10;
2399 }
2400 else
2401 *buffer++ = NODE_NAME (token->val.node.node)[i];
2402 }
2403 break; 3246 break;
2404 3247
2405 case SPELL_LITERAL: 3248 case SPELL_LITERAL:
2406 memcpy (buffer, token->val.str.text, token->val.str.len); 3249 memcpy (buffer, token->val.str.text, token->val.str.len);
2407 buffer += token->val.str.len; 3250 buffer += token->val.str.len;
2511 /* token_no is used to track where multiple consecutive ## 3354 /* token_no is used to track where multiple consecutive ##
2512 tokens were originally located. */ 3355 tokens were originally located. */
2513 return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no); 3356 return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
2514 case SPELL_NONE: 3357 case SPELL_NONE:
2515 return (a->type != CPP_MACRO_ARG 3358 return (a->type != CPP_MACRO_ARG
2516 || a->val.macro_arg.arg_no == b->val.macro_arg.arg_no); 3359 || (a->val.macro_arg.arg_no == b->val.macro_arg.arg_no
3360 && a->val.macro_arg.spelling == b->val.macro_arg.spelling));
2517 case SPELL_IDENT: 3361 case SPELL_IDENT:
2518 return a->val.node.node == b->val.node.node; 3362 return (a->val.node.node == b->val.node.node
3363 && a->val.node.spelling == b->val.node.spelling);
2519 case SPELL_LITERAL: 3364 case SPELL_LITERAL:
2520 return (a->val.str.len == b->val.str.len 3365 return (a->val.str.len == b->val.str.len
2521 && !memcmp (a->val.str.text, b->val.str.text, 3366 && !memcmp (a->val.str.text, b->val.str.text,
2522 a->val.str.len)); 3367 a->val.str.len));
2523 } 3368 }
2575 case CPP_OTHER: return ((token1->val.str.text[0] == '\\' 3420 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
2576 && b == CPP_NAME) 3421 && b == CPP_NAME)
2577 || (CPP_OPTION (pfile, objc) 3422 || (CPP_OPTION (pfile, objc)
2578 && token1->val.str.text[0] == '@' 3423 && token1->val.str.text[0] == '@'
2579 && (b == CPP_NAME || b == CPP_STRING))); 3424 && (b == CPP_NAME || b == CPP_STRING)));
3425 case CPP_STRING:
3426 case CPP_WSTRING:
3427 case CPP_UTF8STRING:
3428 case CPP_STRING16:
3429 case CPP_STRING32: return (CPP_OPTION (pfile, user_literals)
3430 && (b == CPP_NAME
3431 || (TOKEN_SPELL (token2) == SPELL_LITERAL
3432 && ISIDST (token2->val.str.text[0]))));
3433
2580 default: break; 3434 default: break;
2581 } 3435 }
2582 3436
2583 return 0; 3437 return 0;
2584 } 3438 }
2673 3527
2674 if (len < MIN_BUFF_SIZE) 3528 if (len < MIN_BUFF_SIZE)
2675 len = MIN_BUFF_SIZE; 3529 len = MIN_BUFF_SIZE;
2676 len = CPP_ALIGN (len); 3530 len = CPP_ALIGN (len);
2677 3531
3532 #ifdef ENABLE_VALGRIND_ANNOTATIONS
3533 /* Valgrind warns about uses of interior pointers, so put _cpp_buff
3534 struct first. */
3535 size_t slen = CPP_ALIGN2 (sizeof (_cpp_buff), 2 * DEFAULT_ALIGNMENT);
3536 base = XNEWVEC (unsigned char, len + slen);
3537 result = (_cpp_buff *) base;
3538 base += slen;
3539 #else
2678 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff)); 3540 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
2679 result = (_cpp_buff *) (base + len); 3541 result = (_cpp_buff *) (base + len);
3542 #endif
2680 result->base = base; 3543 result->base = base;
2681 result->cur = base; 3544 result->cur = base;
2682 result->limit = base + len; 3545 result->limit = base + len;
2683 result->next = NULL; 3546 result->next = NULL;
2684 return result; 3547 return result;
2761 _cpp_buff *next; 3624 _cpp_buff *next;
2762 3625
2763 for (; buff; buff = next) 3626 for (; buff; buff = next)
2764 { 3627 {
2765 next = buff->next; 3628 next = buff->next;
3629 #ifdef ENABLE_VALGRIND_ANNOTATIONS
3630 free (buff);
3631 #else
2766 free (buff->base); 3632 free (buff->base);
3633 #endif
2767 } 3634 }
2768 } 3635 }
2769 3636
2770 /* Allocate permanent, unaligned storage of length LEN. */ 3637 /* Allocate permanent, unaligned storage of length LEN. */
2771 unsigned char * 3638 unsigned char *
2815 } 3682 }
2816 3683
2817 /* Say which field of TOK is in use. */ 3684 /* Say which field of TOK is in use. */
2818 3685
2819 enum cpp_token_fld_kind 3686 enum cpp_token_fld_kind
2820 cpp_token_val_index (cpp_token *tok) 3687 cpp_token_val_index (const cpp_token *tok)
2821 { 3688 {
2822 switch (TOKEN_SPELL (tok)) 3689 switch (TOKEN_SPELL (tok))
2823 { 3690 {
2824 case SPELL_IDENT: 3691 case SPELL_IDENT:
2825 return CPP_TOKEN_FLD_NODE; 3692 return CPP_TOKEN_FLD_NODE;
2835 return CPP_TOKEN_FLD_ARG_NO; 3702 return CPP_TOKEN_FLD_ARG_NO;
2836 else if (tok->type == CPP_PADDING) 3703 else if (tok->type == CPP_PADDING)
2837 return CPP_TOKEN_FLD_SOURCE; 3704 return CPP_TOKEN_FLD_SOURCE;
2838 else if (tok->type == CPP_PRAGMA) 3705 else if (tok->type == CPP_PRAGMA)
2839 return CPP_TOKEN_FLD_PRAGMA; 3706 return CPP_TOKEN_FLD_PRAGMA;
2840 /* else fall through */ 3707 /* fall through */
2841 default: 3708 default:
2842 return CPP_TOKEN_FLD_NONE; 3709 return CPP_TOKEN_FLD_NONE;
2843 } 3710 }
2844 } 3711 }
3712
3713 /* All tokens lexed in R after calling this function will be forced to have
3714 their source_location the same as the location referenced by P, until
3715 cpp_stop_forcing_token_locations is called for R. */
3716
3717 void
3718 cpp_force_token_locations (cpp_reader *r, source_location *p)
3719 {
3720 r->forced_token_location_p = p;
3721 }
3722
3723 /* Go back to assigning locations naturally for lexed tokens. */
3724
3725 void
3726 cpp_stop_forcing_token_locations (cpp_reader *r)
3727 {
3728 r->forced_token_location_p = NULL;
3729 }