CbC/CbC_gcc: libcpp/lex.c comparison

comparison libcpp/lex.c @ 111:04ced10e8804

gcc 7

author	kono
date	Fri, 27 Oct 2017 22:46:09 +0900
parents	f6334be47118
children	84e7813d76e9

comparison

equal deleted inserted replaced

-:561a7518be6b
+:04ced10e8804
 /* CPP Library - lexical analysis.
-Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010
+Copyright (C) 2000-2017 Free Software Foundation, Inc.
-Free Software Foundation, Inc.
 Contributed by Per Bothner, 1994-95.
 Based on CCCP program by Paul Rubin, June 1986
 Adapted to ANSI C, Richard Stallman, Jan 1987
 Broken out to separate file, Zack Weinberg, Mar 2000
 val = *++p;
 }
 }
-/* Disable on Solaris 2/x86 until the following problems can be properly
+/* Disable on Solaris 2/x86 until the following problem can be properly
 autoconfed:
-The Solaris 8 assembler cannot assemble SSE2/SSE4.2 insns.
-The Solaris 9 assembler cannot assemble SSE4.2 insns.
-Before Solaris 9 Update 6, SSE insns cannot be executed.
 The Solaris 10+ assembler tags objects with the instruction set
 extensions used, so SSE4.2 executables cannot run on machines that
 don't support that extension.  */
-#if (GCC_VERSION >= 4005) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
+#if (GCC_VERSION >= 4005) && (__GNUC__ >= 5 || !defined(__PIC__)) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
 /* Replicated character data to be shared between implementations.
 Recall that outside of a context with vector support we can't
 define compatible vector types, therefore these are all defined
 in terms of raw characters.  */
 };
 /* A version of the fast scanner using MMX vectorized byte compare insns.
 This uses the PMOVMSKB instruction which was introduced with "MMX2",
-which was packaged into SSE1; it is also present in the AMD 3dNOW-A
+which was packaged into SSE1; it is also present in the AMD MMX
 extension.  Mark the function as using "sse" so that we emit a real
 "emms" instruction, rather than the 3dNOW "femms" instruction.  */
 static const uchar *
 #ifndef __SSE__
 uintptr_t index;
 /* Check for unaligned input.  */
 if (si & 15)
 {
+v16qi sv;
 if (__builtin_expect (end - s < 16, 0)
 	  && __builtin_expect ((si & 0xfff) > 0xff0, 0))
 	{
 	  /* There are less than 16 bytes left in the buffer, and less
 	     than 16 bytes left on the page.  Reading 16 bytes at this
 	  return search_line_sse2 (s, end);
 	}
 /* ??? The builtin doesn't understand that the PCMPESTRI read from
 	 memory need not be aligned.  */
-__asm ("%vpcmpestri $0, (%1), %2"
+sv = __builtin_ia32_loaddqu ((const char *) s);
-	     : "=c"(index) : "r"(s), "x"(search), "a"(4), "d"(16));
+index = __builtin_ia32_pcmpestri128 (search, 4, sv, 16, 0);
 if (__builtin_expect (index < 16, 0))
 	goto found;
 /* Advance the pointer to an aligned address.  We will re-scan a
 	 few bytes, but we no longer need care for reading past the
 	 end of a page, since we're guaranteed a match.  */
-s = (const uchar *)((si + 16) & -16);
+s = (const uchar *)((si + 15) & -16);
 }
-/* Main loop, processing 16 bytes at a time.  By doing the whole loop
+/* Main loop, processing 16 bytes at a time.  */
-in inline assembly, we can make proper use of the flags set.  */
+#ifdef __GCC_ASM_FLAG_OUTPUTS__
-__asm (      "sub $16, %1\n"
+while (1)
-	"	.balign 16\n"
+{
+char f;
+/* By using inline assembly instead of the builtin,
+	 we can use the result, as well as the flags set.  */
+__asm ("%vpcmpestri\t$0, %2, %3"
+	     : "=c"(index), "=@ccc"(f)
+	     : "m"(*s), "x"(search), "a"(4), "d"(16));
+if (f)
+	break;
+s += 16;
+}
+#else
+s -= 16;
+/* By doing the whole loop in inline assembly,
+we can make proper use of the flags set.  */
+__asm (      ".balign 16\n"
 	"0:	add $16, %1\n"
-	"	%vpcmpestri $0, (%1), %2\n"
+	"	%vpcmpestri\t$0, (%1), %2\n"
 	"	jnc 0b"
 	: "=&c"(index), "+r"(s)
 	: "x"(search), "a"(4), "d"(16));
+#endif
 found:
 return s + index;
 }
 #include "../gcc/config/i386/cpuid.h"
 typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *);
 static search_line_fast_type search_line_fast;
-static void __attribute__((constructor))
+#define HAVE_init_vectorized_lexer 1
+static inline void
 init_vectorized_lexer (void)
 {
 unsigned dummy, ecx = 0, edx = 0;
 search_line_fast_type impl = search_line_acc_char;
 int minimum = 0;
 #if defined(__SSE4_2__)
 minimum = 3;
 #elif defined(__SSE2__)
 minimum = 2;
-#elif defined(__SSE__) || defined(__3dNOW_A__)
+#elif defined(__SSE__)
 minimum = 1;
 #endif
 if (minimum == 3)
 impl = search_line_sse42;
 else if (minimum == 1 || (edx & bit_SSE))
 	impl = search_line_mmx;
 }
 else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
 {
-if (minimum == 1 || edx & bit_3DNOWP)
+if (minimum == 1
+	  || (edx & (bit_MMXEXT | bit_CMOV)) == (bit_MMXEXT | bit_CMOV))
 	impl = search_line_mmx;
 }
 search_line_fast = impl;
 }
-#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__)
+#elif defined(_ARCH_PWR8) && defined(__ALTIVEC__)
-/* A vection of the fast scanner using AltiVec vectorized byte compares.  */
+/* A vection of the fast scanner using AltiVec vectorized byte compares
+and VSX unaligned loads (when VSX is available).  This is otherwise
+the same as the pre-GCC 5 version.  */
+ATTRIBUTE_NO_SANITIZE_UNDEFINED
+static const uchar *
+search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
+{
+typedef __attribute__((altivec(vector))) unsigned char vc;
+const vc repl_nl = {
+'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
+'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
+};
+const vc repl_cr = {
+'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
+'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
+};
+const vc repl_bs = {
+'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
+'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
+};
+const vc repl_qm = {
+'?', '?', '?', '?', '?', '?', '?', '?',
+'?', '?', '?', '?', '?', '?', '?', '?',
+};
+const vc zero = { 0 };
+vc data, t;
+/* Main loop processing 16 bytes at a time.  */
+do
+{
+vc m_nl, m_cr, m_bs, m_qm;
+data = *((const vc *)s);
+s += 16;
+m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
+m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
+m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
+m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
+t = (m_nl | m_cr) | (m_bs | m_qm);
+/* T now contains 0xff in bytes for which we matched one of the relevant
+	 characters.  We want to exit the loop if any byte in T is non-zero.
+	 Below is the expansion of vec_any_ne(t, zero).  */
+}
+while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
+/* Restore s to to point to the 16 bytes we just processed.  */
+s -= 16;
+{
+#define N  (sizeof(vc) / sizeof(long))
+union {
+vc v;
+/* Statically assert that N is 2 or 4.  */
+unsigned long l[(N == 2 || N == 4) ? N : -1];
+} u;
+unsigned long l, i = 0;
+u.v = t;
+/* Find the first word of T that is non-zero.  */
+switch (N)
+{
+case 4:
+	l = u.l[i++];
+	if (l != 0)
+	  break;
+	s += sizeof(unsigned long);
+	l = u.l[i++];
+	if (l != 0)
+	  break;
+	s += sizeof(unsigned long);
+	/* FALLTHRU */
+case 2:
+	l = u.l[i++];
+	if (l != 0)
+	  break;
+	s += sizeof(unsigned long);
+	l = u.l[i];
+}
+/* L now contains 0xff in bytes for which we matched one of the
+relevant characters.  We can find the byte index by finding
+its bit index and dividing by 8.  */
+#ifdef __BIG_ENDIAN__
+l = __builtin_clzl(l) >> 3;
+#else
+l = __builtin_ctzl(l) >> 3;
+#endif
+return s + l;
+#undef N
+}
+}
+#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
+/* A vection of the fast scanner using AltiVec vectorized byte compares.
+This cannot be used for little endian because vec_lvsl/lvsr are
+deprecated for little endian and the code won't work properly.  */
 /* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
 so we can't compile this function without -maltivec on the command line
 (or implied by some other switch).  */
 static const uchar *
 while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
 {
 #define N  (sizeof(vc) / sizeof(long))
-typedef char check_count[(N == 2 || N == 4) * 2 - 1];
 union {
 vc v;
-unsigned long l[N];
+/* Statically assert that N is 2 or 4.  */
+unsigned long l[(N == 2 || N == 4) ? N : -1];
 } u;
 unsigned long l, i = 0;
 u.v = t;
 	s += sizeof(unsigned long);
 	l = u.l[i++];
 	if (l != 0)
 	  break;
 	s += sizeof(unsigned long);
+	/* FALLTHROUGH */
 case 2:
 	l = u.l[i++];
 	if (l != 0)
 	  break;
 	s += sizeof(unsigned long);
 #undef N
 }
 }
+#elif defined (__ARM_NEON) && defined (__ARM_64BIT_STATE)
+#include "arm_neon.h"
+/* This doesn't have to be the exact page size, but no system may use
+a size smaller than this.  ARMv8 requires a minimum page size of
+4k.  The impact of being conservative here is a small number of
+cases will take the slightly slower entry path into the main
+loop.  */
+#define AARCH64_MIN_PAGE_SIZE 4096
+static const uchar *
+search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
+{
+const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
+const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
+const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
+const uint8x16_t repl_qm = vdupq_n_u8 ('?');
+const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
+#ifdef __AARCH64EB
+const int16x8_t shift = {8, 8, 8, 8, 0, 0, 0, 0};
 #else
+const int16x8_t shift = {0, 0, 0, 0, 8, 8, 8, 8};
-/* We only have one accellerated alternative.  Use a direct call so that
+#endif
+unsigned int found;
+const uint8_t *p;
+uint8x16_t data;
+uint8x16_t t;
+uint16x8_t m;
+uint8x16_t u, v, w;
+/* Align the source pointer.  */
+p = (const uint8_t *)((uintptr_t)s & -16);
+/* Assuming random string start positions, with a 4k page size we'll take
+the slow path about 0.37% of the time.  */
+if (__builtin_expect ((AARCH64_MIN_PAGE_SIZE
+			 - (((uintptr_t) s) & (AARCH64_MIN_PAGE_SIZE - 1)))
+			< 16, 0))
+{
+/* Slow path: the string starts near a possible page boundary.  */
+uint32_t misalign, mask;
+misalign = (uintptr_t)s & 15;
+mask = (-1u << misalign) & 0xffff;
+data = vld1q_u8 (p);
+t = vceqq_u8 (data, repl_nl);
+u = vceqq_u8 (data, repl_cr);
+v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
+w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
+t = vorrq_u8 (v, w);
+t = vandq_u8 (t, xmask);
+m = vpaddlq_u8 (t);
+m = vshlq_u16 (m, shift);
+found = vaddvq_u16 (m);
+found &= mask;
+if (found)
+	return (const uchar*)p + __builtin_ctz (found);
+}
+else
+{
+data = vld1q_u8 ((const uint8_t *) s);
+t = vceqq_u8 (data, repl_nl);
+u = vceqq_u8 (data, repl_cr);
+v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
+w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
+t = vorrq_u8 (v, w);
+if (__builtin_expect (vpaddd_u64 ((uint64x2_t)t) != 0, 0))
+	goto done;
+}
+do
+{
+p += 16;
+data = vld1q_u8 (p);
+t = vceqq_u8 (data, repl_nl);
+u = vceqq_u8 (data, repl_cr);
+v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
+w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
+t = vorrq_u8 (v, w);
+} while (!vpaddd_u64 ((uint64x2_t)t));
+done:
+/* Now that we've found the terminating substring, work out precisely where
+we need to stop.  */
+t = vandq_u8 (t, xmask);
+m = vpaddlq_u8 (t);
+m = vshlq_u16 (m, shift);
+found = vaddvq_u16 (m);
+return (((((uintptr_t) p) < (uintptr_t) s) ? s : (const uchar *)p)
+	  + __builtin_ctz (found));
+}
+#elif defined (__ARM_NEON)
+#include "arm_neon.h"
+static const uchar *
+search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
+{
+const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
+const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
+const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
+const uint8x16_t repl_qm = vdupq_n_u8 ('?');
+const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
+unsigned int misalign, found, mask;
+const uint8_t *p;
+uint8x16_t data;
+/* Align the source pointer.  */
+misalign = (uintptr_t)s & 15;
+p = (const uint8_t *)((uintptr_t)s & -16);
+data = vld1q_u8 (p);
+/* Create a mask for the bytes that are valid within the first
+16-byte block.  The Idea here is that the AND with the mask
+within the loop is "free", since we need some AND or TEST
+insn in order to set the flags for the branch anyway.  */
+mask = (-1u << misalign) & 0xffff;
+/* Main loop, processing 16 bytes at a time.  */
+goto start;
+do
+{
+uint8x8_t l;
+uint16x4_t m;
+uint32x2_t n;
+uint8x16_t t, u, v, w;
+p += 16;
+data = vld1q_u8 (p);
+mask = 0xffff;
+start:
+t = vceqq_u8 (data, repl_nl);
+u = vceqq_u8 (data, repl_cr);
+v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
+w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
+t = vandq_u8 (vorrq_u8 (v, w), xmask);
+l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));
+m = vpaddl_u8 (l);
+n = vpaddl_u16 (m);
+found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n,
+	      vshr_n_u64 ((uint64x1_t) n, 24)), 0);
+found &= mask;
+}
+while (!found);
+/* FOUND contains 1 in bits for which we matched a relevant
+character.  Conversion to the byte index is trivial.  */
+found = __builtin_ctz (found);
+return (const uchar *)p + found;
+}
+#else
+/* We only have one accelerated alternative.  Use a direct call so that
 we encourage inlining.  */
 #define search_line_fast  search_line_acc_char
 #endif
+/* Initialize the lexer if needed.  */
+void
+_cpp_init_lexer (void)
+{
+#ifdef HAVE_init_vectorized_lexer
+init_vectorized_lexer ();
+#endif
+}
 /* Returns with a logical line that contains no escaped newlines or
 trigraphs.  This is a time-critical inner loop.  */
 void
 _cpp_clean_line (cpp_reader *pfile)
 	cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
 			       "`%.*s' is not in NFKC", (int) sz, buf);
 else
 	cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
 			       "`%.*s' is not in NFC", (int) sz, buf);
+free (buf);
 }
 }
 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
 an identifier.  FIRST is TRUE if this starts an identifier.  */
 /* Is this a syntactically valid UCN?  */
 if (CPP_OPTION (pfile, extended_identifiers)
 && *buffer->cur == '\\'
 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
 {
+cppchar_t s;
 buffer->cur += 2;
 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
-			  state))
+			  state, &s, NULL, NULL))
 	return true;
 buffer->cur -= 2;
 }
 return false;
 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 	 replacement list of a variadic macro.  */
 if (result == pfile->spec_nodes.n__VA_ARGS__
 	  && !pfile->state.va_args_ok)
-	cpp_error (pfile, CPP_DL_PEDWARN,
+	{
-		   "__VA_ARGS__ can only appear in the expansion"
+	  if (CPP_OPTION (pfile, cplusplus))
-		   " of a C99 variadic macro");
+	    cpp_error (pfile, CPP_DL_PEDWARN,
+		       "__VA_ARGS__ can only appear in the expansion"
+		       " of a C++11 variadic macro");
+	  else
+	    cpp_error (pfile, CPP_DL_PEDWARN,
+		       "__VA_ARGS__ can only appear in the expansion"
+		       " of a C99 variadic macro");
+	}
 /* For -Wc++-compat, warn about use of C++ named operators.  */
 if (result->flags & NODE_WARN_OPERATOR)
 	cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
 		     "identifier \"%s\" is a special operator name in C++",
 }
 /* Lex an identifier starting at BUFFER->CUR - 1.  */
 static cpp_hashnode *
 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
-		struct normalize_state *nst)
+		struct normalize_state *nst, cpp_hashnode **spelling)
 {
 cpp_hashnode *result;
 const uchar *cur;
 unsigned int len;
 unsigned int hash = HT_HASHSTEP (0, *base);
 cur = pfile->buffer->cur;
 if (! starts_ucn)
-while (ISIDNUM (*cur))
+{
-{
+while (ISIDNUM (*cur))
-	hash = HT_HASHSTEP (hash, *cur);
+	{
-	cur++;
+	  hash = HT_HASHSTEP (hash, *cur);
-}
+	  cur++;
+	}
+NORMALIZE_STATE_UPDATE_IDNUM (nst, *(cur - 1));
+}
 pfile->buffer->cur = cur;
 if (starts_ucn || forms_identifier_p (pfile, false, nst))
 {
 /* Slower version for identifiers containing UCNs (or $).  */
 do {
 	while (ISIDNUM (*pfile->buffer->cur))
 	  {
+	    NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur);
 	    pfile->buffer->cur++;
-	    NORMALIZE_STATE_UPDATE_IDNUM (nst);
 	  }
 } while (forms_identifier_p (pfile, false, nst));
 result = _cpp_interpret_identifier (pfile, base,
 					  pfile->buffer->cur - base);
+*spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
 }
 else
 {
 len = cur - base;
 hash = HT_HASHFINISH (hash, len);
 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
 						  base, len, hash, HT_ALLOC));
+*spelling = result;
 }
 /* Rarely, identifiers require diagnostics when lexed.  */
 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 			&& !pfile->state.skipping, 0))
 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 	 replacement list of a variadic macro.  */
 if (result == pfile->spec_nodes.n__VA_ARGS__
 	  && !pfile->state.va_args_ok)
-	cpp_error (pfile, CPP_DL_PEDWARN,
+	{
-		   "__VA_ARGS__ can only appear in the expansion"
+	  if (CPP_OPTION (pfile, cplusplus))
-		   " of a C99 variadic macro");
+	    cpp_error (pfile, CPP_DL_PEDWARN,
+		       "__VA_ARGS__ can only appear in the expansion"
+		       " of a C++11 variadic macro");
+	  else
+	    cpp_error (pfile, CPP_DL_PEDWARN,
+		       "__VA_ARGS__ can only appear in the expansion"
+		       " of a C99 variadic macro");
+	}
 /* For -Wc++-compat, warn about use of C++ named operators.  */
 if (result->flags & NODE_WARN_OPERATOR)
 	cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
 		     "identifier \"%s\" is a special operator name in C++",
 do
 {
 cur = pfile->buffer->cur;
 /* N.B. ISIDNUM does not include $.  */
-while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
+while (ISIDNUM (*cur) || *cur == '.' || DIGIT_SEP (*cur)
-	{
+	     || VALID_SIGN (*cur, cur[-1]))
+	{
+	  NORMALIZE_STATE_UPDATE_IDNUM (nst, *cur);
 	  cur++;
-	  NORMALIZE_STATE_UPDATE_IDNUM (nst);
+	}
-	}
+/* A number can't end with a digit separator.  */
+while (cur > pfile->buffer->cur && DIGIT_SEP (cur[-1]))
+	--cur;
 pfile->buffer->cur = cur;
 }
 while (forms_identifier_p (pfile, false, nst));
 *first_buff_p = first_buff;
 *last_buff_p = last_buff;
 }
+/* Returns true if a macro has been defined.
+This might not work if compile with -save-temps,
+or preprocess separately from compilation.  */
+static bool
+is_macro(cpp_reader *pfile, const uchar *base)
+{
+const uchar *cur = base;
+if (! ISIDST (*cur))
+return false;
+unsigned int hash = HT_HASHSTEP (0, *cur);
+++cur;
+while (ISIDNUM (*cur))
+{
+hash = HT_HASHSTEP (hash, *cur);
+++cur;
+}
+hash = HT_HASHFINISH (hash, cur - base);
+cpp_hashnode *result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
+					base, cur - base, hash, HT_NO_INSERT));
+return !result ? false : (result->type == NT_MACRO);
+}
 /* Lexes a raw string.  The stored string contains the spelling, including
 double quotes, delimiter string, '(' and ')', any leading
 'L', 'u', 'U' or 'u8' and 'R' modifier.  It returns the type of the
 literal, or CPP_OTHER if it was not properly terminated.
 static void
 lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
 		const uchar *cur)
 {
-source_location saw_NUL = 0;
+uchar raw_prefix[17];
-const uchar *raw_prefix;
+uchar temp_buffer[18];
-unsigned int raw_prefix_len = 0;
+const uchar *orig_base;
+unsigned int raw_prefix_len = 0, raw_suffix_len = 0;
+enum raw_str_phase { RAW_STR_PREFIX, RAW_STR, RAW_STR_SUFFIX };
+raw_str_phase phase = RAW_STR_PREFIX;
 enum cpp_ttype type;
 size_t total_len = 0;
+/* Index into temp_buffer during phases other than RAW_STR,
+during RAW_STR phase 17 to tell BUF_APPEND that nothing should
+be appended to temp_buffer.  */
+size_t temp_buffer_len = 0;
 _cpp_buff *first_buff = NULL, *last_buff = NULL;
+size_t raw_prefix_start;
 _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
 type = (*base == 'L' ? CPP_WSTRING :
 	  *base == 'U' ? CPP_STRING32 :
 	  *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
 	  : CPP_STRING);
-raw_prefix = cur + 1;
-while (raw_prefix_len < 16)
-{
-switch (raw_prefix[raw_prefix_len])
-	{
-	case ' ': case '(': case ')': case '\\': case '\t':
-	case '\v': case '\f': case '\n': default:
-	  break;
-	/* Basic source charset except the above chars.  */
-	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
-	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
-	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
-	case 's': case 't': case 'u': case 'v': case 'w': case 'x':
-	case 'y': case 'z':
-	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
-	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
-	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
-	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
-	case 'Y': case 'Z':
-	case '0': case '1': case '2': case '3': case '4': case '5':
-	case '6': case '7': case '8': case '9':
-	case '_': case '{': case '}': case '#': case '[': case ']':
-	case '<': case '>': case '%': case ':': case ';': case '.':
-	case '?': case '*': case '+': case '-': case '/': case '^':
-	case '&': case '|': case '~': case '!': case '=': case ',':
-	case '"': case '\'':
-	  raw_prefix_len++;
-	  continue;
-	}
-break;
-}
-if (raw_prefix[raw_prefix_len] != '(')
-{
-int col = CPP_BUF_COLUMN (pfile->buffer, raw_prefix + raw_prefix_len)
-		+ 1;
-if (raw_prefix_len == 16)
-	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
-			     "raw string delimiter longer than 16 characters");
-else
-	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
-			     "invalid character '%c' in raw string delimiter",
-			     (int) raw_prefix[raw_prefix_len]);
-pfile->buffer->cur = raw_prefix - 1;
-create_literal (pfile, token, base, raw_prefix - 1 - base, CPP_OTHER);
-return;
-}
-cur = raw_prefix + raw_prefix_len + 1;
-for (;;)
-{
 #define BUF_APPEND(STR,LEN)					\
 do {							\
 	bufring_append (pfile, (const uchar *)(STR), (LEN),	\
 			&first_buff, &last_buff);		\
 	total_len += (LEN);					\
+	if (__builtin_expect (temp_buffer_len < 17, 0)		\
+	    && (const uchar *)(STR) != base			\
+	    && (LEN) <= 2)					\
+	  {							\
+	    memcpy (temp_buffer + temp_buffer_len,		\
+		    (const uchar *)(STR), (LEN));		\
+	    temp_buffer_len += (LEN);				\
+	  }							\
 } while (0);
+orig_base = base;
+++cur;
+raw_prefix_start = cur - base;
+for (;;)
+{
 cppchar_t c;
 /* If we previously performed any trigraph or line splicing
-	 transformations, undo them within the body of the raw string.  */
+	 transformations, undo them in between the opening and closing
+	 double quote.  */
 while (note->pos < cur)
 	++note;
 for (; note->pos == cur; ++note)
 	{
 	  switch (note->type)
 			abort ();
 		      BUF_APPEND ("/", 1);
 		      ++note;
 		      goto after_backslash;
 		    }
-		  /* The ) from ??) could be part of the suffix.  */
-		  else if (type == ')'
-			   && strncmp ((const char *) cur+1,
-				       (const char *) raw_prefix,
-				       raw_prefix_len) == 0
-			   && cur[raw_prefix_len+1] == '"')
-		    {
-		      cur += raw_prefix_len+2;
-		      goto break_outer_loop;
-		    }
 		  else
 		    {
 		      /* Skip the replacement character.  */
 		      base = ++cur;
 		      BUF_APPEND (&type, 1);
+		      c = type;
+		      goto check_c;
 		    }
 		}
 	      else
 		abort ();
 	      break;
 	    }
 	}
 c = *cur++;
+if (__builtin_expect (temp_buffer_len < 17, 0))
-if (c == ')'
+	temp_buffer[temp_buffer_len++] = c;
-	  && strncmp ((const char *) cur, (const char *) raw_prefix,
-		      raw_prefix_len) == 0
+check_c:
-	  && cur[raw_prefix_len] == '"')
+if (phase == RAW_STR_PREFIX)
 	{
-	  cur += raw_prefix_len + 1;
+	  while (raw_prefix_len < temp_buffer_len)
-	  break;
+	    {
+	      raw_prefix[raw_prefix_len] = temp_buffer[raw_prefix_len];
+	      switch (raw_prefix[raw_prefix_len])
+		{
+		case ' ': case '(': case ')': case '\\': case '\t':
+		case '\v': case '\f': case '\n': default:
+		  break;
+		/* Basic source charset except the above chars.  */
+		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+		case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+		case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+		case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+		case 'y': case 'z':
+		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+		case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+		case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+		case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+		case 'Y': case 'Z':
+		case '0': case '1': case '2': case '3': case '4': case '5':
+		case '6': case '7': case '8': case '9':
+		case '_': case '{': case '}': case '#': case '[': case ']':
+		case '<': case '>': case '%': case ':': case ';': case '.':
+		case '?': case '*': case '+': case '-': case '/': case '^':
+		case '&': case '|': case '~': case '!': case '=': case ',':
+		case '"': case '\'':
+		  if (raw_prefix_len < 16)
+		    {
+		      raw_prefix_len++;
+		      continue;
+		    }
+		  break;
+		}
+	      if (raw_prefix[raw_prefix_len] != '(')
+		{
+		  int col = CPP_BUF_COLUMN (pfile->buffer, cur) + 1;
+		  if (raw_prefix_len == 16)
+		    cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
+					 col, "raw string delimiter longer "
+					      "than 16 characters");
+		  else if (raw_prefix[raw_prefix_len] == '\n')
+		    cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
+					 col, "invalid new-line in raw "
+					      "string delimiter");
+		  else
+		    cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
+					 col, "invalid character '%c' in "
+					      "raw string delimiter",
+					 (int) raw_prefix[raw_prefix_len]);
+		  pfile->buffer->cur = orig_base + raw_prefix_start - 1;
+		  create_literal (pfile, token, orig_base,
+				  raw_prefix_start - 1, CPP_OTHER);
+		  if (first_buff)
+		    _cpp_release_buff (pfile, first_buff);
+		  return;
+		}
+	      raw_prefix[raw_prefix_len] = '"';
+	      phase = RAW_STR;
+	      /* Nothing should be appended to temp_buffer during
+		 RAW_STR phase.  */
+	      temp_buffer_len = 17;
+	      break;
+	    }
+	  continue;
+	}
+else if (phase == RAW_STR_SUFFIX)
+	{
+	  while (raw_suffix_len <= raw_prefix_len
+		 && raw_suffix_len < temp_buffer_len
+		 && temp_buffer[raw_suffix_len] == raw_prefix[raw_suffix_len])
+	    raw_suffix_len++;
+	  if (raw_suffix_len > raw_prefix_len)
+	    break;
+	  if (raw_suffix_len == temp_buffer_len)
+	    continue;
+	  phase = RAW_STR;
+	  /* Nothing should be appended to temp_buffer during
+	     RAW_STR phase.  */
+	  temp_buffer_len = 17;
+	}
+if (c == ')')
+	{
+	  phase = RAW_STR_SUFFIX;
+	  raw_suffix_len = 0;
+	  temp_buffer_len = 0;
 	}
 else if (c == '\n')
 	{
 	  if (pfile->state.in_directive
-	      || pfile->state.parsing_args
+	      || (pfile->state.parsing_args
-	      || pfile->state.in_deferred_pragma)
+		  && pfile->buffer->next_line >= pfile->buffer->rlimit))
 	    {
 	      cur--;
 	      type = CPP_OTHER;
 	      cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
 				   "unterminated raw string");
 	    }
 	  cur = base = pfile->buffer->cur;
 	  note = &pfile->buffer->notes[pfile->buffer->cur_note];
 	}
-else if (c == '\0' && !saw_NUL)
+}
-	LINEMAP_POSITION_FOR_COLUMN (saw_NUL, pfile->line_table,
-				     CPP_BUF_COLUMN (pfile->buffer, cur));
+if (CPP_OPTION (pfile, user_literals))
-}
+{
-break_outer_loop:
+/* If a string format macro, say from inttypes.h, is placed touching
+	 a string literal it could be parsed as a C++11 user-defined string
-if (saw_NUL && !pfile->state.skipping)
+	 literal thus breaking the program.
-cpp_error_with_line (pfile, CPP_DL_WARNING, saw_NUL, 0,
+	 Try to identify macros with is_macro. A warning is issued. */
-	       "null character(s) preserved in literal");
+if (is_macro (pfile, cur))
+	{
+	  /* Raise a warning, but do not consume subsequent tokens.  */
+	  if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
+	    cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
+				   token->src_loc, 0,
+				   "invalid suffix on literal; C++11 requires "
+				   "a space between literal and string macro");
+	}
+/* Grab user defined literal suffix.  */
+else if (ISIDST (*cur))
+	{
+	  type = cpp_userdef_string_add_type (type);
+	  ++cur;
+	  while (ISIDNUM (*cur))
+	    ++cur;
+	}
+}
 pfile->buffer->cur = cur;
 if (first_buff == NULL)
 create_literal (pfile, token, base, cur - base, type);
 else
 	    *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
 			 : CPP_STRING);
 else if (terminator == '\'')
 type = (*base == 'L' ? CPP_WCHAR :
 	    *base == 'U' ? CPP_CHAR32 :
-	    *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
+	    *base == 'u' ? (base[1] == '8' ? CPP_UTF8CHAR : CPP_CHAR16)
+			 : CPP_CHAR);
 else
 terminator = '>', type = CPP_HEADER_NAME;
 for (;;)
 {
 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
 	       (int) terminator);
+if (CPP_OPTION (pfile, user_literals))
+{
+/* If a string format macro, say from inttypes.h, is placed touching
+	 a string literal it could be parsed as a C++11 user-defined string
+	 literal thus breaking the program.
+	 Try to identify macros with is_macro. A warning is issued. */
+if (is_macro (pfile, cur))
+	{
+	  /* Raise a warning, but do not consume subsequent tokens.  */
+	  if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
+	    cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
+				   token->src_loc, 0,
+				   "invalid suffix on literal; C++11 requires "
+				   "a space between literal and string macro");
+	}
+/* Grab user defined literal suffix.  */
+else if (ISIDST (*cur))
+	{
+	  type = cpp_userdef_char_add_type (type);
+	  type = cpp_userdef_string_add_type (type);
+++cur;
+	  while (ISIDNUM (*cur))
+	    ++cur;
+	}
+}
+else if (CPP_OPTION (pfile, cpp_warn_cxx11_compat)
+	   && is_macro (pfile, cur)
+	   && !pfile->state.skipping)
+cpp_warning_with_line (pfile, CPP_W_CXX11_COMPAT,
+			   token->src_loc, 0, "C++11 requires a space "
+			   "between string literal and macro");
 pfile->buffer->cur = cur;
 create_literal (pfile, token, base, cur - base, type);
 }
 /* Return the comment table. The client may not make any assumption
 /* Finally store this comment for use by clients of libcpp. */
 store_comment (pfile, token);
 }
+/* Returns true if comment at COMMENT_START is a recognized FALLTHROUGH
+comment.  */
+static bool
+fallthrough_comment_p (cpp_reader *pfile, const unsigned char *comment_start)
+{
+const unsigned char *from = comment_start + 1;
+switch (CPP_OPTION (pfile, cpp_warn_implicit_fallthrough))
+{
+/* For both -Wimplicit-fallthrough=0 and -Wimplicit-fallthrough=5 we
+	 don't recognize any comments.  The latter only checks attributes,
+	 the former doesn't warn.  */
+case 0:
+default:
+return false;
+/* -Wimplicit-fallthrough=1 considers any comment, no matter what
+	 content it has.  */
+case 1:
+return true;
+case 2:
+/* -Wimplicit-fallthrough=2 looks for (case insensitive)
+	 .*falls?[ \t-]*thr(u|ough).* regex.  */
+for (; (size_t) (pfile->buffer->cur - from) >= sizeof "fallthru" - 1;
+	   from++)
+	{
+	  /* Is there anything like strpbrk with upper boundary, or
+	     memchr looking for 2 characters rather than just one?  */
+	  if (from[0] != 'f' && from[0] != 'F')
+	    continue;
+	  if (from[1] != 'a' && from[1] != 'A')
+	    continue;
+	  if (from[2] != 'l' && from[2] != 'L')
+	    continue;
+	  if (from[3] != 'l' && from[3] != 'L')
+	    continue;
+	  from += sizeof "fall" - 1;
+	  if (from[0] == 's' || from[0] == 'S')
+	    from++;
+	  while (*from == ' ' || *from == '\t' || *from == '-')
+	    from++;
+	  if (from[0] != 't' && from[0] != 'T')
+	    continue;
+	  if (from[1] != 'h' && from[1] != 'H')
+	    continue;
+	  if (from[2] != 'r' && from[2] != 'R')
+	    continue;
+	  if (from[3] == 'u' || from[3] == 'U')
+	    return true;
+	  if (from[3] != 'o' && from[3] != 'O')
+	    continue;
+	  if (from[4] != 'u' && from[4] != 'U')
+	    continue;
+	  if (from[5] != 'g' && from[5] != 'G')
+	    continue;
+	  if (from[6] != 'h' && from[6] != 'H')
+	    continue;
+	  return true;
+	}
+return false;
+case 3:
+case 4:
+break;
+}
+/* Whole comment contents:
+-fallthrough
+@fallthrough@
+*/
+if (*from == '-' || *from == '@')
+{
+size_t len = sizeof "fallthrough" - 1;
+if ((size_t) (pfile->buffer->cur - from - 1) < len)
+	return false;
+if (memcmp (from + 1, "fallthrough", len))
+	return false;
+if (*from == '@')
+	{
+	  if (from[len + 1] != '@')
+	    return false;
+	  len++;
+	}
+from += 1 + len;
+}
+/* Whole comment contents (regex):
+lint -fallthrough[ \t]*
+*/
+else if (*from == 'l')
+{
+size_t len = sizeof "int -fallthrough" - 1;
+if ((size_t) (pfile->buffer->cur - from - 1) < len)
+	return false;
+if (memcmp (from + 1, "int -fallthrough", len))
+	return false;
+from += 1 + len;
+while (*from == ' ' || *from == '\t')
+	from++;
+}
+/* Whole comment contents (regex):
+[ \t]*FALLTHR(U|OUGH)[ \t]*
+*/
+else if (CPP_OPTION (pfile, cpp_warn_implicit_fallthrough) == 4)
+{
+while (*from == ' ' || *from == '\t')
+	from++;
+if ((size_t) (pfile->buffer->cur - from)  < sizeof "FALLTHRU" - 1)
+	return false;
+if (memcmp (from, "FALLTHR", sizeof "FALLTHR" - 1))
+	return false;
+from += sizeof "FALLTHR" - 1;
+if (*from == 'U')
+	from++;
+else if ((size_t) (pfile->buffer->cur - from)  < sizeof "OUGH" - 1)
+	return false;
+else if (memcmp (from, "OUGH", sizeof "OUGH" - 1))
+	return false;
+else
+	from += sizeof "OUGH" - 1;
+while (*from == ' ' || *from == '\t')
+	from++;
+}
+/* Whole comment contents (regex):
+[ \t.!]*(ELSE,? |INTENTIONAL(LY)? )?FALL(S | |-)?THR(OUGH|U)[ \t.!]*(-[^\n\r]*)?
+[ \t.!]*(Else,? |Intentional(ly)? )?Fall((s | |-)[Tt]|t)hr(ough|u)[ \t.!]*(-[^\n\r]*)?
+[ \t.!]*([Ee]lse,? |[Ii]ntentional(ly)? )?fall(s | |-)?thr(ough|u)[ \t.!]*(-[^\n\r]*)?
+*/
+else
+{
+while (*from == ' ' || *from == '\t' || *from == '.' || *from == '!')
+	from++;
+unsigned char f = *from;
+bool all_upper = false;
+if (f == 'E' || f == 'e')
+	{
+	  if ((size_t) (pfile->buffer->cur - from)
+	      < sizeof "else fallthru" - 1)
+	    return false;
+	  if (f == 'E' && memcmp (from + 1, "LSE", sizeof "LSE" - 1) == 0)
+	    all_upper = true;
+	  else if (memcmp (from + 1, "lse", sizeof "lse" - 1))
+	    return false;
+	  from += sizeof "else" - 1;
+	  if (*from == ',')
+	    from++;
+	  if (*from != ' ')
+	    return false;
+	  from++;
+	  if (all_upper && *from == 'f')
+	    return false;
+	  if (f == 'e' && *from == 'F')
+	    return false;
+	  f = *from;
+	}
+else if (f == 'I' || f == 'i')
+	{
+	  if ((size_t) (pfile->buffer->cur - from)
+	      < sizeof "intentional fallthru" - 1)
+	    return false;
+	  if (f == 'I' && memcmp (from + 1, "NTENTIONAL",
+				  sizeof "NTENTIONAL" - 1) == 0)
+	    all_upper = true;
+	  else if (memcmp (from + 1, "ntentional",
+			   sizeof "ntentional" - 1))
+	    return false;
+	  from += sizeof "intentional" - 1;
+	  if (*from == ' ')
+	    {
+	      from++;
+	      if (all_upper && *from == 'f')
+		return false;
+	    }
+	  else if (all_upper)
+	    {
+	      if (memcmp (from, "LY F", sizeof "LY F" - 1))
+		return false;
+	      from += sizeof "LY " - 1;
+	    }
+	  else
+	    {
+	      if (memcmp (from, "ly ", sizeof "ly " - 1))
+		return false;
+	      from += sizeof "ly " - 1;
+	    }
+	  if (f == 'i' && *from == 'F')
+	    return false;
+	  f = *from;
+	}
+if (f != 'F' && f != 'f')
+	return false;
+if ((size_t) (pfile->buffer->cur - from) < sizeof "fallthru" - 1)
+	return false;
+if (f == 'F' && memcmp (from + 1, "ALL", sizeof "ALL" - 1) == 0)
+	all_upper = true;
+else if (all_upper)
+	return false;
+else if (memcmp (from + 1, "all", sizeof "all" - 1))
+	return false;
+from += sizeof "fall" - 1;
+if (*from == (all_upper ? 'S' : 's') && from[1] == ' ')
+	from += 2;
+else if (*from == ' ' || *from == '-')
+	from++;
+else if (*from != (all_upper ? 'T' : 't'))
+	return false;
+if ((f == 'f' || *from != 'T') && (all_upper || *from != 't'))
+	return false;
+if ((size_t) (pfile->buffer->cur - from) < sizeof "thru" - 1)
+	return false;
+if (memcmp (from + 1, all_upper ? "HRU" : "hru", sizeof "hru" - 1))
+	{
+	  if ((size_t) (pfile->buffer->cur - from) < sizeof "through" - 1)
+	    return false;
+	  if (memcmp (from + 1, all_upper ? "HROUGH" : "hrough",
+		      sizeof "hrough" - 1))
+	    return false;
+	  from += sizeof "through" - 1;
+	}
+else
+	from += sizeof "thru" - 1;
+while (*from == ' ' || *from == '\t' || *from == '.' || *from == '!')
+	from++;
+if (*from == '-')
+	{
+	  from++;
+	  if (*comment_start == '*')
+	    {
+	      do
+		{
+		  while (*from && *from != '*'
+			 && *from != '\n' && *from != '\r')
+		    from++;
+		  if (*from != '*' || from[1] == '/')
+		    break;
+		  from++;
+		}
+	      while (1);
+	    }
+	  else
+	    while (*from && *from != '\n' && *from != '\r')
+	      from++;
+	}
+}
+/* C block comment.  */
+if (*comment_start == '*')
+{
+if (*from != '*' || from[1] != '/')
+	return false;
+}
+/* C++ line comment.  */
+else if (*from != '\n')
+return false;
+return true;
+}
 /* Allocate COUNT tokens for RUN.  */
 void
 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
 {
 run->base = XNEWVEC (cpp_token, count);
 }
 return run->next;
 }
+/* Return the number of not yet processed token in a given
+context.  */
+int
+_cpp_remaining_tokens_num_in_context (cpp_context *context)
+{
+if (context->tokens_kind == TOKENS_KIND_DIRECT)
+return (LAST (context).token - FIRST (context).token);
+else if (context->tokens_kind == TOKENS_KIND_INDIRECT
+	   || context->tokens_kind == TOKENS_KIND_EXTENDED)
+return (LAST (context).ptoken - FIRST (context).ptoken);
+else
+abort ();
+}
+/* Returns the token present at index INDEX in a given context.  If
+INDEX is zero, the next token to be processed is returned.  */
+static const cpp_token*
+_cpp_token_from_context_at (cpp_context *context, int index)
+{
+if (context->tokens_kind == TOKENS_KIND_DIRECT)
+return &(FIRST (context).token[index]);
+else if (context->tokens_kind == TOKENS_KIND_INDIRECT
+	   || context->tokens_kind == TOKENS_KIND_EXTENDED)
+return FIRST (context).ptoken[index];
+else
+abort ();
+}
 /* Look ahead in the input stream.  */
 const cpp_token *
 cpp_peek_token (cpp_reader *pfile, int index)
 {
 cpp_context *context = pfile->context;
 int count;
 /* First, scan through any pending cpp_context objects.  */
 while (context->prev)
 {
-ptrdiff_t sz = (context->direct_p
+ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
-? LAST (context).token - FIRST (context).token
-: LAST (context).ptoken - FIRST (context).ptoken);
 if (index < (int) sz)
-return (context->direct_p
+return _cpp_token_from_context_at (context, index);
-? FIRST (context).token + index
-: *(FIRST (context).ptoken + index));
 index -= (int) sz;
 context = context->prev;
 }
 /* We will have to read some new tokens after all (and do so
 without invalidating preceding tokens).  */
 count = index;
 pfile->keep_tokens++;
+/* For peeked tokens temporarily disable line_change reporting,
+until the tokens are parsed for real.  */
+void (*line_change) (cpp_reader *, const cpp_token *, int)
+= pfile->cb.line_change;
+pfile->cb.line_change = NULL;
 do
 {
 peektok = _cpp_lex_token (pfile);
 if (peektok->type == CPP_EOF)
-	return peektok;
+	{
+	  index--;
+	  break;
+	}
 }
 while (index--);
-_cpp_backup_tokens_direct (pfile, count + 1);
+_cpp_backup_tokens_direct (pfile, count - index);
 pfile->keep_tokens--;
+pfile->cb.line_change = line_change;
 return peektok;
 }
 /* Allocate a single token that is invalidated at the same time as the
 _cpp_lex_direct (cpp_reader *pfile)
 {
 cppchar_t c;
 cpp_buffer *buffer;
 const unsigned char *comment_start;
+bool fallthrough_comment = false;
 cpp_token *result = pfile->cur_token++;
 fresh_line:
 result->flags = 0;
 buffer = pfile->buffer;
 	      result->src_loc = pfile->line_table->highest_line;
 	      result->flags = BOL;
 	    }
 	  return result;
 	}
+if (buffer != pfile->buffer)
+	fallthrough_comment = false;
 if (!pfile->keep_tokens)
 	{
 	  pfile->cur_run = &pfile->base_run;
 	  result = pfile->base_run.base;
 	  pfile->cur_token = result + 1;
 _cpp_process_line_notes (pfile, false);
 result->src_loc = pfile->line_table->highest_line;
 }
 c = *buffer->cur++;
-LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
+if (pfile->forced_token_location_p)
-			       CPP_BUF_COLUMN (buffer, buffer->cur));
+result->src_loc = *pfile->forced_token_location_p;
+else
+result->src_loc = linemap_position_for_column (pfile->line_table,
+					  CPP_BUF_COLUMN (buffer, buffer->cur));
 switch (c)
 {
 case ' ': case '\t': case '\f': case '\v': case '\0':
 result->flags |= PREV_WHITE;
 case 'u':
 case 'U':
 case 'R':
 /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
 	 wide strings or raw strings.  */
-if (c == 'L' || CPP_OPTION (pfile, uliterals))
+if (c == 'L' || CPP_OPTION (pfile, rliterals)
+	  || (c != 'R' && CPP_OPTION (pfile, uliterals)))
 	{
 	  if ((*buffer->cur == '\'' && c != 'R')
 	      || *buffer->cur == '"'
 	      || (*buffer->cur == 'R'
 		  && c != 'R'
 		  && buffer->cur[1] == '"'
-		  && CPP_OPTION (pfile, uliterals))
+		  && CPP_OPTION (pfile, rliterals))
 	      || (*buffer->cur == '8'
 		  && c == 'u'
-		  && (buffer->cur[1] == '"'
+		  && ((buffer->cur[1] == '"' || (buffer->cur[1] == '\''
-		      || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'))))
+				&& CPP_OPTION (pfile, utf8_char_literals)))
+		      || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
+			  && CPP_OPTION (pfile, rliterals)))))
 	    {
 	      lex_string (pfile, result, buffer->cur - 1);
 	      break;
 	    }
 	}
 case 'Y': case 'Z':
 result->type = CPP_NAME;
 {
 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
 	result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
-						&nst);
+						&nst,
+						&result->val.node.spelling);
 	warn_about_normalization (pfile, result, &nst);
 }
 /* Convert named operators to their proper types.  */
 if (result->val.node.node->flags & NODE_OPERATOR)
 	{
 	  result->flags |= NAMED_OP;
 	  result->type = (enum cpp_ttype) result->val.node.node->directive_index;
 	}
+/* Signal FALLTHROUGH comment followed by another token.  */
+if (fallthrough_comment)
+	result->flags |= PREV_FALLTHROUGH;
 break;
 case '\'':
 case '"':
 lex_string (pfile, result, buffer->cur - 1);
 if (c == '*')
 	{
 	  if (_cpp_skip_block_comment (pfile))
 	    cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
 	}
-else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
+else if (c == '/' && ! CPP_OPTION (pfile, traditional))
-			    || cpp_in_system_header (pfile)))
+	{
-	{
+	  /* Don't warn for system headers.  */
-	  /* Warn about comments only if pedantically GNUC89, and not
+	  if (cpp_in_system_header (pfile))
+	    ;
+	  /* Warn about comments if pedantically GNUC89, and not
 	     in system headers.  */
-	  if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
+	  else if (CPP_OPTION (pfile, lang) == CLK_GNUC89
-	      && ! buffer->warned_cplusplus_comments)
+		   && CPP_PEDANTIC (pfile)
+		   && ! buffer->warned_cplusplus_comments)
 	    {
 	      cpp_error (pfile, CPP_DL_PEDWARN,
 			 "C++ style comments are not allowed in ISO C90");
 	      cpp_error (pfile, CPP_DL_PEDWARN,
 			 "(this will be reported only once per input file)");
 	      buffer->warned_cplusplus_comments = 1;
 	    }
+	  /* Or if specifically desired via -Wc90-c99-compat.  */
+	  else if (CPP_OPTION (pfile, cpp_warn_c90_c99_compat) > 0
+		   && ! CPP_OPTION (pfile, cplusplus)
+		   && ! buffer->warned_cplusplus_comments)
+	    {
+	      cpp_error (pfile, CPP_DL_WARNING,
+			 "C++ style comments are incompatible with C90");
+	      cpp_error (pfile, CPP_DL_WARNING,
+			 "(this will be reported only once per input file)");
+	      buffer->warned_cplusplus_comments = 1;
+	    }
+	  /* In C89/C94, C++ style comments are forbidden.  */
+	  else if ((CPP_OPTION (pfile, lang) == CLK_STDC89
+		    || CPP_OPTION (pfile, lang) == CLK_STDC94))
+	    {
+	      /* But don't be confused about valid code such as
+	         - // immediately followed by *,
+		 - // in a preprocessing directive,
+		 - // in an #if 0 block.  */
+	      if (buffer->cur[1] == '*'
+		  || pfile->state.in_directive
+		  || pfile->state.skipping)
+		{
+		  result->type = CPP_DIV;
+		  break;
+		}
+	      else if (! buffer->warned_cplusplus_comments)
+		{
+		  cpp_error (pfile, CPP_DL_ERROR,
+			     "C++ style comments are not allowed in ISO C90");
+		  cpp_error (pfile, CPP_DL_ERROR,
+			     "(this will be reported only once per input "
+			     "file)");
+		  buffer->warned_cplusplus_comments = 1;
+		}
+	    }
 	  if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
 	    cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
 	}
 else if (c == '=')
 	{
 	{
 	  result->type = CPP_DIV;
 	  break;
 	}
+if (fallthrough_comment_p (pfile, comment_start))
+	fallthrough_comment = true;
+if (pfile->cb.comment)
+	{
+	  size_t len = pfile->buffer->cur - comment_start;
+	  pfile->cb.comment (pfile, result->src_loc, comment_start - 1,
+			     len + 1);
+	}
 if (!pfile->state.save_comments)
 	{
 	  result->flags |= PREV_WHITE;
 	  goto update_tokens_line;
 	}
+if (fallthrough_comment)
+	result->flags |= PREV_FALLTHROUGH;
 /* Save the comment as a token in its own right.  */
 save_comment (pfile, result, comment_start, c);
 break;
 	}
 else if (CPP_OPTION (pfile, digraphs))
 	{
 	  if (*buffer->cur == ':')
 	    {
+	      /* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
+		 three characters are <:: and the subsequent character
+		 is neither : nor >, the < is treated as a preprocessor
+		 token by itself".  */
+	      if (CPP_OPTION (pfile, cplusplus)
+		  && CPP_OPTION (pfile, lang) != CLK_CXX98
+		  && CPP_OPTION (pfile, lang) != CLK_GNUCXX
+		  && buffer->cur[1] == ':'
+		  && buffer->cur[2] != ':' && buffer->cur[2] != '>')
+		break;
 	      buffer->cur++;
 	      result->flags |= DIGRAPH;
 	      result->type = CPP_OPEN_SQUARE;
 	    }
 	  else if (*buffer->cur == '%')
 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
 	if (forms_identifier_p (pfile, true, &nst))
 	  {
 	    result->type = CPP_NAME;
-	    result->val.node.node = lex_identifier (pfile, base, true, &nst);
+	    result->val.node.node = lex_identifier (pfile, base, true, &nst,
+						    &result->val.node.spelling);
 	    warn_about_normalization (pfile, result, &nst);
 	    break;
 	  }
 	buffer->cur++;
 }
+/* FALLTHRU */
 default:
 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
 break;
+}
+/* Potentially convert the location of the token to a range.  */
+if (result->src_loc >= RESERVED_LOCATION_COUNT
+&& result->type != CPP_EOF)
+{
+/* Ensure that any line notes are processed, so that we have the
+	 correct physical line/column for the end-point of the token even
+	 when a logical line is split via one or more backslashes.  */
+if (buffer->cur >= buffer->notes[buffer->cur_note].pos
+	  && !pfile->overlaid_buffer)
+	_cpp_process_line_notes (pfile, false);
+source_range tok_range;
+tok_range.m_start = result->src_loc;
+tok_range.m_finish
+	= linemap_position_for_column (pfile->line_table,
+				       CPP_BUF_COLUMN (buffer, buffer->cur));
+result->src_loc = COMBINE_LOCATION_DATA (pfile->line_table,
+					       result->src_loc,
+					       tok_range, NULL);
 }
 return result;
 }
 cpp_digraph2name (enum cpp_ttype type)
 {
 return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
 }
+/* Write the spelling of an identifier IDENT, using UCNs, to BUFFER.
+The buffer must already contain the enough space to hold the
+token's spelling.  Returns a pointer to the character after the
+last character written.  */
+unsigned char *
+_cpp_spell_ident_ucns (unsigned char *buffer, cpp_hashnode *ident)
+{
+size_t i;
+const unsigned char *name = NODE_NAME (ident);
+for (i = 0; i < NODE_LEN (ident); i++)
+if (name[i] & ~0x7F)
+{
+	i += utf8_to_ucn (buffer, name + i) - 1;
+	buffer += 10;
+}
+else
+*buffer++ = name[i];
+return buffer;
+}
 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
 already contain the enough space to hold the token's spelling.
 Returns a pointer to the character after the last character written.
 FORSTRING is true if this is to be the spelling after translation
-phase 1 (this is different for UCNs).
+phase 1 (with the original spelling of extended identifiers), false
+if extended identifiers should always be written using UCNs (there is
+no option for always writing them in the internal UTF-8 form).
 FIXME: Would be nice if we didn't need the PFILE argument.  */
 unsigned char *
 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
 		 unsigned char *buffer, bool forstring)
 {
 spell_ident:
 case SPELL_IDENT:
 if (forstring)
 	{
-	  memcpy (buffer, NODE_NAME (token->val.node.node),
+	  memcpy (buffer, NODE_NAME (token->val.node.spelling),
-		  NODE_LEN (token->val.node.node));
+		  NODE_LEN (token->val.node.spelling));
-	  buffer += NODE_LEN (token->val.node.node);
+	  buffer += NODE_LEN (token->val.node.spelling);
 	}
 else
-	{
+	buffer = _cpp_spell_ident_ucns (buffer, token->val.node.node);
-	  size_t i;
-	  const unsigned char * name = NODE_NAME (token->val.node.node);
-	  for (i = 0; i < NODE_LEN (token->val.node.node); i++)
-	    if (name[i] & ~0x7F)
-	      {
-		i += utf8_to_ucn (buffer, name + i) - 1;
-		buffer += 10;
-	      }
-	    else
-	      *buffer++ = NODE_NAME (token->val.node.node)[i];
-	}
 break;
 case SPELL_LITERAL:
 memcpy (buffer, token->val.str.text, token->val.str.len);
 buffer += token->val.str.len;
 	/* token_no is used to track where multiple consecutive ##
 	   tokens were originally located.  */
 	return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
 case SPELL_NONE:
 	return (a->type != CPP_MACRO_ARG
-		|| a->val.macro_arg.arg_no == b->val.macro_arg.arg_no);
+		|| (a->val.macro_arg.arg_no == b->val.macro_arg.arg_no
+		    && a->val.macro_arg.spelling == b->val.macro_arg.spelling));
 case SPELL_IDENT:
-	return a->val.node.node == b->val.node.node;
+	return (a->val.node.node == b->val.node.node
+		&& a->val.node.spelling == b->val.node.spelling);
 case SPELL_LITERAL:
 	return (a->val.str.len == b->val.str.len
 		&& !memcmp (a->val.str.text, b->val.str.text,
 			    a->val.str.len));
 }
 case CPP_OTHER:	return ((token1->val.str.text[0] == '\\'
 				 && b == CPP_NAME)
 				|| (CPP_OPTION (pfile, objc)
 				    && token1->val.str.text[0] == '@'
 				    && (b == CPP_NAME || b == CPP_STRING)));
+case CPP_STRING:
+case CPP_WSTRING:
+case CPP_UTF8STRING:
+case CPP_STRING16:
+case CPP_STRING32:	return (CPP_OPTION (pfile, user_literals)
+				&& (b == CPP_NAME
+				    || (TOKEN_SPELL (token2) == SPELL_LITERAL
+					&& ISIDST (token2->val.str.text[0]))));
 default:		break;
 }
 return 0;
 }
 if (len < MIN_BUFF_SIZE)
 len = MIN_BUFF_SIZE;
 len = CPP_ALIGN (len);
+#ifdef ENABLE_VALGRIND_ANNOTATIONS
+/* Valgrind warns about uses of interior pointers, so put _cpp_buff
+struct first.  */
+size_t slen = CPP_ALIGN2 (sizeof (_cpp_buff), 2 * DEFAULT_ALIGNMENT);
+base = XNEWVEC (unsigned char, len + slen);
+result = (_cpp_buff *) base;
+base += slen;
+#else
 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
 result = (_cpp_buff *) (base + len);
+#endif
 result->base = base;
 result->cur = base;
 result->limit = base + len;
 result->next = NULL;
 return result;
 _cpp_buff *next;
 for (; buff; buff = next)
 {
 next = buff->next;
+#ifdef ENABLE_VALGRIND_ANNOTATIONS
+free (buff);
+#else
 free (buff->base);
+#endif
 }
 }
 /* Allocate permanent, unaligned storage of length LEN.  */
 unsigned char *
 }
 /* Say which field of TOK is in use.  */
 enum cpp_token_fld_kind
-cpp_token_val_index (cpp_token *tok)
+cpp_token_val_index (const cpp_token *tok)
 {
 switch (TOKEN_SPELL (tok))
 {
 case SPELL_IDENT:
 return CPP_TOKEN_FLD_NODE;
 	return CPP_TOKEN_FLD_ARG_NO;
 else if (tok->type == CPP_PADDING)
 	return CPP_TOKEN_FLD_SOURCE;
 else if (tok->type == CPP_PRAGMA)
 	return CPP_TOKEN_FLD_PRAGMA;
-/* else fall through */
+/* fall through */
 default:
 return CPP_TOKEN_FLD_NONE;
 }
 }
+/* All tokens lexed in R after calling this function will be forced to have
+their source_location the same as the location referenced by P, until
+cpp_stop_forcing_token_locations is called for R.  */
+void
+cpp_force_token_locations (cpp_reader *r, source_location *p)
+{
+r->forced_token_location_p = p;
+}
+/* Go back to assigning locations naturally for lexed tokens.  */
+void
+cpp_stop_forcing_token_locations (cpp_reader *r)
+{
+r->forced_token_location_p = NULL;
+}

Mercurial > hg > CbC > CbC_gcc

comparison libcpp/lex.c @ 111:04ced10e8804