comparison libcpp/lex.c @ 67:f6334be47118

update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
author nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
date Tue, 22 Mar 2011 17:18:12 +0900
parents b7f97abdc517
children 04ced10e8804
comparison
equal deleted inserted replaced
65:65488c3d617d 67:f6334be47118
1 /* CPP Library - lexical analysis. 1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009 2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc. 3 Free Software Foundation, Inc.
4 Contributed by Per Bothner, 1994-95. 4 Contributed by Per Bothner, 1994-95.
5 Based on CCCP program by Paul Rubin, June 1986 5 Based on CCCP program by Paul Rubin, June 1986
6 Adapted to ANSI C, Richard Stallman, Jan 1987 6 Adapted to ANSI C, Richard Stallman, Jan 1987
7 Broken out to separate file, Zack Weinberg, Mar 2000 7 Broken out to separate file, Zack Weinberg, Mar 2000
94 buffer->notes[buffer->notes_used].pos = pos; 94 buffer->notes[buffer->notes_used].pos = pos;
95 buffer->notes[buffer->notes_used].type = type; 95 buffer->notes[buffer->notes_used].type = type;
96 buffer->notes_used++; 96 buffer->notes_used++;
97 } 97 }
98 98
99
100 /* Fast path to find line special characters using optimized character
101 scanning algorithms. Anything complicated falls back to the slow
102 path below. Since this loop is very hot it's worth doing these kinds
103 of optimizations.
104
105 One of the paths through the ifdefs should provide
106
107 const uchar *search_line_fast (const uchar *s, const uchar *end);
108
109 Between S and END, search for \n, \r, \\, ?. Return a pointer to
110 the found character.
111
112 Note that the last character of the buffer is *always* a newline,
113 as forced by _cpp_convert_input. This fact can be used to avoid
114 explicitly looking for the end of the buffer. */
115
116 /* Configure gives us an ifdef test. */
117 #ifndef WORDS_BIGENDIAN
118 #define WORDS_BIGENDIAN 0
119 #endif
120
121 /* We'd like the largest integer that fits into a register. There's nothing
122 in <stdint.h> that gives us that. For most hosts this is unsigned long,
123 but MS decided on an LLP64 model. Thankfully when building with GCC we
124 can get the "real" word size. */
125 #ifdef __GNUC__
126 typedef unsigned int word_type __attribute__((__mode__(__word__)));
127 #else
128 typedef unsigned long word_type;
129 #endif
130
131 /* The code below is only expecting sizes 4 or 8.
132 Die at compile-time if this expectation is violated. */
133 typedef char check_word_type_size
134 [(sizeof(word_type) == 8 || sizeof(word_type) == 4) * 2 - 1];
135
136 /* Return X with the first N bytes forced to values that won't match one
137 of the interesting characters. Note that NUL is not interesting. */
138
139 static inline word_type
140 acc_char_mask_misalign (word_type val, unsigned int n)
141 {
142 word_type mask = -1;
143 if (WORDS_BIGENDIAN)
144 mask >>= n * 8;
145 else
146 mask <<= n * 8;
147 return val & mask;
148 }
149
150 /* Return X replicated to all byte positions within WORD_TYPE. */
151
152 static inline word_type
153 acc_char_replicate (uchar x)
154 {
155 word_type ret;
156
157 ret = (x << 24) | (x << 16) | (x << 8) | x;
158 if (sizeof(word_type) == 8)
159 ret = (ret << 16 << 16) | ret;
160 return ret;
161 }
162
163 /* Return non-zero if some byte of VAL is (probably) C. */
164
165 static inline word_type
166 acc_char_cmp (word_type val, word_type c)
167 {
168 #if defined(__GNUC__) && defined(__alpha__)
169 /* We can get exact results using a compare-bytes instruction.
170 Get (val == c) via (0 >= (val ^ c)). */
171 return __builtin_alpha_cmpbge (0, val ^ c);
172 #else
173 word_type magic = 0x7efefefeU;
174 if (sizeof(word_type) == 8)
175 magic = (magic << 16 << 16) | 0xfefefefeU;
176 magic |= 1;
177
178 val ^= c;
179 return ((val + magic) ^ ~val) & ~magic;
180 #endif
181 }
182
183 /* Given the result of acc_char_cmp is non-zero, return the index of
184 the found character. If this was a false positive, return -1. */
185
186 static inline int
187 acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
188 word_type val ATTRIBUTE_UNUSED)
189 {
190 #if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
191 /* The cmpbge instruction sets *bits* of the result corresponding to
192 matches in the bytes with no false positives. */
193 return __builtin_ctzl (cmp);
194 #else
195 unsigned int i;
196
197 /* ??? It would be nice to force unrolling here,
198 and have all of these constants folded. */
199 for (i = 0; i < sizeof(word_type); ++i)
200 {
201 uchar c;
202 if (WORDS_BIGENDIAN)
203 c = (val >> (sizeof(word_type) - i - 1) * 8) & 0xff;
204 else
205 c = (val >> i * 8) & 0xff;
206
207 if (c == '\n' || c == '\r' || c == '\\' || c == '?')
208 return i;
209 }
210
211 return -1;
212 #endif
213 }
214
215 /* A version of the fast scanner using bit fiddling techniques.
216
217 For 32-bit words, one would normally perform 16 comparisons and
218 16 branches. With this algorithm one performs 24 arithmetic
219 operations and one branch. Whether this is faster with a 32-bit
220 word size is going to be somewhat system dependent.
221
222 For 64-bit words, we eliminate twice the number of comparisons
223 and branches without increasing the number of arithmetic operations.
224 It's almost certainly going to be a win with 64-bit word size. */
225
226 static const uchar * search_line_acc_char (const uchar *, const uchar *)
227 ATTRIBUTE_UNUSED;
228
229 static const uchar *
230 search_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
231 {
232 const word_type repl_nl = acc_char_replicate ('\n');
233 const word_type repl_cr = acc_char_replicate ('\r');
234 const word_type repl_bs = acc_char_replicate ('\\');
235 const word_type repl_qm = acc_char_replicate ('?');
236
237 unsigned int misalign;
238 const word_type *p;
239 word_type val, t;
240
241 /* Align the buffer. Mask out any bytes from before the beginning. */
242 p = (word_type *)((uintptr_t)s & -sizeof(word_type));
243 val = *p;
244 misalign = (uintptr_t)s & (sizeof(word_type) - 1);
245 if (misalign)
246 val = acc_char_mask_misalign (val, misalign);
247
248 /* Main loop. */
249 while (1)
250 {
251 t = acc_char_cmp (val, repl_nl);
252 t |= acc_char_cmp (val, repl_cr);
253 t |= acc_char_cmp (val, repl_bs);
254 t |= acc_char_cmp (val, repl_qm);
255
256 if (__builtin_expect (t != 0, 0))
257 {
258 int i = acc_char_index (t, val);
259 if (i >= 0)
260 return (const uchar *)p + i;
261 }
262
263 val = *++p;
264 }
265 }
266
267 /* Disable on Solaris 2/x86 until the following problems can be properly
268 autoconfed:
269
270 The Solaris 8 assembler cannot assemble SSE2/SSE4.2 insns.
271 The Solaris 9 assembler cannot assemble SSE4.2 insns.
272 Before Solaris 9 Update 6, SSE insns cannot be executed.
273 The Solaris 10+ assembler tags objects with the instruction set
274 extensions used, so SSE4.2 executables cannot run on machines that
275 don't support that extension. */
276
277 #if (GCC_VERSION >= 4005) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
278
279 /* Replicated character data to be shared between implementations.
280 Recall that outside of a context with vector support we can't
281 define compatible vector types, therefore these are all defined
282 in terms of raw characters. */
283 static const char repl_chars[4][16] __attribute__((aligned(16))) = {
284 { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
285 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
286 { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
287 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
288 { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
289 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
290 { '?', '?', '?', '?', '?', '?', '?', '?',
291 '?', '?', '?', '?', '?', '?', '?', '?' },
292 };
293
294 /* A version of the fast scanner using MMX vectorized byte compare insns.
295
296 This uses the PMOVMSKB instruction which was introduced with "MMX2",
297 which was packaged into SSE1; it is also present in the AMD 3dNOW-A
298 extension. Mark the function as using "sse" so that we emit a real
299 "emms" instruction, rather than the 3dNOW "femms" instruction. */
300
301 static const uchar *
302 #ifndef __SSE__
303 __attribute__((__target__("sse")))
304 #endif
305 search_line_mmx (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
306 {
307 typedef char v8qi __attribute__ ((__vector_size__ (8)));
308 typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
309
310 const v8qi repl_nl = *(const v8qi *)repl_chars[0];
311 const v8qi repl_cr = *(const v8qi *)repl_chars[1];
312 const v8qi repl_bs = *(const v8qi *)repl_chars[2];
313 const v8qi repl_qm = *(const v8qi *)repl_chars[3];
314
315 unsigned int misalign, found, mask;
316 const v8qi *p;
317 v8qi data, t, c;
318
319 /* Align the source pointer. While MMX doesn't generate unaligned data
320 faults, this allows us to safely scan to the end of the buffer without
321 reading beyond the end of the last page. */
322 misalign = (uintptr_t)s & 7;
323 p = (const v8qi *)((uintptr_t)s & -8);
324 data = *p;
325
326 /* Create a mask for the bytes that are valid within the first
327 16-byte block. The Idea here is that the AND with the mask
328 within the loop is "free", since we need some AND or TEST
329 insn in order to set the flags for the branch anyway. */
330 mask = -1u << misalign;
331
332 /* Main loop processing 8 bytes at a time. */
333 goto start;
334 do
335 {
336 data = *++p;
337 mask = -1;
338
339 start:
340 t = __builtin_ia32_pcmpeqb(data, repl_nl);
341 c = __builtin_ia32_pcmpeqb(data, repl_cr);
342 t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
343 c = __builtin_ia32_pcmpeqb(data, repl_bs);
344 t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
345 c = __builtin_ia32_pcmpeqb(data, repl_qm);
346 t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
347 found = __builtin_ia32_pmovmskb (t);
348 found &= mask;
349 }
350 while (!found);
351
352 __builtin_ia32_emms ();
353
354 /* FOUND contains 1 in bits for which we matched a relevant
355 character. Conversion to the byte index is trivial. */
356 found = __builtin_ctz(found);
357 return (const uchar *)p + found;
358 }
359
360 /* A version of the fast scanner using SSE2 vectorized byte compare insns. */
361
362 static const uchar *
363 #ifndef __SSE2__
364 __attribute__((__target__("sse2")))
365 #endif
366 search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
367 {
368 typedef char v16qi __attribute__ ((__vector_size__ (16)));
369
370 const v16qi repl_nl = *(const v16qi *)repl_chars[0];
371 const v16qi repl_cr = *(const v16qi *)repl_chars[1];
372 const v16qi repl_bs = *(const v16qi *)repl_chars[2];
373 const v16qi repl_qm = *(const v16qi *)repl_chars[3];
374
375 unsigned int misalign, found, mask;
376 const v16qi *p;
377 v16qi data, t;
378
379 /* Align the source pointer. */
380 misalign = (uintptr_t)s & 15;
381 p = (const v16qi *)((uintptr_t)s & -16);
382 data = *p;
383
384 /* Create a mask for the bytes that are valid within the first
385 16-byte block. The Idea here is that the AND with the mask
386 within the loop is "free", since we need some AND or TEST
387 insn in order to set the flags for the branch anyway. */
388 mask = -1u << misalign;
389
390 /* Main loop processing 16 bytes at a time. */
391 goto start;
392 do
393 {
394 data = *++p;
395 mask = -1;
396
397 start:
398 t = __builtin_ia32_pcmpeqb128(data, repl_nl);
399 t |= __builtin_ia32_pcmpeqb128(data, repl_cr);
400 t |= __builtin_ia32_pcmpeqb128(data, repl_bs);
401 t |= __builtin_ia32_pcmpeqb128(data, repl_qm);
402 found = __builtin_ia32_pmovmskb128 (t);
403 found &= mask;
404 }
405 while (!found);
406
407 /* FOUND contains 1 in bits for which we matched a relevant
408 character. Conversion to the byte index is trivial. */
409 found = __builtin_ctz(found);
410 return (const uchar *)p + found;
411 }
412
413 #ifdef HAVE_SSE4
414 /* A version of the fast scanner using SSE 4.2 vectorized string insns. */
415
416 static const uchar *
417 #ifndef __SSE4_2__
418 __attribute__((__target__("sse4.2")))
419 #endif
420 search_line_sse42 (const uchar *s, const uchar *end)
421 {
422 typedef char v16qi __attribute__ ((__vector_size__ (16)));
423 static const v16qi search = { '\n', '\r', '?', '\\' };
424
425 uintptr_t si = (uintptr_t)s;
426 uintptr_t index;
427
428 /* Check for unaligned input. */
429 if (si & 15)
430 {
431 if (__builtin_expect (end - s < 16, 0)
432 && __builtin_expect ((si & 0xfff) > 0xff0, 0))
433 {
434 /* There are less than 16 bytes left in the buffer, and less
435 than 16 bytes left on the page. Reading 16 bytes at this
436 point might generate a spurious page fault. Defer to the
437 SSE2 implementation, which already handles alignment. */
438 return search_line_sse2 (s, end);
439 }
440
441 /* ??? The builtin doesn't understand that the PCMPESTRI read from
442 memory need not be aligned. */
443 __asm ("%vpcmpestri $0, (%1), %2"
444 : "=c"(index) : "r"(s), "x"(search), "a"(4), "d"(16));
445 if (__builtin_expect (index < 16, 0))
446 goto found;
447
448 /* Advance the pointer to an aligned address. We will re-scan a
449 few bytes, but we no longer need care for reading past the
450 end of a page, since we're guaranteed a match. */
451 s = (const uchar *)((si + 16) & -16);
452 }
453
454 /* Main loop, processing 16 bytes at a time. By doing the whole loop
455 in inline assembly, we can make proper use of the flags set. */
456 __asm ( "sub $16, %1\n"
457 " .balign 16\n"
458 "0: add $16, %1\n"
459 " %vpcmpestri $0, (%1), %2\n"
460 " jnc 0b"
461 : "=&c"(index), "+r"(s)
462 : "x"(search), "a"(4), "d"(16));
463
464 found:
465 return s + index;
466 }
467
468 #else
469 /* Work around out-dated assemblers without sse4 support. */
470 #define search_line_sse42 search_line_sse2
471 #endif
472
473 /* Check the CPU capabilities. */
474
475 #include "../gcc/config/i386/cpuid.h"
476
477 typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *);
478 static search_line_fast_type search_line_fast;
479
480 static void __attribute__((constructor))
481 init_vectorized_lexer (void)
482 {
483 unsigned dummy, ecx = 0, edx = 0;
484 search_line_fast_type impl = search_line_acc_char;
485 int minimum = 0;
486
487 #if defined(__SSE4_2__)
488 minimum = 3;
489 #elif defined(__SSE2__)
490 minimum = 2;
491 #elif defined(__SSE__) || defined(__3dNOW_A__)
492 minimum = 1;
493 #endif
494
495 if (minimum == 3)
496 impl = search_line_sse42;
497 else if (__get_cpuid (1, &dummy, &dummy, &ecx, &edx) || minimum == 2)
498 {
499 if (minimum == 3 || (ecx & bit_SSE4_2))
500 impl = search_line_sse42;
501 else if (minimum == 2 || (edx & bit_SSE2))
502 impl = search_line_sse2;
503 else if (minimum == 1 || (edx & bit_SSE))
504 impl = search_line_mmx;
505 }
506 else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
507 {
508 if (minimum == 1 || edx & bit_3DNOWP)
509 impl = search_line_mmx;
510 }
511
512 search_line_fast = impl;
513 }
514
515 #elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__)
516
517 /* A vection of the fast scanner using AltiVec vectorized byte compares. */
518 /* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
519 so we can't compile this function without -maltivec on the command line
520 (or implied by some other switch). */
521
522 static const uchar *
523 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
524 {
525 typedef __attribute__((altivec(vector))) unsigned char vc;
526
527 const vc repl_nl = {
528 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
529 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
530 };
531 const vc repl_cr = {
532 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
533 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
534 };
535 const vc repl_bs = {
536 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
537 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
538 };
539 const vc repl_qm = {
540 '?', '?', '?', '?', '?', '?', '?', '?',
541 '?', '?', '?', '?', '?', '?', '?', '?',
542 };
543 const vc ones = {
544 -1, -1, -1, -1, -1, -1, -1, -1,
545 -1, -1, -1, -1, -1, -1, -1, -1,
546 };
547 const vc zero = { 0 };
548
549 vc data, mask, t;
550
551 /* Altivec loads automatically mask addresses with -16. This lets us
552 issue the first load as early as possible. */
553 data = __builtin_vec_ld(0, (const vc *)s);
554
555 /* Discard bytes before the beginning of the buffer. Do this by
556 beginning with all ones and shifting in zeros according to the
557 mis-alignment. The LVSR instruction pulls the exact shift we
558 want from the address. */
559 mask = __builtin_vec_lvsr(0, s);
560 mask = __builtin_vec_perm(zero, ones, mask);
561 data &= mask;
562
563 /* While altivec loads mask addresses, we still need to align S so
564 that the offset we compute at the end is correct. */
565 s = (const uchar *)((uintptr_t)s & -16);
566
567 /* Main loop processing 16 bytes at a time. */
568 goto start;
569 do
570 {
571 vc m_nl, m_cr, m_bs, m_qm;
572
573 s += 16;
574 data = __builtin_vec_ld(0, (const vc *)s);
575
576 start:
577 m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
578 m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
579 m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
580 m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
581 t = (m_nl | m_cr) | (m_bs | m_qm);
582
583 /* T now contains 0xff in bytes for which we matched one of the relevant
584 characters. We want to exit the loop if any byte in T is non-zero.
585 Below is the expansion of vec_any_ne(t, zero). */
586 }
587 while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
588
589 {
590 #define N (sizeof(vc) / sizeof(long))
591
592 typedef char check_count[(N == 2 || N == 4) * 2 - 1];
593 union {
594 vc v;
595 unsigned long l[N];
596 } u;
597 unsigned long l, i = 0;
598
599 u.v = t;
600
601 /* Find the first word of T that is non-zero. */
602 switch (N)
603 {
604 case 4:
605 l = u.l[i++];
606 if (l != 0)
607 break;
608 s += sizeof(unsigned long);
609 l = u.l[i++];
610 if (l != 0)
611 break;
612 s += sizeof(unsigned long);
613 case 2:
614 l = u.l[i++];
615 if (l != 0)
616 break;
617 s += sizeof(unsigned long);
618 l = u.l[i];
619 }
620
621 /* L now contains 0xff in bytes for which we matched one of the
622 relevant characters. We can find the byte index by finding
623 its bit index and dividing by 8. */
624 l = __builtin_clzl(l) >> 3;
625 return s + l;
626
627 #undef N
628 }
629 }
630
631 #else
632
633 /* We only have one accellerated alternative. Use a direct call so that
634 we encourage inlining. */
635
636 #define search_line_fast search_line_acc_char
637
638 #endif
639
99 /* Returns with a logical line that contains no escaped newlines or 640 /* Returns with a logical line that contains no escaped newlines or
100 trigraphs. This is a time-critical inner loop. */ 641 trigraphs. This is a time-critical inner loop. */
101 void 642 void
102 _cpp_clean_line (cpp_reader *pfile) 643 _cpp_clean_line (cpp_reader *pfile)
103 { 644 {
107 648
108 buffer = pfile->buffer; 649 buffer = pfile->buffer;
109 buffer->cur_note = buffer->notes_used = 0; 650 buffer->cur_note = buffer->notes_used = 0;
110 buffer->cur = buffer->line_base = buffer->next_line; 651 buffer->cur = buffer->line_base = buffer->next_line;
111 buffer->need_line = false; 652 buffer->need_line = false;
112 s = buffer->next_line - 1; 653 s = buffer->next_line;
113 654
114 if (!buffer->from_stage3) 655 if (!buffer->from_stage3)
115 { 656 {
116 const uchar *pbackslash = NULL; 657 const uchar *pbackslash = NULL;
117 658
118 /* Short circuit for the common case of an un-escaped line with 659 /* Fast path. This is the common case of an un-escaped line with
119 no trigraphs. The primary win here is by not writing any 660 no trigraphs. The primary win here is by not writing any
120 data back to memory until we have to. */ 661 data back to memory until we have to. */
121 for (;;) 662 while (1)
122 { 663 {
123 c = *++s; 664 /* Perform an optimized search for \n, \r, \\, ?. */
124 if (__builtin_expect (c == '\n', false) 665 s = search_line_fast (s, buffer->rlimit);
125 || __builtin_expect (c == '\r', false)) 666
667 c = *s;
668 if (c == '\\')
126 { 669 {
127 d = (uchar *) s; 670 /* Record the location of the backslash and continue. */
128 671 pbackslash = s++;
129 if (__builtin_expect (s == buffer->rlimit, false)) 672 }
130 goto done; 673 else if (__builtin_expect (c == '?', 0))
131 674 {
132 /* DOS line ending? */ 675 if (__builtin_expect (s[1] == '?', false)
133 if (__builtin_expect (c == '\r', false) 676 && _cpp_trigraph_map[s[2]])
134 && s[1] == '\n')
135 { 677 {
136 s++; 678 /* Have a trigraph. We may or may not have to convert
137 if (s == buffer->rlimit) 679 it. Add a line note regardless, for -Wtrigraphs. */
138 goto done; 680 add_line_note (buffer, s, s[2]);
681 if (CPP_OPTION (pfile, trigraphs))
682 {
683 /* We do, and that means we have to switch to the
684 slow path. */
685 d = (uchar *) s;
686 *d = _cpp_trigraph_map[s[2]];
687 s += 2;
688 goto slow_path;
689 }
139 } 690 }
140 691 /* Not a trigraph. Continue on fast-path. */
141 if (__builtin_expect (pbackslash == NULL, true)) 692 s++;
142 goto done;
143
144 /* Check for escaped newline. */
145 p = d;
146 while (is_nvspace (p[-1]))
147 p--;
148 if (p - 1 != pbackslash)
149 goto done;
150
151 /* Have an escaped newline; process it and proceed to
152 the slow path. */
153 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
154 d = p - 2;
155 buffer->next_line = p - 1;
156 break;
157 } 693 }
158 if (__builtin_expect (c == '\\', false)) 694 else
159 pbackslash = s; 695 break;
160 else if (__builtin_expect (c == '?', false) 696 }
161 && __builtin_expect (s[1] == '?', false) 697
162 && _cpp_trigraph_map[s[2]]) 698 /* This must be \r or \n. We're either done, or we'll be forced
163 { 699 to write back to the buffer and continue on the slow path. */
164 /* Have a trigraph. We may or may not have to convert 700 d = (uchar *) s;
165 it. Add a line note regardless, for -Wtrigraphs. */ 701
166 add_line_note (buffer, s, s[2]); 702 if (__builtin_expect (s == buffer->rlimit, false))
167 if (CPP_OPTION (pfile, trigraphs)) 703 goto done;
168 { 704
169 /* We do, and that means we have to switch to the 705 /* DOS line ending? */
170 slow path. */ 706 if (__builtin_expect (c == '\r', false) && s[1] == '\n')
171 d = (uchar *) s; 707 {
172 *d = _cpp_trigraph_map[s[2]]; 708 s++;
173 s += 2; 709 if (s == buffer->rlimit)
174 break; 710 goto done;
175 } 711 }
176 } 712
177 } 713 if (__builtin_expect (pbackslash == NULL, true))
178 714 goto done;
179 715
180 for (;;) 716 /* Check for escaped newline. */
717 p = d;
718 while (is_nvspace (p[-1]))
719 p--;
720 if (p - 1 != pbackslash)
721 goto done;
722
723 /* Have an escaped newline; process it and proceed to
724 the slow path. */
725 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
726 d = p - 2;
727 buffer->next_line = p - 1;
728
729 slow_path:
730 while (1)
181 { 731 {
182 c = *++s; 732 c = *++s;
183 *++d = c; 733 *++d = c;
184 734
185 if (c == '\n' || c == '\r') 735 if (c == '\n' || c == '\r')
186 { 736 {
187 /* Handle DOS line endings. */ 737 /* Handle DOS line endings. */
188 if (c == '\r' && s != buffer->rlimit && s[1] == '\n') 738 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
189 s++; 739 s++;
190 if (s == buffer->rlimit) 740 if (s == buffer->rlimit)
191 break; 741 break;
192 742
213 } 763 }
214 } 764 }
215 } 765 }
216 else 766 else
217 { 767 {
218 do 768 while (*s != '\n' && *s != '\r')
219 s++; 769 s++;
220 while (*s != '\n' && *s != '\r');
221 d = (uchar *) s; 770 d = (uchar *) s;
222 771
223 /* Handle DOS line endings. */ 772 /* Handle DOS line endings. */
224 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n') 773 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
225 s++; 774 s++;
1090 static void 1639 static void
1091 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from, 1640 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1092 cppchar_t type) 1641 cppchar_t type)
1093 { 1642 {
1094 unsigned char *buffer; 1643 unsigned char *buffer;
1095 unsigned int len, clen; 1644 unsigned int len, clen, i;
1096 1645
1097 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */ 1646 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
1098 1647
1099 /* C++ comments probably (not definitely) have moved past a new 1648 /* C++ comments probably (not definitely) have moved past a new
1100 line, which we don't want to save in the comment. */ 1649 line, which we don't want to save in the comment. */
1101 if (is_vspace (pfile->buffer->cur[-1])) 1650 if (is_vspace (pfile->buffer->cur[-1]))
1102 len--; 1651 len--;
1103 1652
1104 /* If we are currently in a directive, then we need to store all 1653 /* If we are currently in a directive or in argument parsing, then
1105 C++ comments as C comments internally, and so we need to 1654 we need to store all C++ comments as C comments internally, and
1106 allocate a little extra space in that case. 1655 so we need to allocate a little extra space in that case.
1107 1656
1108 Note that the only time we encounter a directive here is 1657 Note that the only time we encounter a directive here is
1109 when we are saving comments in a "#define". */ 1658 when we are saving comments in a "#define". */
1110 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len; 1659 clen = ((pfile->state.in_directive || pfile->state.parsing_args)
1660 && type == '/') ? len + 2 : len;
1111 1661
1112 buffer = _cpp_unaligned_alloc (pfile, clen); 1662 buffer = _cpp_unaligned_alloc (pfile, clen);
1113 1663
1114 token->type = CPP_COMMENT; 1664 token->type = CPP_COMMENT;
1115 token->val.str.len = clen; 1665 token->val.str.len = clen;
1117 1667
1118 buffer[0] = '/'; 1668 buffer[0] = '/';
1119 memcpy (buffer + 1, from, len - 1); 1669 memcpy (buffer + 1, from, len - 1);
1120 1670
1121 /* Finish conversion to a C comment, if necessary. */ 1671 /* Finish conversion to a C comment, if necessary. */
1122 if (pfile->state.in_directive && type == '/') 1672 if ((pfile->state.in_directive || pfile->state.parsing_args) && type == '/')
1123 { 1673 {
1124 buffer[1] = '*'; 1674 buffer[1] = '*';
1125 buffer[clen - 2] = '*'; 1675 buffer[clen - 2] = '*';
1126 buffer[clen - 1] = '/'; 1676 buffer[clen - 1] = '/';
1677 /* As there can be in a C++ comments illegal sequences for C comments
1678 we need to filter them out. */
1679 for (i = 2; i < (clen - 2); i++)
1680 if (buffer[i] == '/' && (buffer[i - 1] == '*' || buffer[i + 1] == '*'))
1681 buffer[i] = '|';
1127 } 1682 }
1128 1683
1129 /* Finally store this comment for use by clients of libcpp. */ 1684 /* Finally store this comment for use by clients of libcpp. */
1130 store_comment (pfile, token); 1685 store_comment (pfile, token);
1131 } 1686 }