Mercurial > hg > CbC > CbC_gcc
annotate libcpp/lex.c @ 47:3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 07 Feb 2010 17:44:34 +0900 |
parents | a06113de4d67 |
children | 77e2b8dfacca |
rev | line source |
---|---|
0 | 1 /* CPP Library - lexical analysis. |
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009 | |
3 Free Software Foundation, Inc. | |
4 Contributed by Per Bothner, 1994-95. | |
5 Based on CCCP program by Paul Rubin, June 1986 | |
6 Adapted to ANSI C, Richard Stallman, Jan 1987 | |
7 Broken out to separate file, Zack Weinberg, Mar 2000 | |
8 | |
9 This program is free software; you can redistribute it and/or modify it | |
10 under the terms of the GNU General Public License as published by the | |
11 Free Software Foundation; either version 3, or (at your option) any | |
12 later version. | |
13 | |
14 This program is distributed in the hope that it will be useful, | |
15 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 GNU General Public License for more details. | |
18 | |
19 You should have received a copy of the GNU General Public License | |
20 along with this program; see the file COPYING3. If not see | |
21 <http://www.gnu.org/licenses/>. */ | |
22 | |
23 #include "config.h" | |
24 #include "system.h" | |
25 #include "cpplib.h" | |
26 #include "internal.h" | |
27 | |
28 enum spell_type | |
29 { | |
30 SPELL_OPERATOR = 0, | |
31 SPELL_IDENT, | |
32 SPELL_LITERAL, | |
33 SPELL_NONE | |
34 }; | |
35 | |
36 struct token_spelling | |
37 { | |
38 enum spell_type category; | |
39 const unsigned char *name; | |
40 }; | |
41 | |
42 static const unsigned char *const digraph_spellings[] = | |
43 { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" }; | |
44 | |
45 #define OP(e, s) { SPELL_OPERATOR, UC s }, | |
46 #define TK(e, s) { SPELL_ ## s, UC #e }, | |
47 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE }; | |
48 #undef OP | |
49 #undef TK | |
50 | |
51 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category) | |
52 #define TOKEN_NAME(token) (token_spellings[(token)->type].name) | |
53 | |
54 static void add_line_note (cpp_buffer *, const uchar *, unsigned int); | |
55 static int skip_line_comment (cpp_reader *); | |
56 static void skip_whitespace (cpp_reader *, cppchar_t); | |
57 static void lex_string (cpp_reader *, cpp_token *, const uchar *); | |
58 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t); | |
59 static void store_comment (cpp_reader *, cpp_token *); | |
60 static void create_literal (cpp_reader *, cpp_token *, const uchar *, | |
61 unsigned int, enum cpp_ttype); | |
62 static bool warn_in_comment (cpp_reader *, _cpp_line_note *); | |
63 static int name_p (cpp_reader *, const cpp_string *); | |
64 static tokenrun *next_tokenrun (tokenrun *); | |
65 | |
66 static _cpp_buff *new_buff (size_t); | |
67 | |
68 | |
69 /* Utility routine: | |
70 | |
71 Compares, the token TOKEN to the NUL-terminated string STRING. | |
72 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */ | |
73 int | |
74 cpp_ideq (const cpp_token *token, const char *string) | |
75 { | |
76 if (token->type != CPP_NAME) | |
77 return 0; | |
78 | |
79 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string); | |
80 } | |
81 | |
82 /* Record a note TYPE at byte POS into the current cleaned logical | |
83 line. */ | |
84 static void | |
85 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type) | |
86 { | |
87 if (buffer->notes_used == buffer->notes_cap) | |
88 { | |
89 buffer->notes_cap = buffer->notes_cap * 2 + 200; | |
90 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes, | |
91 buffer->notes_cap); | |
92 } | |
93 | |
94 buffer->notes[buffer->notes_used].pos = pos; | |
95 buffer->notes[buffer->notes_used].type = type; | |
96 buffer->notes_used++; | |
97 } | |
98 | |
99 /* Returns with a logical line that contains no escaped newlines or | |
100 trigraphs. This is a time-critical inner loop. */ | |
101 void | |
102 _cpp_clean_line (cpp_reader *pfile) | |
103 { | |
104 cpp_buffer *buffer; | |
105 const uchar *s; | |
106 uchar c, *d, *p; | |
107 | |
108 buffer = pfile->buffer; | |
109 buffer->cur_note = buffer->notes_used = 0; | |
110 buffer->cur = buffer->line_base = buffer->next_line; | |
111 buffer->need_line = false; | |
112 s = buffer->next_line - 1; | |
113 | |
114 if (!buffer->from_stage3) | |
115 { | |
116 const uchar *pbackslash = NULL; | |
117 | |
118 /* Short circuit for the common case of an un-escaped line with | |
119 no trigraphs. The primary win here is by not writing any | |
120 data back to memory until we have to. */ | |
121 for (;;) | |
122 { | |
123 c = *++s; | |
124 if (__builtin_expect (c == '\n', false) | |
125 || __builtin_expect (c == '\r', false)) | |
126 { | |
127 d = (uchar *) s; | |
128 | |
129 if (__builtin_expect (s == buffer->rlimit, false)) | |
130 goto done; | |
131 | |
132 /* DOS line ending? */ | |
133 if (__builtin_expect (c == '\r', false) | |
134 && s[1] == '\n') | |
135 { | |
136 s++; | |
137 if (s == buffer->rlimit) | |
138 goto done; | |
139 } | |
140 | |
141 if (__builtin_expect (pbackslash == NULL, true)) | |
142 goto done; | |
143 | |
144 /* Check for escaped newline. */ | |
145 p = d; | |
146 while (is_nvspace (p[-1])) | |
147 p--; | |
148 if (p - 1 != pbackslash) | |
149 goto done; | |
150 | |
151 /* Have an escaped newline; process it and proceed to | |
152 the slow path. */ | |
153 add_line_note (buffer, p - 1, p != d ? ' ' : '\\'); | |
154 d = p - 2; | |
155 buffer->next_line = p - 1; | |
156 break; | |
157 } | |
158 if (__builtin_expect (c == '\\', false)) | |
159 pbackslash = s; | |
160 else if (__builtin_expect (c == '?', false) | |
161 && __builtin_expect (s[1] == '?', false) | |
162 && _cpp_trigraph_map[s[2]]) | |
163 { | |
164 /* Have a trigraph. We may or may not have to convert | |
165 it. Add a line note regardless, for -Wtrigraphs. */ | |
166 add_line_note (buffer, s, s[2]); | |
167 if (CPP_OPTION (pfile, trigraphs)) | |
168 { | |
169 /* We do, and that means we have to switch to the | |
170 slow path. */ | |
171 d = (uchar *) s; | |
172 *d = _cpp_trigraph_map[s[2]]; | |
173 s += 2; | |
174 break; | |
175 } | |
176 } | |
177 } | |
178 | |
179 | |
180 for (;;) | |
181 { | |
182 c = *++s; | |
183 *++d = c; | |
184 | |
185 if (c == '\n' || c == '\r') | |
186 { | |
187 /* Handle DOS line endings. */ | |
188 if (c == '\r' && s != buffer->rlimit && s[1] == '\n') | |
189 s++; | |
190 if (s == buffer->rlimit) | |
191 break; | |
192 | |
193 /* Escaped? */ | |
194 p = d; | |
195 while (p != buffer->next_line && is_nvspace (p[-1])) | |
196 p--; | |
197 if (p == buffer->next_line || p[-1] != '\\') | |
198 break; | |
199 | |
200 add_line_note (buffer, p - 1, p != d ? ' ': '\\'); | |
201 d = p - 2; | |
202 buffer->next_line = p - 1; | |
203 } | |
204 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]]) | |
205 { | |
206 /* Add a note regardless, for the benefit of -Wtrigraphs. */ | |
207 add_line_note (buffer, d, s[2]); | |
208 if (CPP_OPTION (pfile, trigraphs)) | |
209 { | |
210 *d = _cpp_trigraph_map[s[2]]; | |
211 s += 2; | |
212 } | |
213 } | |
214 } | |
215 } | |
216 else | |
217 { | |
218 do | |
219 s++; | |
220 while (*s != '\n' && *s != '\r'); | |
221 d = (uchar *) s; | |
222 | |
223 /* Handle DOS line endings. */ | |
224 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n') | |
225 s++; | |
226 } | |
227 | |
228 done: | |
229 *d = '\n'; | |
230 /* A sentinel note that should never be processed. */ | |
231 add_line_note (buffer, d + 1, '\n'); | |
232 buffer->next_line = s + 1; | |
233 } | |
234 | |
235 /* Return true if the trigraph indicated by NOTE should be warned | |
236 about in a comment. */ | |
237 static bool | |
238 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note) | |
239 { | |
240 const uchar *p; | |
241 | |
242 /* Within comments we don't warn about trigraphs, unless the | |
243 trigraph forms an escaped newline, as that may change | |
244 behavior. */ | |
245 if (note->type != '/') | |
246 return false; | |
247 | |
248 /* If -trigraphs, then this was an escaped newline iff the next note | |
249 is coincident. */ | |
250 if (CPP_OPTION (pfile, trigraphs)) | |
251 return note[1].pos == note->pos; | |
252 | |
253 /* Otherwise, see if this forms an escaped newline. */ | |
254 p = note->pos + 3; | |
255 while (is_nvspace (*p)) | |
256 p++; | |
257 | |
258 /* There might have been escaped newlines between the trigraph and the | |
259 newline we found. Hence the position test. */ | |
260 return (*p == '\n' && p < note[1].pos); | |
261 } | |
262 | |
263 /* Process the notes created by add_line_note as far as the current | |
264 location. */ | |
265 void | |
266 _cpp_process_line_notes (cpp_reader *pfile, int in_comment) | |
267 { | |
268 cpp_buffer *buffer = pfile->buffer; | |
269 | |
270 for (;;) | |
271 { | |
272 _cpp_line_note *note = &buffer->notes[buffer->cur_note]; | |
273 unsigned int col; | |
274 | |
275 if (note->pos > buffer->cur) | |
276 break; | |
277 | |
278 buffer->cur_note++; | |
279 col = CPP_BUF_COLUMN (buffer, note->pos + 1); | |
280 | |
281 if (note->type == '\\' || note->type == ' ') | |
282 { | |
283 if (note->type == ' ' && !in_comment) | |
284 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col, | |
285 "backslash and newline separated by space"); | |
286 | |
287 if (buffer->next_line > buffer->rlimit) | |
288 { | |
289 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col, | |
290 "backslash-newline at end of file"); | |
291 /* Prevent "no newline at end of file" warning. */ | |
292 buffer->next_line = buffer->rlimit; | |
293 } | |
294 | |
295 buffer->line_base = note->pos; | |
296 CPP_INCREMENT_LINE (pfile, 0); | |
297 } | |
298 else if (_cpp_trigraph_map[note->type]) | |
299 { | |
300 if (CPP_OPTION (pfile, warn_trigraphs) | |
301 && (!in_comment || warn_in_comment (pfile, note))) | |
302 { | |
303 if (CPP_OPTION (pfile, trigraphs)) | |
304 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col, | |
305 "trigraph ??%c converted to %c", | |
306 note->type, | |
307 (int) _cpp_trigraph_map[note->type]); | |
308 else | |
309 { | |
310 cpp_error_with_line | |
311 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col, | |
312 "trigraph ??%c ignored, use -trigraphs to enable", | |
313 note->type); | |
314 } | |
315 } | |
316 } | |
317 else | |
318 abort (); | |
319 } | |
320 } | |
321 | |
322 /* Skip a C-style block comment. We find the end of the comment by | |
323 seeing if an asterisk is before every '/' we encounter. Returns | |
324 nonzero if comment terminated by EOF, zero otherwise. | |
325 | |
326 Buffer->cur points to the initial asterisk of the comment. */ | |
327 bool | |
328 _cpp_skip_block_comment (cpp_reader *pfile) | |
329 { | |
330 cpp_buffer *buffer = pfile->buffer; | |
331 const uchar *cur = buffer->cur; | |
332 uchar c; | |
333 | |
334 cur++; | |
335 if (*cur == '/') | |
336 cur++; | |
337 | |
338 for (;;) | |
339 { | |
340 /* People like decorating comments with '*', so check for '/' | |
341 instead for efficiency. */ | |
342 c = *cur++; | |
343 | |
344 if (c == '/') | |
345 { | |
346 if (cur[-2] == '*') | |
347 break; | |
348 | |
349 /* Warn about potential nested comments, but not if the '/' | |
350 comes immediately before the true comment delimiter. | |
351 Don't bother to get it right across escaped newlines. */ | |
352 if (CPP_OPTION (pfile, warn_comments) | |
353 && cur[0] == '*' && cur[1] != '/') | |
354 { | |
355 buffer->cur = cur; | |
356 cpp_error_with_line (pfile, CPP_DL_WARNING, | |
357 pfile->line_table->highest_line, CPP_BUF_COL (buffer), | |
358 "\"/*\" within comment"); | |
359 } | |
360 } | |
361 else if (c == '\n') | |
362 { | |
363 unsigned int cols; | |
364 buffer->cur = cur - 1; | |
365 _cpp_process_line_notes (pfile, true); | |
366 if (buffer->next_line >= buffer->rlimit) | |
367 return true; | |
368 _cpp_clean_line (pfile); | |
369 | |
370 cols = buffer->next_line - buffer->line_base; | |
371 CPP_INCREMENT_LINE (pfile, cols); | |
372 | |
373 cur = buffer->cur; | |
374 } | |
375 } | |
376 | |
377 buffer->cur = cur; | |
378 _cpp_process_line_notes (pfile, true); | |
379 return false; | |
380 } | |
381 | |
382 /* Skip a C++ line comment, leaving buffer->cur pointing to the | |
383 terminating newline. Handles escaped newlines. Returns nonzero | |
384 if a multiline comment. */ | |
385 static int | |
386 skip_line_comment (cpp_reader *pfile) | |
387 { | |
388 cpp_buffer *buffer = pfile->buffer; | |
389 source_location orig_line = pfile->line_table->highest_line; | |
390 | |
391 while (*buffer->cur != '\n') | |
392 buffer->cur++; | |
393 | |
394 _cpp_process_line_notes (pfile, true); | |
395 return orig_line != pfile->line_table->highest_line; | |
396 } | |
397 | |
398 /* Skips whitespace, saving the next non-whitespace character. */ | |
399 static void | |
400 skip_whitespace (cpp_reader *pfile, cppchar_t c) | |
401 { | |
402 cpp_buffer *buffer = pfile->buffer; | |
403 bool saw_NUL = false; | |
404 | |
405 do | |
406 { | |
407 /* Horizontal space always OK. */ | |
408 if (c == ' ' || c == '\t') | |
409 ; | |
410 /* Just \f \v or \0 left. */ | |
411 else if (c == '\0') | |
412 saw_NUL = true; | |
413 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile)) | |
414 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, | |
415 CPP_BUF_COL (buffer), | |
416 "%s in preprocessing directive", | |
417 c == '\f' ? "form feed" : "vertical tab"); | |
418 | |
419 c = *buffer->cur++; | |
420 } | |
421 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */ | |
422 while (is_nvspace (c)); | |
423 | |
424 if (saw_NUL) | |
425 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored"); | |
426 | |
427 buffer->cur--; | |
428 } | |
429 | |
430 /* See if the characters of a number token are valid in a name (no | |
431 '.', '+' or '-'). */ | |
432 static int | |
433 name_p (cpp_reader *pfile, const cpp_string *string) | |
434 { | |
435 unsigned int i; | |
436 | |
437 for (i = 0; i < string->len; i++) | |
438 if (!is_idchar (string->text[i])) | |
439 return 0; | |
440 | |
441 return 1; | |
442 } | |
443 | |
444 /* After parsing an identifier or other sequence, produce a warning about | |
445 sequences not in NFC/NFKC. */ | |
446 static void | |
447 warn_about_normalization (cpp_reader *pfile, | |
448 const cpp_token *token, | |
449 const struct normalize_state *s) | |
450 { | |
451 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s) | |
452 && !pfile->state.skipping) | |
453 { | |
454 /* Make sure that the token is printed using UCNs, even | |
455 if we'd otherwise happily print UTF-8. */ | |
456 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token)); | |
457 size_t sz; | |
458 | |
459 sz = cpp_spell_token (pfile, token, buf, false) - buf; | |
460 if (NORMALIZE_STATE_RESULT (s) == normalized_C) | |
461 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0, | |
462 "`%.*s' is not in NFKC", (int) sz, buf); | |
463 else | |
464 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0, | |
465 "`%.*s' is not in NFC", (int) sz, buf); | |
466 } | |
467 } | |
468 | |
469 /* Returns TRUE if the sequence starting at buffer->cur is invalid in | |
470 an identifier. FIRST is TRUE if this starts an identifier. */ | |
471 static bool | |
472 forms_identifier_p (cpp_reader *pfile, int first, | |
473 struct normalize_state *state) | |
474 { | |
475 cpp_buffer *buffer = pfile->buffer; | |
476 | |
477 if (*buffer->cur == '$') | |
478 { | |
479 if (!CPP_OPTION (pfile, dollars_in_ident)) | |
480 return false; | |
481 | |
482 buffer->cur++; | |
483 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping) | |
484 { | |
485 CPP_OPTION (pfile, warn_dollars) = 0; | |
486 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number"); | |
487 } | |
488 | |
489 return true; | |
490 } | |
491 | |
492 /* Is this a syntactically valid UCN? */ | |
493 if (CPP_OPTION (pfile, extended_identifiers) | |
494 && *buffer->cur == '\\' | |
495 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U')) | |
496 { | |
497 buffer->cur += 2; | |
498 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first, | |
499 state)) | |
500 return true; | |
501 buffer->cur -= 2; | |
502 } | |
503 | |
504 return false; | |
505 } | |
506 | |
47
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
507 /* Helper function to get the cpp_hashnode of the identifier BASE. */ |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
508 static cpp_hashnode * |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
509 lex_identifier_intern (cpp_reader *pfile, const uchar *base) |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
510 { |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
511 cpp_hashnode *result; |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
512 const uchar *cur; |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
513 unsigned int len; |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
514 unsigned int hash = HT_HASHSTEP (0, *base); |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
515 |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
516 cur = base + 1; |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
517 while (ISIDNUM (*cur)) |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
518 { |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
519 hash = HT_HASHSTEP (hash, *cur); |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
520 cur++; |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
521 } |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
522 len = cur - base; |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
523 hash = HT_HASHFINISH (hash, len); |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
524 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table, |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
525 base, len, hash, HT_ALLOC)); |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
526 |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
527 /* Rarely, identifiers require diagnostics when lexed. */ |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
528 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC) |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
529 && !pfile->state.skipping, 0)) |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
530 { |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
531 /* It is allowed to poison the same identifier twice. */ |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
532 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok) |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
533 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"", |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
534 NODE_NAME (result)); |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
535 |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
536 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
537 replacement list of a variadic macro. */ |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
538 if (result == pfile->spec_nodes.n__VA_ARGS__ |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
539 && !pfile->state.va_args_ok) |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
540 cpp_error (pfile, CPP_DL_PEDWARN, |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
541 "__VA_ARGS__ can only appear in the expansion" |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
542 " of a C99 variadic macro"); |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
543 } |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
544 |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
545 return result; |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
546 } |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
547 |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
548 /* Get the cpp_hashnode of an identifier specified by NAME in |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
549 the current cpp_reader object. If none is found, NULL is returned. */ |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
550 cpp_hashnode * |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
551 _cpp_lex_identifier (cpp_reader *pfile, const char *name) |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
552 { |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
553 cpp_hashnode *result; |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
554 result = lex_identifier_intern (pfile, (uchar *) name); |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
555 return result; |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
556 } |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
557 |
0 | 558 /* Lex an identifier starting at BUFFER->CUR - 1. */ |
559 static cpp_hashnode * | |
560 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn, | |
561 struct normalize_state *nst) | |
562 { | |
563 cpp_hashnode *result; | |
564 const uchar *cur; | |
565 unsigned int len; | |
566 unsigned int hash = HT_HASHSTEP (0, *base); | |
567 | |
568 cur = pfile->buffer->cur; | |
569 if (! starts_ucn) | |
570 while (ISIDNUM (*cur)) | |
571 { | |
572 hash = HT_HASHSTEP (hash, *cur); | |
573 cur++; | |
574 } | |
575 pfile->buffer->cur = cur; | |
576 if (starts_ucn || forms_identifier_p (pfile, false, nst)) | |
577 { | |
578 /* Slower version for identifiers containing UCNs (or $). */ | |
579 do { | |
580 while (ISIDNUM (*pfile->buffer->cur)) | |
581 { | |
582 pfile->buffer->cur++; | |
583 NORMALIZE_STATE_UPDATE_IDNUM (nst); | |
584 } | |
585 } while (forms_identifier_p (pfile, false, nst)); | |
586 result = _cpp_interpret_identifier (pfile, base, | |
587 pfile->buffer->cur - base); | |
588 } | |
589 else | |
590 { | |
591 len = cur - base; | |
592 hash = HT_HASHFINISH (hash, len); | |
593 | |
594 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table, | |
595 base, len, hash, HT_ALLOC)); | |
596 } | |
597 | |
598 /* Rarely, identifiers require diagnostics when lexed. */ | |
599 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC) | |
600 && !pfile->state.skipping, 0)) | |
601 { | |
602 /* It is allowed to poison the same identifier twice. */ | |
603 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok) | |
604 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"", | |
605 NODE_NAME (result)); | |
606 | |
607 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the | |
608 replacement list of a variadic macro. */ | |
609 if (result == pfile->spec_nodes.n__VA_ARGS__ | |
610 && !pfile->state.va_args_ok) | |
611 cpp_error (pfile, CPP_DL_PEDWARN, | |
612 "__VA_ARGS__ can only appear in the expansion" | |
613 " of a C99 variadic macro"); | |
614 } | |
615 | |
616 return result; | |
617 } | |
618 | |
619 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */ | |
620 static void | |
621 lex_number (cpp_reader *pfile, cpp_string *number, | |
622 struct normalize_state *nst) | |
623 { | |
624 const uchar *cur; | |
625 const uchar *base; | |
626 uchar *dest; | |
627 | |
628 base = pfile->buffer->cur - 1; | |
629 do | |
630 { | |
631 cur = pfile->buffer->cur; | |
632 | |
633 /* N.B. ISIDNUM does not include $. */ | |
634 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1])) | |
635 { | |
636 cur++; | |
637 NORMALIZE_STATE_UPDATE_IDNUM (nst); | |
638 } | |
639 | |
640 pfile->buffer->cur = cur; | |
641 } | |
642 while (forms_identifier_p (pfile, false, nst)); | |
643 | |
644 number->len = cur - base; | |
645 dest = _cpp_unaligned_alloc (pfile, number->len + 1); | |
646 memcpy (dest, base, number->len); | |
647 dest[number->len] = '\0'; | |
648 number->text = dest; | |
649 } | |
650 | |
651 /* Create a token of type TYPE with a literal spelling. */ | |
652 static void | |
653 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base, | |
654 unsigned int len, enum cpp_ttype type) | |
655 { | |
656 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1); | |
657 | |
658 memcpy (dest, base, len); | |
659 dest[len] = '\0'; | |
660 token->type = type; | |
661 token->val.str.len = len; | |
662 token->val.str.text = dest; | |
663 } | |
664 | |
665 /* Lexes a string, character constant, or angle-bracketed header file | |
666 name. The stored string contains the spelling, including opening | |
667 quote and leading any leading 'L', 'u' or 'U'. It returns the type | |
668 of the literal, or CPP_OTHER if it was not properly terminated, or | |
669 CPP_LESS for an unterminated header name which must be relexed as | |
670 normal tokens. | |
671 | |
672 The spelling is NUL-terminated, but it is not guaranteed that this | |
673 is the first NUL since embedded NULs are preserved. */ | |
674 static void | |
675 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base) | |
676 { | |
677 bool saw_NUL = false; | |
678 const uchar *cur; | |
679 cppchar_t terminator; | |
680 enum cpp_ttype type; | |
681 | |
682 cur = base; | |
683 terminator = *cur++; | |
684 if (terminator == 'L' || terminator == 'u' || terminator == 'U') | |
685 terminator = *cur++; | |
686 if (terminator == '\"') | |
687 type = (*base == 'L' ? CPP_WSTRING : | |
688 *base == 'U' ? CPP_STRING32 : | |
689 *base == 'u' ? CPP_STRING16 : CPP_STRING); | |
690 else if (terminator == '\'') | |
691 type = (*base == 'L' ? CPP_WCHAR : | |
692 *base == 'U' ? CPP_CHAR32 : | |
693 *base == 'u' ? CPP_CHAR16 : CPP_CHAR); | |
694 else | |
695 terminator = '>', type = CPP_HEADER_NAME; | |
696 | |
697 for (;;) | |
698 { | |
699 cppchar_t c = *cur++; | |
700 | |
701 /* In #include-style directives, terminators are not escapable. */ | |
702 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n') | |
703 cur++; | |
704 else if (c == terminator) | |
705 break; | |
706 else if (c == '\n') | |
707 { | |
708 cur--; | |
709 /* Unmatched quotes always yield undefined behavior, but | |
710 greedy lexing means that what appears to be an unterminated | |
711 header name may actually be a legitimate sequence of tokens. */ | |
712 if (terminator == '>') | |
713 { | |
714 token->type = CPP_LESS; | |
715 return; | |
716 } | |
717 type = CPP_OTHER; | |
718 break; | |
719 } | |
720 else if (c == '\0') | |
721 saw_NUL = true; | |
722 } | |
723 | |
724 if (saw_NUL && !pfile->state.skipping) | |
725 cpp_error (pfile, CPP_DL_WARNING, | |
726 "null character(s) preserved in literal"); | |
727 | |
728 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM) | |
729 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character", | |
730 (int) terminator); | |
731 | |
732 pfile->buffer->cur = cur; | |
733 create_literal (pfile, token, base, cur - base, type); | |
734 } | |
735 | |
736 /* Return the comment table. The client may not make any assumption | |
737 about the ordering of the table. */ | |
738 cpp_comment_table * | |
739 cpp_get_comments (cpp_reader *pfile) | |
740 { | |
741 return &pfile->comments; | |
742 } | |
743 | |
744 /* Append a comment to the end of the comment table. */ | |
745 static void | |
746 store_comment (cpp_reader *pfile, cpp_token *token) | |
747 { | |
748 int len; | |
749 | |
750 if (pfile->comments.allocated == 0) | |
751 { | |
752 pfile->comments.allocated = 256; | |
753 pfile->comments.entries = (cpp_comment *) xmalloc | |
754 (pfile->comments.allocated * sizeof (cpp_comment)); | |
755 } | |
756 | |
757 if (pfile->comments.count == pfile->comments.allocated) | |
758 { | |
759 pfile->comments.allocated *= 2; | |
760 pfile->comments.entries = (cpp_comment *) xrealloc | |
761 (pfile->comments.entries, | |
762 pfile->comments.allocated * sizeof (cpp_comment)); | |
763 } | |
764 | |
765 len = token->val.str.len; | |
766 | |
767 /* Copy comment. Note, token may not be NULL terminated. */ | |
768 pfile->comments.entries[pfile->comments.count].comment = | |
769 (char *) xmalloc (sizeof (char) * (len + 1)); | |
770 memcpy (pfile->comments.entries[pfile->comments.count].comment, | |
771 token->val.str.text, len); | |
772 pfile->comments.entries[pfile->comments.count].comment[len] = '\0'; | |
773 | |
774 /* Set source location. */ | |
775 pfile->comments.entries[pfile->comments.count].sloc = token->src_loc; | |
776 | |
777 /* Increment the count of entries in the comment table. */ | |
778 pfile->comments.count++; | |
779 } | |
780 | |
781 /* The stored comment includes the comment start and any terminator. */ | |
782 static void | |
783 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from, | |
784 cppchar_t type) | |
785 { | |
786 unsigned char *buffer; | |
787 unsigned int len, clen; | |
788 | |
789 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */ | |
790 | |
791 /* C++ comments probably (not definitely) have moved past a new | |
792 line, which we don't want to save in the comment. */ | |
793 if (is_vspace (pfile->buffer->cur[-1])) | |
794 len--; | |
795 | |
796 /* If we are currently in a directive, then we need to store all | |
797 C++ comments as C comments internally, and so we need to | |
798 allocate a little extra space in that case. | |
799 | |
800 Note that the only time we encounter a directive here is | |
801 when we are saving comments in a "#define". */ | |
802 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len; | |
803 | |
804 buffer = _cpp_unaligned_alloc (pfile, clen); | |
805 | |
806 token->type = CPP_COMMENT; | |
807 token->val.str.len = clen; | |
808 token->val.str.text = buffer; | |
809 | |
810 buffer[0] = '/'; | |
811 memcpy (buffer + 1, from, len - 1); | |
812 | |
813 /* Finish conversion to a C comment, if necessary. */ | |
814 if (pfile->state.in_directive && type == '/') | |
815 { | |
816 buffer[1] = '*'; | |
817 buffer[clen - 2] = '*'; | |
818 buffer[clen - 1] = '/'; | |
819 } | |
820 | |
821 /* Finally store this comment for use by clients of libcpp. */ | |
822 store_comment (pfile, token); | |
823 } | |
824 | |
825 /* Allocate COUNT tokens for RUN. */ | |
826 void | |
827 _cpp_init_tokenrun (tokenrun *run, unsigned int count) | |
828 { | |
829 run->base = XNEWVEC (cpp_token, count); | |
830 run->limit = run->base + count; | |
831 run->next = NULL; | |
832 } | |
833 | |
834 /* Returns the next tokenrun, or creates one if there is none. */ | |
835 static tokenrun * | |
836 next_tokenrun (tokenrun *run) | |
837 { | |
838 if (run->next == NULL) | |
839 { | |
840 run->next = XNEW (tokenrun); | |
841 run->next->prev = run; | |
842 _cpp_init_tokenrun (run->next, 250); | |
843 } | |
844 | |
845 return run->next; | |
846 } | |
847 | |
848 /* Look ahead in the input stream. */ | |
849 const cpp_token * | |
850 cpp_peek_token (cpp_reader *pfile, int index) | |
851 { | |
852 cpp_context *context = pfile->context; | |
853 const cpp_token *peektok; | |
854 int count; | |
855 | |
856 /* First, scan through any pending cpp_context objects. */ | |
857 while (context->prev) | |
858 { | |
859 ptrdiff_t sz = (context->direct_p | |
860 ? LAST (context).token - FIRST (context).token | |
861 : LAST (context).ptoken - FIRST (context).ptoken); | |
862 | |
863 if (index < (int) sz) | |
864 return (context->direct_p | |
865 ? FIRST (context).token + index | |
866 : *(FIRST (context).ptoken + index)); | |
867 | |
868 index -= (int) sz; | |
869 context = context->prev; | |
870 } | |
871 | |
872 /* We will have to read some new tokens after all (and do so | |
873 without invalidating preceding tokens). */ | |
874 count = index; | |
875 pfile->keep_tokens++; | |
876 | |
877 do | |
878 { | |
879 peektok = _cpp_lex_token (pfile); | |
880 if (peektok->type == CPP_EOF) | |
881 return peektok; | |
882 } | |
883 while (index--); | |
884 | |
885 _cpp_backup_tokens_direct (pfile, count + 1); | |
886 pfile->keep_tokens--; | |
887 | |
888 return peektok; | |
889 } | |
890 | |
891 /* Allocate a single token that is invalidated at the same time as the | |
892 rest of the tokens on the line. Has its line and col set to the | |
893 same as the last lexed token, so that diagnostics appear in the | |
894 right place. */ | |
895 cpp_token * | |
896 _cpp_temp_token (cpp_reader *pfile) | |
897 { | |
898 cpp_token *old, *result; | |
899 ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token; | |
900 ptrdiff_t la = (ptrdiff_t) pfile->lookaheads; | |
901 | |
902 old = pfile->cur_token - 1; | |
903 /* Any pre-existing lookaheads must not be clobbered. */ | |
904 if (la) | |
905 { | |
906 if (sz <= la) | |
907 { | |
908 tokenrun *next = next_tokenrun (pfile->cur_run); | |
909 | |
910 if (sz < la) | |
911 memmove (next->base + 1, next->base, | |
912 (la - sz) * sizeof (cpp_token)); | |
913 | |
914 next->base[0] = pfile->cur_run->limit[-1]; | |
915 } | |
916 | |
917 if (sz > 1) | |
918 memmove (pfile->cur_token + 1, pfile->cur_token, | |
919 MIN (la, sz - 1) * sizeof (cpp_token)); | |
920 } | |
921 | |
922 if (!sz && pfile->cur_token == pfile->cur_run->limit) | |
923 { | |
924 pfile->cur_run = next_tokenrun (pfile->cur_run); | |
925 pfile->cur_token = pfile->cur_run->base; | |
926 } | |
927 | |
928 result = pfile->cur_token++; | |
929 result->src_loc = old->src_loc; | |
930 return result; | |
931 } | |
932 | |
933 /* Lex a token into RESULT (external interface). Takes care of issues | |
934 like directive handling, token lookahead, multiple include | |
935 optimization and skipping. */ | |
936 const cpp_token * | |
937 _cpp_lex_token (cpp_reader *pfile) | |
938 { | |
939 cpp_token *result; | |
940 | |
941 for (;;) | |
942 { | |
943 if (pfile->cur_token == pfile->cur_run->limit) | |
944 { | |
945 pfile->cur_run = next_tokenrun (pfile->cur_run); | |
946 pfile->cur_token = pfile->cur_run->base; | |
947 } | |
948 /* We assume that the current token is somewhere in the current | |
949 run. */ | |
950 if (pfile->cur_token < pfile->cur_run->base | |
951 || pfile->cur_token >= pfile->cur_run->limit) | |
952 abort (); | |
953 | |
954 if (pfile->lookaheads) | |
955 { | |
956 pfile->lookaheads--; | |
957 result = pfile->cur_token++; | |
958 } | |
959 else | |
960 result = _cpp_lex_direct (pfile); | |
961 | |
962 if (result->flags & BOL) | |
963 { | |
964 /* Is this a directive. If _cpp_handle_directive returns | |
965 false, it is an assembler #. */ | |
966 if (result->type == CPP_HASH | |
967 /* 6.10.3 p 11: Directives in a list of macro arguments | |
968 gives undefined behavior. This implementation | |
969 handles the directive as normal. */ | |
970 && pfile->state.parsing_args != 1) | |
971 { | |
972 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE)) | |
973 { | |
974 if (pfile->directive_result.type == CPP_PADDING) | |
975 continue; | |
976 result = &pfile->directive_result; | |
977 } | |
978 } | |
979 else if (pfile->state.in_deferred_pragma) | |
980 result = &pfile->directive_result; | |
981 | |
982 if (pfile->cb.line_change && !pfile->state.skipping) | |
983 pfile->cb.line_change (pfile, result, pfile->state.parsing_args); | |
984 } | |
985 | |
986 /* We don't skip tokens in directives. */ | |
987 if (pfile->state.in_directive || pfile->state.in_deferred_pragma) | |
988 break; | |
989 | |
990 /* Outside a directive, invalidate controlling macros. At file | |
991 EOF, _cpp_lex_direct takes care of popping the buffer, so we never | |
992 get here and MI optimization works. */ | |
993 pfile->mi_valid = false; | |
994 | |
995 if (!pfile->state.skipping || result->type == CPP_EOF) | |
996 break; | |
997 } | |
998 | |
999 return result; | |
1000 } | |
1001 | |
1002 /* Returns true if a fresh line has been loaded. */ | |
1003 bool | |
1004 _cpp_get_fresh_line (cpp_reader *pfile) | |
1005 { | |
1006 int return_at_eof; | |
1007 | |
1008 /* We can't get a new line until we leave the current directive. */ | |
1009 if (pfile->state.in_directive) | |
1010 return false; | |
1011 | |
1012 for (;;) | |
1013 { | |
1014 cpp_buffer *buffer = pfile->buffer; | |
1015 | |
1016 if (!buffer->need_line) | |
1017 return true; | |
1018 | |
1019 if (buffer->next_line < buffer->rlimit) | |
1020 { | |
1021 _cpp_clean_line (pfile); | |
1022 return true; | |
1023 } | |
1024 | |
1025 /* First, get out of parsing arguments state. */ | |
1026 if (pfile->state.parsing_args) | |
1027 return false; | |
1028 | |
1029 /* End of buffer. Non-empty files should end in a newline. */ | |
1030 if (buffer->buf != buffer->rlimit | |
1031 && buffer->next_line > buffer->rlimit | |
1032 && !buffer->from_stage3) | |
1033 { | |
1034 /* Clip to buffer size. */ | |
1035 buffer->next_line = buffer->rlimit; | |
1036 } | |
1037 | |
1038 return_at_eof = buffer->return_at_eof; | |
1039 _cpp_pop_buffer (pfile); | |
1040 if (pfile->buffer == NULL || return_at_eof) | |
1041 return false; | |
1042 } | |
1043 } | |
1044 | |
1045 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \ | |
1046 do \ | |
1047 { \ | |
1048 result->type = ELSE_TYPE; \ | |
1049 if (*buffer->cur == CHAR) \ | |
1050 buffer->cur++, result->type = THEN_TYPE; \ | |
1051 } \ | |
1052 while (0) | |
1053 | |
1054 /* Lex a token into pfile->cur_token, which is also incremented, to | |
1055 get diagnostics pointing to the correct location. | |
1056 | |
1057 Does not handle issues such as token lookahead, multiple-include | |
1058 optimization, directives, skipping etc. This function is only | |
1059 suitable for use by _cpp_lex_token, and in special cases like | |
1060 lex_expansion_token which doesn't care for any of these issues. | |
1061 | |
1062 When meeting a newline, returns CPP_EOF if parsing a directive, | |
1063 otherwise returns to the start of the token buffer if permissible. | |
1064 Returns the location of the lexed token. */ | |
1065 cpp_token * | |
1066 _cpp_lex_direct (cpp_reader *pfile) | |
1067 { | |
1068 cppchar_t c; | |
1069 cpp_buffer *buffer; | |
1070 const unsigned char *comment_start; | |
1071 cpp_token *result = pfile->cur_token++; | |
1072 | |
1073 fresh_line: | |
1074 result->flags = 0; | |
1075 buffer = pfile->buffer; | |
1076 if (buffer->need_line) | |
1077 { | |
1078 if (pfile->state.in_deferred_pragma) | |
1079 { | |
1080 result->type = CPP_PRAGMA_EOL; | |
1081 pfile->state.in_deferred_pragma = false; | |
1082 if (!pfile->state.pragma_allow_expansion) | |
1083 pfile->state.prevent_expansion--; | |
1084 return result; | |
1085 } | |
1086 if (!_cpp_get_fresh_line (pfile)) | |
1087 { | |
1088 result->type = CPP_EOF; | |
1089 if (!pfile->state.in_directive) | |
1090 { | |
1091 /* Tell the compiler the line number of the EOF token. */ | |
1092 result->src_loc = pfile->line_table->highest_line; | |
1093 result->flags = BOL; | |
1094 } | |
1095 return result; | |
1096 } | |
1097 if (!pfile->keep_tokens) | |
1098 { | |
1099 pfile->cur_run = &pfile->base_run; | |
1100 result = pfile->base_run.base; | |
1101 pfile->cur_token = result + 1; | |
1102 } | |
1103 result->flags = BOL; | |
1104 if (pfile->state.parsing_args == 2) | |
1105 result->flags |= PREV_WHITE; | |
1106 } | |
1107 buffer = pfile->buffer; | |
1108 update_tokens_line: | |
1109 result->src_loc = pfile->line_table->highest_line; | |
1110 | |
1111 skipped_white: | |
1112 if (buffer->cur >= buffer->notes[buffer->cur_note].pos | |
1113 && !pfile->overlaid_buffer) | |
1114 { | |
1115 _cpp_process_line_notes (pfile, false); | |
1116 result->src_loc = pfile->line_table->highest_line; | |
1117 } | |
1118 c = *buffer->cur++; | |
1119 | |
1120 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table, | |
1121 CPP_BUF_COLUMN (buffer, buffer->cur)); | |
1122 | |
1123 switch (c) | |
1124 { | |
1125 case ' ': case '\t': case '\f': case '\v': case '\0': | |
1126 result->flags |= PREV_WHITE; | |
1127 skip_whitespace (pfile, c); | |
1128 goto skipped_white; | |
1129 | |
1130 case '\n': | |
1131 if (buffer->cur < buffer->rlimit) | |
1132 CPP_INCREMENT_LINE (pfile, 0); | |
1133 buffer->need_line = true; | |
1134 goto fresh_line; | |
1135 | |
1136 case '0': case '1': case '2': case '3': case '4': | |
1137 case '5': case '6': case '7': case '8': case '9': | |
1138 { | |
1139 struct normalize_state nst = INITIAL_NORMALIZE_STATE; | |
1140 result->type = CPP_NUMBER; | |
1141 lex_number (pfile, &result->val.str, &nst); | |
1142 warn_about_normalization (pfile, result, &nst); | |
1143 break; | |
1144 } | |
1145 | |
1146 case 'L': | |
1147 case 'u': | |
1148 case 'U': | |
1149 /* 'L', 'u' or 'U' may introduce wide characters or strings. */ | |
1150 if (c == 'L' || CPP_OPTION (pfile, uliterals)) | |
1151 { | |
1152 if (*buffer->cur == '\'' || *buffer->cur == '"') | |
1153 { | |
1154 lex_string (pfile, result, buffer->cur - 1); | |
1155 break; | |
1156 } | |
1157 } | |
1158 /* Fall through. */ | |
1159 | |
1160 case '_': | |
1161 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': | |
1162 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': | |
1163 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': | |
1164 case 's': case 't': case 'v': case 'w': case 'x': | |
1165 case 'y': case 'z': | |
1166 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': | |
1167 case 'G': case 'H': case 'I': case 'J': case 'K': | |
1168 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': | |
1169 case 'S': case 'T': case 'V': case 'W': case 'X': | |
1170 case 'Y': case 'Z': | |
1171 result->type = CPP_NAME; | |
1172 { | |
1173 struct normalize_state nst = INITIAL_NORMALIZE_STATE; | |
1174 result->val.node = lex_identifier (pfile, buffer->cur - 1, false, | |
1175 &nst); | |
1176 warn_about_normalization (pfile, result, &nst); | |
1177 } | |
1178 | |
1179 /* Convert named operators to their proper types. */ | |
1180 if (result->val.node->flags & NODE_OPERATOR) | |
1181 { | |
1182 result->flags |= NAMED_OP; | |
1183 result->type = (enum cpp_ttype) result->val.node->directive_index; | |
1184 } | |
1185 break; | |
1186 | |
1187 case '\'': | |
1188 case '"': | |
1189 lex_string (pfile, result, buffer->cur - 1); | |
1190 break; | |
1191 | |
1192 case '/': | |
1193 /* A potential block or line comment. */ | |
1194 comment_start = buffer->cur; | |
1195 c = *buffer->cur; | |
1196 | |
1197 if (c == '*') | |
1198 { | |
1199 if (_cpp_skip_block_comment (pfile)) | |
1200 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment"); | |
1201 } | |
1202 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments) | |
1203 || cpp_in_system_header (pfile))) | |
1204 { | |
1205 /* Warn about comments only if pedantically GNUC89, and not | |
1206 in system headers. */ | |
1207 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile) | |
1208 && ! buffer->warned_cplusplus_comments) | |
1209 { | |
1210 cpp_error (pfile, CPP_DL_PEDWARN, | |
1211 "C++ style comments are not allowed in ISO C90"); | |
1212 cpp_error (pfile, CPP_DL_PEDWARN, | |
1213 "(this will be reported only once per input file)"); | |
1214 buffer->warned_cplusplus_comments = 1; | |
1215 } | |
1216 | |
1217 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments)) | |
1218 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment"); | |
1219 } | |
1220 else if (c == '=') | |
1221 { | |
1222 buffer->cur++; | |
1223 result->type = CPP_DIV_EQ; | |
1224 break; | |
1225 } | |
1226 else | |
1227 { | |
1228 result->type = CPP_DIV; | |
1229 break; | |
1230 } | |
1231 | |
1232 if (!pfile->state.save_comments) | |
1233 { | |
1234 result->flags |= PREV_WHITE; | |
1235 goto update_tokens_line; | |
1236 } | |
1237 | |
1238 /* Save the comment as a token in its own right. */ | |
1239 save_comment (pfile, result, comment_start, c); | |
1240 break; | |
1241 | |
1242 case '<': | |
1243 if (pfile->state.angled_headers) | |
1244 { | |
1245 lex_string (pfile, result, buffer->cur - 1); | |
1246 if (result->type != CPP_LESS) | |
1247 break; | |
1248 } | |
1249 | |
1250 result->type = CPP_LESS; | |
1251 if (*buffer->cur == '=') | |
1252 buffer->cur++, result->type = CPP_LESS_EQ; | |
1253 else if (*buffer->cur == '<') | |
1254 { | |
1255 buffer->cur++; | |
1256 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT); | |
1257 } | |
1258 else if (CPP_OPTION (pfile, digraphs)) | |
1259 { | |
1260 if (*buffer->cur == ':') | |
1261 { | |
1262 buffer->cur++; | |
1263 result->flags |= DIGRAPH; | |
1264 result->type = CPP_OPEN_SQUARE; | |
1265 } | |
1266 else if (*buffer->cur == '%') | |
1267 { | |
1268 buffer->cur++; | |
1269 result->flags |= DIGRAPH; | |
1270 result->type = CPP_OPEN_BRACE; | |
1271 } | |
1272 } | |
1273 break; | |
1274 | |
1275 case '>': | |
1276 result->type = CPP_GREATER; | |
1277 if (*buffer->cur == '=') | |
1278 buffer->cur++, result->type = CPP_GREATER_EQ; | |
1279 else if (*buffer->cur == '>') | |
1280 { | |
1281 buffer->cur++; | |
1282 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT); | |
1283 } | |
1284 break; | |
1285 | |
1286 case '%': | |
1287 result->type = CPP_MOD; | |
1288 if (*buffer->cur == '=') | |
1289 buffer->cur++, result->type = CPP_MOD_EQ; | |
1290 else if (CPP_OPTION (pfile, digraphs)) | |
1291 { | |
1292 if (*buffer->cur == ':') | |
1293 { | |
1294 buffer->cur++; | |
1295 result->flags |= DIGRAPH; | |
1296 result->type = CPP_HASH; | |
1297 if (*buffer->cur == '%' && buffer->cur[1] == ':') | |
1298 buffer->cur += 2, result->type = CPP_PASTE; | |
1299 } | |
1300 else if (*buffer->cur == '>') | |
1301 { | |
1302 buffer->cur++; | |
1303 result->flags |= DIGRAPH; | |
1304 result->type = CPP_CLOSE_BRACE; | |
1305 } | |
1306 } | |
1307 break; | |
1308 | |
1309 case '.': | |
1310 result->type = CPP_DOT; | |
1311 if (ISDIGIT (*buffer->cur)) | |
1312 { | |
1313 struct normalize_state nst = INITIAL_NORMALIZE_STATE; | |
1314 result->type = CPP_NUMBER; | |
1315 lex_number (pfile, &result->val.str, &nst); | |
1316 warn_about_normalization (pfile, result, &nst); | |
1317 } | |
1318 else if (*buffer->cur == '.' && buffer->cur[1] == '.') | |
1319 buffer->cur += 2, result->type = CPP_ELLIPSIS; | |
1320 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus)) | |
1321 buffer->cur++, result->type = CPP_DOT_STAR; | |
1322 break; | |
1323 | |
1324 case '+': | |
1325 result->type = CPP_PLUS; | |
1326 if (*buffer->cur == '+') | |
1327 buffer->cur++, result->type = CPP_PLUS_PLUS; | |
1328 else if (*buffer->cur == '=') | |
1329 buffer->cur++, result->type = CPP_PLUS_EQ; | |
1330 break; | |
1331 | |
1332 case '-': | |
1333 result->type = CPP_MINUS; | |
1334 if (*buffer->cur == '>') | |
1335 { | |
1336 buffer->cur++; | |
1337 result->type = CPP_DEREF; | |
1338 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus)) | |
1339 buffer->cur++, result->type = CPP_DEREF_STAR; | |
1340 } | |
1341 else if (*buffer->cur == '-') | |
1342 buffer->cur++, result->type = CPP_MINUS_MINUS; | |
1343 else if (*buffer->cur == '=') | |
1344 buffer->cur++, result->type = CPP_MINUS_EQ; | |
1345 break; | |
1346 | |
1347 case '&': | |
1348 result->type = CPP_AND; | |
1349 if (*buffer->cur == '&') | |
1350 buffer->cur++, result->type = CPP_AND_AND; | |
1351 else if (*buffer->cur == '=') | |
1352 buffer->cur++, result->type = CPP_AND_EQ; | |
1353 break; | |
1354 | |
1355 case '|': | |
1356 result->type = CPP_OR; | |
1357 if (*buffer->cur == '|') | |
1358 buffer->cur++, result->type = CPP_OR_OR; | |
1359 else if (*buffer->cur == '=') | |
1360 buffer->cur++, result->type = CPP_OR_EQ; | |
1361 break; | |
1362 | |
1363 case ':': | |
1364 result->type = CPP_COLON; | |
1365 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus)) | |
1366 buffer->cur++, result->type = CPP_SCOPE; | |
1367 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs)) | |
1368 { | |
1369 buffer->cur++; | |
1370 result->flags |= DIGRAPH; | |
1371 result->type = CPP_CLOSE_SQUARE; | |
1372 } | |
1373 break; | |
1374 | |
1375 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break; | |
1376 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break; | |
1377 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break; | |
1378 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break; | |
1379 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break; | |
1380 | |
1381 case '?': result->type = CPP_QUERY; break; | |
1382 case '~': result->type = CPP_COMPL; break; | |
1383 case ',': result->type = CPP_COMMA; break; | |
1384 case '(': result->type = CPP_OPEN_PAREN; break; | |
1385 case ')': result->type = CPP_CLOSE_PAREN; break; | |
1386 case '[': result->type = CPP_OPEN_SQUARE; break; | |
1387 case ']': result->type = CPP_CLOSE_SQUARE; break; | |
1388 case '{': result->type = CPP_OPEN_BRACE; break; | |
1389 case '}': result->type = CPP_CLOSE_BRACE; break; | |
1390 case ';': result->type = CPP_SEMICOLON; break; | |
1391 | |
1392 /* @ is a punctuator in Objective-C. */ | |
1393 case '@': result->type = CPP_ATSIGN; break; | |
1394 | |
1395 case '$': | |
1396 case '\\': | |
1397 { | |
1398 const uchar *base = --buffer->cur; | |
1399 struct normalize_state nst = INITIAL_NORMALIZE_STATE; | |
1400 | |
1401 if (forms_identifier_p (pfile, true, &nst)) | |
1402 { | |
1403 result->type = CPP_NAME; | |
1404 result->val.node = lex_identifier (pfile, base, true, &nst); | |
1405 warn_about_normalization (pfile, result, &nst); | |
1406 break; | |
1407 } | |
1408 buffer->cur++; | |
1409 } | |
1410 | |
1411 default: | |
1412 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER); | |
1413 break; | |
1414 } | |
1415 | |
1416 return result; | |
1417 } | |
1418 | |
1419 /* An upper bound on the number of bytes needed to spell TOKEN. | |
1420 Does not include preceding whitespace. */ | |
1421 unsigned int | |
1422 cpp_token_len (const cpp_token *token) | |
1423 { | |
1424 unsigned int len; | |
1425 | |
1426 switch (TOKEN_SPELL (token)) | |
1427 { | |
1428 default: len = 6; break; | |
1429 case SPELL_LITERAL: len = token->val.str.len; break; | |
1430 case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break; | |
1431 } | |
1432 | |
1433 return len; | |
1434 } | |
1435 | |
1436 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER. | |
1437 Return the number of bytes read out of NAME. (There are always | |
1438 10 bytes written to BUFFER.) */ | |
1439 | |
1440 static size_t | |
1441 utf8_to_ucn (unsigned char *buffer, const unsigned char *name) | |
1442 { | |
1443 int j; | |
1444 int ucn_len = 0; | |
1445 int ucn_len_c; | |
1446 unsigned t; | |
1447 unsigned long utf32; | |
1448 | |
1449 /* Compute the length of the UTF-8 sequence. */ | |
1450 for (t = *name; t & 0x80; t <<= 1) | |
1451 ucn_len++; | |
1452 | |
1453 utf32 = *name & (0x7F >> ucn_len); | |
1454 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++) | |
1455 { | |
1456 utf32 = (utf32 << 6) | (*++name & 0x3F); | |
1457 | |
1458 /* Ill-formed UTF-8. */ | |
1459 if ((*name & ~0x3F) != 0x80) | |
1460 abort (); | |
1461 } | |
1462 | |
1463 *buffer++ = '\\'; | |
1464 *buffer++ = 'U'; | |
1465 for (j = 7; j >= 0; j--) | |
1466 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF]; | |
1467 return ucn_len; | |
1468 } | |
1469 | |
1470 | |
1471 /* Write the spelling of a token TOKEN to BUFFER. The buffer must | |
1472 already contain the enough space to hold the token's spelling. | |
1473 Returns a pointer to the character after the last character written. | |
1474 FORSTRING is true if this is to be the spelling after translation | |
1475 phase 1 (this is different for UCNs). | |
1476 FIXME: Would be nice if we didn't need the PFILE argument. */ | |
1477 unsigned char * | |
1478 cpp_spell_token (cpp_reader *pfile, const cpp_token *token, | |
1479 unsigned char *buffer, bool forstring) | |
1480 { | |
1481 switch (TOKEN_SPELL (token)) | |
1482 { | |
1483 case SPELL_OPERATOR: | |
1484 { | |
1485 const unsigned char *spelling; | |
1486 unsigned char c; | |
1487 | |
1488 if (token->flags & DIGRAPH) | |
1489 spelling | |
1490 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH]; | |
1491 else if (token->flags & NAMED_OP) | |
1492 goto spell_ident; | |
1493 else | |
1494 spelling = TOKEN_NAME (token); | |
1495 | |
1496 while ((c = *spelling++) != '\0') | |
1497 *buffer++ = c; | |
1498 } | |
1499 break; | |
1500 | |
1501 spell_ident: | |
1502 case SPELL_IDENT: | |
1503 if (forstring) | |
1504 { | |
1505 memcpy (buffer, NODE_NAME (token->val.node), | |
1506 NODE_LEN (token->val.node)); | |
1507 buffer += NODE_LEN (token->val.node); | |
1508 } | |
1509 else | |
1510 { | |
1511 size_t i; | |
1512 const unsigned char * name = NODE_NAME (token->val.node); | |
1513 | |
1514 for (i = 0; i < NODE_LEN (token->val.node); i++) | |
1515 if (name[i] & ~0x7F) | |
1516 { | |
1517 i += utf8_to_ucn (buffer, name + i) - 1; | |
1518 buffer += 10; | |
1519 } | |
1520 else | |
1521 *buffer++ = NODE_NAME (token->val.node)[i]; | |
1522 } | |
1523 break; | |
1524 | |
1525 case SPELL_LITERAL: | |
1526 memcpy (buffer, token->val.str.text, token->val.str.len); | |
1527 buffer += token->val.str.len; | |
1528 break; | |
1529 | |
1530 case SPELL_NONE: | |
1531 cpp_error (pfile, CPP_DL_ICE, | |
1532 "unspellable token %s", TOKEN_NAME (token)); | |
1533 break; | |
1534 } | |
1535 | |
1536 return buffer; | |
1537 } | |
1538 | |
1539 /* Returns TOKEN spelt as a null-terminated string. The string is | |
1540 freed when the reader is destroyed. Useful for diagnostics. */ | |
1541 unsigned char * | |
1542 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token) | |
1543 { | |
1544 unsigned int len = cpp_token_len (token) + 1; | |
1545 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end; | |
1546 | |
1547 end = cpp_spell_token (pfile, token, start, false); | |
1548 end[0] = '\0'; | |
1549 | |
1550 return start; | |
1551 } | |
1552 | |
1553 /* Used by C front ends, which really should move to using | |
1554 cpp_token_as_text. */ | |
1555 const char * | |
1556 cpp_type2name (enum cpp_ttype type) | |
1557 { | |
1558 return (const char *) token_spellings[type].name; | |
1559 } | |
1560 | |
1561 /* Writes the spelling of token to FP, without any preceding space. | |
1562 Separated from cpp_spell_token for efficiency - to avoid stdio | |
1563 double-buffering. */ | |
1564 void | |
1565 cpp_output_token (const cpp_token *token, FILE *fp) | |
1566 { | |
1567 switch (TOKEN_SPELL (token)) | |
1568 { | |
1569 case SPELL_OPERATOR: | |
1570 { | |
1571 const unsigned char *spelling; | |
1572 int c; | |
1573 | |
1574 if (token->flags & DIGRAPH) | |
1575 spelling | |
1576 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH]; | |
1577 else if (token->flags & NAMED_OP) | |
1578 goto spell_ident; | |
1579 else | |
1580 spelling = TOKEN_NAME (token); | |
1581 | |
1582 c = *spelling; | |
1583 do | |
1584 putc (c, fp); | |
1585 while ((c = *++spelling) != '\0'); | |
1586 } | |
1587 break; | |
1588 | |
1589 spell_ident: | |
1590 case SPELL_IDENT: | |
1591 { | |
1592 size_t i; | |
1593 const unsigned char * name = NODE_NAME (token->val.node); | |
1594 | |
1595 for (i = 0; i < NODE_LEN (token->val.node); i++) | |
1596 if (name[i] & ~0x7F) | |
1597 { | |
1598 unsigned char buffer[10]; | |
1599 i += utf8_to_ucn (buffer, name + i) - 1; | |
1600 fwrite (buffer, 1, 10, fp); | |
1601 } | |
1602 else | |
1603 fputc (NODE_NAME (token->val.node)[i], fp); | |
1604 } | |
1605 break; | |
1606 | |
1607 case SPELL_LITERAL: | |
1608 fwrite (token->val.str.text, 1, token->val.str.len, fp); | |
1609 break; | |
1610 | |
1611 case SPELL_NONE: | |
1612 /* An error, most probably. */ | |
1613 break; | |
1614 } | |
1615 } | |
1616 | |
1617 /* Compare two tokens. */ | |
1618 int | |
1619 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b) | |
1620 { | |
1621 if (a->type == b->type && a->flags == b->flags) | |
1622 switch (TOKEN_SPELL (a)) | |
1623 { | |
1624 default: /* Keep compiler happy. */ | |
1625 case SPELL_OPERATOR: | |
1626 return 1; | |
1627 case SPELL_NONE: | |
1628 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no); | |
1629 case SPELL_IDENT: | |
1630 return a->val.node == b->val.node; | |
1631 case SPELL_LITERAL: | |
1632 return (a->val.str.len == b->val.str.len | |
1633 && !memcmp (a->val.str.text, b->val.str.text, | |
1634 a->val.str.len)); | |
1635 } | |
1636 | |
1637 return 0; | |
1638 } | |
1639 | |
1640 /* Returns nonzero if a space should be inserted to avoid an | |
1641 accidental token paste for output. For simplicity, it is | |
1642 conservative, and occasionally advises a space where one is not | |
1643 needed, e.g. "." and ".2". */ | |
1644 int | |
1645 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1, | |
1646 const cpp_token *token2) | |
1647 { | |
1648 enum cpp_ttype a = token1->type, b = token2->type; | |
1649 cppchar_t c; | |
1650 | |
1651 if (token1->flags & NAMED_OP) | |
1652 a = CPP_NAME; | |
1653 if (token2->flags & NAMED_OP) | |
1654 b = CPP_NAME; | |
1655 | |
1656 c = EOF; | |
1657 if (token2->flags & DIGRAPH) | |
1658 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0]; | |
1659 else if (token_spellings[b].category == SPELL_OPERATOR) | |
1660 c = token_spellings[b].name[0]; | |
1661 | |
1662 /* Quickly get everything that can paste with an '='. */ | |
1663 if ((int) a <= (int) CPP_LAST_EQ && c == '=') | |
1664 return 1; | |
1665 | |
1666 switch (a) | |
1667 { | |
1668 case CPP_GREATER: return c == '>'; | |
1669 case CPP_LESS: return c == '<' || c == '%' || c == ':'; | |
1670 case CPP_PLUS: return c == '+'; | |
1671 case CPP_MINUS: return c == '-' || c == '>'; | |
1672 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */ | |
1673 case CPP_MOD: return c == ':' || c == '>'; | |
1674 case CPP_AND: return c == '&'; | |
1675 case CPP_OR: return c == '|'; | |
1676 case CPP_COLON: return c == ':' || c == '>'; | |
1677 case CPP_DEREF: return c == '*'; | |
1678 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER; | |
1679 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */ | |
1680 case CPP_NAME: return ((b == CPP_NUMBER | |
1681 && name_p (pfile, &token2->val.str)) | |
1682 || b == CPP_NAME | |
1683 || b == CPP_CHAR || b == CPP_STRING); /* L */ | |
1684 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME | |
1685 || c == '.' || c == '+' || c == '-'); | |
1686 /* UCNs */ | |
1687 case CPP_OTHER: return ((token1->val.str.text[0] == '\\' | |
1688 && b == CPP_NAME) | |
1689 || (CPP_OPTION (pfile, objc) | |
1690 && token1->val.str.text[0] == '@' | |
1691 && (b == CPP_NAME || b == CPP_STRING))); | |
1692 default: break; | |
1693 } | |
1694 | |
1695 return 0; | |
1696 } | |
1697 | |
1698 /* Output all the remaining tokens on the current line, and a newline | |
1699 character, to FP. Leading whitespace is removed. If there are | |
1700 macros, special token padding is not performed. */ | |
1701 void | |
1702 cpp_output_line (cpp_reader *pfile, FILE *fp) | |
1703 { | |
1704 const cpp_token *token; | |
1705 | |
1706 token = cpp_get_token (pfile); | |
1707 while (token->type != CPP_EOF) | |
1708 { | |
1709 cpp_output_token (token, fp); | |
1710 token = cpp_get_token (pfile); | |
1711 if (token->flags & PREV_WHITE) | |
1712 putc (' ', fp); | |
1713 } | |
1714 | |
1715 putc ('\n', fp); | |
1716 } | |
1717 | |
1718 /* Return a string representation of all the remaining tokens on the | |
1719 current line. The result is allocated using xmalloc and must be | |
1720 freed by the caller. */ | |
1721 unsigned char * | |
1722 cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name) | |
1723 { | |
1724 const cpp_token *token; | |
1725 unsigned int out = dir_name ? ustrlen (dir_name) : 0; | |
1726 unsigned int alloced = 120 + out; | |
1727 unsigned char *result = (unsigned char *) xmalloc (alloced); | |
1728 | |
1729 /* If DIR_NAME is empty, there are no initial contents. */ | |
1730 if (dir_name) | |
1731 { | |
1732 sprintf ((char *) result, "#%s ", dir_name); | |
1733 out += 2; | |
1734 } | |
1735 | |
1736 token = cpp_get_token (pfile); | |
1737 while (token->type != CPP_EOF) | |
1738 { | |
1739 unsigned char *last; | |
1740 /* Include room for a possible space and the terminating nul. */ | |
1741 unsigned int len = cpp_token_len (token) + 2; | |
1742 | |
1743 if (out + len > alloced) | |
1744 { | |
1745 alloced *= 2; | |
1746 if (out + len > alloced) | |
1747 alloced = out + len; | |
1748 result = (unsigned char *) xrealloc (result, alloced); | |
1749 } | |
1750 | |
1751 last = cpp_spell_token (pfile, token, &result[out], 0); | |
1752 out = last - result; | |
1753 | |
1754 token = cpp_get_token (pfile); | |
1755 if (token->flags & PREV_WHITE) | |
1756 result[out++] = ' '; | |
1757 } | |
1758 | |
1759 result[out] = '\0'; | |
1760 return result; | |
1761 } | |
1762 | |
1763 /* Memory buffers. Changing these three constants can have a dramatic | |
1764 effect on performance. The values here are reasonable defaults, | |
1765 but might be tuned. If you adjust them, be sure to test across a | |
1766 range of uses of cpplib, including heavy nested function-like macro | |
1767 expansion. Also check the change in peak memory usage (NJAMD is a | |
1768 good tool for this). */ | |
1769 #define MIN_BUFF_SIZE 8000 | |
1770 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2) | |
1771 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \ | |
1772 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2) | |
1773 | |
1774 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0) | |
1775 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE! | |
1776 #endif | |
1777 | |
1778 /* Create a new allocation buffer. Place the control block at the end | |
1779 of the buffer, so that buffer overflows will cause immediate chaos. */ | |
1780 static _cpp_buff * | |
1781 new_buff (size_t len) | |
1782 { | |
1783 _cpp_buff *result; | |
1784 unsigned char *base; | |
1785 | |
1786 if (len < MIN_BUFF_SIZE) | |
1787 len = MIN_BUFF_SIZE; | |
1788 len = CPP_ALIGN (len); | |
1789 | |
1790 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff)); | |
1791 result = (_cpp_buff *) (base + len); | |
1792 result->base = base; | |
1793 result->cur = base; | |
1794 result->limit = base + len; | |
1795 result->next = NULL; | |
1796 return result; | |
1797 } | |
1798 | |
1799 /* Place a chain of unwanted allocation buffers on the free list. */ | |
1800 void | |
1801 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff) | |
1802 { | |
1803 _cpp_buff *end = buff; | |
1804 | |
1805 while (end->next) | |
1806 end = end->next; | |
1807 end->next = pfile->free_buffs; | |
1808 pfile->free_buffs = buff; | |
1809 } | |
1810 | |
1811 /* Return a free buffer of size at least MIN_SIZE. */ | |
1812 _cpp_buff * | |
1813 _cpp_get_buff (cpp_reader *pfile, size_t min_size) | |
1814 { | |
1815 _cpp_buff *result, **p; | |
1816 | |
1817 for (p = &pfile->free_buffs;; p = &(*p)->next) | |
1818 { | |
1819 size_t size; | |
1820 | |
1821 if (*p == NULL) | |
1822 return new_buff (min_size); | |
1823 result = *p; | |
1824 size = result->limit - result->base; | |
1825 /* Return a buffer that's big enough, but don't waste one that's | |
1826 way too big. */ | |
1827 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size)) | |
1828 break; | |
1829 } | |
1830 | |
1831 *p = result->next; | |
1832 result->next = NULL; | |
1833 result->cur = result->base; | |
1834 return result; | |
1835 } | |
1836 | |
1837 /* Creates a new buffer with enough space to hold the uncommitted | |
1838 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies | |
1839 the excess bytes to the new buffer. Chains the new buffer after | |
1840 BUFF, and returns the new buffer. */ | |
1841 _cpp_buff * | |
1842 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra) | |
1843 { | |
1844 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra); | |
1845 _cpp_buff *new_buff = _cpp_get_buff (pfile, size); | |
1846 | |
1847 buff->next = new_buff; | |
1848 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff)); | |
1849 return new_buff; | |
1850 } | |
1851 | |
1852 /* Creates a new buffer with enough space to hold the uncommitted | |
1853 remaining bytes of the buffer pointed to by BUFF, and at least | |
1854 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer. | |
1855 Chains the new buffer before the buffer pointed to by BUFF, and | |
1856 updates the pointer to point to the new buffer. */ | |
1857 void | |
1858 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra) | |
1859 { | |
1860 _cpp_buff *new_buff, *old_buff = *pbuff; | |
1861 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra); | |
1862 | |
1863 new_buff = _cpp_get_buff (pfile, size); | |
1864 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff)); | |
1865 new_buff->next = old_buff; | |
1866 *pbuff = new_buff; | |
1867 } | |
1868 | |
1869 /* Free a chain of buffers starting at BUFF. */ | |
1870 void | |
1871 _cpp_free_buff (_cpp_buff *buff) | |
1872 { | |
1873 _cpp_buff *next; | |
1874 | |
1875 for (; buff; buff = next) | |
1876 { | |
1877 next = buff->next; | |
1878 free (buff->base); | |
1879 } | |
1880 } | |
1881 | |
1882 /* Allocate permanent, unaligned storage of length LEN. */ | |
1883 unsigned char * | |
1884 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len) | |
1885 { | |
1886 _cpp_buff *buff = pfile->u_buff; | |
1887 unsigned char *result = buff->cur; | |
1888 | |
1889 if (len > (size_t) (buff->limit - result)) | |
1890 { | |
1891 buff = _cpp_get_buff (pfile, len); | |
1892 buff->next = pfile->u_buff; | |
1893 pfile->u_buff = buff; | |
1894 result = buff->cur; | |
1895 } | |
1896 | |
1897 buff->cur = result + len; | |
1898 return result; | |
1899 } | |
1900 | |
1901 /* Allocate permanent, unaligned storage of length LEN from a_buff. | |
1902 That buffer is used for growing allocations when saving macro | |
1903 replacement lists in a #define, and when parsing an answer to an | |
1904 assertion in #assert, #unassert or #if (and therefore possibly | |
1905 whilst expanding macros). It therefore must not be used by any | |
1906 code that they might call: specifically the lexer and the guts of | |
1907 the macro expander. | |
1908 | |
1909 All existing other uses clearly fit this restriction: storing | |
1910 registered pragmas during initialization. */ | |
1911 unsigned char * | |
1912 _cpp_aligned_alloc (cpp_reader *pfile, size_t len) | |
1913 { | |
1914 _cpp_buff *buff = pfile->a_buff; | |
1915 unsigned char *result = buff->cur; | |
1916 | |
1917 if (len > (size_t) (buff->limit - result)) | |
1918 { | |
1919 buff = _cpp_get_buff (pfile, len); | |
1920 buff->next = pfile->a_buff; | |
1921 pfile->a_buff = buff; | |
1922 result = buff->cur; | |
1923 } | |
1924 | |
1925 buff->cur = result + len; | |
1926 return result; | |
1927 } | |
1928 | |
1929 /* Say which field of TOK is in use. */ | |
1930 | |
1931 enum cpp_token_fld_kind | |
1932 cpp_token_val_index (cpp_token *tok) | |
1933 { | |
1934 switch (TOKEN_SPELL (tok)) | |
1935 { | |
1936 case SPELL_IDENT: | |
1937 return CPP_TOKEN_FLD_NODE; | |
1938 case SPELL_LITERAL: | |
1939 return CPP_TOKEN_FLD_STR; | |
1940 case SPELL_NONE: | |
1941 if (tok->type == CPP_MACRO_ARG) | |
1942 return CPP_TOKEN_FLD_ARG_NO; | |
1943 else if (tok->type == CPP_PADDING) | |
1944 return CPP_TOKEN_FLD_SOURCE; | |
1945 else if (tok->type == CPP_PRAGMA) | |
1946 return CPP_TOKEN_FLD_PRAGMA; | |
1947 /* else fall through */ | |
1948 default: | |
1949 return CPP_TOKEN_FLD_NONE; | |
1950 } | |
1951 } |