Mercurial > hg > CbC > CbC_gcc
comparison gcc/input.c @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | 561a7518be6b |
children | 84e7813d76e9 |
comparison
equal
deleted
inserted
replaced
68:561a7518be6b | 111:04ced10e8804 |
---|---|
1 /* Data and functions related to line maps and input files. | 1 /* Data and functions related to line maps and input files. |
2 Copyright (C) 2004, 2007, 2008, 2009, 2010 | 2 Copyright (C) 2004-2017 Free Software Foundation, Inc. |
3 Free Software Foundation, Inc. | |
4 | 3 |
5 This file is part of GCC. | 4 This file is part of GCC. |
6 | 5 |
7 GCC is free software; you can redistribute it and/or modify it under | 6 GCC is free software; you can redistribute it and/or modify it under |
8 the terms of the GNU General Public License as published by the Free | 7 the terms of the GNU General Public License as published by the Free |
20 | 19 |
21 #include "config.h" | 20 #include "config.h" |
22 #include "system.h" | 21 #include "system.h" |
23 #include "coretypes.h" | 22 #include "coretypes.h" |
24 #include "intl.h" | 23 #include "intl.h" |
25 #include "input.h" | 24 #include "diagnostic-core.h" |
25 #include "selftest.h" | |
26 #include "cpplib.h" | |
27 | |
28 #ifndef HAVE_ICONV | |
29 #define HAVE_ICONV 0 | |
30 #endif | |
31 | |
32 /* This is a cache used by get_next_line to store the content of a | |
33 file to be searched for file lines. */ | |
34 struct fcache | |
35 { | |
36 /* These are information used to store a line boundary. */ | |
37 struct line_info | |
38 { | |
39 /* The line number. It starts from 1. */ | |
40 size_t line_num; | |
41 | |
42 /* The position (byte count) of the beginning of the line, | |
43 relative to the file data pointer. This starts at zero. */ | |
44 size_t start_pos; | |
45 | |
46 /* The position (byte count) of the last byte of the line. This | |
47 normally points to the '\n' character, or to one byte after the | |
48 last byte of the file, if the file doesn't contain a '\n' | |
49 character. */ | |
50 size_t end_pos; | |
51 | |
52 line_info (size_t l, size_t s, size_t e) | |
53 : line_num (l), start_pos (s), end_pos (e) | |
54 {} | |
55 | |
56 line_info () | |
57 :line_num (0), start_pos (0), end_pos (0) | |
58 {} | |
59 }; | |
60 | |
61 /* The number of time this file has been accessed. This is used | |
62 to designate which file cache to evict from the cache | |
63 array. */ | |
64 unsigned use_count; | |
65 | |
66 /* The file_path is the key for identifying a particular file in | |
67 the cache. | |
68 For libcpp-using code, the underlying buffer for this field is | |
69 owned by the corresponding _cpp_file within the cpp_reader. */ | |
70 const char *file_path; | |
71 | |
72 FILE *fp; | |
73 | |
74 /* This points to the content of the file that we've read so | |
75 far. */ | |
76 char *data; | |
77 | |
78 /* The size of the DATA array above.*/ | |
79 size_t size; | |
80 | |
81 /* The number of bytes read from the underlying file so far. This | |
82 must be less (or equal) than SIZE above. */ | |
83 size_t nb_read; | |
84 | |
85 /* The index of the beginning of the current line. */ | |
86 size_t line_start_idx; | |
87 | |
88 /* The number of the previous line read. This starts at 1. Zero | |
89 means we've read no line so far. */ | |
90 size_t line_num; | |
91 | |
92 /* This is the total number of lines of the current file. At the | |
93 moment, we try to get this information from the line map | |
94 subsystem. Note that this is just a hint. When using the C++ | |
95 front-end, this hint is correct because the input file is then | |
96 completely tokenized before parsing starts; so the line map knows | |
97 the number of lines before compilation really starts. For e.g, | |
98 the C front-end, it can happen that we start emitting diagnostics | |
99 before the line map has seen the end of the file. */ | |
100 size_t total_lines; | |
101 | |
102 /* Could this file be missing a trailing newline on its final line? | |
103 Initially true (to cope with empty files), set to true/false | |
104 as each line is read. */ | |
105 bool missing_trailing_newline; | |
106 | |
107 /* This is a record of the beginning and end of the lines we've seen | |
108 while reading the file. This is useful to avoid walking the data | |
109 from the beginning when we are asked to read a line that is | |
110 before LINE_START_IDX above. Note that the maximum size of this | |
111 record is fcache_line_record_size, so that the memory consumption | |
112 doesn't explode. We thus scale total_lines down to | |
113 fcache_line_record_size. */ | |
114 vec<line_info, va_heap> line_record; | |
115 | |
116 fcache (); | |
117 ~fcache (); | |
118 }; | |
26 | 119 |
27 /* Current position in real source file. */ | 120 /* Current position in real source file. */ |
28 | 121 |
29 location_t input_location; | 122 location_t input_location = UNKNOWN_LOCATION; |
30 | 123 |
31 struct line_maps *line_table; | 124 struct line_maps *line_table; |
32 | 125 |
33 expanded_location | 126 /* A stashed copy of "line_table" for use by selftest::line_table_test. |
34 expand_location (source_location loc) | 127 This needs to be a global so that it can be a GC root, and thus |
128 prevent the stashed copy from being garbage-collected if the GC runs | |
129 during a line_table_test. */ | |
130 | |
131 struct line_maps *saved_line_table; | |
132 | |
133 static fcache *fcache_tab; | |
134 static const size_t fcache_tab_size = 16; | |
135 static const size_t fcache_buffer_size = 4 * 1024; | |
136 static const size_t fcache_line_record_size = 100; | |
137 | |
138 /* Expand the source location LOC into a human readable location. If | |
139 LOC resolves to a builtin location, the file name of the readable | |
140 location is set to the string "<built-in>". If EXPANSION_POINT_P is | |
141 TRUE and LOC is virtual, then it is resolved to the expansion | |
142 point of the involved macro. Otherwise, it is resolved to the | |
143 spelling location of the token. | |
144 | |
145 When resolving to the spelling location of the token, if the | |
146 resulting location is for a built-in location (that is, it has no | |
147 associated line/column) in the context of a macro expansion, the | |
148 returned location is the first one (while unwinding the macro | |
149 location towards its expansion point) that is in real source | |
150 code. | |
151 | |
152 ASPECT controls which part of the location to use. */ | |
153 | |
154 static expanded_location | |
155 expand_location_1 (source_location loc, | |
156 bool expansion_point_p, | |
157 enum location_aspect aspect) | |
35 { | 158 { |
36 expanded_location xloc; | 159 expanded_location xloc; |
160 const line_map_ordinary *map; | |
161 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT; | |
162 tree block = NULL; | |
163 | |
164 if (IS_ADHOC_LOC (loc)) | |
165 { | |
166 block = LOCATION_BLOCK (loc); | |
167 loc = LOCATION_LOCUS (loc); | |
168 } | |
169 | |
170 memset (&xloc, 0, sizeof (xloc)); | |
171 | |
172 if (loc >= RESERVED_LOCATION_COUNT) | |
173 { | |
174 if (!expansion_point_p) | |
175 { | |
176 /* We want to resolve LOC to its spelling location. | |
177 | |
178 But if that spelling location is a reserved location that | |
179 appears in the context of a macro expansion (like for a | |
180 location for a built-in token), let's consider the first | |
181 location (toward the expansion point) that is not reserved; | |
182 that is, the first location that is in real source code. */ | |
183 loc = linemap_unwind_to_first_non_reserved_loc (line_table, | |
184 loc, NULL); | |
185 lrk = LRK_SPELLING_LOCATION; | |
186 } | |
187 loc = linemap_resolve_location (line_table, loc, lrk, &map); | |
188 | |
189 /* loc is now either in an ordinary map, or is a reserved location. | |
190 If it is a compound location, the caret is in a spelling location, | |
191 but the start/finish might still be a virtual location. | |
192 Depending of what the caller asked for, we may need to recurse | |
193 one level in order to resolve any virtual locations in the | |
194 end-points. */ | |
195 switch (aspect) | |
196 { | |
197 default: | |
198 gcc_unreachable (); | |
199 /* Fall through. */ | |
200 case LOCATION_ASPECT_CARET: | |
201 break; | |
202 case LOCATION_ASPECT_START: | |
203 { | |
204 source_location start = get_start (loc); | |
205 if (start != loc) | |
206 return expand_location_1 (start, expansion_point_p, aspect); | |
207 } | |
208 break; | |
209 case LOCATION_ASPECT_FINISH: | |
210 { | |
211 source_location finish = get_finish (loc); | |
212 if (finish != loc) | |
213 return expand_location_1 (finish, expansion_point_p, aspect); | |
214 } | |
215 break; | |
216 } | |
217 xloc = linemap_expand_location (line_table, map, loc); | |
218 } | |
219 | |
220 xloc.data = block; | |
37 if (loc <= BUILTINS_LOCATION) | 221 if (loc <= BUILTINS_LOCATION) |
222 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>"); | |
223 | |
224 return xloc; | |
225 } | |
226 | |
227 /* Initialize the set of cache used for files accessed by caret | |
228 diagnostic. */ | |
229 | |
230 static void | |
231 diagnostic_file_cache_init (void) | |
232 { | |
233 if (fcache_tab == NULL) | |
234 fcache_tab = new fcache[fcache_tab_size]; | |
235 } | |
236 | |
237 /* Free the resources used by the set of cache used for files accessed | |
238 by caret diagnostic. */ | |
239 | |
240 void | |
241 diagnostic_file_cache_fini (void) | |
242 { | |
243 if (fcache_tab) | |
38 { | 244 { |
39 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>"); | 245 delete [] (fcache_tab); |
40 xloc.line = 0; | 246 fcache_tab = NULL; |
41 xloc.column = 0; | 247 } |
42 xloc.sysp = 0; | 248 } |
249 | |
250 /* Return the total lines number that have been read so far by the | |
251 line map (in the preprocessor) so far. For languages like C++ that | |
252 entirely preprocess the input file before starting to parse, this | |
253 equals the actual number of lines of the file. */ | |
254 | |
255 static size_t | |
256 total_lines_num (const char *file_path) | |
257 { | |
258 size_t r = 0; | |
259 source_location l = 0; | |
260 if (linemap_get_file_highest_location (line_table, file_path, &l)) | |
261 { | |
262 gcc_assert (l >= RESERVED_LOCATION_COUNT); | |
263 expanded_location xloc = expand_location (l); | |
264 r = xloc.line; | |
265 } | |
266 return r; | |
267 } | |
268 | |
269 /* Lookup the cache used for the content of a given file accessed by | |
270 caret diagnostic. Return the found cached file, or NULL if no | |
271 cached file was found. */ | |
272 | |
273 static fcache* | |
274 lookup_file_in_cache_tab (const char *file_path) | |
275 { | |
276 if (file_path == NULL) | |
277 return NULL; | |
278 | |
279 diagnostic_file_cache_init (); | |
280 | |
281 /* This will contain the found cached file. */ | |
282 fcache *r = NULL; | |
283 for (unsigned i = 0; i < fcache_tab_size; ++i) | |
284 { | |
285 fcache *c = &fcache_tab[i]; | |
286 if (c->file_path && !strcmp (c->file_path, file_path)) | |
287 { | |
288 ++c->use_count; | |
289 r = c; | |
290 } | |
291 } | |
292 | |
293 if (r) | |
294 ++r->use_count; | |
295 | |
296 return r; | |
297 } | |
298 | |
299 /* Purge any mention of FILENAME from the cache of files used for | |
300 printing source code. For use in selftests when working | |
301 with tempfiles. */ | |
302 | |
303 void | |
304 diagnostics_file_cache_forcibly_evict_file (const char *file_path) | |
305 { | |
306 gcc_assert (file_path); | |
307 | |
308 fcache *r = lookup_file_in_cache_tab (file_path); | |
309 if (!r) | |
310 /* Not found. */ | |
311 return; | |
312 | |
313 r->file_path = NULL; | |
314 if (r->fp) | |
315 fclose (r->fp); | |
316 r->fp = NULL; | |
317 r->nb_read = 0; | |
318 r->line_start_idx = 0; | |
319 r->line_num = 0; | |
320 r->line_record.truncate (0); | |
321 r->use_count = 0; | |
322 r->total_lines = 0; | |
323 r->missing_trailing_newline = true; | |
324 } | |
325 | |
326 /* Return the file cache that has been less used, recently, or the | |
327 first empty one. If HIGHEST_USE_COUNT is non-null, | |
328 *HIGHEST_USE_COUNT is set to the highest use count of the entries | |
329 in the cache table. */ | |
330 | |
331 static fcache* | |
332 evicted_cache_tab_entry (unsigned *highest_use_count) | |
333 { | |
334 diagnostic_file_cache_init (); | |
335 | |
336 fcache *to_evict = &fcache_tab[0]; | |
337 unsigned huc = to_evict->use_count; | |
338 for (unsigned i = 1; i < fcache_tab_size; ++i) | |
339 { | |
340 fcache *c = &fcache_tab[i]; | |
341 bool c_is_empty = (c->file_path == NULL); | |
342 | |
343 if (c->use_count < to_evict->use_count | |
344 || (to_evict->file_path && c_is_empty)) | |
345 /* We evict C because it's either an entry with a lower use | |
346 count or one that is empty. */ | |
347 to_evict = c; | |
348 | |
349 if (huc < c->use_count) | |
350 huc = c->use_count; | |
351 | |
352 if (c_is_empty) | |
353 /* We've reached the end of the cache; subsequent elements are | |
354 all empty. */ | |
355 break; | |
356 } | |
357 | |
358 if (highest_use_count) | |
359 *highest_use_count = huc; | |
360 | |
361 return to_evict; | |
362 } | |
363 | |
364 /* Create the cache used for the content of a given file to be | |
365 accessed by caret diagnostic. This cache is added to an array of | |
366 cache and can be retrieved by lookup_file_in_cache_tab. This | |
367 function returns the created cache. Note that only the last | |
368 fcache_tab_size files are cached. */ | |
369 | |
370 static fcache* | |
371 add_file_to_cache_tab (const char *file_path) | |
372 { | |
373 | |
374 FILE *fp = fopen (file_path, "r"); | |
375 if (fp == NULL) | |
376 return NULL; | |
377 | |
378 unsigned highest_use_count = 0; | |
379 fcache *r = evicted_cache_tab_entry (&highest_use_count); | |
380 r->file_path = file_path; | |
381 if (r->fp) | |
382 fclose (r->fp); | |
383 r->fp = fp; | |
384 r->nb_read = 0; | |
385 r->line_start_idx = 0; | |
386 r->line_num = 0; | |
387 r->line_record.truncate (0); | |
388 /* Ensure that this cache entry doesn't get evicted next time | |
389 add_file_to_cache_tab is called. */ | |
390 r->use_count = ++highest_use_count; | |
391 r->total_lines = total_lines_num (file_path); | |
392 r->missing_trailing_newline = true; | |
393 | |
394 return r; | |
395 } | |
396 | |
397 /* Lookup the cache used for the content of a given file accessed by | |
398 caret diagnostic. If no cached file was found, create a new cache | |
399 for this file, add it to the array of cached file and return | |
400 it. */ | |
401 | |
402 static fcache* | |
403 lookup_or_add_file_to_cache_tab (const char *file_path) | |
404 { | |
405 fcache *r = lookup_file_in_cache_tab (file_path); | |
406 if (r == NULL) | |
407 r = add_file_to_cache_tab (file_path); | |
408 return r; | |
409 } | |
410 | |
411 /* Default constructor for a cache of file used by caret | |
412 diagnostic. */ | |
413 | |
414 fcache::fcache () | |
415 : use_count (0), file_path (NULL), fp (NULL), data (0), | |
416 size (0), nb_read (0), line_start_idx (0), line_num (0), | |
417 total_lines (0), missing_trailing_newline (true) | |
418 { | |
419 line_record.create (0); | |
420 } | |
421 | |
422 /* Destructor for a cache of file used by caret diagnostic. */ | |
423 | |
424 fcache::~fcache () | |
425 { | |
426 if (fp) | |
427 { | |
428 fclose (fp); | |
429 fp = NULL; | |
430 } | |
431 if (data) | |
432 { | |
433 XDELETEVEC (data); | |
434 data = 0; | |
435 } | |
436 line_record.release (); | |
437 } | |
438 | |
439 /* Returns TRUE iff the cache would need to be filled with data coming | |
440 from the file. That is, either the cache is empty or full or the | |
441 current line is empty. Note that if the cache is full, it would | |
442 need to be extended and filled again. */ | |
443 | |
444 static bool | |
445 needs_read (fcache *c) | |
446 { | |
447 return (c->nb_read == 0 | |
448 || c->nb_read == c->size | |
449 || (c->line_start_idx >= c->nb_read - 1)); | |
450 } | |
451 | |
452 /* Return TRUE iff the cache is full and thus needs to be | |
453 extended. */ | |
454 | |
455 static bool | |
456 needs_grow (fcache *c) | |
457 { | |
458 return c->nb_read == c->size; | |
459 } | |
460 | |
461 /* Grow the cache if it needs to be extended. */ | |
462 | |
463 static void | |
464 maybe_grow (fcache *c) | |
465 { | |
466 if (!needs_grow (c)) | |
467 return; | |
468 | |
469 size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2; | |
470 c->data = XRESIZEVEC (char, c->data, size); | |
471 c->size = size; | |
472 } | |
473 | |
474 /* Read more data into the cache. Extends the cache if need be. | |
475 Returns TRUE iff new data could be read. */ | |
476 | |
477 static bool | |
478 read_data (fcache *c) | |
479 { | |
480 if (feof (c->fp) || ferror (c->fp)) | |
481 return false; | |
482 | |
483 maybe_grow (c); | |
484 | |
485 char * from = c->data + c->nb_read; | |
486 size_t to_read = c->size - c->nb_read; | |
487 size_t nb_read = fread (from, 1, to_read, c->fp); | |
488 | |
489 if (ferror (c->fp)) | |
490 return false; | |
491 | |
492 c->nb_read += nb_read; | |
493 return !!nb_read; | |
494 } | |
495 | |
496 /* Read new data iff the cache needs to be filled with more data | |
497 coming from the file FP. Return TRUE iff the cache was filled with | |
498 mode data. */ | |
499 | |
500 static bool | |
501 maybe_read_data (fcache *c) | |
502 { | |
503 if (!needs_read (c)) | |
504 return false; | |
505 return read_data (c); | |
506 } | |
507 | |
508 /* Read a new line from file FP, using C as a cache for the data | |
509 coming from the file. Upon successful completion, *LINE is set to | |
510 the beginning of the line found. *LINE points directly in the | |
511 line cache and is only valid until the next call of get_next_line. | |
512 *LINE_LEN is set to the length of the line. Note that the line | |
513 does not contain any terminal delimiter. This function returns | |
514 true if some data was read or process from the cache, false | |
515 otherwise. Note that subsequent calls to get_next_line might | |
516 make the content of *LINE invalid. */ | |
517 | |
518 static bool | |
519 get_next_line (fcache *c, char **line, ssize_t *line_len) | |
520 { | |
521 /* Fill the cache with data to process. */ | |
522 maybe_read_data (c); | |
523 | |
524 size_t remaining_size = c->nb_read - c->line_start_idx; | |
525 if (remaining_size == 0) | |
526 /* There is no more data to process. */ | |
527 return false; | |
528 | |
529 char *line_start = c->data + c->line_start_idx; | |
530 | |
531 char *next_line_start = NULL; | |
532 size_t len = 0; | |
533 char *line_end = (char *) memchr (line_start, '\n', remaining_size); | |
534 if (line_end == NULL) | |
535 { | |
536 /* We haven't found the end-of-line delimiter in the cache. | |
537 Fill the cache with more data from the file and look for the | |
538 '\n'. */ | |
539 while (maybe_read_data (c)) | |
540 { | |
541 line_start = c->data + c->line_start_idx; | |
542 remaining_size = c->nb_read - c->line_start_idx; | |
543 line_end = (char *) memchr (line_start, '\n', remaining_size); | |
544 if (line_end != NULL) | |
545 { | |
546 next_line_start = line_end + 1; | |
547 break; | |
548 } | |
549 } | |
550 if (line_end == NULL) | |
551 { | |
552 /* We've loadded all the file into the cache and still no | |
553 '\n'. Let's say the line ends up at one byte passed the | |
554 end of the file. This is to stay consistent with the case | |
555 of when the line ends up with a '\n' and line_end points to | |
556 that terminal '\n'. That consistency is useful below in | |
557 the len calculation. */ | |
558 line_end = c->data + c->nb_read ; | |
559 c->missing_trailing_newline = true; | |
560 } | |
561 else | |
562 c->missing_trailing_newline = false; | |
43 } | 563 } |
44 else | 564 else |
45 { | 565 { |
46 const struct line_map *map = linemap_lookup (line_table, loc); | 566 next_line_start = line_end + 1; |
47 xloc.file = map->to_file; | 567 c->missing_trailing_newline = false; |
48 xloc.line = SOURCE_LINE (map, loc); | 568 } |
49 xloc.column = SOURCE_COLUMN (map, loc); | 569 |
50 xloc.sysp = map->sysp != 0; | 570 if (ferror (c->fp)) |
51 }; | 571 return false; |
52 return xloc; | 572 |
53 } | 573 /* At this point, we've found the end of the of line. It either |
574 points to the '\n' or to one byte after the last byte of the | |
575 file. */ | |
576 gcc_assert (line_end != NULL); | |
577 | |
578 len = line_end - line_start; | |
579 | |
580 if (c->line_start_idx < c->nb_read) | |
581 *line = line_start; | |
582 | |
583 ++c->line_num; | |
584 | |
585 /* Before we update our line record, make sure the hint about the | |
586 total number of lines of the file is correct. If it's not, then | |
587 we give up recording line boundaries from now on. */ | |
588 bool update_line_record = true; | |
589 if (c->line_num > c->total_lines) | |
590 update_line_record = false; | |
591 | |
592 /* Now update our line record so that re-reading lines from the | |
593 before c->line_start_idx is faster. */ | |
594 if (update_line_record | |
595 && c->line_record.length () < fcache_line_record_size) | |
596 { | |
597 /* If the file lines fits in the line record, we just record all | |
598 its lines ...*/ | |
599 if (c->total_lines <= fcache_line_record_size | |
600 && c->line_num > c->line_record.length ()) | |
601 c->line_record.safe_push (fcache::line_info (c->line_num, | |
602 c->line_start_idx, | |
603 line_end - c->data)); | |
604 else if (c->total_lines > fcache_line_record_size) | |
605 { | |
606 /* ... otherwise, we just scale total_lines down to | |
607 (fcache_line_record_size lines. */ | |
608 size_t n = (c->line_num * fcache_line_record_size) / c->total_lines; | |
609 if (c->line_record.length () == 0 | |
610 || n >= c->line_record.length ()) | |
611 c->line_record.safe_push (fcache::line_info (c->line_num, | |
612 c->line_start_idx, | |
613 line_end - c->data)); | |
614 } | |
615 } | |
616 | |
617 /* Update c->line_start_idx so that it points to the next line to be | |
618 read. */ | |
619 if (next_line_start) | |
620 c->line_start_idx = next_line_start - c->data; | |
621 else | |
622 /* We didn't find any terminal '\n'. Let's consider that the end | |
623 of line is the end of the data in the cache. The next | |
624 invocation of get_next_line will either read more data from the | |
625 underlying file or return false early because we've reached the | |
626 end of the file. */ | |
627 c->line_start_idx = c->nb_read; | |
628 | |
629 *line_len = len; | |
630 | |
631 return true; | |
632 } | |
633 | |
634 /* Consume the next bytes coming from the cache (or from its | |
635 underlying file if there are remaining unread bytes in the file) | |
636 until we reach the next end-of-line (or end-of-file). There is no | |
637 copying from the cache involved. Return TRUE upon successful | |
638 completion. */ | |
639 | |
640 static bool | |
641 goto_next_line (fcache *cache) | |
642 { | |
643 char *l; | |
644 ssize_t len; | |
645 | |
646 return get_next_line (cache, &l, &len); | |
647 } | |
648 | |
649 /* Read an arbitrary line number LINE_NUM from the file cached in C. | |
650 If the line was read successfully, *LINE points to the beginning | |
651 of the line in the file cache and *LINE_LEN is the length of the | |
652 line. *LINE is not nul-terminated, but may contain zero bytes. | |
653 *LINE is only valid until the next call of read_line_num. | |
654 This function returns bool if a line was read. */ | |
655 | |
656 static bool | |
657 read_line_num (fcache *c, size_t line_num, | |
658 char **line, ssize_t *line_len) | |
659 { | |
660 gcc_assert (line_num > 0); | |
661 | |
662 if (line_num <= c->line_num) | |
663 { | |
664 /* We've been asked to read lines that are before c->line_num. | |
665 So lets use our line record (if it's not empty) to try to | |
666 avoid re-reading the file from the beginning again. */ | |
667 | |
668 if (c->line_record.is_empty ()) | |
669 { | |
670 c->line_start_idx = 0; | |
671 c->line_num = 0; | |
672 } | |
673 else | |
674 { | |
675 fcache::line_info *i = NULL; | |
676 if (c->total_lines <= fcache_line_record_size) | |
677 { | |
678 /* In languages where the input file is not totally | |
679 preprocessed up front, the c->total_lines hint | |
680 can be smaller than the number of lines of the | |
681 file. In that case, only the first | |
682 c->total_lines have been recorded. | |
683 | |
684 Otherwise, the first c->total_lines we've read have | |
685 their start/end recorded here. */ | |
686 i = (line_num <= c->total_lines) | |
687 ? &c->line_record[line_num - 1] | |
688 : &c->line_record[c->total_lines - 1]; | |
689 gcc_assert (i->line_num <= line_num); | |
690 } | |
691 else | |
692 { | |
693 /* So the file had more lines than our line record | |
694 size. Thus the number of lines we've recorded has | |
695 been scaled down to fcache_line_reacord_size. Let's | |
696 pick the start/end of the recorded line that is | |
697 closest to line_num. */ | |
698 size_t n = (line_num <= c->total_lines) | |
699 ? line_num * fcache_line_record_size / c->total_lines | |
700 : c ->line_record.length () - 1; | |
701 if (n < c->line_record.length ()) | |
702 { | |
703 i = &c->line_record[n]; | |
704 gcc_assert (i->line_num <= line_num); | |
705 } | |
706 } | |
707 | |
708 if (i && i->line_num == line_num) | |
709 { | |
710 /* We have the start/end of the line. */ | |
711 *line = c->data + i->start_pos; | |
712 *line_len = i->end_pos - i->start_pos; | |
713 return true; | |
714 } | |
715 | |
716 if (i) | |
717 { | |
718 c->line_start_idx = i->start_pos; | |
719 c->line_num = i->line_num - 1; | |
720 } | |
721 else | |
722 { | |
723 c->line_start_idx = 0; | |
724 c->line_num = 0; | |
725 } | |
726 } | |
727 } | |
728 | |
729 /* Let's walk from line c->line_num up to line_num - 1, without | |
730 copying any line. */ | |
731 while (c->line_num < line_num - 1) | |
732 if (!goto_next_line (c)) | |
733 return false; | |
734 | |
735 /* The line we want is the next one. Let's read and copy it back to | |
736 the caller. */ | |
737 return get_next_line (c, line, line_len); | |
738 } | |
739 | |
740 /* Return the physical source line that corresponds to FILE_PATH/LINE. | |
741 The line is not nul-terminated. The returned pointer is only | |
742 valid until the next call of location_get_source_line. | |
743 Note that the line can contain several null characters, | |
744 so LINE_LEN, if non-null, points to the actual length of the line. | |
745 If the function fails, NULL is returned. */ | |
746 | |
747 const char * | |
748 location_get_source_line (const char *file_path, int line, | |
749 int *line_len) | |
750 { | |
751 char *buffer = NULL; | |
752 ssize_t len; | |
753 | |
754 if (line == 0) | |
755 return NULL; | |
756 | |
757 fcache *c = lookup_or_add_file_to_cache_tab (file_path); | |
758 if (c == NULL) | |
759 return NULL; | |
760 | |
761 bool read = read_line_num (c, line, &buffer, &len); | |
762 | |
763 if (read && line_len) | |
764 *line_len = len; | |
765 | |
766 return read ? buffer : NULL; | |
767 } | |
768 | |
769 /* Determine if FILE_PATH missing a trailing newline on its final line. | |
770 Only valid to call once all of the file has been loaded, by | |
771 requesting a line number beyond the end of the file. */ | |
772 | |
773 bool | |
774 location_missing_trailing_newline (const char *file_path) | |
775 { | |
776 fcache *c = lookup_or_add_file_to_cache_tab (file_path); | |
777 if (c == NULL) | |
778 return false; | |
779 | |
780 return c->missing_trailing_newline; | |
781 } | |
782 | |
783 /* Test if the location originates from the spelling location of a | |
784 builtin-tokens. That is, return TRUE if LOC is a (possibly | |
785 virtual) location of a built-in token that appears in the expansion | |
786 list of a macro. Please note that this function also works on | |
787 tokens that result from built-in tokens. For instance, the | |
788 function would return true if passed a token "4" that is the result | |
789 of the expansion of the built-in __LINE__ macro. */ | |
790 bool | |
791 is_location_from_builtin_token (source_location loc) | |
792 { | |
793 const line_map_ordinary *map = NULL; | |
794 loc = linemap_resolve_location (line_table, loc, | |
795 LRK_SPELLING_LOCATION, &map); | |
796 return loc == BUILTINS_LOCATION; | |
797 } | |
798 | |
799 /* Expand the source location LOC into a human readable location. If | |
800 LOC is virtual, it resolves to the expansion point of the involved | |
801 macro. If LOC resolves to a builtin location, the file name of the | |
802 readable location is set to the string "<built-in>". */ | |
803 | |
804 expanded_location | |
805 expand_location (source_location loc) | |
806 { | |
807 return expand_location_1 (loc, /*expansion_point_p=*/true, | |
808 LOCATION_ASPECT_CARET); | |
809 } | |
810 | |
811 /* Expand the source location LOC into a human readable location. If | |
812 LOC is virtual, it resolves to the expansion location of the | |
813 relevant macro. If LOC resolves to a builtin location, the file | |
814 name of the readable location is set to the string | |
815 "<built-in>". */ | |
816 | |
817 expanded_location | |
818 expand_location_to_spelling_point (source_location loc) | |
819 { | |
820 return expand_location_1 (loc, /*expansion_point_p=*/false, | |
821 LOCATION_ASPECT_CARET); | |
822 } | |
823 | |
824 /* The rich_location class within libcpp requires a way to expand | |
825 source_location instances, and relies on the client code | |
826 providing a symbol named | |
827 linemap_client_expand_location_to_spelling_point | |
828 to do this. | |
829 | |
830 This is the implementation for libcommon.a (all host binaries), | |
831 which simply calls into expand_location_1. */ | |
832 | |
833 expanded_location | |
834 linemap_client_expand_location_to_spelling_point (source_location loc, | |
835 enum location_aspect aspect) | |
836 { | |
837 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect); | |
838 } | |
839 | |
840 | |
841 /* If LOCATION is in a system header and if it is a virtual location for | |
842 a token coming from the expansion of a macro, unwind it to the | |
843 location of the expansion point of the macro. Otherwise, just return | |
844 LOCATION. | |
845 | |
846 This is used for instance when we want to emit diagnostics about a | |
847 token that may be located in a macro that is itself defined in a | |
848 system header, for example, for the NULL macro. In such a case, if | |
849 LOCATION were passed directly to diagnostic functions such as | |
850 warning_at, the diagnostic would be suppressed (unless | |
851 -Wsystem-headers). */ | |
852 | |
853 source_location | |
854 expansion_point_location_if_in_system_header (source_location location) | |
855 { | |
856 if (in_system_header_at (location)) | |
857 location = linemap_resolve_location (line_table, location, | |
858 LRK_MACRO_EXPANSION_POINT, | |
859 NULL); | |
860 return location; | |
861 } | |
862 | |
863 /* If LOCATION is a virtual location for a token coming from the expansion | |
864 of a macro, unwind to the location of the expansion point of the macro. */ | |
865 | |
866 source_location | |
867 expansion_point_location (source_location location) | |
868 { | |
869 return linemap_resolve_location (line_table, location, | |
870 LRK_MACRO_EXPANSION_POINT, NULL); | |
871 } | |
872 | |
873 /* Construct a location with caret at CARET, ranging from START to | |
874 finish e.g. | |
875 | |
876 11111111112 | |
877 12345678901234567890 | |
878 522 | |
879 523 return foo + bar; | |
880 ~~~~^~~~~ | |
881 524 | |
882 | |
883 The location's caret is at the "+", line 523 column 15, but starts | |
884 earlier, at the "f" of "foo" at column 11. The finish is at the "r" | |
885 of "bar" at column 19. */ | |
886 | |
887 location_t | |
888 make_location (location_t caret, location_t start, location_t finish) | |
889 { | |
890 location_t pure_loc = get_pure_location (caret); | |
891 source_range src_range; | |
892 src_range.m_start = get_start (start); | |
893 src_range.m_finish = get_finish (finish); | |
894 location_t combined_loc = COMBINE_LOCATION_DATA (line_table, | |
895 pure_loc, | |
896 src_range, | |
897 NULL); | |
898 return combined_loc; | |
899 } | |
900 | |
901 /* Same as above, but taking a source range rather than two locations. */ | |
902 | |
903 location_t | |
904 make_location (location_t caret, source_range src_range) | |
905 { | |
906 location_t pure_loc = get_pure_location (caret); | |
907 return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL); | |
908 } | |
909 | |
910 #define ONE_K 1024 | |
911 #define ONE_M (ONE_K * ONE_K) | |
912 | |
913 /* Display a number as an integer multiple of either: | |
914 - 1024, if said integer is >= to 10 K (in base 2) | |
915 - 1024 * 1024, if said integer is >= 10 M in (base 2) | |
916 */ | |
917 #define SCALE(x) ((unsigned long) ((x) < 10 * ONE_K \ | |
918 ? (x) \ | |
919 : ((x) < 10 * ONE_M \ | |
920 ? (x) / ONE_K \ | |
921 : (x) / ONE_M))) | |
922 | |
923 /* For a given integer, display either: | |
924 - the character 'k', if the number is higher than 10 K (in base 2) | |
925 but strictly lower than 10 M (in base 2) | |
926 - the character 'M' if the number is higher than 10 M (in base2) | |
927 - the charcter ' ' if the number is strictly lower than 10 K */ | |
928 #define STAT_LABEL(x) ((x) < 10 * ONE_K ? ' ' : ((x) < 10 * ONE_M ? 'k' : 'M')) | |
929 | |
930 /* Display an integer amount as multiple of 1K or 1M (in base 2). | |
931 Display the correct unit (either k, M, or ' ') after the amount, as | |
932 well. */ | |
933 #define FORMAT_AMOUNT(size) SCALE (size), STAT_LABEL (size) | |
934 | |
935 /* Dump statistics to stderr about the memory usage of the line_table | |
936 set of line maps. This also displays some statistics about macro | |
937 expansion. */ | |
938 | |
939 void | |
940 dump_line_table_statistics (void) | |
941 { | |
942 struct linemap_stats s; | |
943 long total_used_map_size, | |
944 macro_maps_size, | |
945 total_allocated_map_size; | |
946 | |
947 memset (&s, 0, sizeof (s)); | |
948 | |
949 linemap_get_statistics (line_table, &s); | |
950 | |
951 macro_maps_size = s.macro_maps_used_size | |
952 + s.macro_maps_locations_size; | |
953 | |
954 total_allocated_map_size = s.ordinary_maps_allocated_size | |
955 + s.macro_maps_allocated_size | |
956 + s.macro_maps_locations_size; | |
957 | |
958 total_used_map_size = s.ordinary_maps_used_size | |
959 + s.macro_maps_used_size | |
960 + s.macro_maps_locations_size; | |
961 | |
962 fprintf (stderr, "Number of expanded macros: %5ld\n", | |
963 s.num_expanded_macros); | |
964 if (s.num_expanded_macros != 0) | |
965 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n", | |
966 s.num_macro_tokens / s.num_expanded_macros); | |
967 fprintf (stderr, | |
968 "\nLine Table allocations during the " | |
969 "compilation process\n"); | |
970 fprintf (stderr, "Number of ordinary maps used: %5ld%c\n", | |
971 SCALE (s.num_ordinary_maps_used), | |
972 STAT_LABEL (s.num_ordinary_maps_used)); | |
973 fprintf (stderr, "Ordinary map used size: %5ld%c\n", | |
974 SCALE (s.ordinary_maps_used_size), | |
975 STAT_LABEL (s.ordinary_maps_used_size)); | |
976 fprintf (stderr, "Number of ordinary maps allocated: %5ld%c\n", | |
977 SCALE (s.num_ordinary_maps_allocated), | |
978 STAT_LABEL (s.num_ordinary_maps_allocated)); | |
979 fprintf (stderr, "Ordinary maps allocated size: %5ld%c\n", | |
980 SCALE (s.ordinary_maps_allocated_size), | |
981 STAT_LABEL (s.ordinary_maps_allocated_size)); | |
982 fprintf (stderr, "Number of macro maps used: %5ld%c\n", | |
983 SCALE (s.num_macro_maps_used), | |
984 STAT_LABEL (s.num_macro_maps_used)); | |
985 fprintf (stderr, "Macro maps used size: %5ld%c\n", | |
986 SCALE (s.macro_maps_used_size), | |
987 STAT_LABEL (s.macro_maps_used_size)); | |
988 fprintf (stderr, "Macro maps locations size: %5ld%c\n", | |
989 SCALE (s.macro_maps_locations_size), | |
990 STAT_LABEL (s.macro_maps_locations_size)); | |
991 fprintf (stderr, "Macro maps size: %5ld%c\n", | |
992 SCALE (macro_maps_size), | |
993 STAT_LABEL (macro_maps_size)); | |
994 fprintf (stderr, "Duplicated maps locations size: %5ld%c\n", | |
995 SCALE (s.duplicated_macro_maps_locations_size), | |
996 STAT_LABEL (s.duplicated_macro_maps_locations_size)); | |
997 fprintf (stderr, "Total allocated maps size: %5ld%c\n", | |
998 SCALE (total_allocated_map_size), | |
999 STAT_LABEL (total_allocated_map_size)); | |
1000 fprintf (stderr, "Total used maps size: %5ld%c\n", | |
1001 SCALE (total_used_map_size), | |
1002 STAT_LABEL (total_used_map_size)); | |
1003 fprintf (stderr, "Ad-hoc table size: %5ld%c\n", | |
1004 SCALE (s.adhoc_table_size), | |
1005 STAT_LABEL (s.adhoc_table_size)); | |
1006 fprintf (stderr, "Ad-hoc table entries used: %5ld\n", | |
1007 s.adhoc_table_entries_used); | |
1008 fprintf (stderr, "optimized_ranges: %i\n", | |
1009 line_table->num_optimized_ranges); | |
1010 fprintf (stderr, "unoptimized_ranges: %i\n", | |
1011 line_table->num_unoptimized_ranges); | |
1012 | |
1013 fprintf (stderr, "\n"); | |
1014 } | |
1015 | |
1016 /* Get location one beyond the final location in ordinary map IDX. */ | |
1017 | |
1018 static source_location | |
1019 get_end_location (struct line_maps *set, unsigned int idx) | |
1020 { | |
1021 if (idx == LINEMAPS_ORDINARY_USED (set) - 1) | |
1022 return set->highest_location; | |
1023 | |
1024 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1); | |
1025 return MAP_START_LOCATION (next_map); | |
1026 } | |
1027 | |
1028 /* Helper function for write_digit_row. */ | |
1029 | |
1030 static void | |
1031 write_digit (FILE *stream, int digit) | |
1032 { | |
1033 fputc ('0' + (digit % 10), stream); | |
1034 } | |
1035 | |
1036 /* Helper function for dump_location_info. | |
1037 Write a row of numbers to STREAM, numbering a source line, | |
1038 giving the units, tens, hundreds etc of the column number. */ | |
1039 | |
1040 static void | |
1041 write_digit_row (FILE *stream, int indent, | |
1042 const line_map_ordinary *map, | |
1043 source_location loc, int max_col, int divisor) | |
1044 { | |
1045 fprintf (stream, "%*c", indent, ' '); | |
1046 fprintf (stream, "|"); | |
1047 for (int column = 1; column < max_col; column++) | |
1048 { | |
1049 source_location column_loc = loc + (column << map->m_range_bits); | |
1050 write_digit (stream, column_loc / divisor); | |
1051 } | |
1052 fprintf (stream, "\n"); | |
1053 } | |
1054 | |
1055 /* Write a half-closed (START) / half-open (END) interval of | |
1056 source_location to STREAM. */ | |
1057 | |
1058 static void | |
1059 dump_location_range (FILE *stream, | |
1060 source_location start, source_location end) | |
1061 { | |
1062 fprintf (stream, | |
1063 " source_location interval: %u <= loc < %u\n", | |
1064 start, end); | |
1065 } | |
1066 | |
1067 /* Write a labelled description of a half-closed (START) / half-open (END) | |
1068 interval of source_location to STREAM. */ | |
1069 | |
1070 static void | |
1071 dump_labelled_location_range (FILE *stream, | |
1072 const char *name, | |
1073 source_location start, source_location end) | |
1074 { | |
1075 fprintf (stream, "%s\n", name); | |
1076 dump_location_range (stream, start, end); | |
1077 fprintf (stream, "\n"); | |
1078 } | |
1079 | |
1080 /* Write a visualization of the locations in the line_table to STREAM. */ | |
1081 | |
1082 void | |
1083 dump_location_info (FILE *stream) | |
1084 { | |
1085 /* Visualize the reserved locations. */ | |
1086 dump_labelled_location_range (stream, "RESERVED LOCATIONS", | |
1087 0, RESERVED_LOCATION_COUNT); | |
1088 | |
1089 /* Visualize the ordinary line_map instances, rendering the sources. */ | |
1090 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++) | |
1091 { | |
1092 source_location end_location = get_end_location (line_table, idx); | |
1093 /* half-closed: doesn't include this one. */ | |
1094 | |
1095 const line_map_ordinary *map | |
1096 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx); | |
1097 fprintf (stream, "ORDINARY MAP: %i\n", idx); | |
1098 dump_location_range (stream, | |
1099 MAP_START_LOCATION (map), end_location); | |
1100 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map)); | |
1101 fprintf (stream, " starting at line: %i\n", | |
1102 ORDINARY_MAP_STARTING_LINE_NUMBER (map)); | |
1103 fprintf (stream, " column and range bits: %i\n", | |
1104 map->m_column_and_range_bits); | |
1105 fprintf (stream, " column bits: %i\n", | |
1106 map->m_column_and_range_bits - map->m_range_bits); | |
1107 fprintf (stream, " range bits: %i\n", | |
1108 map->m_range_bits); | |
1109 | |
1110 /* Render the span of source lines that this "map" covers. */ | |
1111 for (source_location loc = MAP_START_LOCATION (map); | |
1112 loc < end_location; | |
1113 loc += (1 << map->m_range_bits) ) | |
1114 { | |
1115 gcc_assert (pure_location_p (line_table, loc) ); | |
1116 | |
1117 expanded_location exploc | |
1118 = linemap_expand_location (line_table, map, loc); | |
1119 | |
1120 if (0 == exploc.column) | |
1121 { | |
1122 /* Beginning of a new source line: draw the line. */ | |
1123 | |
1124 int line_size; | |
1125 const char *line_text = location_get_source_line (exploc.file, | |
1126 exploc.line, | |
1127 &line_size); | |
1128 if (!line_text) | |
1129 break; | |
1130 fprintf (stream, | |
1131 "%s:%3i|loc:%5i|%.*s\n", | |
1132 exploc.file, exploc.line, | |
1133 loc, | |
1134 line_size, line_text); | |
1135 | |
1136 /* "loc" is at column 0, which means "the whole line". | |
1137 Render the locations *within* the line, by underlining | |
1138 it, showing the source_location numeric values | |
1139 at each column. */ | |
1140 int max_col = (1 << map->m_column_and_range_bits) - 1; | |
1141 if (max_col > line_size) | |
1142 max_col = line_size + 1; | |
1143 | |
1144 int indent = 14 + strlen (exploc.file); | |
1145 | |
1146 /* Thousands. */ | |
1147 if (end_location > 999) | |
1148 write_digit_row (stream, indent, map, loc, max_col, 1000); | |
1149 | |
1150 /* Hundreds. */ | |
1151 if (end_location > 99) | |
1152 write_digit_row (stream, indent, map, loc, max_col, 100); | |
1153 | |
1154 /* Tens. */ | |
1155 write_digit_row (stream, indent, map, loc, max_col, 10); | |
1156 | |
1157 /* Units. */ | |
1158 write_digit_row (stream, indent, map, loc, max_col, 1); | |
1159 } | |
1160 } | |
1161 fprintf (stream, "\n"); | |
1162 } | |
1163 | |
1164 /* Visualize unallocated values. */ | |
1165 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS", | |
1166 line_table->highest_location, | |
1167 LINEMAPS_MACRO_LOWEST_LOCATION (line_table)); | |
1168 | |
1169 /* Visualize the macro line_map instances, rendering the sources. */ | |
1170 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++) | |
1171 { | |
1172 /* Each macro map that is allocated owns source_location values | |
1173 that are *lower* that the one before them. | |
1174 Hence it's meaningful to view them either in order of ascending | |
1175 source locations, or in order of ascending macro map index. */ | |
1176 const bool ascending_source_locations = true; | |
1177 unsigned int idx = (ascending_source_locations | |
1178 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1)) | |
1179 : i); | |
1180 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx); | |
1181 fprintf (stream, "MACRO %i: %s (%u tokens)\n", | |
1182 idx, | |
1183 linemap_map_get_macro_name (map), | |
1184 MACRO_MAP_NUM_MACRO_TOKENS (map)); | |
1185 dump_location_range (stream, | |
1186 map->start_location, | |
1187 (map->start_location | |
1188 + MACRO_MAP_NUM_MACRO_TOKENS (map))); | |
1189 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map), | |
1190 "expansion point is location %i", | |
1191 MACRO_MAP_EXPANSION_POINT_LOCATION (map)); | |
1192 fprintf (stream, " map->start_location: %u\n", | |
1193 map->start_location); | |
1194 | |
1195 fprintf (stream, " macro_locations:\n"); | |
1196 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++) | |
1197 { | |
1198 source_location x = MACRO_MAP_LOCATIONS (map)[2 * i]; | |
1199 source_location y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1]; | |
1200 | |
1201 /* linemap_add_macro_token encodes token numbers in an expansion | |
1202 by putting them after MAP_START_LOCATION. */ | |
1203 | |
1204 /* I'm typically seeing 4 uninitialized entries at the end of | |
1205 0xafafafaf. | |
1206 This appears to be due to macro.c:replace_args | |
1207 adding 2 extra args for padding tokens; presumably there may | |
1208 be a leading and/or trailing padding token injected, | |
1209 each for 2 more location slots. | |
1210 This would explain there being up to 4 source_locations slots | |
1211 that may be uninitialized. */ | |
1212 | |
1213 fprintf (stream, " %u: %u, %u\n", | |
1214 i, | |
1215 x, | |
1216 y); | |
1217 if (x == y) | |
1218 { | |
1219 if (x < MAP_START_LOCATION (map)) | |
1220 inform (x, "token %u has x-location == y-location == %u", i, x); | |
1221 else | |
1222 fprintf (stream, | |
1223 "x-location == y-location == %u encodes token # %u\n", | |
1224 x, x - MAP_START_LOCATION (map)); | |
1225 } | |
1226 else | |
1227 { | |
1228 inform (x, "token %u has x-location == %u", i, x); | |
1229 inform (x, "token %u has y-location == %u", i, y); | |
1230 } | |
1231 } | |
1232 fprintf (stream, "\n"); | |
1233 } | |
1234 | |
1235 /* It appears that MAX_SOURCE_LOCATION itself is never assigned to a | |
1236 macro map, presumably due to an off-by-one error somewhere | |
1237 between the logic in linemap_enter_macro and | |
1238 LINEMAPS_MACRO_LOWEST_LOCATION. */ | |
1239 dump_labelled_location_range (stream, "MAX_SOURCE_LOCATION", | |
1240 MAX_SOURCE_LOCATION, | |
1241 MAX_SOURCE_LOCATION + 1); | |
1242 | |
1243 /* Visualize ad-hoc values. */ | |
1244 dump_labelled_location_range (stream, "AD-HOC LOCATIONS", | |
1245 MAX_SOURCE_LOCATION + 1, UINT_MAX); | |
1246 } | |
1247 | |
1248 /* string_concat's constructor. */ | |
1249 | |
1250 string_concat::string_concat (int num, location_t *locs) | |
1251 : m_num (num) | |
1252 { | |
1253 m_locs = ggc_vec_alloc <location_t> (num); | |
1254 for (int i = 0; i < num; i++) | |
1255 m_locs[i] = locs[i]; | |
1256 } | |
1257 | |
1258 /* string_concat_db's constructor. */ | |
1259 | |
1260 string_concat_db::string_concat_db () | |
1261 { | |
1262 m_table = hash_map <location_hash, string_concat *>::create_ggc (64); | |
1263 } | |
1264 | |
1265 /* Record that a string concatenation occurred, covering NUM | |
1266 string literal tokens. LOCS is an array of size NUM, containing the | |
1267 locations of the tokens. A copy of LOCS is taken. */ | |
1268 | |
1269 void | |
1270 string_concat_db::record_string_concatenation (int num, location_t *locs) | |
1271 { | |
1272 gcc_assert (num > 1); | |
1273 gcc_assert (locs); | |
1274 | |
1275 location_t key_loc = get_key_loc (locs[0]); | |
1276 | |
1277 string_concat *concat | |
1278 = new (ggc_alloc <string_concat> ()) string_concat (num, locs); | |
1279 m_table->put (key_loc, concat); | |
1280 } | |
1281 | |
1282 /* Determine if LOC was the location of the the initial token of a | |
1283 concatenation of string literal tokens. | |
1284 If so, *OUT_NUM is written to with the number of tokens, and | |
1285 *OUT_LOCS with the location of an array of locations of the | |
1286 tokens, and return true. *OUT_LOCS is a borrowed pointer to | |
1287 storage owned by the string_concat_db. | |
1288 Otherwise, return false. */ | |
1289 | |
1290 bool | |
1291 string_concat_db::get_string_concatenation (location_t loc, | |
1292 int *out_num, | |
1293 location_t **out_locs) | |
1294 { | |
1295 gcc_assert (out_num); | |
1296 gcc_assert (out_locs); | |
1297 | |
1298 location_t key_loc = get_key_loc (loc); | |
1299 | |
1300 string_concat **concat = m_table->get (key_loc); | |
1301 if (!concat) | |
1302 return false; | |
1303 | |
1304 *out_num = (*concat)->m_num; | |
1305 *out_locs =(*concat)->m_locs; | |
1306 return true; | |
1307 } | |
1308 | |
1309 /* Internal function. Canonicalize LOC into a form suitable for | |
1310 use as a key within the database, stripping away macro expansion, | |
1311 ad-hoc information, and range information, using the location of | |
1312 the start of LOC within an ordinary linemap. */ | |
1313 | |
1314 location_t | |
1315 string_concat_db::get_key_loc (location_t loc) | |
1316 { | |
1317 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION, | |
1318 NULL); | |
1319 | |
1320 loc = get_range_from_loc (line_table, loc).m_start; | |
1321 | |
1322 return loc; | |
1323 } | |
1324 | |
1325 /* Helper class for use within get_substring_ranges_for_loc. | |
1326 An vec of cpp_string with responsibility for releasing all of the | |
1327 str->text for each str in the vector. */ | |
1328 | |
1329 class auto_cpp_string_vec : public auto_vec <cpp_string> | |
1330 { | |
1331 public: | |
1332 auto_cpp_string_vec (int alloc) | |
1333 : auto_vec <cpp_string> (alloc) {} | |
1334 | |
1335 ~auto_cpp_string_vec () | |
1336 { | |
1337 /* Clean up the copies within this vec. */ | |
1338 int i; | |
1339 cpp_string *str; | |
1340 FOR_EACH_VEC_ELT (*this, i, str) | |
1341 free (const_cast <unsigned char *> (str->text)); | |
1342 } | |
1343 }; | |
1344 | |
1345 /* Attempt to populate RANGES with source location information on the | |
1346 individual characters within the string literal found at STRLOC. | |
1347 If CONCATS is non-NULL, then any string literals that the token at | |
1348 STRLOC was concatenated with are also added to RANGES. | |
1349 | |
1350 Return NULL if successful, or an error message if any errors occurred (in | |
1351 which case RANGES may be only partially populated and should not | |
1352 be used). | |
1353 | |
1354 This is implemented by re-parsing the relevant source line(s). */ | |
1355 | |
1356 static const char * | |
1357 get_substring_ranges_for_loc (cpp_reader *pfile, | |
1358 string_concat_db *concats, | |
1359 location_t strloc, | |
1360 enum cpp_ttype type, | |
1361 cpp_substring_ranges &ranges) | |
1362 { | |
1363 gcc_assert (pfile); | |
1364 | |
1365 if (strloc == UNKNOWN_LOCATION) | |
1366 return "unknown location"; | |
1367 | |
1368 /* Reparsing the strings requires accurate location information. | |
1369 If -ftrack-macro-expansion has been overridden from its default | |
1370 of 2, then we might have a location of a macro expansion point, | |
1371 rather than the location of the literal itself. | |
1372 Avoid this by requiring that we have full macro expansion tracking | |
1373 for substring locations to be available. */ | |
1374 if (cpp_get_options (pfile)->track_macro_expansion != 2) | |
1375 return "track_macro_expansion != 2"; | |
1376 | |
1377 /* If #line or # 44 "file"-style directives are present, then there's | |
1378 no guarantee that the line numbers we have can be used to locate | |
1379 the strings. For example, we might have a .i file with # directives | |
1380 pointing back to lines within a .c file, but the .c file might | |
1381 have been edited since the .i file was created. | |
1382 In such a case, the safest course is to disable on-demand substring | |
1383 locations. */ | |
1384 if (line_table->seen_line_directive) | |
1385 return "seen line directive"; | |
1386 | |
1387 /* If string concatenation has occurred at STRLOC, get the locations | |
1388 of all of the literal tokens making up the compound string. | |
1389 Otherwise, just use STRLOC. */ | |
1390 int num_locs = 1; | |
1391 location_t *strlocs = &strloc; | |
1392 if (concats) | |
1393 concats->get_string_concatenation (strloc, &num_locs, &strlocs); | |
1394 | |
1395 auto_cpp_string_vec strs (num_locs); | |
1396 auto_vec <cpp_string_location_reader> loc_readers (num_locs); | |
1397 for (int i = 0; i < num_locs; i++) | |
1398 { | |
1399 /* Get range of strloc. We will use it to locate the start and finish | |
1400 of the literal token within the line. */ | |
1401 source_range src_range = get_range_from_loc (line_table, strlocs[i]); | |
1402 | |
1403 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table)) | |
1404 /* If the string is within a macro expansion, we can't get at the | |
1405 end location. */ | |
1406 return "macro expansion"; | |
1407 | |
1408 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS) | |
1409 /* If so, we can't reliably determine where the token started within | |
1410 its line. */ | |
1411 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS"; | |
1412 | |
1413 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS) | |
1414 /* If so, we can't reliably determine where the token finished within | |
1415 its line. */ | |
1416 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS"; | |
1417 | |
1418 expanded_location start | |
1419 = expand_location_to_spelling_point (src_range.m_start); | |
1420 expanded_location finish | |
1421 = expand_location_to_spelling_point (src_range.m_finish); | |
1422 if (start.file != finish.file) | |
1423 return "range endpoints are in different files"; | |
1424 if (start.line != finish.line) | |
1425 return "range endpoints are on different lines"; | |
1426 if (start.column > finish.column) | |
1427 return "range endpoints are reversed"; | |
1428 | |
1429 int line_width; | |
1430 const char *line = location_get_source_line (start.file, start.line, | |
1431 &line_width); | |
1432 if (line == NULL) | |
1433 return "unable to read source line"; | |
1434 | |
1435 /* Determine the location of the literal (including quotes | |
1436 and leading prefix chars, such as the 'u' in a u"" | |
1437 token). */ | |
1438 const char *literal = line + start.column - 1; | |
1439 int literal_length = finish.column - start.column + 1; | |
1440 | |
1441 /* Ensure that we don't crash if we got the wrong location. */ | |
1442 if (line_width < (start.column - 1 + literal_length)) | |
1443 return "line is not wide enough"; | |
1444 | |
1445 cpp_string from; | |
1446 from.len = literal_length; | |
1447 /* Make a copy of the literal, to avoid having to rely on | |
1448 the lifetime of the copy of the line within the cache. | |
1449 This will be released by the auto_cpp_string_vec dtor. */ | |
1450 from.text = XDUPVEC (unsigned char, literal, literal_length); | |
1451 strs.safe_push (from); | |
1452 | |
1453 /* For very long lines, a new linemap could have started | |
1454 halfway through the token. | |
1455 Ensure that the loc_reader uses the linemap of the | |
1456 *end* of the token for its start location. */ | |
1457 const line_map_ordinary *final_ord_map; | |
1458 linemap_resolve_location (line_table, src_range.m_finish, | |
1459 LRK_MACRO_EXPANSION_POINT, &final_ord_map); | |
1460 location_t start_loc | |
1461 = linemap_position_for_line_and_column (line_table, final_ord_map, | |
1462 start.line, start.column); | |
1463 | |
1464 cpp_string_location_reader loc_reader (start_loc, line_table); | |
1465 loc_readers.safe_push (loc_reader); | |
1466 } | |
1467 | |
1468 /* Rerun cpp_interpret_string, or rather, a modified version of it. */ | |
1469 const char *err = cpp_interpret_string_ranges (pfile, strs.address (), | |
1470 loc_readers.address (), | |
1471 num_locs, &ranges, type); | |
1472 if (err) | |
1473 return err; | |
1474 | |
1475 /* Success: "ranges" should now contain information on the string. */ | |
1476 return NULL; | |
1477 } | |
1478 | |
1479 /* Attempt to populate *OUT_LOC with source location information on the | |
1480 given characters within the string literal found at STRLOC. | |
1481 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution | |
1482 character set. | |
1483 | |
1484 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7 | |
1485 and string literal "012345\n789" | |
1486 *OUT_LOC is written to with: | |
1487 "012345\n789" | |
1488 ~^~~~~ | |
1489 | |
1490 If CONCATS is non-NULL, then any string literals that the token at | |
1491 STRLOC was concatenated with are also considered. | |
1492 | |
1493 This is implemented by re-parsing the relevant source line(s). | |
1494 | |
1495 Return NULL if successful, or an error message if any errors occurred. | |
1496 Error messages are intended for GCC developers (to help debugging) rather | |
1497 than for end-users. */ | |
1498 | |
1499 const char * | |
1500 get_source_location_for_substring (cpp_reader *pfile, | |
1501 string_concat_db *concats, | |
1502 location_t strloc, | |
1503 enum cpp_ttype type, | |
1504 int caret_idx, int start_idx, int end_idx, | |
1505 source_location *out_loc) | |
1506 { | |
1507 gcc_checking_assert (caret_idx >= 0); | |
1508 gcc_checking_assert (start_idx >= 0); | |
1509 gcc_checking_assert (end_idx >= 0); | |
1510 gcc_assert (out_loc); | |
1511 | |
1512 cpp_substring_ranges ranges; | |
1513 const char *err | |
1514 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges); | |
1515 if (err) | |
1516 return err; | |
1517 | |
1518 if (caret_idx >= ranges.get_num_ranges ()) | |
1519 return "caret_idx out of range"; | |
1520 if (start_idx >= ranges.get_num_ranges ()) | |
1521 return "start_idx out of range"; | |
1522 if (end_idx >= ranges.get_num_ranges ()) | |
1523 return "end_idx out of range"; | |
1524 | |
1525 *out_loc = make_location (ranges.get_range (caret_idx).m_start, | |
1526 ranges.get_range (start_idx).m_start, | |
1527 ranges.get_range (end_idx).m_finish); | |
1528 return NULL; | |
1529 } | |
1530 | |
1531 #if CHECKING_P | |
1532 | |
1533 namespace selftest { | |
1534 | |
1535 /* Selftests of location handling. */ | |
1536 | |
1537 /* Attempt to populate *OUT_RANGE with source location information on the | |
1538 given character within the string literal found at STRLOC. | |
1539 CHAR_IDX refers to an offset within the execution character set. | |
1540 If CONCATS is non-NULL, then any string literals that the token at | |
1541 STRLOC was concatenated with are also considered. | |
1542 | |
1543 This is implemented by re-parsing the relevant source line(s). | |
1544 | |
1545 Return NULL if successful, or an error message if any errors occurred. | |
1546 Error messages are intended for GCC developers (to help debugging) rather | |
1547 than for end-users. */ | |
1548 | |
1549 static const char * | |
1550 get_source_range_for_char (cpp_reader *pfile, | |
1551 string_concat_db *concats, | |
1552 location_t strloc, | |
1553 enum cpp_ttype type, | |
1554 int char_idx, | |
1555 source_range *out_range) | |
1556 { | |
1557 gcc_checking_assert (char_idx >= 0); | |
1558 gcc_assert (out_range); | |
1559 | |
1560 cpp_substring_ranges ranges; | |
1561 const char *err | |
1562 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges); | |
1563 if (err) | |
1564 return err; | |
1565 | |
1566 if (char_idx >= ranges.get_num_ranges ()) | |
1567 return "char_idx out of range"; | |
1568 | |
1569 *out_range = ranges.get_range (char_idx); | |
1570 return NULL; | |
1571 } | |
1572 | |
1573 /* As get_source_range_for_char, but write to *OUT the number | |
1574 of ranges that are available. */ | |
1575 | |
1576 static const char * | |
1577 get_num_source_ranges_for_substring (cpp_reader *pfile, | |
1578 string_concat_db *concats, | |
1579 location_t strloc, | |
1580 enum cpp_ttype type, | |
1581 int *out) | |
1582 { | |
1583 gcc_assert (out); | |
1584 | |
1585 cpp_substring_ranges ranges; | |
1586 const char *err | |
1587 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges); | |
1588 | |
1589 if (err) | |
1590 return err; | |
1591 | |
1592 *out = ranges.get_num_ranges (); | |
1593 return NULL; | |
1594 } | |
1595 | |
1596 /* Selftests of location handling. */ | |
1597 | |
1598 /* Helper function for verifying location data: when location_t | |
1599 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated | |
1600 as having column 0. */ | |
1601 | |
1602 static bool | |
1603 should_have_column_data_p (location_t loc) | |
1604 { | |
1605 if (IS_ADHOC_LOC (loc)) | |
1606 loc = get_location_from_adhoc_loc (line_table, loc); | |
1607 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS) | |
1608 return false; | |
1609 return true; | |
1610 } | |
1611 | |
1612 /* Selftest for should_have_column_data_p. */ | |
1613 | |
1614 static void | |
1615 test_should_have_column_data_p () | |
1616 { | |
1617 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT)); | |
1618 ASSERT_TRUE | |
1619 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS)); | |
1620 ASSERT_FALSE | |
1621 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1)); | |
1622 } | |
1623 | |
1624 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN | |
1625 on LOC. */ | |
1626 | |
1627 static void | |
1628 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum, | |
1629 location_t loc) | |
1630 { | |
1631 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc)); | |
1632 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc)); | |
1633 /* If location_t values are sufficiently high, then column numbers | |
1634 will be unavailable and LOCATION_COLUMN (loc) will be 0. | |
1635 When close to the threshold, column numbers *may* be present: if | |
1636 the final linemap before the threshold contains a line that straddles | |
1637 the threshold, locations in that line have column information. */ | |
1638 if (should_have_column_data_p (loc)) | |
1639 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc)); | |
1640 } | |
1641 | |
1642 /* Various selftests involve constructing a line table and one or more | |
1643 line maps within it. | |
1644 | |
1645 For maximum test coverage we want to run these tests with a variety | |
1646 of situations: | |
1647 - line_table->default_range_bits: some frontends use a non-zero value | |
1648 and others use zero | |
1649 - the fallback modes within line-map.c: there are various threshold | |
1650 values for source_location/location_t beyond line-map.c changes | |
1651 behavior (disabling of the range-packing optimization, disabling | |
1652 of column-tracking). We can exercise these by starting the line_table | |
1653 at interesting values at or near these thresholds. | |
1654 | |
1655 The following struct describes a particular case within our test | |
1656 matrix. */ | |
1657 | |
1658 struct line_table_case | |
1659 { | |
1660 line_table_case (int default_range_bits, int base_location) | |
1661 : m_default_range_bits (default_range_bits), | |
1662 m_base_location (base_location) | |
1663 {} | |
1664 | |
1665 int m_default_range_bits; | |
1666 int m_base_location; | |
1667 }; | |
1668 | |
1669 /* Constructor. Store the old value of line_table, and create a new | |
1670 one, using sane defaults. */ | |
1671 | |
1672 line_table_test::line_table_test () | |
1673 { | |
1674 gcc_assert (saved_line_table == NULL); | |
1675 saved_line_table = line_table; | |
1676 line_table = ggc_alloc<line_maps> (); | |
1677 linemap_init (line_table, BUILTINS_LOCATION); | |
1678 gcc_assert (saved_line_table->reallocator); | |
1679 line_table->reallocator = saved_line_table->reallocator; | |
1680 gcc_assert (saved_line_table->round_alloc_size); | |
1681 line_table->round_alloc_size = saved_line_table->round_alloc_size; | |
1682 line_table->default_range_bits = 0; | |
1683 } | |
1684 | |
1685 /* Constructor. Store the old value of line_table, and create a new | |
1686 one, using the sitation described in CASE_. */ | |
1687 | |
1688 line_table_test::line_table_test (const line_table_case &case_) | |
1689 { | |
1690 gcc_assert (saved_line_table == NULL); | |
1691 saved_line_table = line_table; | |
1692 line_table = ggc_alloc<line_maps> (); | |
1693 linemap_init (line_table, BUILTINS_LOCATION); | |
1694 gcc_assert (saved_line_table->reallocator); | |
1695 line_table->reallocator = saved_line_table->reallocator; | |
1696 gcc_assert (saved_line_table->round_alloc_size); | |
1697 line_table->round_alloc_size = saved_line_table->round_alloc_size; | |
1698 line_table->default_range_bits = case_.m_default_range_bits; | |
1699 if (case_.m_base_location) | |
1700 { | |
1701 line_table->highest_location = case_.m_base_location; | |
1702 line_table->highest_line = case_.m_base_location; | |
1703 } | |
1704 } | |
1705 | |
1706 /* Destructor. Restore the old value of line_table. */ | |
1707 | |
1708 line_table_test::~line_table_test () | |
1709 { | |
1710 gcc_assert (saved_line_table != NULL); | |
1711 line_table = saved_line_table; | |
1712 saved_line_table = NULL; | |
1713 } | |
1714 | |
1715 /* Verify basic operation of ordinary linemaps. */ | |
1716 | |
1717 static void | |
1718 test_accessing_ordinary_linemaps (const line_table_case &case_) | |
1719 { | |
1720 line_table_test ltt (case_); | |
1721 | |
1722 /* Build a simple linemap describing some locations. */ | |
1723 linemap_add (line_table, LC_ENTER, false, "foo.c", 0); | |
1724 | |
1725 linemap_line_start (line_table, 1, 100); | |
1726 location_t loc_a = linemap_position_for_column (line_table, 1); | |
1727 location_t loc_b = linemap_position_for_column (line_table, 23); | |
1728 | |
1729 linemap_line_start (line_table, 2, 100); | |
1730 location_t loc_c = linemap_position_for_column (line_table, 1); | |
1731 location_t loc_d = linemap_position_for_column (line_table, 17); | |
1732 | |
1733 /* Example of a very long line. */ | |
1734 linemap_line_start (line_table, 3, 2000); | |
1735 location_t loc_e = linemap_position_for_column (line_table, 700); | |
1736 | |
1737 /* Transitioning back to a short line. */ | |
1738 linemap_line_start (line_table, 4, 0); | |
1739 location_t loc_back_to_short = linemap_position_for_column (line_table, 100); | |
1740 | |
1741 if (should_have_column_data_p (loc_back_to_short)) | |
1742 { | |
1743 /* Verify that we switched to short lines in the linemap. */ | |
1744 line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table); | |
1745 ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits); | |
1746 } | |
1747 | |
1748 /* Example of a line that will eventually be seen to be longer | |
1749 than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is | |
1750 below that. */ | |
1751 linemap_line_start (line_table, 5, 2000); | |
1752 | |
1753 location_t loc_start_of_very_long_line | |
1754 = linemap_position_for_column (line_table, 2000); | |
1755 location_t loc_too_wide | |
1756 = linemap_position_for_column (line_table, 4097); | |
1757 location_t loc_too_wide_2 | |
1758 = linemap_position_for_column (line_table, 4098); | |
1759 | |
1760 /* ...and back to a sane line length. */ | |
1761 linemap_line_start (line_table, 6, 100); | |
1762 location_t loc_sane_again = linemap_position_for_column (line_table, 10); | |
1763 | |
1764 linemap_add (line_table, LC_LEAVE, false, NULL, 0); | |
1765 | |
1766 /* Multiple files. */ | |
1767 linemap_add (line_table, LC_ENTER, false, "bar.c", 0); | |
1768 linemap_line_start (line_table, 1, 200); | |
1769 location_t loc_f = linemap_position_for_column (line_table, 150); | |
1770 linemap_add (line_table, LC_LEAVE, false, NULL, 0); | |
1771 | |
1772 /* Verify that we can recover the location info. */ | |
1773 assert_loceq ("foo.c", 1, 1, loc_a); | |
1774 assert_loceq ("foo.c", 1, 23, loc_b); | |
1775 assert_loceq ("foo.c", 2, 1, loc_c); | |
1776 assert_loceq ("foo.c", 2, 17, loc_d); | |
1777 assert_loceq ("foo.c", 3, 700, loc_e); | |
1778 assert_loceq ("foo.c", 4, 100, loc_back_to_short); | |
1779 | |
1780 /* In the very wide line, the initial location should be fully tracked. */ | |
1781 assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line); | |
1782 /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should | |
1783 be disabled. */ | |
1784 assert_loceq ("foo.c", 5, 0, loc_too_wide); | |
1785 assert_loceq ("foo.c", 5, 0, loc_too_wide_2); | |
1786 /*...and column-tracking should be re-enabled for subsequent lines. */ | |
1787 assert_loceq ("foo.c", 6, 10, loc_sane_again); | |
1788 | |
1789 assert_loceq ("bar.c", 1, 150, loc_f); | |
1790 | |
1791 ASSERT_FALSE (is_location_from_builtin_token (loc_a)); | |
1792 ASSERT_TRUE (pure_location_p (line_table, loc_a)); | |
1793 | |
1794 /* Verify using make_location to build a range, and extracting data | |
1795 back from it. */ | |
1796 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d); | |
1797 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d)); | |
1798 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d)); | |
1799 source_range src_range = get_range_from_loc (line_table, range_c_b_d); | |
1800 ASSERT_EQ (loc_b, src_range.m_start); | |
1801 ASSERT_EQ (loc_d, src_range.m_finish); | |
1802 } | |
1803 | |
1804 /* Verify various properties of UNKNOWN_LOCATION. */ | |
1805 | |
1806 static void | |
1807 test_unknown_location () | |
1808 { | |
1809 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION)); | |
1810 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION)); | |
1811 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION)); | |
1812 } | |
1813 | |
1814 /* Verify various properties of BUILTINS_LOCATION. */ | |
1815 | |
1816 static void | |
1817 test_builtins () | |
1818 { | |
1819 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION); | |
1820 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION); | |
1821 } | |
1822 | |
1823 /* Regression test for make_location. | |
1824 Ensure that we use pure locations for the start/finish of the range, | |
1825 rather than storing a packed or ad-hoc range as the start/finish. */ | |
1826 | |
1827 static void | |
1828 test_make_location_nonpure_range_endpoints (const line_table_case &case_) | |
1829 { | |
1830 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c | |
1831 with C++ frontend. | |
1832 ....................0000000001111111111222. | |
1833 ....................1234567890123456789012. */ | |
1834 const char *content = " r += !aaa == bbb;\n"; | |
1835 temp_source_file tmp (SELFTEST_LOCATION, ".C", content); | |
1836 line_table_test ltt (case_); | |
1837 linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1); | |
1838 | |
1839 const location_t c11 = linemap_position_for_column (line_table, 11); | |
1840 const location_t c12 = linemap_position_for_column (line_table, 12); | |
1841 const location_t c13 = linemap_position_for_column (line_table, 13); | |
1842 const location_t c14 = linemap_position_for_column (line_table, 14); | |
1843 const location_t c21 = linemap_position_for_column (line_table, 21); | |
1844 | |
1845 if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS) | |
1846 return; | |
1847 | |
1848 /* Use column 13 for the caret location, arbitrarily, to verify that we | |
1849 handle start != caret. */ | |
1850 const location_t aaa = make_location (c13, c12, c14); | |
1851 ASSERT_EQ (c13, get_pure_location (aaa)); | |
1852 ASSERT_EQ (c12, get_start (aaa)); | |
1853 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa))); | |
1854 ASSERT_EQ (c14, get_finish (aaa)); | |
1855 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa))); | |
1856 | |
1857 /* Make a location using a location with a range as the start-point. */ | |
1858 const location_t not_aaa = make_location (c11, aaa, c14); | |
1859 ASSERT_EQ (c11, get_pure_location (not_aaa)); | |
1860 /* It should use the start location of the range, not store the range | |
1861 itself. */ | |
1862 ASSERT_EQ (c12, get_start (not_aaa)); | |
1863 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa))); | |
1864 ASSERT_EQ (c14, get_finish (not_aaa)); | |
1865 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa))); | |
1866 | |
1867 /* Similarly, make a location with a range as the end-point. */ | |
1868 const location_t aaa_eq_bbb = make_location (c12, c12, c21); | |
1869 ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb)); | |
1870 ASSERT_EQ (c12, get_start (aaa_eq_bbb)); | |
1871 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb))); | |
1872 ASSERT_EQ (c21, get_finish (aaa_eq_bbb)); | |
1873 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb))); | |
1874 const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb); | |
1875 /* It should use the finish location of the range, not store the range | |
1876 itself. */ | |
1877 ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb)); | |
1878 ASSERT_EQ (c12, get_start (not_aaa_eq_bbb)); | |
1879 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb))); | |
1880 ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb)); | |
1881 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb))); | |
1882 } | |
1883 | |
1884 /* Verify reading of input files (e.g. for caret-based diagnostics). */ | |
1885 | |
1886 static void | |
1887 test_reading_source_line () | |
1888 { | |
1889 /* Create a tempfile and write some text to it. */ | |
1890 temp_source_file tmp (SELFTEST_LOCATION, ".txt", | |
1891 "01234567890123456789\n" | |
1892 "This is the test text\n" | |
1893 "This is the 3rd line"); | |
1894 | |
1895 /* Read back a specific line from the tempfile. */ | |
1896 int line_size; | |
1897 const char *source_line = location_get_source_line (tmp.get_filename (), | |
1898 3, &line_size); | |
1899 ASSERT_TRUE (source_line != NULL); | |
1900 ASSERT_EQ (20, line_size); | |
1901 ASSERT_TRUE (!strncmp ("This is the 3rd line", | |
1902 source_line, line_size)); | |
1903 | |
1904 source_line = location_get_source_line (tmp.get_filename (), | |
1905 2, &line_size); | |
1906 ASSERT_TRUE (source_line != NULL); | |
1907 ASSERT_EQ (21, line_size); | |
1908 ASSERT_TRUE (!strncmp ("This is the test text", | |
1909 source_line, line_size)); | |
1910 | |
1911 source_line = location_get_source_line (tmp.get_filename (), | |
1912 4, &line_size); | |
1913 ASSERT_TRUE (source_line == NULL); | |
1914 } | |
1915 | |
1916 /* Tests of lexing. */ | |
1917 | |
1918 /* Verify that token TOK from PARSER has cpp_token_as_text | |
1919 equal to EXPECTED_TEXT. */ | |
1920 | |
1921 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \ | |
1922 SELFTEST_BEGIN_STMT \ | |
1923 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \ | |
1924 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \ | |
1925 SELFTEST_END_STMT | |
1926 | |
1927 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM, | |
1928 and ranges from EXP_START_COL to EXP_FINISH_COL. | |
1929 Use LOC as the effective location of the selftest. */ | |
1930 | |
1931 static void | |
1932 assert_token_loc_eq (const location &loc, | |
1933 const cpp_token *tok, | |
1934 const char *exp_filename, int exp_linenum, | |
1935 int exp_start_col, int exp_finish_col) | |
1936 { | |
1937 location_t tok_loc = tok->src_loc; | |
1938 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc)); | |
1939 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc)); | |
1940 | |
1941 /* If location_t values are sufficiently high, then column numbers | |
1942 will be unavailable. */ | |
1943 if (!should_have_column_data_p (tok_loc)) | |
1944 return; | |
1945 | |
1946 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc)); | |
1947 source_range tok_range = get_range_from_loc (line_table, tok_loc); | |
1948 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start)); | |
1949 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish)); | |
1950 } | |
1951 | |
1952 /* Use assert_token_loc_eq to verify the TOK->src_loc, using | |
1953 SELFTEST_LOCATION as the effective location of the selftest. */ | |
1954 | |
1955 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \ | |
1956 EXP_START_COL, EXP_FINISH_COL) \ | |
1957 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \ | |
1958 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL)) | |
1959 | |
1960 /* Test of lexing a file using libcpp, verifying tokens and their | |
1961 location information. */ | |
1962 | |
1963 static void | |
1964 test_lexer (const line_table_case &case_) | |
1965 { | |
1966 /* Create a tempfile and write some text to it. */ | |
1967 const char *content = | |
1968 /*00000000011111111112222222222333333.3333444444444.455555555556 | |
1969 12345678901234567890123456789012345.6789012345678.901234567890. */ | |
1970 ("test_name /* c-style comment */\n" | |
1971 " \"test literal\"\n" | |
1972 " // test c++-style comment\n" | |
1973 " 42\n"); | |
1974 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content); | |
1975 | |
1976 line_table_test ltt (case_); | |
1977 | |
1978 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table); | |
1979 | |
1980 const char *fname = cpp_read_main_file (parser, tmp.get_filename ()); | |
1981 ASSERT_NE (fname, NULL); | |
1982 | |
1983 /* Verify that we get the expected tokens back, with the correct | |
1984 location information. */ | |
1985 | |
1986 location_t loc; | |
1987 const cpp_token *tok; | |
1988 tok = cpp_get_token_with_location (parser, &loc); | |
1989 ASSERT_NE (tok, NULL); | |
1990 ASSERT_EQ (tok->type, CPP_NAME); | |
1991 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name"); | |
1992 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9); | |
1993 | |
1994 tok = cpp_get_token_with_location (parser, &loc); | |
1995 ASSERT_NE (tok, NULL); | |
1996 ASSERT_EQ (tok->type, CPP_STRING); | |
1997 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\""); | |
1998 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48); | |
1999 | |
2000 tok = cpp_get_token_with_location (parser, &loc); | |
2001 ASSERT_NE (tok, NULL); | |
2002 ASSERT_EQ (tok->type, CPP_NUMBER); | |
2003 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42"); | |
2004 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5); | |
2005 | |
2006 tok = cpp_get_token_with_location (parser, &loc); | |
2007 ASSERT_NE (tok, NULL); | |
2008 ASSERT_EQ (tok->type, CPP_EOF); | |
2009 | |
2010 cpp_finish (parser, NULL); | |
2011 cpp_destroy (parser); | |
2012 } | |
2013 | |
2014 /* Forward decls. */ | |
2015 | |
2016 struct lexer_test; | |
2017 class lexer_test_options; | |
2018 | |
2019 /* A class for specifying options of a lexer_test. | |
2020 The "apply" vfunc is called during the lexer_test constructor. */ | |
2021 | |
2022 class lexer_test_options | |
2023 { | |
2024 public: | |
2025 virtual void apply (lexer_test &) = 0; | |
2026 }; | |
2027 | |
2028 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy | |
2029 in its dtor. | |
2030 | |
2031 This is needed by struct lexer_test to ensure that the cleanup of the | |
2032 cpp_reader happens *after* the cleanup of the temp_source_file. */ | |
2033 | |
2034 class cpp_reader_ptr | |
2035 { | |
2036 public: | |
2037 cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {} | |
2038 | |
2039 ~cpp_reader_ptr () | |
2040 { | |
2041 cpp_finish (m_ptr, NULL); | |
2042 cpp_destroy (m_ptr); | |
2043 } | |
2044 | |
2045 operator cpp_reader * () const { return m_ptr; } | |
2046 | |
2047 private: | |
2048 cpp_reader *m_ptr; | |
2049 }; | |
2050 | |
2051 /* A struct for writing lexer tests. */ | |
2052 | |
2053 struct lexer_test | |
2054 { | |
2055 lexer_test (const line_table_case &case_, const char *content, | |
2056 lexer_test_options *options); | |
2057 ~lexer_test (); | |
2058 | |
2059 const cpp_token *get_token (); | |
2060 | |
2061 /* The ordering of these fields matters. | |
2062 The line_table_test must be first, since the cpp_reader_ptr | |
2063 uses it. | |
2064 The cpp_reader must be cleaned up *after* the temp_source_file | |
2065 since the filenames in input.c's input cache are owned by the | |
2066 cpp_reader; in particular, when ~temp_source_file evicts the | |
2067 filename the filenames must still be alive. */ | |
2068 line_table_test m_ltt; | |
2069 cpp_reader_ptr m_parser; | |
2070 temp_source_file m_tempfile; | |
2071 string_concat_db m_concats; | |
2072 bool m_implicitly_expect_EOF; | |
2073 }; | |
2074 | |
2075 /* Use an EBCDIC encoding for the execution charset, specifically | |
2076 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047"). | |
2077 | |
2078 This exercises iconv integration within libcpp. | |
2079 Not every build of iconv supports the given charset, | |
2080 so we need to flag this error and handle it gracefully. */ | |
2081 | |
2082 class ebcdic_execution_charset : public lexer_test_options | |
2083 { | |
2084 public: | |
2085 ebcdic_execution_charset () : m_num_iconv_errors (0) | |
2086 { | |
2087 gcc_assert (s_singleton == NULL); | |
2088 s_singleton = this; | |
2089 } | |
2090 ~ebcdic_execution_charset () | |
2091 { | |
2092 gcc_assert (s_singleton == this); | |
2093 s_singleton = NULL; | |
2094 } | |
2095 | |
2096 void apply (lexer_test &test) FINAL OVERRIDE | |
2097 { | |
2098 cpp_options *cpp_opts = cpp_get_options (test.m_parser); | |
2099 cpp_opts->narrow_charset = "IBM1047"; | |
2100 | |
2101 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser); | |
2102 callbacks->error = on_error; | |
2103 } | |
2104 | |
2105 static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED, | |
2106 int level ATTRIBUTE_UNUSED, | |
2107 int reason ATTRIBUTE_UNUSED, | |
2108 rich_location *richloc ATTRIBUTE_UNUSED, | |
2109 const char *msgid, va_list *ap ATTRIBUTE_UNUSED) | |
2110 ATTRIBUTE_FPTR_PRINTF(5,0) | |
2111 { | |
2112 gcc_assert (s_singleton); | |
2113 /* Avoid exgettext from picking this up, it is translated in libcpp. */ | |
2114 const char *msg = "conversion from %s to %s not supported by iconv"; | |
2115 #ifdef ENABLE_NLS | |
2116 msg = dgettext ("cpplib", msg); | |
2117 #endif | |
2118 /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc | |
2119 when the local iconv build doesn't support the conversion. */ | |
2120 if (strcmp (msgid, msg) == 0) | |
2121 { | |
2122 s_singleton->m_num_iconv_errors++; | |
2123 return true; | |
2124 } | |
2125 | |
2126 /* Otherwise, we have an unexpected error. */ | |
2127 abort (); | |
2128 } | |
2129 | |
2130 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; } | |
2131 | |
2132 private: | |
2133 static ebcdic_execution_charset *s_singleton; | |
2134 int m_num_iconv_errors; | |
2135 }; | |
2136 | |
2137 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton; | |
2138 | |
2139 /* A lexer_test_options subclass that records a list of error | |
2140 messages emitted by the lexer. */ | |
2141 | |
2142 class lexer_error_sink : public lexer_test_options | |
2143 { | |
2144 public: | |
2145 lexer_error_sink () | |
2146 { | |
2147 gcc_assert (s_singleton == NULL); | |
2148 s_singleton = this; | |
2149 } | |
2150 ~lexer_error_sink () | |
2151 { | |
2152 gcc_assert (s_singleton == this); | |
2153 s_singleton = NULL; | |
2154 | |
2155 int i; | |
2156 char *str; | |
2157 FOR_EACH_VEC_ELT (m_errors, i, str) | |
2158 free (str); | |
2159 } | |
2160 | |
2161 void apply (lexer_test &test) FINAL OVERRIDE | |
2162 { | |
2163 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser); | |
2164 callbacks->error = on_error; | |
2165 } | |
2166 | |
2167 static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED, | |
2168 int level ATTRIBUTE_UNUSED, | |
2169 int reason ATTRIBUTE_UNUSED, | |
2170 rich_location *richloc ATTRIBUTE_UNUSED, | |
2171 const char *msgid, va_list *ap) | |
2172 ATTRIBUTE_FPTR_PRINTF(5,0) | |
2173 { | |
2174 char *msg = xvasprintf (msgid, *ap); | |
2175 s_singleton->m_errors.safe_push (msg); | |
2176 return true; | |
2177 } | |
2178 | |
2179 auto_vec<char *> m_errors; | |
2180 | |
2181 private: | |
2182 static lexer_error_sink *s_singleton; | |
2183 }; | |
2184 | |
2185 lexer_error_sink *lexer_error_sink::s_singleton; | |
2186 | |
2187 /* Constructor. Override line_table with a new instance based on CASE_, | |
2188 and write CONTENT to a tempfile. Create a cpp_reader, and use it to | |
2189 start parsing the tempfile. */ | |
2190 | |
2191 lexer_test::lexer_test (const line_table_case &case_, const char *content, | |
2192 lexer_test_options *options) | |
2193 : m_ltt (case_), | |
2194 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)), | |
2195 /* Create a tempfile and write the text to it. */ | |
2196 m_tempfile (SELFTEST_LOCATION, ".c", content), | |
2197 m_concats (), | |
2198 m_implicitly_expect_EOF (true) | |
2199 { | |
2200 if (options) | |
2201 options->apply (*this); | |
2202 | |
2203 cpp_init_iconv (m_parser); | |
2204 | |
2205 /* Parse the file. */ | |
2206 const char *fname = cpp_read_main_file (m_parser, | |
2207 m_tempfile.get_filename ()); | |
2208 ASSERT_NE (fname, NULL); | |
2209 } | |
2210 | |
2211 /* Destructor. By default, verify that the next token in m_parser is EOF. */ | |
2212 | |
2213 lexer_test::~lexer_test () | |
2214 { | |
2215 location_t loc; | |
2216 const cpp_token *tok; | |
2217 | |
2218 if (m_implicitly_expect_EOF) | |
2219 { | |
2220 tok = cpp_get_token_with_location (m_parser, &loc); | |
2221 ASSERT_NE (tok, NULL); | |
2222 ASSERT_EQ (tok->type, CPP_EOF); | |
2223 } | |
2224 } | |
2225 | |
2226 /* Get the next token from m_parser. */ | |
2227 | |
2228 const cpp_token * | |
2229 lexer_test::get_token () | |
2230 { | |
2231 location_t loc; | |
2232 const cpp_token *tok; | |
2233 | |
2234 tok = cpp_get_token_with_location (m_parser, &loc); | |
2235 ASSERT_NE (tok, NULL); | |
2236 return tok; | |
2237 } | |
2238 | |
2239 /* Verify that locations within string literals are correctly handled. */ | |
2240 | |
2241 /* Verify get_source_range_for_substring for token(s) at STRLOC, | |
2242 using the string concatenation database for TEST. | |
2243 | |
2244 Assert that the character at index IDX is on EXPECTED_LINE, | |
2245 and that it begins at column EXPECTED_START_COL and ends at | |
2246 EXPECTED_FINISH_COL (unless the locations are beyond | |
2247 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their | |
2248 columns). */ | |
2249 | |
2250 static void | |
2251 assert_char_at_range (const location &loc, | |
2252 lexer_test& test, | |
2253 location_t strloc, enum cpp_ttype type, int idx, | |
2254 int expected_line, int expected_start_col, | |
2255 int expected_finish_col) | |
2256 { | |
2257 cpp_reader *pfile = test.m_parser; | |
2258 string_concat_db *concats = &test.m_concats; | |
2259 | |
2260 source_range actual_range = source_range(); | |
2261 const char *err | |
2262 = get_source_range_for_char (pfile, concats, strloc, type, idx, | |
2263 &actual_range); | |
2264 if (should_have_column_data_p (strloc)) | |
2265 ASSERT_EQ_AT (loc, NULL, err); | |
2266 else | |
2267 { | |
2268 ASSERT_STREQ_AT (loc, | |
2269 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", | |
2270 err); | |
2271 return; | |
2272 } | |
2273 | |
2274 int actual_start_line = LOCATION_LINE (actual_range.m_start); | |
2275 ASSERT_EQ_AT (loc, expected_line, actual_start_line); | |
2276 int actual_finish_line = LOCATION_LINE (actual_range.m_finish); | |
2277 ASSERT_EQ_AT (loc, expected_line, actual_finish_line); | |
2278 | |
2279 if (should_have_column_data_p (actual_range.m_start)) | |
2280 { | |
2281 int actual_start_col = LOCATION_COLUMN (actual_range.m_start); | |
2282 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col); | |
2283 } | |
2284 if (should_have_column_data_p (actual_range.m_finish)) | |
2285 { | |
2286 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish); | |
2287 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col); | |
2288 } | |
2289 } | |
2290 | |
2291 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for | |
2292 the effective location of any errors. */ | |
2293 | |
2294 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \ | |
2295 EXPECTED_START_COL, EXPECTED_FINISH_COL) \ | |
2296 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \ | |
2297 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \ | |
2298 (EXPECTED_FINISH_COL)) | |
2299 | |
2300 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC, | |
2301 using the string concatenation database for TEST. | |
2302 | |
2303 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */ | |
2304 | |
2305 static void | |
2306 assert_num_substring_ranges (const location &loc, | |
2307 lexer_test& test, | |
2308 location_t strloc, | |
2309 enum cpp_ttype type, | |
2310 int expected_num_ranges) | |
2311 { | |
2312 cpp_reader *pfile = test.m_parser; | |
2313 string_concat_db *concats = &test.m_concats; | |
2314 | |
2315 int actual_num_ranges = -1; | |
2316 const char *err | |
2317 = get_num_source_ranges_for_substring (pfile, concats, strloc, type, | |
2318 &actual_num_ranges); | |
2319 if (should_have_column_data_p (strloc)) | |
2320 ASSERT_EQ_AT (loc, NULL, err); | |
2321 else | |
2322 { | |
2323 ASSERT_STREQ_AT (loc, | |
2324 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", | |
2325 err); | |
2326 return; | |
2327 } | |
2328 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges); | |
2329 } | |
2330 | |
2331 /* Macro for calling assert_num_substring_ranges, supplying | |
2332 SELFTEST_LOCATION for the effective location of any errors. */ | |
2333 | |
2334 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \ | |
2335 EXPECTED_NUM_RANGES) \ | |
2336 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \ | |
2337 (TYPE), (EXPECTED_NUM_RANGES)) | |
2338 | |
2339 | |
2340 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC | |
2341 returns an error (using the string concatenation database for TEST). */ | |
2342 | |
2343 static void | |
2344 assert_has_no_substring_ranges (const location &loc, | |
2345 lexer_test& test, | |
2346 location_t strloc, | |
2347 enum cpp_ttype type, | |
2348 const char *expected_err) | |
2349 { | |
2350 cpp_reader *pfile = test.m_parser; | |
2351 string_concat_db *concats = &test.m_concats; | |
2352 cpp_substring_ranges ranges; | |
2353 const char *actual_err | |
2354 = get_substring_ranges_for_loc (pfile, concats, strloc, | |
2355 type, ranges); | |
2356 if (should_have_column_data_p (strloc)) | |
2357 ASSERT_STREQ_AT (loc, expected_err, actual_err); | |
2358 else | |
2359 ASSERT_STREQ_AT (loc, | |
2360 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", | |
2361 actual_err); | |
2362 } | |
2363 | |
2364 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \ | |
2365 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \ | |
2366 (STRLOC), (TYPE), (ERR)) | |
2367 | |
2368 /* Lex a simple string literal. Verify the substring location data, before | |
2369 and after running cpp_interpret_string on it. */ | |
2370 | |
2371 static void | |
2372 test_lexer_string_locations_simple (const line_table_case &case_) | |
2373 { | |
2374 /* Digits 0-9 (with 0 at column 10), the simple way. | |
2375 ....................000000000.11111111112.2222222223333333333 | |
2376 ....................123456789.01234567890.1234567890123456789 | |
2377 We add a trailing comment to ensure that we correctly locate | |
2378 the end of the string literal token. */ | |
2379 const char *content = " \"0123456789\" /* not a string */\n"; | |
2380 lexer_test test (case_, content, NULL); | |
2381 | |
2382 /* Verify that we get the expected token back, with the correct | |
2383 location information. */ | |
2384 const cpp_token *tok = test.get_token (); | |
2385 ASSERT_EQ (tok->type, CPP_STRING); | |
2386 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\""); | |
2387 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20); | |
2388 | |
2389 /* At this point in lexing, the quote characters are treated as part of | |
2390 the string (they are stripped off by cpp_interpret_string). */ | |
2391 | |
2392 ASSERT_EQ (tok->val.str.len, 12); | |
2393 | |
2394 /* Verify that cpp_interpret_string works. */ | |
2395 cpp_string dst_string; | |
2396 const enum cpp_ttype type = CPP_STRING; | |
2397 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
2398 &dst_string, type); | |
2399 ASSERT_TRUE (result); | |
2400 ASSERT_STREQ ("0123456789", (const char *)dst_string.text); | |
2401 free (const_cast <unsigned char *> (dst_string.text)); | |
2402 | |
2403 /* Verify ranges of individual characters. This no longer includes the | |
2404 opening quote, but does include the closing quote. */ | |
2405 for (int i = 0; i <= 10; i++) | |
2406 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, | |
2407 10 + i, 10 + i); | |
2408 | |
2409 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11); | |
2410 } | |
2411 | |
2412 /* As test_lexer_string_locations_simple, but use an EBCDIC execution | |
2413 encoding. */ | |
2414 | |
2415 static void | |
2416 test_lexer_string_locations_ebcdic (const line_table_case &case_) | |
2417 { | |
2418 /* EBCDIC support requires iconv. */ | |
2419 if (!HAVE_ICONV) | |
2420 return; | |
2421 | |
2422 /* Digits 0-9 (with 0 at column 10), the simple way. | |
2423 ....................000000000.11111111112.2222222223333333333 | |
2424 ....................123456789.01234567890.1234567890123456789 | |
2425 We add a trailing comment to ensure that we correctly locate | |
2426 the end of the string literal token. */ | |
2427 const char *content = " \"0123456789\" /* not a string */\n"; | |
2428 ebcdic_execution_charset use_ebcdic; | |
2429 lexer_test test (case_, content, &use_ebcdic); | |
2430 | |
2431 /* Verify that we get the expected token back, with the correct | |
2432 location information. */ | |
2433 const cpp_token *tok = test.get_token (); | |
2434 ASSERT_EQ (tok->type, CPP_STRING); | |
2435 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\""); | |
2436 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20); | |
2437 | |
2438 /* At this point in lexing, the quote characters are treated as part of | |
2439 the string (they are stripped off by cpp_interpret_string). */ | |
2440 | |
2441 ASSERT_EQ (tok->val.str.len, 12); | |
2442 | |
2443 /* The remainder of the test requires an iconv implementation that | |
2444 can convert from UTF-8 to the EBCDIC encoding requested above. */ | |
2445 if (use_ebcdic.iconv_errors_occurred_p ()) | |
2446 return; | |
2447 | |
2448 /* Verify that cpp_interpret_string works. */ | |
2449 cpp_string dst_string; | |
2450 const enum cpp_ttype type = CPP_STRING; | |
2451 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
2452 &dst_string, type); | |
2453 ASSERT_TRUE (result); | |
2454 /* We should now have EBCDIC-encoded text, specifically | |
2455 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047"). | |
2456 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */ | |
2457 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9", | |
2458 (const char *)dst_string.text); | |
2459 free (const_cast <unsigned char *> (dst_string.text)); | |
2460 | |
2461 /* Verify that we don't attempt to record substring location information | |
2462 for such cases. */ | |
2463 ASSERT_HAS_NO_SUBSTRING_RANGES | |
2464 (test, tok->src_loc, type, | |
2465 "execution character set != source character set"); | |
2466 } | |
2467 | |
2468 /* Lex a string literal containing a hex-escaped character. | |
2469 Verify the substring location data, before and after running | |
2470 cpp_interpret_string on it. */ | |
2471 | |
2472 static void | |
2473 test_lexer_string_locations_hex (const line_table_case &case_) | |
2474 { | |
2475 /* Digits 0-9, expressing digit 5 in ASCII as "\x35" | |
2476 and with a space in place of digit 6, to terminate the escaped | |
2477 hex code. | |
2478 ....................000000000.111111.11112222. | |
2479 ....................123456789.012345.67890123. */ | |
2480 const char *content = " \"01234\\x35 789\"\n"; | |
2481 lexer_test test (case_, content, NULL); | |
2482 | |
2483 /* Verify that we get the expected token back, with the correct | |
2484 location information. */ | |
2485 const cpp_token *tok = test.get_token (); | |
2486 ASSERT_EQ (tok->type, CPP_STRING); | |
2487 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\""); | |
2488 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23); | |
2489 | |
2490 /* At this point in lexing, the quote characters are treated as part of | |
2491 the string (they are stripped off by cpp_interpret_string). */ | |
2492 ASSERT_EQ (tok->val.str.len, 15); | |
2493 | |
2494 /* Verify that cpp_interpret_string works. */ | |
2495 cpp_string dst_string; | |
2496 const enum cpp_ttype type = CPP_STRING; | |
2497 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
2498 &dst_string, type); | |
2499 ASSERT_TRUE (result); | |
2500 ASSERT_STREQ ("012345 789", (const char *)dst_string.text); | |
2501 free (const_cast <unsigned char *> (dst_string.text)); | |
2502 | |
2503 /* Verify ranges of individual characters. This no longer includes the | |
2504 opening quote, but does include the closing quote. */ | |
2505 for (int i = 0; i <= 4; i++) | |
2506 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); | |
2507 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18); | |
2508 for (int i = 6; i <= 10; i++) | |
2509 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i); | |
2510 | |
2511 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11); | |
2512 } | |
2513 | |
2514 /* Lex a string literal containing an octal-escaped character. | |
2515 Verify the substring location data after running cpp_interpret_string | |
2516 on it. */ | |
2517 | |
2518 static void | |
2519 test_lexer_string_locations_oct (const line_table_case &case_) | |
2520 { | |
2521 /* Digits 0-9, expressing digit 5 in ASCII as "\065" | |
2522 and with a space in place of digit 6, to terminate the escaped | |
2523 octal code. | |
2524 ....................000000000.111111.11112222.2222223333333333444 | |
2525 ....................123456789.012345.67890123.4567890123456789012 */ | |
2526 const char *content = " \"01234\\065 789\" /* not a string */\n"; | |
2527 lexer_test test (case_, content, NULL); | |
2528 | |
2529 /* Verify that we get the expected token back, with the correct | |
2530 location information. */ | |
2531 const cpp_token *tok = test.get_token (); | |
2532 ASSERT_EQ (tok->type, CPP_STRING); | |
2533 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\""); | |
2534 | |
2535 /* Verify that cpp_interpret_string works. */ | |
2536 cpp_string dst_string; | |
2537 const enum cpp_ttype type = CPP_STRING; | |
2538 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
2539 &dst_string, type); | |
2540 ASSERT_TRUE (result); | |
2541 ASSERT_STREQ ("012345 789", (const char *)dst_string.text); | |
2542 free (const_cast <unsigned char *> (dst_string.text)); | |
2543 | |
2544 /* Verify ranges of individual characters. This no longer includes the | |
2545 opening quote, but does include the closing quote. */ | |
2546 for (int i = 0; i < 5; i++) | |
2547 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); | |
2548 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18); | |
2549 for (int i = 6; i <= 10; i++) | |
2550 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i); | |
2551 | |
2552 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11); | |
2553 } | |
2554 | |
2555 /* Test of string literal containing letter escapes. */ | |
2556 | |
2557 static void | |
2558 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_) | |
2559 { | |
2560 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar. | |
2561 .....................000000000.1.11111.1.1.11222.22222223333333 | |
2562 .....................123456789.0.12345.6.7.89012.34567890123456. */ | |
2563 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n"); | |
2564 lexer_test test (case_, content, NULL); | |
2565 | |
2566 /* Verify that we get the expected tokens back. */ | |
2567 const cpp_token *tok = test.get_token (); | |
2568 ASSERT_EQ (tok->type, CPP_STRING); | |
2569 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\""); | |
2570 | |
2571 /* Verify ranges of individual characters. */ | |
2572 /* "\t". */ | |
2573 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, | |
2574 0, 1, 10, 11); | |
2575 /* "foo". */ | |
2576 for (int i = 1; i <= 3; i++) | |
2577 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, | |
2578 i, 1, 11 + i, 11 + i); | |
2579 /* "\\" and "\n". */ | |
2580 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, | |
2581 4, 1, 15, 16); | |
2582 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, | |
2583 5, 1, 17, 18); | |
2584 | |
2585 /* "bar" and closing quote for nul-terminator. */ | |
2586 for (int i = 6; i <= 9; i++) | |
2587 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, | |
2588 i, 1, 13 + i, 13 + i); | |
2589 | |
2590 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10); | |
2591 } | |
2592 | |
2593 /* Another test of a string literal containing a letter escape. | |
2594 Based on string seen in | |
2595 printf ("%-%\n"); | |
2596 in gcc.dg/format/c90-printf-1.c. */ | |
2597 | |
2598 static void | |
2599 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_) | |
2600 { | |
2601 /* .....................000000000.1111.11.1111.22222222223. | |
2602 .....................123456789.0123.45.6789.01234567890. */ | |
2603 const char *content = (" \"%-%\\n\" /* non-str */\n"); | |
2604 lexer_test test (case_, content, NULL); | |
2605 | |
2606 /* Verify that we get the expected tokens back. */ | |
2607 const cpp_token *tok = test.get_token (); | |
2608 ASSERT_EQ (tok->type, CPP_STRING); | |
2609 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\""); | |
2610 | |
2611 /* Verify ranges of individual characters. */ | |
2612 /* "%-%". */ | |
2613 for (int i = 0; i < 3; i++) | |
2614 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, | |
2615 i, 1, 10 + i, 10 + i); | |
2616 /* "\n". */ | |
2617 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, | |
2618 3, 1, 13, 14); | |
2619 | |
2620 /* Closing quote for nul-terminator. */ | |
2621 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, | |
2622 4, 1, 15, 15); | |
2623 | |
2624 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5); | |
2625 } | |
2626 | |
2627 /* Lex a string literal containing UCN 4 characters. | |
2628 Verify the substring location data after running cpp_interpret_string | |
2629 on it. */ | |
2630 | |
2631 static void | |
2632 test_lexer_string_locations_ucn4 (const line_table_case &case_) | |
2633 { | |
2634 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed | |
2635 as UCN 4. | |
2636 ....................000000000.111111.111122.222222223.33333333344444 | |
2637 ....................123456789.012345.678901.234567890.12345678901234 */ | |
2638 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n"; | |
2639 lexer_test test (case_, content, NULL); | |
2640 | |
2641 /* Verify that we get the expected token back, with the correct | |
2642 location information. */ | |
2643 const cpp_token *tok = test.get_token (); | |
2644 ASSERT_EQ (tok->type, CPP_STRING); | |
2645 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\""); | |
2646 | |
2647 /* Verify that cpp_interpret_string works. | |
2648 The string should be encoded in the execution character | |
2649 set. Assuming that that is UTF-8, we should have the following: | |
2650 ----------- ---- ----- ------- ---------------- | |
2651 Byte offset Byte Octal Unicode Source Column(s) | |
2652 ----------- ---- ----- ------- ---------------- | |
2653 0 0x30 '0' 10 | |
2654 1 0x31 '1' 11 | |
2655 2 0x32 '2' 12 | |
2656 3 0x33 '3' 13 | |
2657 4 0x34 '4' 14 | |
2658 5 0xE2 \342 U+2174 15-20 | |
2659 6 0x85 \205 (cont) 15-20 | |
2660 7 0xB4 \264 (cont) 15-20 | |
2661 8 0xE2 \342 U+2175 21-26 | |
2662 9 0x85 \205 (cont) 21-26 | |
2663 10 0xB5 \265 (cont) 21-26 | |
2664 11 0x37 '7' 27 | |
2665 12 0x38 '8' 28 | |
2666 13 0x39 '9' 29 | |
2667 14 0x00 30 (closing quote) | |
2668 ----------- ---- ----- ------- ---------------. */ | |
2669 | |
2670 cpp_string dst_string; | |
2671 const enum cpp_ttype type = CPP_STRING; | |
2672 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
2673 &dst_string, type); | |
2674 ASSERT_TRUE (result); | |
2675 ASSERT_STREQ ("01234\342\205\264\342\205\265789", | |
2676 (const char *)dst_string.text); | |
2677 free (const_cast <unsigned char *> (dst_string.text)); | |
2678 | |
2679 /* Verify ranges of individual characters. This no longer includes the | |
2680 opening quote, but does include the closing quote. | |
2681 '01234'. */ | |
2682 for (int i = 0; i <= 4; i++) | |
2683 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); | |
2684 /* U+2174. */ | |
2685 for (int i = 5; i <= 7; i++) | |
2686 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20); | |
2687 /* U+2175. */ | |
2688 for (int i = 8; i <= 10; i++) | |
2689 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26); | |
2690 /* '789' and nul terminator */ | |
2691 for (int i = 11; i <= 14; i++) | |
2692 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i); | |
2693 | |
2694 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15); | |
2695 } | |
2696 | |
2697 /* Lex a string literal containing UCN 8 characters. | |
2698 Verify the substring location data after running cpp_interpret_string | |
2699 on it. */ | |
2700 | |
2701 static void | |
2702 test_lexer_string_locations_ucn8 (const line_table_case &case_) | |
2703 { | |
2704 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8. | |
2705 ....................000000000.111111.1111222222.2222333333333.344444 | |
2706 ....................123456789.012345.6789012345.6789012345678.901234 */ | |
2707 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n"; | |
2708 lexer_test test (case_, content, NULL); | |
2709 | |
2710 /* Verify that we get the expected token back, with the correct | |
2711 location information. */ | |
2712 const cpp_token *tok = test.get_token (); | |
2713 ASSERT_EQ (tok->type, CPP_STRING); | |
2714 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, | |
2715 "\"01234\\U00002174\\U00002175789\""); | |
2716 | |
2717 /* Verify that cpp_interpret_string works. | |
2718 The UTF-8 encoding of the string is identical to that from | |
2719 the ucn4 testcase above; the only difference is the column | |
2720 locations. */ | |
2721 cpp_string dst_string; | |
2722 const enum cpp_ttype type = CPP_STRING; | |
2723 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
2724 &dst_string, type); | |
2725 ASSERT_TRUE (result); | |
2726 ASSERT_STREQ ("01234\342\205\264\342\205\265789", | |
2727 (const char *)dst_string.text); | |
2728 free (const_cast <unsigned char *> (dst_string.text)); | |
2729 | |
2730 /* Verify ranges of individual characters. This no longer includes the | |
2731 opening quote, but does include the closing quote. | |
2732 '01234'. */ | |
2733 for (int i = 0; i <= 4; i++) | |
2734 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); | |
2735 /* U+2174. */ | |
2736 for (int i = 5; i <= 7; i++) | |
2737 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24); | |
2738 /* U+2175. */ | |
2739 for (int i = 8; i <= 10; i++) | |
2740 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34); | |
2741 /* '789' at columns 35-37 */ | |
2742 for (int i = 11; i <= 13; i++) | |
2743 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i); | |
2744 /* Closing quote/nul-terminator at column 38. */ | |
2745 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38); | |
2746 | |
2747 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15); | |
2748 } | |
2749 | |
2750 /* Fetch a big-endian 32-bit value and convert to host endianness. */ | |
2751 | |
2752 static uint32_t | |
2753 uint32_from_big_endian (const uint32_t *ptr_be_value) | |
2754 { | |
2755 const unsigned char *buf = (const unsigned char *)ptr_be_value; | |
2756 return (((uint32_t) buf[0] << 24) | |
2757 | ((uint32_t) buf[1] << 16) | |
2758 | ((uint32_t) buf[2] << 8) | |
2759 | (uint32_t) buf[3]); | |
2760 } | |
2761 | |
2762 /* Lex a wide string literal and verify that attempts to read substring | |
2763 location data from it fail gracefully. */ | |
2764 | |
2765 static void | |
2766 test_lexer_string_locations_wide_string (const line_table_case &case_) | |
2767 { | |
2768 /* Digits 0-9. | |
2769 ....................000000000.11111111112.22222222233333 | |
2770 ....................123456789.01234567890.12345678901234 */ | |
2771 const char *content = " L\"0123456789\" /* non-str */\n"; | |
2772 lexer_test test (case_, content, NULL); | |
2773 | |
2774 /* Verify that we get the expected token back, with the correct | |
2775 location information. */ | |
2776 const cpp_token *tok = test.get_token (); | |
2777 ASSERT_EQ (tok->type, CPP_WSTRING); | |
2778 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\""); | |
2779 | |
2780 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */ | |
2781 cpp_string dst_string; | |
2782 const enum cpp_ttype type = CPP_WSTRING; | |
2783 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
2784 &dst_string, type); | |
2785 ASSERT_TRUE (result); | |
2786 /* The cpp_reader defaults to big-endian with | |
2787 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should | |
2788 now be encoded as UTF-32BE. */ | |
2789 const uint32_t *be32_chars = (const uint32_t *)dst_string.text; | |
2790 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0])); | |
2791 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5])); | |
2792 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9])); | |
2793 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10])); | |
2794 free (const_cast <unsigned char *> (dst_string.text)); | |
2795 | |
2796 /* We don't yet support generating substring location information | |
2797 for L"" strings. */ | |
2798 ASSERT_HAS_NO_SUBSTRING_RANGES | |
2799 (test, tok->src_loc, type, | |
2800 "execution character set != source character set"); | |
2801 } | |
2802 | |
2803 /* Fetch a big-endian 16-bit value and convert to host endianness. */ | |
2804 | |
2805 static uint16_t | |
2806 uint16_from_big_endian (const uint16_t *ptr_be_value) | |
2807 { | |
2808 const unsigned char *buf = (const unsigned char *)ptr_be_value; | |
2809 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1]; | |
2810 } | |
2811 | |
2812 /* Lex a u"" string literal and verify that attempts to read substring | |
2813 location data from it fail gracefully. */ | |
2814 | |
2815 static void | |
2816 test_lexer_string_locations_string16 (const line_table_case &case_) | |
2817 { | |
2818 /* Digits 0-9. | |
2819 ....................000000000.11111111112.22222222233333 | |
2820 ....................123456789.01234567890.12345678901234 */ | |
2821 const char *content = " u\"0123456789\" /* non-str */\n"; | |
2822 lexer_test test (case_, content, NULL); | |
2823 | |
2824 /* Verify that we get the expected token back, with the correct | |
2825 location information. */ | |
2826 const cpp_token *tok = test.get_token (); | |
2827 ASSERT_EQ (tok->type, CPP_STRING16); | |
2828 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\""); | |
2829 | |
2830 /* Verify that cpp_interpret_string works, using CPP_STRING16. */ | |
2831 cpp_string dst_string; | |
2832 const enum cpp_ttype type = CPP_STRING16; | |
2833 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
2834 &dst_string, type); | |
2835 ASSERT_TRUE (result); | |
2836 | |
2837 /* The cpp_reader defaults to big-endian, so dst_string should | |
2838 now be encoded as UTF-16BE. */ | |
2839 const uint16_t *be16_chars = (const uint16_t *)dst_string.text; | |
2840 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0])); | |
2841 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5])); | |
2842 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9])); | |
2843 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10])); | |
2844 free (const_cast <unsigned char *> (dst_string.text)); | |
2845 | |
2846 /* We don't yet support generating substring location information | |
2847 for L"" strings. */ | |
2848 ASSERT_HAS_NO_SUBSTRING_RANGES | |
2849 (test, tok->src_loc, type, | |
2850 "execution character set != source character set"); | |
2851 } | |
2852 | |
2853 /* Lex a U"" string literal and verify that attempts to read substring | |
2854 location data from it fail gracefully. */ | |
2855 | |
2856 static void | |
2857 test_lexer_string_locations_string32 (const line_table_case &case_) | |
2858 { | |
2859 /* Digits 0-9. | |
2860 ....................000000000.11111111112.22222222233333 | |
2861 ....................123456789.01234567890.12345678901234 */ | |
2862 const char *content = " U\"0123456789\" /* non-str */\n"; | |
2863 lexer_test test (case_, content, NULL); | |
2864 | |
2865 /* Verify that we get the expected token back, with the correct | |
2866 location information. */ | |
2867 const cpp_token *tok = test.get_token (); | |
2868 ASSERT_EQ (tok->type, CPP_STRING32); | |
2869 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\""); | |
2870 | |
2871 /* Verify that cpp_interpret_string works, using CPP_STRING32. */ | |
2872 cpp_string dst_string; | |
2873 const enum cpp_ttype type = CPP_STRING32; | |
2874 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
2875 &dst_string, type); | |
2876 ASSERT_TRUE (result); | |
2877 | |
2878 /* The cpp_reader defaults to big-endian, so dst_string should | |
2879 now be encoded as UTF-32BE. */ | |
2880 const uint32_t *be32_chars = (const uint32_t *)dst_string.text; | |
2881 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0])); | |
2882 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5])); | |
2883 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9])); | |
2884 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10])); | |
2885 free (const_cast <unsigned char *> (dst_string.text)); | |
2886 | |
2887 /* We don't yet support generating substring location information | |
2888 for L"" strings. */ | |
2889 ASSERT_HAS_NO_SUBSTRING_RANGES | |
2890 (test, tok->src_loc, type, | |
2891 "execution character set != source character set"); | |
2892 } | |
2893 | |
2894 /* Lex a u8-string literal. | |
2895 Verify the substring location data after running cpp_interpret_string | |
2896 on it. */ | |
2897 | |
2898 static void | |
2899 test_lexer_string_locations_u8 (const line_table_case &case_) | |
2900 { | |
2901 /* Digits 0-9. | |
2902 ....................000000000.11111111112.22222222233333 | |
2903 ....................123456789.01234567890.12345678901234 */ | |
2904 const char *content = " u8\"0123456789\" /* non-str */\n"; | |
2905 lexer_test test (case_, content, NULL); | |
2906 | |
2907 /* Verify that we get the expected token back, with the correct | |
2908 location information. */ | |
2909 const cpp_token *tok = test.get_token (); | |
2910 ASSERT_EQ (tok->type, CPP_UTF8STRING); | |
2911 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\""); | |
2912 | |
2913 /* Verify that cpp_interpret_string works. */ | |
2914 cpp_string dst_string; | |
2915 const enum cpp_ttype type = CPP_STRING; | |
2916 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
2917 &dst_string, type); | |
2918 ASSERT_TRUE (result); | |
2919 ASSERT_STREQ ("0123456789", (const char *)dst_string.text); | |
2920 free (const_cast <unsigned char *> (dst_string.text)); | |
2921 | |
2922 /* Verify ranges of individual characters. This no longer includes the | |
2923 opening quote, but does include the closing quote. */ | |
2924 for (int i = 0; i <= 10; i++) | |
2925 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); | |
2926 } | |
2927 | |
2928 /* Lex a string literal containing UTF-8 source characters. | |
2929 Verify the substring location data after running cpp_interpret_string | |
2930 on it. */ | |
2931 | |
2932 static void | |
2933 test_lexer_string_locations_utf8_source (const line_table_case &case_) | |
2934 { | |
2935 /* This string literal is written out to the source file as UTF-8, | |
2936 and is of the form "before mojibake after", where "mojibake" | |
2937 is written as the following four unicode code points: | |
2938 U+6587 CJK UNIFIED IDEOGRAPH-6587 | |
2939 U+5B57 CJK UNIFIED IDEOGRAPH-5B57 | |
2940 U+5316 CJK UNIFIED IDEOGRAPH-5316 | |
2941 U+3051 HIRAGANA LETTER KE. | |
2942 Each of these is 3 bytes wide when encoded in UTF-8, whereas the | |
2943 "before" and "after" are 1 byte per unicode character. | |
2944 | |
2945 The numbering shown are "columns", which are *byte* numbers within | |
2946 the line, rather than unicode character numbers. | |
2947 | |
2948 .................... 000000000.1111111. | |
2949 .................... 123456789.0123456. */ | |
2950 const char *content = (" \"before " | |
2951 /* U+6587 CJK UNIFIED IDEOGRAPH-6587 | |
2952 UTF-8: 0xE6 0x96 0x87 | |
2953 C octal escaped UTF-8: \346\226\207 | |
2954 "column" numbers: 17-19. */ | |
2955 "\346\226\207" | |
2956 | |
2957 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57 | |
2958 UTF-8: 0xE5 0xAD 0x97 | |
2959 C octal escaped UTF-8: \345\255\227 | |
2960 "column" numbers: 20-22. */ | |
2961 "\345\255\227" | |
2962 | |
2963 /* U+5316 CJK UNIFIED IDEOGRAPH-5316 | |
2964 UTF-8: 0xE5 0x8C 0x96 | |
2965 C octal escaped UTF-8: \345\214\226 | |
2966 "column" numbers: 23-25. */ | |
2967 "\345\214\226" | |
2968 | |
2969 /* U+3051 HIRAGANA LETTER KE | |
2970 UTF-8: 0xE3 0x81 0x91 | |
2971 C octal escaped UTF-8: \343\201\221 | |
2972 "column" numbers: 26-28. */ | |
2973 "\343\201\221" | |
2974 | |
2975 /* column numbers 29 onwards | |
2976 2333333.33334444444444 | |
2977 9012345.67890123456789. */ | |
2978 " after\" /* non-str */\n"); | |
2979 lexer_test test (case_, content, NULL); | |
2980 | |
2981 /* Verify that we get the expected token back, with the correct | |
2982 location information. */ | |
2983 const cpp_token *tok = test.get_token (); | |
2984 ASSERT_EQ (tok->type, CPP_STRING); | |
2985 ASSERT_TOKEN_AS_TEXT_EQ | |
2986 (test.m_parser, tok, | |
2987 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\""); | |
2988 | |
2989 /* Verify that cpp_interpret_string works. */ | |
2990 cpp_string dst_string; | |
2991 const enum cpp_ttype type = CPP_STRING; | |
2992 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
2993 &dst_string, type); | |
2994 ASSERT_TRUE (result); | |
2995 ASSERT_STREQ | |
2996 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after", | |
2997 (const char *)dst_string.text); | |
2998 free (const_cast <unsigned char *> (dst_string.text)); | |
2999 | |
3000 /* Verify ranges of individual characters. This no longer includes the | |
3001 opening quote, but does include the closing quote. | |
3002 Assuming that both source and execution encodings are UTF-8, we have | |
3003 a run of 25 octets in each, plus the NUL terminator. */ | |
3004 for (int i = 0; i < 25; i++) | |
3005 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); | |
3006 /* NUL-terminator should use the closing quote at column 35. */ | |
3007 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35); | |
3008 | |
3009 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26); | |
3010 } | |
3011 | |
3012 /* Test of string literal concatenation. */ | |
3013 | |
3014 static void | |
3015 test_lexer_string_locations_concatenation_1 (const line_table_case &case_) | |
3016 { | |
3017 /* Digits 0-9. | |
3018 .....................000000000.111111.11112222222222 | |
3019 .....................123456789.012345.67890123456789. */ | |
3020 const char *content = (" \"01234\" /* non-str */\n" | |
3021 " \"56789\" /* non-str */\n"); | |
3022 lexer_test test (case_, content, NULL); | |
3023 | |
3024 location_t input_locs[2]; | |
3025 | |
3026 /* Verify that we get the expected tokens back. */ | |
3027 auto_vec <cpp_string> input_strings; | |
3028 const cpp_token *tok_a = test.get_token (); | |
3029 ASSERT_EQ (tok_a->type, CPP_STRING); | |
3030 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\""); | |
3031 input_strings.safe_push (tok_a->val.str); | |
3032 input_locs[0] = tok_a->src_loc; | |
3033 | |
3034 const cpp_token *tok_b = test.get_token (); | |
3035 ASSERT_EQ (tok_b->type, CPP_STRING); | |
3036 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\""); | |
3037 input_strings.safe_push (tok_b->val.str); | |
3038 input_locs[1] = tok_b->src_loc; | |
3039 | |
3040 /* Verify that cpp_interpret_string works. */ | |
3041 cpp_string dst_string; | |
3042 const enum cpp_ttype type = CPP_STRING; | |
3043 bool result = cpp_interpret_string (test.m_parser, | |
3044 input_strings.address (), 2, | |
3045 &dst_string, type); | |
3046 ASSERT_TRUE (result); | |
3047 ASSERT_STREQ ("0123456789", (const char *)dst_string.text); | |
3048 free (const_cast <unsigned char *> (dst_string.text)); | |
3049 | |
3050 /* Simulate c-lex.c's lex_string in order to record concatenation. */ | |
3051 test.m_concats.record_string_concatenation (2, input_locs); | |
3052 | |
3053 location_t initial_loc = input_locs[0]; | |
3054 | |
3055 /* "01234" on line 1. */ | |
3056 for (int i = 0; i <= 4; i++) | |
3057 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i); | |
3058 /* "56789" in line 2, plus its closing quote for the nul terminator. */ | |
3059 for (int i = 5; i <= 10; i++) | |
3060 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i); | |
3061 | |
3062 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11); | |
3063 } | |
3064 | |
3065 /* Another test of string literal concatenation. */ | |
3066 | |
3067 static void | |
3068 test_lexer_string_locations_concatenation_2 (const line_table_case &case_) | |
3069 { | |
3070 /* Digits 0-9. | |
3071 .....................000000000.111.11111112222222 | |
3072 .....................123456789.012.34567890123456. */ | |
3073 const char *content = (" \"01\" /* non-str */\n" | |
3074 " \"23\" /* non-str */\n" | |
3075 " \"45\" /* non-str */\n" | |
3076 " \"67\" /* non-str */\n" | |
3077 " \"89\" /* non-str */\n"); | |
3078 lexer_test test (case_, content, NULL); | |
3079 | |
3080 auto_vec <cpp_string> input_strings; | |
3081 location_t input_locs[5]; | |
3082 | |
3083 /* Verify that we get the expected tokens back. */ | |
3084 for (int i = 0; i < 5; i++) | |
3085 { | |
3086 const cpp_token *tok = test.get_token (); | |
3087 ASSERT_EQ (tok->type, CPP_STRING); | |
3088 input_strings.safe_push (tok->val.str); | |
3089 input_locs[i] = tok->src_loc; | |
3090 } | |
3091 | |
3092 /* Verify that cpp_interpret_string works. */ | |
3093 cpp_string dst_string; | |
3094 const enum cpp_ttype type = CPP_STRING; | |
3095 bool result = cpp_interpret_string (test.m_parser, | |
3096 input_strings.address (), 5, | |
3097 &dst_string, type); | |
3098 ASSERT_TRUE (result); | |
3099 ASSERT_STREQ ("0123456789", (const char *)dst_string.text); | |
3100 free (const_cast <unsigned char *> (dst_string.text)); | |
3101 | |
3102 /* Simulate c-lex.c's lex_string in order to record concatenation. */ | |
3103 test.m_concats.record_string_concatenation (5, input_locs); | |
3104 | |
3105 location_t initial_loc = input_locs[0]; | |
3106 | |
3107 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can | |
3108 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS | |
3109 and expect get_source_range_for_substring to fail. | |
3110 However, for a string concatenation test, we can have a case | |
3111 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS, | |
3112 but subsequent strings can be after it. | |
3113 Attempting to detect this within assert_char_at_range | |
3114 would overcomplicate the logic for the common test cases, so | |
3115 we detect it here. */ | |
3116 if (should_have_column_data_p (input_locs[0]) | |
3117 && !should_have_column_data_p (input_locs[4])) | |
3118 { | |
3119 /* Verify that get_source_range_for_substring gracefully rejects | |
3120 this case. */ | |
3121 source_range actual_range; | |
3122 const char *err | |
3123 = get_source_range_for_char (test.m_parser, &test.m_concats, | |
3124 initial_loc, type, 0, &actual_range); | |
3125 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err); | |
3126 return; | |
3127 } | |
3128 | |
3129 for (int i = 0; i < 5; i++) | |
3130 for (int j = 0; j < 2; j++) | |
3131 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j, | |
3132 i + 1, 10 + j, 10 + j); | |
3133 | |
3134 /* NUL-terminator should use the final closing quote at line 5 column 12. */ | |
3135 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12); | |
3136 | |
3137 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11); | |
3138 } | |
3139 | |
3140 /* Another test of string literal concatenation, this time combined with | |
3141 various kinds of escaped characters. */ | |
3142 | |
3143 static void | |
3144 test_lexer_string_locations_concatenation_3 (const line_table_case &case_) | |
3145 { | |
3146 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35" | |
3147 digit 6 in ASCII as octal "\066", concatenating multiple strings. */ | |
3148 const char *content | |
3149 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555 | |
3150 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */ | |
3151 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n"); | |
3152 lexer_test test (case_, content, NULL); | |
3153 | |
3154 auto_vec <cpp_string> input_strings; | |
3155 location_t input_locs[4]; | |
3156 | |
3157 /* Verify that we get the expected tokens back. */ | |
3158 for (int i = 0; i < 4; i++) | |
3159 { | |
3160 const cpp_token *tok = test.get_token (); | |
3161 ASSERT_EQ (tok->type, CPP_STRING); | |
3162 input_strings.safe_push (tok->val.str); | |
3163 input_locs[i] = tok->src_loc; | |
3164 } | |
3165 | |
3166 /* Verify that cpp_interpret_string works. */ | |
3167 cpp_string dst_string; | |
3168 const enum cpp_ttype type = CPP_STRING; | |
3169 bool result = cpp_interpret_string (test.m_parser, | |
3170 input_strings.address (), 4, | |
3171 &dst_string, type); | |
3172 ASSERT_TRUE (result); | |
3173 ASSERT_STREQ ("0123456789", (const char *)dst_string.text); | |
3174 free (const_cast <unsigned char *> (dst_string.text)); | |
3175 | |
3176 /* Simulate c-lex.c's lex_string in order to record concatenation. */ | |
3177 test.m_concats.record_string_concatenation (4, input_locs); | |
3178 | |
3179 location_t initial_loc = input_locs[0]; | |
3180 | |
3181 for (int i = 0; i <= 4; i++) | |
3182 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i); | |
3183 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22); | |
3184 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30); | |
3185 for (int i = 7; i <= 9; i++) | |
3186 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i); | |
3187 | |
3188 /* NUL-terminator should use the location of the final closing quote. */ | |
3189 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38); | |
3190 | |
3191 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11); | |
3192 } | |
3193 | |
3194 /* Test of string literal in a macro. */ | |
3195 | |
3196 static void | |
3197 test_lexer_string_locations_macro (const line_table_case &case_) | |
3198 { | |
3199 /* Digits 0-9. | |
3200 .....................0000000001111111111.22222222223. | |
3201 .....................1234567890123456789.01234567890. */ | |
3202 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n" | |
3203 " MACRO"); | |
3204 lexer_test test (case_, content, NULL); | |
3205 | |
3206 /* Verify that we get the expected tokens back. */ | |
3207 const cpp_token *tok = test.get_token (); | |
3208 ASSERT_EQ (tok->type, CPP_PADDING); | |
3209 | |
3210 tok = test.get_token (); | |
3211 ASSERT_EQ (tok->type, CPP_STRING); | |
3212 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\""); | |
3213 | |
3214 /* Verify ranges of individual characters. We ought to | |
3215 see columns within the macro definition. */ | |
3216 for (int i = 0; i <= 10; i++) | |
3217 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, | |
3218 i, 1, 20 + i, 20 + i); | |
3219 | |
3220 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11); | |
3221 | |
3222 tok = test.get_token (); | |
3223 ASSERT_EQ (tok->type, CPP_PADDING); | |
3224 } | |
3225 | |
3226 /* Test of stringification of a macro argument. */ | |
3227 | |
3228 static void | |
3229 test_lexer_string_locations_stringified_macro_argument | |
3230 (const line_table_case &case_) | |
3231 { | |
3232 /* .....................000000000111111111122222222223. | |
3233 .....................123456789012345678901234567890. */ | |
3234 const char *content = ("#define MACRO(X) #X /* non-str */\n" | |
3235 "MACRO(foo)\n"); | |
3236 lexer_test test (case_, content, NULL); | |
3237 | |
3238 /* Verify that we get the expected token back. */ | |
3239 const cpp_token *tok = test.get_token (); | |
3240 ASSERT_EQ (tok->type, CPP_PADDING); | |
3241 | |
3242 tok = test.get_token (); | |
3243 ASSERT_EQ (tok->type, CPP_STRING); | |
3244 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\""); | |
3245 | |
3246 /* We don't support getting the location of a stringified macro | |
3247 argument. Verify that it fails gracefully. */ | |
3248 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, | |
3249 "cpp_interpret_string_1 failed"); | |
3250 | |
3251 tok = test.get_token (); | |
3252 ASSERT_EQ (tok->type, CPP_PADDING); | |
3253 | |
3254 tok = test.get_token (); | |
3255 ASSERT_EQ (tok->type, CPP_PADDING); | |
3256 } | |
3257 | |
3258 /* Ensure that we are fail gracefully if something attempts to pass | |
3259 in a location that isn't a string literal token. Seen on this code: | |
3260 | |
3261 const char a[] = " %d "; | |
3262 __builtin_printf (a, 0.5); | |
3263 ^ | |
3264 | |
3265 when c-format.c erroneously used the indicated one-character | |
3266 location as the format string location, leading to a read past the | |
3267 end of a string buffer in cpp_interpret_string_1. */ | |
3268 | |
3269 static void | |
3270 test_lexer_string_locations_non_string (const line_table_case &case_) | |
3271 { | |
3272 /* .....................000000000111111111122222222223. | |
3273 .....................123456789012345678901234567890. */ | |
3274 const char *content = (" a\n"); | |
3275 lexer_test test (case_, content, NULL); | |
3276 | |
3277 /* Verify that we get the expected token back. */ | |
3278 const cpp_token *tok = test.get_token (); | |
3279 ASSERT_EQ (tok->type, CPP_NAME); | |
3280 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a"); | |
3281 | |
3282 /* At this point, libcpp is attempting to interpret the name as a | |
3283 string literal, despite it not starting with a quote. We don't detect | |
3284 that, but we should at least fail gracefully. */ | |
3285 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, | |
3286 "cpp_interpret_string_1 failed"); | |
3287 } | |
3288 | |
3289 /* Ensure that we can read substring information for a token which | |
3290 starts in one linemap and ends in another . Adapted from | |
3291 gcc.dg/cpp/pr69985.c. */ | |
3292 | |
3293 static void | |
3294 test_lexer_string_locations_long_line (const line_table_case &case_) | |
3295 { | |
3296 /* .....................000000.000111111111 | |
3297 .....................123456.789012346789. */ | |
3298 const char *content = ("/* A very long line, so that we start a new line map. */\n" | |
3299 " \"0123456789012345678901234567890123456789" | |
3300 "0123456789012345678901234567890123456789" | |
3301 "0123456789012345678901234567890123456789" | |
3302 "0123456789\"\n"); | |
3303 | |
3304 lexer_test test (case_, content, NULL); | |
3305 | |
3306 /* Verify that we get the expected token back. */ | |
3307 const cpp_token *tok = test.get_token (); | |
3308 ASSERT_EQ (tok->type, CPP_STRING); | |
3309 | |
3310 if (!should_have_column_data_p (line_table->highest_location)) | |
3311 return; | |
3312 | |
3313 /* Verify ranges of individual characters. */ | |
3314 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131); | |
3315 for (int i = 0; i < 131; i++) | |
3316 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, | |
3317 i, 2, 7 + i, 7 + i); | |
3318 } | |
3319 | |
3320 /* Test of locations within a raw string that doesn't contain a newline. */ | |
3321 | |
3322 static void | |
3323 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_) | |
3324 { | |
3325 /* .....................00.0000000111111111122. | |
3326 .....................12.3456789012345678901. */ | |
3327 const char *content = ("R\"foo(0123456789)foo\"\n"); | |
3328 lexer_test test (case_, content, NULL); | |
3329 | |
3330 /* Verify that we get the expected token back. */ | |
3331 const cpp_token *tok = test.get_token (); | |
3332 ASSERT_EQ (tok->type, CPP_STRING); | |
3333 | |
3334 /* Verify that cpp_interpret_string works. */ | |
3335 cpp_string dst_string; | |
3336 const enum cpp_ttype type = CPP_STRING; | |
3337 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
3338 &dst_string, type); | |
3339 ASSERT_TRUE (result); | |
3340 ASSERT_STREQ ("0123456789", (const char *)dst_string.text); | |
3341 free (const_cast <unsigned char *> (dst_string.text)); | |
3342 | |
3343 if (!should_have_column_data_p (line_table->highest_location)) | |
3344 return; | |
3345 | |
3346 /* 0-9, plus the nil terminator. */ | |
3347 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11); | |
3348 for (int i = 0; i < 11; i++) | |
3349 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, | |
3350 i, 1, 7 + i, 7 + i); | |
3351 } | |
3352 | |
3353 /* Test of locations within a raw string that contains a newline. */ | |
3354 | |
3355 static void | |
3356 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_) | |
3357 { | |
3358 /* .....................00.0000. | |
3359 .....................12.3456. */ | |
3360 const char *content = ("R\"foo(\n" | |
3361 /* .....................00000. | |
3362 .....................12345. */ | |
3363 "hello\n" | |
3364 "world\n" | |
3365 /* .....................00000. | |
3366 .....................12345. */ | |
3367 ")foo\"\n"); | |
3368 lexer_test test (case_, content, NULL); | |
3369 | |
3370 /* Verify that we get the expected token back. */ | |
3371 const cpp_token *tok = test.get_token (); | |
3372 ASSERT_EQ (tok->type, CPP_STRING); | |
3373 | |
3374 /* Verify that cpp_interpret_string works. */ | |
3375 cpp_string dst_string; | |
3376 const enum cpp_ttype type = CPP_STRING; | |
3377 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
3378 &dst_string, type); | |
3379 ASSERT_TRUE (result); | |
3380 ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text); | |
3381 free (const_cast <unsigned char *> (dst_string.text)); | |
3382 | |
3383 if (!should_have_column_data_p (line_table->highest_location)) | |
3384 return; | |
3385 | |
3386 /* Currently we don't support locations within raw strings that | |
3387 contain newlines. */ | |
3388 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type, | |
3389 "range endpoints are on different lines"); | |
3390 } | |
3391 | |
3392 /* Test of parsing an unterminated raw string. */ | |
3393 | |
3394 static void | |
3395 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_) | |
3396 { | |
3397 const char *content = "R\"ouch()ouCh\" /* etc */"; | |
3398 | |
3399 lexer_error_sink errors; | |
3400 lexer_test test (case_, content, &errors); | |
3401 test.m_implicitly_expect_EOF = false; | |
3402 | |
3403 /* Attempt to parse the raw string. */ | |
3404 const cpp_token *tok = test.get_token (); | |
3405 ASSERT_EQ (tok->type, CPP_EOF); | |
3406 | |
3407 ASSERT_EQ (1, errors.m_errors.length ()); | |
3408 /* We expect the message "unterminated raw string" | |
3409 in the "cpplib" translation domain. | |
3410 It's not clear that dgettext is available on all supported hosts, | |
3411 so this assertion is commented-out for now. | |
3412 ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"), | |
3413 errors.m_errors[0]); | |
3414 */ | |
3415 } | |
3416 | |
3417 /* Test of lexing char constants. */ | |
3418 | |
3419 static void | |
3420 test_lexer_char_constants (const line_table_case &case_) | |
3421 { | |
3422 /* Various char constants. | |
3423 .....................0000000001111111111.22222222223. | |
3424 .....................1234567890123456789.01234567890. */ | |
3425 const char *content = (" 'a'\n" | |
3426 " u'a'\n" | |
3427 " U'a'\n" | |
3428 " L'a'\n" | |
3429 " 'abc'\n"); | |
3430 lexer_test test (case_, content, NULL); | |
3431 | |
3432 /* Verify that we get the expected tokens back. */ | |
3433 /* 'a'. */ | |
3434 const cpp_token *tok = test.get_token (); | |
3435 ASSERT_EQ (tok->type, CPP_CHAR); | |
3436 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'"); | |
3437 | |
3438 unsigned int chars_seen; | |
3439 int unsignedp; | |
3440 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok, | |
3441 &chars_seen, &unsignedp); | |
3442 ASSERT_EQ (cc, 'a'); | |
3443 ASSERT_EQ (chars_seen, 1); | |
3444 | |
3445 /* u'a'. */ | |
3446 tok = test.get_token (); | |
3447 ASSERT_EQ (tok->type, CPP_CHAR16); | |
3448 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'"); | |
3449 | |
3450 /* U'a'. */ | |
3451 tok = test.get_token (); | |
3452 ASSERT_EQ (tok->type, CPP_CHAR32); | |
3453 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'"); | |
3454 | |
3455 /* L'a'. */ | |
3456 tok = test.get_token (); | |
3457 ASSERT_EQ (tok->type, CPP_WCHAR); | |
3458 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'"); | |
3459 | |
3460 /* 'abc' (c-char-sequence). */ | |
3461 tok = test.get_token (); | |
3462 ASSERT_EQ (tok->type, CPP_CHAR); | |
3463 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'"); | |
3464 } | |
3465 /* A table of interesting location_t values, giving one axis of our test | |
3466 matrix. */ | |
3467 | |
3468 static const location_t boundary_locations[] = { | |
3469 /* Zero means "don't override the default values for a new line_table". */ | |
3470 0, | |
3471 | |
3472 /* An arbitrary non-zero value that isn't close to one of | |
3473 the boundary values below. */ | |
3474 0x10000, | |
3475 | |
3476 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */ | |
3477 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100, | |
3478 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1, | |
3479 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES, | |
3480 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1, | |
3481 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100, | |
3482 | |
3483 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */ | |
3484 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100, | |
3485 LINE_MAP_MAX_LOCATION_WITH_COLS - 1, | |
3486 LINE_MAP_MAX_LOCATION_WITH_COLS, | |
3487 LINE_MAP_MAX_LOCATION_WITH_COLS + 1, | |
3488 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100, | |
3489 }; | |
3490 | |
3491 /* Run TESTCASE multiple times, once for each case in our test matrix. */ | |
3492 | |
3493 void | |
3494 for_each_line_table_case (void (*testcase) (const line_table_case &)) | |
3495 { | |
3496 /* As noted above in the description of struct line_table_case, | |
3497 we want to explore a test matrix of interesting line_table | |
3498 situations, running various selftests for each case within the | |
3499 matrix. */ | |
3500 | |
3501 /* Run all tests with: | |
3502 (a) line_table->default_range_bits == 0, and | |
3503 (b) line_table->default_range_bits == 5. */ | |
3504 int num_cases_tested = 0; | |
3505 for (int default_range_bits = 0; default_range_bits <= 5; | |
3506 default_range_bits += 5) | |
3507 { | |
3508 /* ...and use each of the "interesting" location values as | |
3509 the starting location within line_table. */ | |
3510 const int num_boundary_locations | |
3511 = sizeof (boundary_locations) / sizeof (boundary_locations[0]); | |
3512 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++) | |
3513 { | |
3514 line_table_case c (default_range_bits, boundary_locations[loc_idx]); | |
3515 | |
3516 testcase (c); | |
3517 | |
3518 num_cases_tested++; | |
3519 } | |
3520 } | |
3521 | |
3522 /* Verify that we fully covered the test matrix. */ | |
3523 ASSERT_EQ (num_cases_tested, 2 * 12); | |
3524 } | |
3525 | |
3526 /* Run all of the selftests within this file. */ | |
3527 | |
3528 void | |
3529 input_c_tests () | |
3530 { | |
3531 test_should_have_column_data_p (); | |
3532 test_unknown_location (); | |
3533 test_builtins (); | |
3534 for_each_line_table_case (test_make_location_nonpure_range_endpoints); | |
3535 | |
3536 for_each_line_table_case (test_accessing_ordinary_linemaps); | |
3537 for_each_line_table_case (test_lexer); | |
3538 for_each_line_table_case (test_lexer_string_locations_simple); | |
3539 for_each_line_table_case (test_lexer_string_locations_ebcdic); | |
3540 for_each_line_table_case (test_lexer_string_locations_hex); | |
3541 for_each_line_table_case (test_lexer_string_locations_oct); | |
3542 for_each_line_table_case (test_lexer_string_locations_letter_escape_1); | |
3543 for_each_line_table_case (test_lexer_string_locations_letter_escape_2); | |
3544 for_each_line_table_case (test_lexer_string_locations_ucn4); | |
3545 for_each_line_table_case (test_lexer_string_locations_ucn8); | |
3546 for_each_line_table_case (test_lexer_string_locations_wide_string); | |
3547 for_each_line_table_case (test_lexer_string_locations_string16); | |
3548 for_each_line_table_case (test_lexer_string_locations_string32); | |
3549 for_each_line_table_case (test_lexer_string_locations_u8); | |
3550 for_each_line_table_case (test_lexer_string_locations_utf8_source); | |
3551 for_each_line_table_case (test_lexer_string_locations_concatenation_1); | |
3552 for_each_line_table_case (test_lexer_string_locations_concatenation_2); | |
3553 for_each_line_table_case (test_lexer_string_locations_concatenation_3); | |
3554 for_each_line_table_case (test_lexer_string_locations_macro); | |
3555 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument); | |
3556 for_each_line_table_case (test_lexer_string_locations_non_string); | |
3557 for_each_line_table_case (test_lexer_string_locations_long_line); | |
3558 for_each_line_table_case (test_lexer_string_locations_raw_string_one_line); | |
3559 for_each_line_table_case (test_lexer_string_locations_raw_string_multiline); | |
3560 for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated); | |
3561 for_each_line_table_case (test_lexer_char_constants); | |
3562 | |
3563 test_reading_source_line (); | |
3564 } | |
3565 | |
3566 } // namespace selftest | |
3567 | |
3568 #endif /* CHECKING_P */ |