68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
1 /* Data and functions related to line maps and input files.
|
131
|
2 Copyright (C) 2004-2018 Free Software Foundation, Inc.
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
3
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
4 This file is part of GCC.
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
5
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
6 GCC is free software; you can redistribute it and/or modify it under
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
7 the terms of the GNU General Public License as published by the Free
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
8 Software Foundation; either version 3, or (at your option) any later
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
9 version.
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
10
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
14 for more details.
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
15
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
16 You should have received a copy of the GNU General Public License
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
17 along with GCC; see the file COPYING3. If not see
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
18 <http://www.gnu.org/licenses/>. */
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
19
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
20 #include "config.h"
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
21 #include "system.h"
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
22 #include "coretypes.h"
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
23 #include "intl.h"
|
111
|
24 #include "diagnostic-core.h"
|
|
25 #include "selftest.h"
|
|
26 #include "cpplib.h"
|
|
27
|
|
28 #ifndef HAVE_ICONV
|
|
29 #define HAVE_ICONV 0
|
|
30 #endif
|
|
31
|
|
32 /* This is a cache used by get_next_line to store the content of a
|
|
33 file to be searched for file lines. */
|
|
34 struct fcache
|
|
35 {
|
|
36 /* These are information used to store a line boundary. */
|
|
37 struct line_info
|
|
38 {
|
|
39 /* The line number. It starts from 1. */
|
|
40 size_t line_num;
|
|
41
|
|
42 /* The position (byte count) of the beginning of the line,
|
|
43 relative to the file data pointer. This starts at zero. */
|
|
44 size_t start_pos;
|
|
45
|
|
46 /* The position (byte count) of the last byte of the line. This
|
|
47 normally points to the '\n' character, or to one byte after the
|
|
48 last byte of the file, if the file doesn't contain a '\n'
|
|
49 character. */
|
|
50 size_t end_pos;
|
|
51
|
|
52 line_info (size_t l, size_t s, size_t e)
|
|
53 : line_num (l), start_pos (s), end_pos (e)
|
|
54 {}
|
|
55
|
|
56 line_info ()
|
|
57 :line_num (0), start_pos (0), end_pos (0)
|
|
58 {}
|
|
59 };
|
|
60
|
|
61 /* The number of time this file has been accessed. This is used
|
|
62 to designate which file cache to evict from the cache
|
|
63 array. */
|
|
64 unsigned use_count;
|
|
65
|
|
66 /* The file_path is the key for identifying a particular file in
|
|
67 the cache.
|
|
68 For libcpp-using code, the underlying buffer for this field is
|
|
69 owned by the corresponding _cpp_file within the cpp_reader. */
|
|
70 const char *file_path;
|
|
71
|
|
72 FILE *fp;
|
|
73
|
|
74 /* This points to the content of the file that we've read so
|
|
75 far. */
|
|
76 char *data;
|
|
77
|
|
78 /* The size of the DATA array above.*/
|
|
79 size_t size;
|
|
80
|
|
81 /* The number of bytes read from the underlying file so far. This
|
|
82 must be less (or equal) than SIZE above. */
|
|
83 size_t nb_read;
|
|
84
|
|
85 /* The index of the beginning of the current line. */
|
|
86 size_t line_start_idx;
|
|
87
|
|
88 /* The number of the previous line read. This starts at 1. Zero
|
|
89 means we've read no line so far. */
|
|
90 size_t line_num;
|
|
91
|
|
92 /* This is the total number of lines of the current file. At the
|
|
93 moment, we try to get this information from the line map
|
|
94 subsystem. Note that this is just a hint. When using the C++
|
|
95 front-end, this hint is correct because the input file is then
|
|
96 completely tokenized before parsing starts; so the line map knows
|
|
97 the number of lines before compilation really starts. For e.g,
|
|
98 the C front-end, it can happen that we start emitting diagnostics
|
|
99 before the line map has seen the end of the file. */
|
|
100 size_t total_lines;
|
|
101
|
|
102 /* Could this file be missing a trailing newline on its final line?
|
|
103 Initially true (to cope with empty files), set to true/false
|
|
104 as each line is read. */
|
|
105 bool missing_trailing_newline;
|
|
106
|
|
107 /* This is a record of the beginning and end of the lines we've seen
|
|
108 while reading the file. This is useful to avoid walking the data
|
|
109 from the beginning when we are asked to read a line that is
|
|
110 before LINE_START_IDX above. Note that the maximum size of this
|
|
111 record is fcache_line_record_size, so that the memory consumption
|
|
112 doesn't explode. We thus scale total_lines down to
|
|
113 fcache_line_record_size. */
|
|
114 vec<line_info, va_heap> line_record;
|
|
115
|
|
116 fcache ();
|
|
117 ~fcache ();
|
|
118 };
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
119
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
120 /* Current position in real source file. */
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
121
|
111
|
122 location_t input_location = UNKNOWN_LOCATION;
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
123
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
124 struct line_maps *line_table;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
125
|
111
|
126 /* A stashed copy of "line_table" for use by selftest::line_table_test.
|
|
127 This needs to be a global so that it can be a GC root, and thus
|
|
128 prevent the stashed copy from being garbage-collected if the GC runs
|
|
129 during a line_table_test. */
|
|
130
|
|
131 struct line_maps *saved_line_table;
|
|
132
|
|
133 static fcache *fcache_tab;
|
|
134 static const size_t fcache_tab_size = 16;
|
|
135 static const size_t fcache_buffer_size = 4 * 1024;
|
|
136 static const size_t fcache_line_record_size = 100;
|
|
137
|
|
138 /* Expand the source location LOC into a human readable location. If
|
|
139 LOC resolves to a builtin location, the file name of the readable
|
|
140 location is set to the string "<built-in>". If EXPANSION_POINT_P is
|
|
141 TRUE and LOC is virtual, then it is resolved to the expansion
|
|
142 point of the involved macro. Otherwise, it is resolved to the
|
|
143 spelling location of the token.
|
|
144
|
|
145 When resolving to the spelling location of the token, if the
|
|
146 resulting location is for a built-in location (that is, it has no
|
|
147 associated line/column) in the context of a macro expansion, the
|
|
148 returned location is the first one (while unwinding the macro
|
|
149 location towards its expansion point) that is in real source
|
|
150 code.
|
|
151
|
|
152 ASPECT controls which part of the location to use. */
|
|
153
|
|
154 static expanded_location
|
|
155 expand_location_1 (source_location loc,
|
|
156 bool expansion_point_p,
|
|
157 enum location_aspect aspect)
|
|
158 {
|
|
159 expanded_location xloc;
|
|
160 const line_map_ordinary *map;
|
|
161 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
|
|
162 tree block = NULL;
|
|
163
|
|
164 if (IS_ADHOC_LOC (loc))
|
|
165 {
|
|
166 block = LOCATION_BLOCK (loc);
|
|
167 loc = LOCATION_LOCUS (loc);
|
|
168 }
|
|
169
|
|
170 memset (&xloc, 0, sizeof (xloc));
|
|
171
|
|
172 if (loc >= RESERVED_LOCATION_COUNT)
|
|
173 {
|
|
174 if (!expansion_point_p)
|
|
175 {
|
|
176 /* We want to resolve LOC to its spelling location.
|
|
177
|
|
178 But if that spelling location is a reserved location that
|
|
179 appears in the context of a macro expansion (like for a
|
|
180 location for a built-in token), let's consider the first
|
|
181 location (toward the expansion point) that is not reserved;
|
|
182 that is, the first location that is in real source code. */
|
|
183 loc = linemap_unwind_to_first_non_reserved_loc (line_table,
|
|
184 loc, NULL);
|
|
185 lrk = LRK_SPELLING_LOCATION;
|
|
186 }
|
|
187 loc = linemap_resolve_location (line_table, loc, lrk, &map);
|
|
188
|
|
189 /* loc is now either in an ordinary map, or is a reserved location.
|
|
190 If it is a compound location, the caret is in a spelling location,
|
|
191 but the start/finish might still be a virtual location.
|
|
192 Depending of what the caller asked for, we may need to recurse
|
|
193 one level in order to resolve any virtual locations in the
|
|
194 end-points. */
|
|
195 switch (aspect)
|
|
196 {
|
|
197 default:
|
|
198 gcc_unreachable ();
|
|
199 /* Fall through. */
|
|
200 case LOCATION_ASPECT_CARET:
|
|
201 break;
|
|
202 case LOCATION_ASPECT_START:
|
|
203 {
|
|
204 source_location start = get_start (loc);
|
|
205 if (start != loc)
|
|
206 return expand_location_1 (start, expansion_point_p, aspect);
|
|
207 }
|
|
208 break;
|
|
209 case LOCATION_ASPECT_FINISH:
|
|
210 {
|
|
211 source_location finish = get_finish (loc);
|
|
212 if (finish != loc)
|
|
213 return expand_location_1 (finish, expansion_point_p, aspect);
|
|
214 }
|
|
215 break;
|
|
216 }
|
|
217 xloc = linemap_expand_location (line_table, map, loc);
|
|
218 }
|
|
219
|
|
220 xloc.data = block;
|
|
221 if (loc <= BUILTINS_LOCATION)
|
|
222 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
|
|
223
|
|
224 return xloc;
|
|
225 }
|
|
226
|
|
227 /* Initialize the set of cache used for files accessed by caret
|
|
228 diagnostic. */
|
|
229
|
|
230 static void
|
|
231 diagnostic_file_cache_init (void)
|
|
232 {
|
|
233 if (fcache_tab == NULL)
|
|
234 fcache_tab = new fcache[fcache_tab_size];
|
|
235 }
|
|
236
|
|
237 /* Free the resources used by the set of cache used for files accessed
|
|
238 by caret diagnostic. */
|
|
239
|
|
240 void
|
|
241 diagnostic_file_cache_fini (void)
|
|
242 {
|
|
243 if (fcache_tab)
|
|
244 {
|
|
245 delete [] (fcache_tab);
|
|
246 fcache_tab = NULL;
|
|
247 }
|
|
248 }
|
|
249
|
|
250 /* Return the total lines number that have been read so far by the
|
|
251 line map (in the preprocessor) so far. For languages like C++ that
|
|
252 entirely preprocess the input file before starting to parse, this
|
|
253 equals the actual number of lines of the file. */
|
|
254
|
|
255 static size_t
|
|
256 total_lines_num (const char *file_path)
|
|
257 {
|
|
258 size_t r = 0;
|
|
259 source_location l = 0;
|
|
260 if (linemap_get_file_highest_location (line_table, file_path, &l))
|
|
261 {
|
|
262 gcc_assert (l >= RESERVED_LOCATION_COUNT);
|
|
263 expanded_location xloc = expand_location (l);
|
|
264 r = xloc.line;
|
|
265 }
|
|
266 return r;
|
|
267 }
|
|
268
|
|
269 /* Lookup the cache used for the content of a given file accessed by
|
|
270 caret diagnostic. Return the found cached file, or NULL if no
|
|
271 cached file was found. */
|
|
272
|
|
273 static fcache*
|
|
274 lookup_file_in_cache_tab (const char *file_path)
|
|
275 {
|
|
276 if (file_path == NULL)
|
|
277 return NULL;
|
|
278
|
|
279 diagnostic_file_cache_init ();
|
|
280
|
|
281 /* This will contain the found cached file. */
|
|
282 fcache *r = NULL;
|
|
283 for (unsigned i = 0; i < fcache_tab_size; ++i)
|
|
284 {
|
|
285 fcache *c = &fcache_tab[i];
|
|
286 if (c->file_path && !strcmp (c->file_path, file_path))
|
|
287 {
|
|
288 ++c->use_count;
|
|
289 r = c;
|
|
290 }
|
|
291 }
|
|
292
|
|
293 if (r)
|
|
294 ++r->use_count;
|
|
295
|
|
296 return r;
|
|
297 }
|
|
298
|
|
299 /* Purge any mention of FILENAME from the cache of files used for
|
|
300 printing source code. For use in selftests when working
|
|
301 with tempfiles. */
|
|
302
|
|
303 void
|
|
304 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
|
|
305 {
|
|
306 gcc_assert (file_path);
|
|
307
|
|
308 fcache *r = lookup_file_in_cache_tab (file_path);
|
|
309 if (!r)
|
|
310 /* Not found. */
|
|
311 return;
|
|
312
|
|
313 r->file_path = NULL;
|
|
314 if (r->fp)
|
|
315 fclose (r->fp);
|
|
316 r->fp = NULL;
|
|
317 r->nb_read = 0;
|
|
318 r->line_start_idx = 0;
|
|
319 r->line_num = 0;
|
|
320 r->line_record.truncate (0);
|
|
321 r->use_count = 0;
|
|
322 r->total_lines = 0;
|
|
323 r->missing_trailing_newline = true;
|
|
324 }
|
|
325
|
|
326 /* Return the file cache that has been less used, recently, or the
|
|
327 first empty one. If HIGHEST_USE_COUNT is non-null,
|
|
328 *HIGHEST_USE_COUNT is set to the highest use count of the entries
|
|
329 in the cache table. */
|
|
330
|
|
331 static fcache*
|
|
332 evicted_cache_tab_entry (unsigned *highest_use_count)
|
|
333 {
|
|
334 diagnostic_file_cache_init ();
|
|
335
|
|
336 fcache *to_evict = &fcache_tab[0];
|
|
337 unsigned huc = to_evict->use_count;
|
|
338 for (unsigned i = 1; i < fcache_tab_size; ++i)
|
|
339 {
|
|
340 fcache *c = &fcache_tab[i];
|
|
341 bool c_is_empty = (c->file_path == NULL);
|
|
342
|
|
343 if (c->use_count < to_evict->use_count
|
|
344 || (to_evict->file_path && c_is_empty))
|
|
345 /* We evict C because it's either an entry with a lower use
|
|
346 count or one that is empty. */
|
|
347 to_evict = c;
|
|
348
|
|
349 if (huc < c->use_count)
|
|
350 huc = c->use_count;
|
|
351
|
|
352 if (c_is_empty)
|
|
353 /* We've reached the end of the cache; subsequent elements are
|
|
354 all empty. */
|
|
355 break;
|
|
356 }
|
|
357
|
|
358 if (highest_use_count)
|
|
359 *highest_use_count = huc;
|
|
360
|
|
361 return to_evict;
|
|
362 }
|
|
363
|
|
364 /* Create the cache used for the content of a given file to be
|
|
365 accessed by caret diagnostic. This cache is added to an array of
|
|
366 cache and can be retrieved by lookup_file_in_cache_tab. This
|
|
367 function returns the created cache. Note that only the last
|
|
368 fcache_tab_size files are cached. */
|
|
369
|
|
370 static fcache*
|
|
371 add_file_to_cache_tab (const char *file_path)
|
|
372 {
|
|
373
|
|
374 FILE *fp = fopen (file_path, "r");
|
|
375 if (fp == NULL)
|
|
376 return NULL;
|
|
377
|
|
378 unsigned highest_use_count = 0;
|
|
379 fcache *r = evicted_cache_tab_entry (&highest_use_count);
|
|
380 r->file_path = file_path;
|
|
381 if (r->fp)
|
|
382 fclose (r->fp);
|
|
383 r->fp = fp;
|
|
384 r->nb_read = 0;
|
|
385 r->line_start_idx = 0;
|
|
386 r->line_num = 0;
|
|
387 r->line_record.truncate (0);
|
|
388 /* Ensure that this cache entry doesn't get evicted next time
|
|
389 add_file_to_cache_tab is called. */
|
|
390 r->use_count = ++highest_use_count;
|
|
391 r->total_lines = total_lines_num (file_path);
|
|
392 r->missing_trailing_newline = true;
|
|
393
|
|
394 return r;
|
|
395 }
|
|
396
|
|
397 /* Lookup the cache used for the content of a given file accessed by
|
|
398 caret diagnostic. If no cached file was found, create a new cache
|
|
399 for this file, add it to the array of cached file and return
|
|
400 it. */
|
|
401
|
|
402 static fcache*
|
|
403 lookup_or_add_file_to_cache_tab (const char *file_path)
|
|
404 {
|
|
405 fcache *r = lookup_file_in_cache_tab (file_path);
|
|
406 if (r == NULL)
|
|
407 r = add_file_to_cache_tab (file_path);
|
|
408 return r;
|
|
409 }
|
|
410
|
|
411 /* Default constructor for a cache of file used by caret
|
|
412 diagnostic. */
|
|
413
|
|
414 fcache::fcache ()
|
|
415 : use_count (0), file_path (NULL), fp (NULL), data (0),
|
|
416 size (0), nb_read (0), line_start_idx (0), line_num (0),
|
|
417 total_lines (0), missing_trailing_newline (true)
|
|
418 {
|
|
419 line_record.create (0);
|
|
420 }
|
|
421
|
|
422 /* Destructor for a cache of file used by caret diagnostic. */
|
|
423
|
|
424 fcache::~fcache ()
|
|
425 {
|
|
426 if (fp)
|
|
427 {
|
|
428 fclose (fp);
|
|
429 fp = NULL;
|
|
430 }
|
|
431 if (data)
|
|
432 {
|
|
433 XDELETEVEC (data);
|
|
434 data = 0;
|
|
435 }
|
|
436 line_record.release ();
|
|
437 }
|
|
438
|
|
439 /* Returns TRUE iff the cache would need to be filled with data coming
|
|
440 from the file. That is, either the cache is empty or full or the
|
|
441 current line is empty. Note that if the cache is full, it would
|
|
442 need to be extended and filled again. */
|
|
443
|
|
444 static bool
|
|
445 needs_read (fcache *c)
|
|
446 {
|
|
447 return (c->nb_read == 0
|
|
448 || c->nb_read == c->size
|
|
449 || (c->line_start_idx >= c->nb_read - 1));
|
|
450 }
|
|
451
|
|
452 /* Return TRUE iff the cache is full and thus needs to be
|
|
453 extended. */
|
|
454
|
|
455 static bool
|
|
456 needs_grow (fcache *c)
|
|
457 {
|
|
458 return c->nb_read == c->size;
|
|
459 }
|
|
460
|
|
461 /* Grow the cache if it needs to be extended. */
|
|
462
|
|
463 static void
|
|
464 maybe_grow (fcache *c)
|
|
465 {
|
|
466 if (!needs_grow (c))
|
|
467 return;
|
|
468
|
|
469 size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
|
|
470 c->data = XRESIZEVEC (char, c->data, size);
|
|
471 c->size = size;
|
|
472 }
|
|
473
|
|
474 /* Read more data into the cache. Extends the cache if need be.
|
|
475 Returns TRUE iff new data could be read. */
|
|
476
|
|
477 static bool
|
|
478 read_data (fcache *c)
|
|
479 {
|
|
480 if (feof (c->fp) || ferror (c->fp))
|
|
481 return false;
|
|
482
|
|
483 maybe_grow (c);
|
|
484
|
|
485 char * from = c->data + c->nb_read;
|
|
486 size_t to_read = c->size - c->nb_read;
|
|
487 size_t nb_read = fread (from, 1, to_read, c->fp);
|
|
488
|
|
489 if (ferror (c->fp))
|
|
490 return false;
|
|
491
|
|
492 c->nb_read += nb_read;
|
|
493 return !!nb_read;
|
|
494 }
|
|
495
|
|
496 /* Read new data iff the cache needs to be filled with more data
|
|
497 coming from the file FP. Return TRUE iff the cache was filled with
|
|
498 mode data. */
|
|
499
|
|
500 static bool
|
|
501 maybe_read_data (fcache *c)
|
|
502 {
|
|
503 if (!needs_read (c))
|
|
504 return false;
|
|
505 return read_data (c);
|
|
506 }
|
|
507
|
|
508 /* Read a new line from file FP, using C as a cache for the data
|
|
509 coming from the file. Upon successful completion, *LINE is set to
|
|
510 the beginning of the line found. *LINE points directly in the
|
|
511 line cache and is only valid until the next call of get_next_line.
|
|
512 *LINE_LEN is set to the length of the line. Note that the line
|
|
513 does not contain any terminal delimiter. This function returns
|
|
514 true if some data was read or process from the cache, false
|
|
515 otherwise. Note that subsequent calls to get_next_line might
|
|
516 make the content of *LINE invalid. */
|
|
517
|
|
518 static bool
|
|
519 get_next_line (fcache *c, char **line, ssize_t *line_len)
|
|
520 {
|
|
521 /* Fill the cache with data to process. */
|
|
522 maybe_read_data (c);
|
|
523
|
|
524 size_t remaining_size = c->nb_read - c->line_start_idx;
|
|
525 if (remaining_size == 0)
|
|
526 /* There is no more data to process. */
|
|
527 return false;
|
|
528
|
|
529 char *line_start = c->data + c->line_start_idx;
|
|
530
|
|
531 char *next_line_start = NULL;
|
|
532 size_t len = 0;
|
|
533 char *line_end = (char *) memchr (line_start, '\n', remaining_size);
|
|
534 if (line_end == NULL)
|
|
535 {
|
|
536 /* We haven't found the end-of-line delimiter in the cache.
|
|
537 Fill the cache with more data from the file and look for the
|
|
538 '\n'. */
|
|
539 while (maybe_read_data (c))
|
|
540 {
|
|
541 line_start = c->data + c->line_start_idx;
|
|
542 remaining_size = c->nb_read - c->line_start_idx;
|
|
543 line_end = (char *) memchr (line_start, '\n', remaining_size);
|
|
544 if (line_end != NULL)
|
|
545 {
|
|
546 next_line_start = line_end + 1;
|
|
547 break;
|
|
548 }
|
|
549 }
|
|
550 if (line_end == NULL)
|
|
551 {
|
|
552 /* We've loadded all the file into the cache and still no
|
|
553 '\n'. Let's say the line ends up at one byte passed the
|
|
554 end of the file. This is to stay consistent with the case
|
|
555 of when the line ends up with a '\n' and line_end points to
|
|
556 that terminal '\n'. That consistency is useful below in
|
|
557 the len calculation. */
|
|
558 line_end = c->data + c->nb_read ;
|
|
559 c->missing_trailing_newline = true;
|
|
560 }
|
|
561 else
|
|
562 c->missing_trailing_newline = false;
|
|
563 }
|
|
564 else
|
|
565 {
|
|
566 next_line_start = line_end + 1;
|
|
567 c->missing_trailing_newline = false;
|
|
568 }
|
|
569
|
|
570 if (ferror (c->fp))
|
|
571 return false;
|
|
572
|
|
573 /* At this point, we've found the end of the of line. It either
|
|
574 points to the '\n' or to one byte after the last byte of the
|
|
575 file. */
|
|
576 gcc_assert (line_end != NULL);
|
|
577
|
|
578 len = line_end - line_start;
|
|
579
|
|
580 if (c->line_start_idx < c->nb_read)
|
|
581 *line = line_start;
|
|
582
|
|
583 ++c->line_num;
|
|
584
|
|
585 /* Before we update our line record, make sure the hint about the
|
|
586 total number of lines of the file is correct. If it's not, then
|
|
587 we give up recording line boundaries from now on. */
|
|
588 bool update_line_record = true;
|
|
589 if (c->line_num > c->total_lines)
|
|
590 update_line_record = false;
|
|
591
|
|
592 /* Now update our line record so that re-reading lines from the
|
|
593 before c->line_start_idx is faster. */
|
|
594 if (update_line_record
|
|
595 && c->line_record.length () < fcache_line_record_size)
|
|
596 {
|
|
597 /* If the file lines fits in the line record, we just record all
|
|
598 its lines ...*/
|
|
599 if (c->total_lines <= fcache_line_record_size
|
|
600 && c->line_num > c->line_record.length ())
|
|
601 c->line_record.safe_push (fcache::line_info (c->line_num,
|
|
602 c->line_start_idx,
|
|
603 line_end - c->data));
|
|
604 else if (c->total_lines > fcache_line_record_size)
|
|
605 {
|
|
606 /* ... otherwise, we just scale total_lines down to
|
|
607 (fcache_line_record_size lines. */
|
|
608 size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
|
|
609 if (c->line_record.length () == 0
|
|
610 || n >= c->line_record.length ())
|
|
611 c->line_record.safe_push (fcache::line_info (c->line_num,
|
|
612 c->line_start_idx,
|
|
613 line_end - c->data));
|
|
614 }
|
|
615 }
|
|
616
|
|
617 /* Update c->line_start_idx so that it points to the next line to be
|
|
618 read. */
|
|
619 if (next_line_start)
|
|
620 c->line_start_idx = next_line_start - c->data;
|
|
621 else
|
|
622 /* We didn't find any terminal '\n'. Let's consider that the end
|
|
623 of line is the end of the data in the cache. The next
|
|
624 invocation of get_next_line will either read more data from the
|
|
625 underlying file or return false early because we've reached the
|
|
626 end of the file. */
|
|
627 c->line_start_idx = c->nb_read;
|
|
628
|
|
629 *line_len = len;
|
|
630
|
|
631 return true;
|
|
632 }
|
|
633
|
|
634 /* Consume the next bytes coming from the cache (or from its
|
|
635 underlying file if there are remaining unread bytes in the file)
|
|
636 until we reach the next end-of-line (or end-of-file). There is no
|
|
637 copying from the cache involved. Return TRUE upon successful
|
|
638 completion. */
|
|
639
|
|
640 static bool
|
|
641 goto_next_line (fcache *cache)
|
|
642 {
|
|
643 char *l;
|
|
644 ssize_t len;
|
|
645
|
|
646 return get_next_line (cache, &l, &len);
|
|
647 }
|
|
648
|
|
649 /* Read an arbitrary line number LINE_NUM from the file cached in C.
|
|
650 If the line was read successfully, *LINE points to the beginning
|
|
651 of the line in the file cache and *LINE_LEN is the length of the
|
|
652 line. *LINE is not nul-terminated, but may contain zero bytes.
|
|
653 *LINE is only valid until the next call of read_line_num.
|
|
654 This function returns bool if a line was read. */
|
|
655
|
|
656 static bool
|
|
657 read_line_num (fcache *c, size_t line_num,
|
|
658 char **line, ssize_t *line_len)
|
|
659 {
|
|
660 gcc_assert (line_num > 0);
|
|
661
|
|
662 if (line_num <= c->line_num)
|
|
663 {
|
|
664 /* We've been asked to read lines that are before c->line_num.
|
|
665 So lets use our line record (if it's not empty) to try to
|
|
666 avoid re-reading the file from the beginning again. */
|
|
667
|
|
668 if (c->line_record.is_empty ())
|
|
669 {
|
|
670 c->line_start_idx = 0;
|
|
671 c->line_num = 0;
|
|
672 }
|
|
673 else
|
|
674 {
|
|
675 fcache::line_info *i = NULL;
|
|
676 if (c->total_lines <= fcache_line_record_size)
|
|
677 {
|
|
678 /* In languages where the input file is not totally
|
|
679 preprocessed up front, the c->total_lines hint
|
|
680 can be smaller than the number of lines of the
|
|
681 file. In that case, only the first
|
|
682 c->total_lines have been recorded.
|
|
683
|
|
684 Otherwise, the first c->total_lines we've read have
|
|
685 their start/end recorded here. */
|
|
686 i = (line_num <= c->total_lines)
|
|
687 ? &c->line_record[line_num - 1]
|
|
688 : &c->line_record[c->total_lines - 1];
|
|
689 gcc_assert (i->line_num <= line_num);
|
|
690 }
|
|
691 else
|
|
692 {
|
|
693 /* So the file had more lines than our line record
|
|
694 size. Thus the number of lines we've recorded has
|
|
695 been scaled down to fcache_line_reacord_size. Let's
|
|
696 pick the start/end of the recorded line that is
|
|
697 closest to line_num. */
|
|
698 size_t n = (line_num <= c->total_lines)
|
|
699 ? line_num * fcache_line_record_size / c->total_lines
|
|
700 : c ->line_record.length () - 1;
|
|
701 if (n < c->line_record.length ())
|
|
702 {
|
|
703 i = &c->line_record[n];
|
|
704 gcc_assert (i->line_num <= line_num);
|
|
705 }
|
|
706 }
|
|
707
|
|
708 if (i && i->line_num == line_num)
|
|
709 {
|
|
710 /* We have the start/end of the line. */
|
|
711 *line = c->data + i->start_pos;
|
|
712 *line_len = i->end_pos - i->start_pos;
|
|
713 return true;
|
|
714 }
|
|
715
|
|
716 if (i)
|
|
717 {
|
|
718 c->line_start_idx = i->start_pos;
|
|
719 c->line_num = i->line_num - 1;
|
|
720 }
|
|
721 else
|
|
722 {
|
|
723 c->line_start_idx = 0;
|
|
724 c->line_num = 0;
|
|
725 }
|
|
726 }
|
|
727 }
|
|
728
|
|
729 /* Let's walk from line c->line_num up to line_num - 1, without
|
|
730 copying any line. */
|
|
731 while (c->line_num < line_num - 1)
|
|
732 if (!goto_next_line (c))
|
|
733 return false;
|
|
734
|
|
735 /* The line we want is the next one. Let's read and copy it back to
|
|
736 the caller. */
|
|
737 return get_next_line (c, line, line_len);
|
|
738 }
|
|
739
|
|
740 /* Return the physical source line that corresponds to FILE_PATH/LINE.
|
|
741 The line is not nul-terminated. The returned pointer is only
|
|
742 valid until the next call of location_get_source_line.
|
|
743 Note that the line can contain several null characters,
|
131
|
744 so the returned value's length has the actual length of the line.
|
|
745 If the function fails, a NULL char_span is returned. */
|
|
746
|
|
747 char_span
|
|
748 location_get_source_line (const char *file_path, int line)
|
111
|
749 {
|
|
750 char *buffer = NULL;
|
|
751 ssize_t len;
|
|
752
|
|
753 if (line == 0)
|
131
|
754 return char_span (NULL, 0);
|
111
|
755
|
|
756 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
|
|
757 if (c == NULL)
|
131
|
758 return char_span (NULL, 0);
|
111
|
759
|
|
760 bool read = read_line_num (c, line, &buffer, &len);
|
131
|
761 if (!read)
|
|
762 return char_span (NULL, 0);
|
|
763
|
|
764 return char_span (buffer, len);
|
111
|
765 }
|
|
766
|
|
767 /* Determine if FILE_PATH missing a trailing newline on its final line.
|
|
768 Only valid to call once all of the file has been loaded, by
|
|
769 requesting a line number beyond the end of the file. */
|
|
770
|
|
771 bool
|
|
772 location_missing_trailing_newline (const char *file_path)
|
|
773 {
|
|
774 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
|
|
775 if (c == NULL)
|
|
776 return false;
|
|
777
|
|
778 return c->missing_trailing_newline;
|
|
779 }
|
|
780
|
|
781 /* Test if the location originates from the spelling location of a
|
|
782 builtin-tokens. That is, return TRUE if LOC is a (possibly
|
|
783 virtual) location of a built-in token that appears in the expansion
|
|
784 list of a macro. Please note that this function also works on
|
|
785 tokens that result from built-in tokens. For instance, the
|
|
786 function would return true if passed a token "4" that is the result
|
|
787 of the expansion of the built-in __LINE__ macro. */
|
|
788 bool
|
|
789 is_location_from_builtin_token (source_location loc)
|
|
790 {
|
|
791 const line_map_ordinary *map = NULL;
|
|
792 loc = linemap_resolve_location (line_table, loc,
|
|
793 LRK_SPELLING_LOCATION, &map);
|
|
794 return loc == BUILTINS_LOCATION;
|
|
795 }
|
|
796
|
|
797 /* Expand the source location LOC into a human readable location. If
|
|
798 LOC is virtual, it resolves to the expansion point of the involved
|
|
799 macro. If LOC resolves to a builtin location, the file name of the
|
|
800 readable location is set to the string "<built-in>". */
|
|
801
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
802 expanded_location
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
803 expand_location (source_location loc)
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
804 {
|
111
|
805 return expand_location_1 (loc, /*expansion_point_p=*/true,
|
|
806 LOCATION_ASPECT_CARET);
|
|
807 }
|
|
808
|
|
809 /* Expand the source location LOC into a human readable location. If
|
|
810 LOC is virtual, it resolves to the expansion location of the
|
|
811 relevant macro. If LOC resolves to a builtin location, the file
|
|
812 name of the readable location is set to the string
|
|
813 "<built-in>". */
|
|
814
|
|
815 expanded_location
|
131
|
816 expand_location_to_spelling_point (source_location loc,
|
|
817 enum location_aspect aspect)
|
111
|
818 {
|
131
|
819 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
|
111
|
820 }
|
|
821
|
|
822 /* The rich_location class within libcpp requires a way to expand
|
|
823 source_location instances, and relies on the client code
|
|
824 providing a symbol named
|
|
825 linemap_client_expand_location_to_spelling_point
|
|
826 to do this.
|
|
827
|
|
828 This is the implementation for libcommon.a (all host binaries),
|
|
829 which simply calls into expand_location_1. */
|
|
830
|
|
831 expanded_location
|
|
832 linemap_client_expand_location_to_spelling_point (source_location loc,
|
|
833 enum location_aspect aspect)
|
|
834 {
|
|
835 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
|
|
836 }
|
|
837
|
|
838
|
|
839 /* If LOCATION is in a system header and if it is a virtual location for
|
|
840 a token coming from the expansion of a macro, unwind it to the
|
|
841 location of the expansion point of the macro. Otherwise, just return
|
|
842 LOCATION.
|
|
843
|
|
844 This is used for instance when we want to emit diagnostics about a
|
|
845 token that may be located in a macro that is itself defined in a
|
|
846 system header, for example, for the NULL macro. In such a case, if
|
|
847 LOCATION were passed directly to diagnostic functions such as
|
|
848 warning_at, the diagnostic would be suppressed (unless
|
|
849 -Wsystem-headers). */
|
|
850
|
|
851 source_location
|
|
852 expansion_point_location_if_in_system_header (source_location location)
|
|
853 {
|
|
854 if (in_system_header_at (location))
|
|
855 location = linemap_resolve_location (line_table, location,
|
|
856 LRK_MACRO_EXPANSION_POINT,
|
|
857 NULL);
|
|
858 return location;
|
|
859 }
|
|
860
|
|
861 /* If LOCATION is a virtual location for a token coming from the expansion
|
|
862 of a macro, unwind to the location of the expansion point of the macro. */
|
|
863
|
|
864 source_location
|
|
865 expansion_point_location (source_location location)
|
|
866 {
|
|
867 return linemap_resolve_location (line_table, location,
|
|
868 LRK_MACRO_EXPANSION_POINT, NULL);
|
|
869 }
|
|
870
|
|
871 /* Construct a location with caret at CARET, ranging from START to
|
|
872 finish e.g.
|
|
873
|
|
874 11111111112
|
|
875 12345678901234567890
|
|
876 522
|
|
877 523 return foo + bar;
|
|
878 ~~~~^~~~~
|
|
879 524
|
|
880
|
|
881 The location's caret is at the "+", line 523 column 15, but starts
|
|
882 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
|
|
883 of "bar" at column 19. */
|
|
884
|
|
885 location_t
|
|
886 make_location (location_t caret, location_t start, location_t finish)
|
|
887 {
|
|
888 location_t pure_loc = get_pure_location (caret);
|
|
889 source_range src_range;
|
|
890 src_range.m_start = get_start (start);
|
|
891 src_range.m_finish = get_finish (finish);
|
|
892 location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
|
|
893 pure_loc,
|
|
894 src_range,
|
|
895 NULL);
|
|
896 return combined_loc;
|
|
897 }
|
|
898
|
|
899 /* Same as above, but taking a source range rather than two locations. */
|
|
900
|
|
901 location_t
|
|
902 make_location (location_t caret, source_range src_range)
|
|
903 {
|
|
904 location_t pure_loc = get_pure_location (caret);
|
|
905 return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
|
|
906 }
|
|
907
|
|
908 #define ONE_K 1024
|
|
909 #define ONE_M (ONE_K * ONE_K)
|
|
910
|
|
911 /* Display a number as an integer multiple of either:
|
|
912 - 1024, if said integer is >= to 10 K (in base 2)
|
|
913 - 1024 * 1024, if said integer is >= 10 M in (base 2)
|
|
914 */
|
|
915 #define SCALE(x) ((unsigned long) ((x) < 10 * ONE_K \
|
|
916 ? (x) \
|
|
917 : ((x) < 10 * ONE_M \
|
|
918 ? (x) / ONE_K \
|
|
919 : (x) / ONE_M)))
|
|
920
|
|
921 /* For a given integer, display either:
|
|
922 - the character 'k', if the number is higher than 10 K (in base 2)
|
|
923 but strictly lower than 10 M (in base 2)
|
|
924 - the character 'M' if the number is higher than 10 M (in base2)
|
|
925 - the charcter ' ' if the number is strictly lower than 10 K */
|
|
926 #define STAT_LABEL(x) ((x) < 10 * ONE_K ? ' ' : ((x) < 10 * ONE_M ? 'k' : 'M'))
|
|
927
|
|
928 /* Display an integer amount as multiple of 1K or 1M (in base 2).
|
|
929 Display the correct unit (either k, M, or ' ') after the amount, as
|
|
930 well. */
|
|
931 #define FORMAT_AMOUNT(size) SCALE (size), STAT_LABEL (size)
|
|
932
|
|
933 /* Dump statistics to stderr about the memory usage of the line_table
|
|
934 set of line maps. This also displays some statistics about macro
|
|
935 expansion. */
|
|
936
|
|
937 void
|
|
938 dump_line_table_statistics (void)
|
|
939 {
|
|
940 struct linemap_stats s;
|
|
941 long total_used_map_size,
|
|
942 macro_maps_size,
|
|
943 total_allocated_map_size;
|
|
944
|
|
945 memset (&s, 0, sizeof (s));
|
|
946
|
|
947 linemap_get_statistics (line_table, &s);
|
|
948
|
|
949 macro_maps_size = s.macro_maps_used_size
|
|
950 + s.macro_maps_locations_size;
|
|
951
|
|
952 total_allocated_map_size = s.ordinary_maps_allocated_size
|
|
953 + s.macro_maps_allocated_size
|
|
954 + s.macro_maps_locations_size;
|
|
955
|
|
956 total_used_map_size = s.ordinary_maps_used_size
|
|
957 + s.macro_maps_used_size
|
|
958 + s.macro_maps_locations_size;
|
|
959
|
|
960 fprintf (stderr, "Number of expanded macros: %5ld\n",
|
|
961 s.num_expanded_macros);
|
|
962 if (s.num_expanded_macros != 0)
|
|
963 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
|
|
964 s.num_macro_tokens / s.num_expanded_macros);
|
|
965 fprintf (stderr,
|
|
966 "\nLine Table allocations during the "
|
|
967 "compilation process\n");
|
|
968 fprintf (stderr, "Number of ordinary maps used: %5ld%c\n",
|
|
969 SCALE (s.num_ordinary_maps_used),
|
|
970 STAT_LABEL (s.num_ordinary_maps_used));
|
|
971 fprintf (stderr, "Ordinary map used size: %5ld%c\n",
|
|
972 SCALE (s.ordinary_maps_used_size),
|
|
973 STAT_LABEL (s.ordinary_maps_used_size));
|
|
974 fprintf (stderr, "Number of ordinary maps allocated: %5ld%c\n",
|
|
975 SCALE (s.num_ordinary_maps_allocated),
|
|
976 STAT_LABEL (s.num_ordinary_maps_allocated));
|
|
977 fprintf (stderr, "Ordinary maps allocated size: %5ld%c\n",
|
|
978 SCALE (s.ordinary_maps_allocated_size),
|
|
979 STAT_LABEL (s.ordinary_maps_allocated_size));
|
|
980 fprintf (stderr, "Number of macro maps used: %5ld%c\n",
|
|
981 SCALE (s.num_macro_maps_used),
|
|
982 STAT_LABEL (s.num_macro_maps_used));
|
|
983 fprintf (stderr, "Macro maps used size: %5ld%c\n",
|
|
984 SCALE (s.macro_maps_used_size),
|
|
985 STAT_LABEL (s.macro_maps_used_size));
|
|
986 fprintf (stderr, "Macro maps locations size: %5ld%c\n",
|
|
987 SCALE (s.macro_maps_locations_size),
|
|
988 STAT_LABEL (s.macro_maps_locations_size));
|
|
989 fprintf (stderr, "Macro maps size: %5ld%c\n",
|
|
990 SCALE (macro_maps_size),
|
|
991 STAT_LABEL (macro_maps_size));
|
|
992 fprintf (stderr, "Duplicated maps locations size: %5ld%c\n",
|
|
993 SCALE (s.duplicated_macro_maps_locations_size),
|
|
994 STAT_LABEL (s.duplicated_macro_maps_locations_size));
|
|
995 fprintf (stderr, "Total allocated maps size: %5ld%c\n",
|
|
996 SCALE (total_allocated_map_size),
|
|
997 STAT_LABEL (total_allocated_map_size));
|
|
998 fprintf (stderr, "Total used maps size: %5ld%c\n",
|
|
999 SCALE (total_used_map_size),
|
|
1000 STAT_LABEL (total_used_map_size));
|
|
1001 fprintf (stderr, "Ad-hoc table size: %5ld%c\n",
|
|
1002 SCALE (s.adhoc_table_size),
|
|
1003 STAT_LABEL (s.adhoc_table_size));
|
|
1004 fprintf (stderr, "Ad-hoc table entries used: %5ld\n",
|
|
1005 s.adhoc_table_entries_used);
|
|
1006 fprintf (stderr, "optimized_ranges: %i\n",
|
|
1007 line_table->num_optimized_ranges);
|
|
1008 fprintf (stderr, "unoptimized_ranges: %i\n",
|
|
1009 line_table->num_unoptimized_ranges);
|
|
1010
|
|
1011 fprintf (stderr, "\n");
|
|
1012 }
|
|
1013
|
|
1014 /* Get location one beyond the final location in ordinary map IDX. */
|
|
1015
|
|
1016 static source_location
|
|
1017 get_end_location (struct line_maps *set, unsigned int idx)
|
|
1018 {
|
|
1019 if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
|
|
1020 return set->highest_location;
|
|
1021
|
|
1022 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
|
|
1023 return MAP_START_LOCATION (next_map);
|
|
1024 }
|
|
1025
|
|
1026 /* Helper function for write_digit_row. */
|
|
1027
|
|
1028 static void
|
|
1029 write_digit (FILE *stream, int digit)
|
|
1030 {
|
|
1031 fputc ('0' + (digit % 10), stream);
|
|
1032 }
|
|
1033
|
|
1034 /* Helper function for dump_location_info.
|
|
1035 Write a row of numbers to STREAM, numbering a source line,
|
|
1036 giving the units, tens, hundreds etc of the column number. */
|
|
1037
|
|
1038 static void
|
|
1039 write_digit_row (FILE *stream, int indent,
|
|
1040 const line_map_ordinary *map,
|
|
1041 source_location loc, int max_col, int divisor)
|
|
1042 {
|
|
1043 fprintf (stream, "%*c", indent, ' ');
|
|
1044 fprintf (stream, "|");
|
|
1045 for (int column = 1; column < max_col; column++)
|
|
1046 {
|
|
1047 source_location column_loc = loc + (column << map->m_range_bits);
|
|
1048 write_digit (stream, column_loc / divisor);
|
|
1049 }
|
|
1050 fprintf (stream, "\n");
|
|
1051 }
|
|
1052
|
|
1053 /* Write a half-closed (START) / half-open (END) interval of
|
|
1054 source_location to STREAM. */
|
|
1055
|
|
1056 static void
|
|
1057 dump_location_range (FILE *stream,
|
|
1058 source_location start, source_location end)
|
|
1059 {
|
|
1060 fprintf (stream,
|
|
1061 " source_location interval: %u <= loc < %u\n",
|
|
1062 start, end);
|
|
1063 }
|
|
1064
|
|
1065 /* Write a labelled description of a half-closed (START) / half-open (END)
|
|
1066 interval of source_location to STREAM. */
|
|
1067
|
|
1068 static void
|
|
1069 dump_labelled_location_range (FILE *stream,
|
|
1070 const char *name,
|
|
1071 source_location start, source_location end)
|
|
1072 {
|
|
1073 fprintf (stream, "%s\n", name);
|
|
1074 dump_location_range (stream, start, end);
|
|
1075 fprintf (stream, "\n");
|
|
1076 }
|
|
1077
|
|
1078 /* Write a visualization of the locations in the line_table to STREAM. */
|
|
1079
|
|
1080 void
|
|
1081 dump_location_info (FILE *stream)
|
|
1082 {
|
|
1083 /* Visualize the reserved locations. */
|
|
1084 dump_labelled_location_range (stream, "RESERVED LOCATIONS",
|
|
1085 0, RESERVED_LOCATION_COUNT);
|
|
1086
|
|
1087 /* Visualize the ordinary line_map instances, rendering the sources. */
|
|
1088 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
|
|
1089 {
|
|
1090 source_location end_location = get_end_location (line_table, idx);
|
|
1091 /* half-closed: doesn't include this one. */
|
|
1092
|
|
1093 const line_map_ordinary *map
|
|
1094 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
|
|
1095 fprintf (stream, "ORDINARY MAP: %i\n", idx);
|
|
1096 dump_location_range (stream,
|
|
1097 MAP_START_LOCATION (map), end_location);
|
|
1098 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
|
|
1099 fprintf (stream, " starting at line: %i\n",
|
|
1100 ORDINARY_MAP_STARTING_LINE_NUMBER (map));
|
|
1101 fprintf (stream, " column and range bits: %i\n",
|
|
1102 map->m_column_and_range_bits);
|
|
1103 fprintf (stream, " column bits: %i\n",
|
|
1104 map->m_column_and_range_bits - map->m_range_bits);
|
|
1105 fprintf (stream, " range bits: %i\n",
|
|
1106 map->m_range_bits);
|
|
1107
|
|
1108 /* Render the span of source lines that this "map" covers. */
|
|
1109 for (source_location loc = MAP_START_LOCATION (map);
|
|
1110 loc < end_location;
|
|
1111 loc += (1 << map->m_range_bits) )
|
|
1112 {
|
|
1113 gcc_assert (pure_location_p (line_table, loc) );
|
|
1114
|
|
1115 expanded_location exploc
|
|
1116 = linemap_expand_location (line_table, map, loc);
|
|
1117
|
131
|
1118 if (exploc.column == 0)
|
111
|
1119 {
|
|
1120 /* Beginning of a new source line: draw the line. */
|
|
1121
|
131
|
1122 char_span line_text = location_get_source_line (exploc.file,
|
|
1123 exploc.line);
|
111
|
1124 if (!line_text)
|
|
1125 break;
|
|
1126 fprintf (stream,
|
|
1127 "%s:%3i|loc:%5i|%.*s\n",
|
|
1128 exploc.file, exploc.line,
|
|
1129 loc,
|
131
|
1130 (int)line_text.length (), line_text.get_buffer ());
|
111
|
1131
|
|
1132 /* "loc" is at column 0, which means "the whole line".
|
|
1133 Render the locations *within* the line, by underlining
|
|
1134 it, showing the source_location numeric values
|
|
1135 at each column. */
|
131
|
1136 size_t max_col = (1 << map->m_column_and_range_bits) - 1;
|
|
1137 if (max_col > line_text.length ())
|
|
1138 max_col = line_text.length () + 1;
|
111
|
1139
|
|
1140 int indent = 14 + strlen (exploc.file);
|
|
1141
|
|
1142 /* Thousands. */
|
|
1143 if (end_location > 999)
|
|
1144 write_digit_row (stream, indent, map, loc, max_col, 1000);
|
|
1145
|
|
1146 /* Hundreds. */
|
|
1147 if (end_location > 99)
|
|
1148 write_digit_row (stream, indent, map, loc, max_col, 100);
|
|
1149
|
|
1150 /* Tens. */
|
|
1151 write_digit_row (stream, indent, map, loc, max_col, 10);
|
|
1152
|
|
1153 /* Units. */
|
|
1154 write_digit_row (stream, indent, map, loc, max_col, 1);
|
|
1155 }
|
|
1156 }
|
|
1157 fprintf (stream, "\n");
|
|
1158 }
|
|
1159
|
|
1160 /* Visualize unallocated values. */
|
|
1161 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
|
|
1162 line_table->highest_location,
|
|
1163 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
|
|
1164
|
|
1165 /* Visualize the macro line_map instances, rendering the sources. */
|
|
1166 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
|
|
1167 {
|
|
1168 /* Each macro map that is allocated owns source_location values
|
|
1169 that are *lower* that the one before them.
|
|
1170 Hence it's meaningful to view them either in order of ascending
|
|
1171 source locations, or in order of ascending macro map index. */
|
|
1172 const bool ascending_source_locations = true;
|
|
1173 unsigned int idx = (ascending_source_locations
|
|
1174 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
|
|
1175 : i);
|
|
1176 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
|
|
1177 fprintf (stream, "MACRO %i: %s (%u tokens)\n",
|
|
1178 idx,
|
|
1179 linemap_map_get_macro_name (map),
|
|
1180 MACRO_MAP_NUM_MACRO_TOKENS (map));
|
|
1181 dump_location_range (stream,
|
|
1182 map->start_location,
|
|
1183 (map->start_location
|
|
1184 + MACRO_MAP_NUM_MACRO_TOKENS (map)));
|
|
1185 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
|
|
1186 "expansion point is location %i",
|
|
1187 MACRO_MAP_EXPANSION_POINT_LOCATION (map));
|
|
1188 fprintf (stream, " map->start_location: %u\n",
|
|
1189 map->start_location);
|
|
1190
|
|
1191 fprintf (stream, " macro_locations:\n");
|
|
1192 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
|
|
1193 {
|
|
1194 source_location x = MACRO_MAP_LOCATIONS (map)[2 * i];
|
|
1195 source_location y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
|
|
1196
|
|
1197 /* linemap_add_macro_token encodes token numbers in an expansion
|
|
1198 by putting them after MAP_START_LOCATION. */
|
|
1199
|
|
1200 /* I'm typically seeing 4 uninitialized entries at the end of
|
|
1201 0xafafafaf.
|
|
1202 This appears to be due to macro.c:replace_args
|
|
1203 adding 2 extra args for padding tokens; presumably there may
|
|
1204 be a leading and/or trailing padding token injected,
|
|
1205 each for 2 more location slots.
|
|
1206 This would explain there being up to 4 source_locations slots
|
|
1207 that may be uninitialized. */
|
|
1208
|
|
1209 fprintf (stream, " %u: %u, %u\n",
|
|
1210 i,
|
|
1211 x,
|
|
1212 y);
|
|
1213 if (x == y)
|
|
1214 {
|
|
1215 if (x < MAP_START_LOCATION (map))
|
|
1216 inform (x, "token %u has x-location == y-location == %u", i, x);
|
|
1217 else
|
|
1218 fprintf (stream,
|
|
1219 "x-location == y-location == %u encodes token # %u\n",
|
|
1220 x, x - MAP_START_LOCATION (map));
|
|
1221 }
|
|
1222 else
|
|
1223 {
|
|
1224 inform (x, "token %u has x-location == %u", i, x);
|
|
1225 inform (x, "token %u has y-location == %u", i, y);
|
|
1226 }
|
|
1227 }
|
|
1228 fprintf (stream, "\n");
|
|
1229 }
|
|
1230
|
|
1231 /* It appears that MAX_SOURCE_LOCATION itself is never assigned to a
|
|
1232 macro map, presumably due to an off-by-one error somewhere
|
|
1233 between the logic in linemap_enter_macro and
|
|
1234 LINEMAPS_MACRO_LOWEST_LOCATION. */
|
|
1235 dump_labelled_location_range (stream, "MAX_SOURCE_LOCATION",
|
|
1236 MAX_SOURCE_LOCATION,
|
|
1237 MAX_SOURCE_LOCATION + 1);
|
|
1238
|
|
1239 /* Visualize ad-hoc values. */
|
|
1240 dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
|
|
1241 MAX_SOURCE_LOCATION + 1, UINT_MAX);
|
|
1242 }
|
|
1243
|
|
1244 /* string_concat's constructor. */
|
|
1245
|
|
1246 string_concat::string_concat (int num, location_t *locs)
|
|
1247 : m_num (num)
|
|
1248 {
|
|
1249 m_locs = ggc_vec_alloc <location_t> (num);
|
|
1250 for (int i = 0; i < num; i++)
|
|
1251 m_locs[i] = locs[i];
|
|
1252 }
|
|
1253
|
|
1254 /* string_concat_db's constructor. */
|
|
1255
|
|
1256 string_concat_db::string_concat_db ()
|
|
1257 {
|
|
1258 m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
|
|
1259 }
|
|
1260
|
|
1261 /* Record that a string concatenation occurred, covering NUM
|
|
1262 string literal tokens. LOCS is an array of size NUM, containing the
|
|
1263 locations of the tokens. A copy of LOCS is taken. */
|
|
1264
|
|
1265 void
|
|
1266 string_concat_db::record_string_concatenation (int num, location_t *locs)
|
|
1267 {
|
|
1268 gcc_assert (num > 1);
|
|
1269 gcc_assert (locs);
|
|
1270
|
|
1271 location_t key_loc = get_key_loc (locs[0]);
|
|
1272
|
|
1273 string_concat *concat
|
|
1274 = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
|
|
1275 m_table->put (key_loc, concat);
|
|
1276 }
|
|
1277
|
|
1278 /* Determine if LOC was the location of the the initial token of a
|
|
1279 concatenation of string literal tokens.
|
|
1280 If so, *OUT_NUM is written to with the number of tokens, and
|
|
1281 *OUT_LOCS with the location of an array of locations of the
|
|
1282 tokens, and return true. *OUT_LOCS is a borrowed pointer to
|
|
1283 storage owned by the string_concat_db.
|
|
1284 Otherwise, return false. */
|
|
1285
|
|
1286 bool
|
|
1287 string_concat_db::get_string_concatenation (location_t loc,
|
|
1288 int *out_num,
|
|
1289 location_t **out_locs)
|
|
1290 {
|
|
1291 gcc_assert (out_num);
|
|
1292 gcc_assert (out_locs);
|
|
1293
|
|
1294 location_t key_loc = get_key_loc (loc);
|
|
1295
|
|
1296 string_concat **concat = m_table->get (key_loc);
|
|
1297 if (!concat)
|
|
1298 return false;
|
|
1299
|
|
1300 *out_num = (*concat)->m_num;
|
|
1301 *out_locs =(*concat)->m_locs;
|
|
1302 return true;
|
|
1303 }
|
|
1304
|
|
1305 /* Internal function. Canonicalize LOC into a form suitable for
|
|
1306 use as a key within the database, stripping away macro expansion,
|
|
1307 ad-hoc information, and range information, using the location of
|
|
1308 the start of LOC within an ordinary linemap. */
|
|
1309
|
|
1310 location_t
|
|
1311 string_concat_db::get_key_loc (location_t loc)
|
|
1312 {
|
|
1313 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
|
|
1314 NULL);
|
|
1315
|
|
1316 loc = get_range_from_loc (line_table, loc).m_start;
|
|
1317
|
|
1318 return loc;
|
|
1319 }
|
|
1320
|
|
1321 /* Helper class for use within get_substring_ranges_for_loc.
|
|
1322 An vec of cpp_string with responsibility for releasing all of the
|
|
1323 str->text for each str in the vector. */
|
|
1324
|
|
1325 class auto_cpp_string_vec : public auto_vec <cpp_string>
|
|
1326 {
|
|
1327 public:
|
|
1328 auto_cpp_string_vec (int alloc)
|
|
1329 : auto_vec <cpp_string> (alloc) {}
|
|
1330
|
|
1331 ~auto_cpp_string_vec ()
|
|
1332 {
|
|
1333 /* Clean up the copies within this vec. */
|
|
1334 int i;
|
|
1335 cpp_string *str;
|
|
1336 FOR_EACH_VEC_ELT (*this, i, str)
|
|
1337 free (const_cast <unsigned char *> (str->text));
|
|
1338 }
|
|
1339 };
|
|
1340
|
|
1341 /* Attempt to populate RANGES with source location information on the
|
|
1342 individual characters within the string literal found at STRLOC.
|
|
1343 If CONCATS is non-NULL, then any string literals that the token at
|
|
1344 STRLOC was concatenated with are also added to RANGES.
|
|
1345
|
|
1346 Return NULL if successful, or an error message if any errors occurred (in
|
|
1347 which case RANGES may be only partially populated and should not
|
|
1348 be used).
|
|
1349
|
|
1350 This is implemented by re-parsing the relevant source line(s). */
|
|
1351
|
|
1352 static const char *
|
|
1353 get_substring_ranges_for_loc (cpp_reader *pfile,
|
|
1354 string_concat_db *concats,
|
|
1355 location_t strloc,
|
|
1356 enum cpp_ttype type,
|
|
1357 cpp_substring_ranges &ranges)
|
|
1358 {
|
|
1359 gcc_assert (pfile);
|
|
1360
|
|
1361 if (strloc == UNKNOWN_LOCATION)
|
|
1362 return "unknown location";
|
|
1363
|
|
1364 /* Reparsing the strings requires accurate location information.
|
|
1365 If -ftrack-macro-expansion has been overridden from its default
|
|
1366 of 2, then we might have a location of a macro expansion point,
|
|
1367 rather than the location of the literal itself.
|
|
1368 Avoid this by requiring that we have full macro expansion tracking
|
|
1369 for substring locations to be available. */
|
|
1370 if (cpp_get_options (pfile)->track_macro_expansion != 2)
|
|
1371 return "track_macro_expansion != 2";
|
|
1372
|
|
1373 /* If #line or # 44 "file"-style directives are present, then there's
|
|
1374 no guarantee that the line numbers we have can be used to locate
|
|
1375 the strings. For example, we might have a .i file with # directives
|
|
1376 pointing back to lines within a .c file, but the .c file might
|
|
1377 have been edited since the .i file was created.
|
|
1378 In such a case, the safest course is to disable on-demand substring
|
|
1379 locations. */
|
|
1380 if (line_table->seen_line_directive)
|
|
1381 return "seen line directive";
|
|
1382
|
|
1383 /* If string concatenation has occurred at STRLOC, get the locations
|
|
1384 of all of the literal tokens making up the compound string.
|
|
1385 Otherwise, just use STRLOC. */
|
|
1386 int num_locs = 1;
|
|
1387 location_t *strlocs = &strloc;
|
|
1388 if (concats)
|
|
1389 concats->get_string_concatenation (strloc, &num_locs, &strlocs);
|
|
1390
|
|
1391 auto_cpp_string_vec strs (num_locs);
|
|
1392 auto_vec <cpp_string_location_reader> loc_readers (num_locs);
|
|
1393 for (int i = 0; i < num_locs; i++)
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
1394 {
|
111
|
1395 /* Get range of strloc. We will use it to locate the start and finish
|
|
1396 of the literal token within the line. */
|
|
1397 source_range src_range = get_range_from_loc (line_table, strlocs[i]);
|
|
1398
|
|
1399 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
|
131
|
1400 {
|
|
1401 /* If the string token was within a macro expansion, then we can
|
|
1402 cope with it for the simple case where we have a single token.
|
|
1403 Otherwise, bail out. */
|
|
1404 if (src_range.m_start != src_range.m_finish)
|
|
1405 return "macro expansion";
|
|
1406 }
|
|
1407 else
|
|
1408 {
|
|
1409 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
|
|
1410 /* If so, we can't reliably determine where the token started within
|
|
1411 its line. */
|
|
1412 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
|
|
1413
|
|
1414 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
|
|
1415 /* If so, we can't reliably determine where the token finished
|
|
1416 within its line. */
|
|
1417 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
|
|
1418 }
|
111
|
1419
|
|
1420 expanded_location start
|
131
|
1421 = expand_location_to_spelling_point (src_range.m_start,
|
|
1422 LOCATION_ASPECT_START);
|
111
|
1423 expanded_location finish
|
131
|
1424 = expand_location_to_spelling_point (src_range.m_finish,
|
|
1425 LOCATION_ASPECT_FINISH);
|
111
|
1426 if (start.file != finish.file)
|
|
1427 return "range endpoints are in different files";
|
|
1428 if (start.line != finish.line)
|
|
1429 return "range endpoints are on different lines";
|
|
1430 if (start.column > finish.column)
|
|
1431 return "range endpoints are reversed";
|
|
1432
|
131
|
1433 char_span line = location_get_source_line (start.file, start.line);
|
|
1434 if (!line)
|
111
|
1435 return "unable to read source line";
|
|
1436
|
|
1437 /* Determine the location of the literal (including quotes
|
|
1438 and leading prefix chars, such as the 'u' in a u""
|
|
1439 token). */
|
131
|
1440 size_t literal_length = finish.column - start.column + 1;
|
111
|
1441
|
|
1442 /* Ensure that we don't crash if we got the wrong location. */
|
131
|
1443 if (line.length () < (start.column - 1 + literal_length))
|
111
|
1444 return "line is not wide enough";
|
|
1445
|
131
|
1446 char_span literal = line.subspan (start.column - 1, literal_length);
|
|
1447
|
111
|
1448 cpp_string from;
|
|
1449 from.len = literal_length;
|
|
1450 /* Make a copy of the literal, to avoid having to rely on
|
|
1451 the lifetime of the copy of the line within the cache.
|
|
1452 This will be released by the auto_cpp_string_vec dtor. */
|
131
|
1453 from.text = (unsigned char *)literal.xstrdup ();
|
111
|
1454 strs.safe_push (from);
|
|
1455
|
|
1456 /* For very long lines, a new linemap could have started
|
|
1457 halfway through the token.
|
|
1458 Ensure that the loc_reader uses the linemap of the
|
|
1459 *end* of the token for its start location. */
|
131
|
1460 const line_map_ordinary *start_ord_map;
|
|
1461 linemap_resolve_location (line_table, src_range.m_start,
|
|
1462 LRK_SPELLING_LOCATION, &start_ord_map);
|
111
|
1463 const line_map_ordinary *final_ord_map;
|
|
1464 linemap_resolve_location (line_table, src_range.m_finish,
|
131
|
1465 LRK_SPELLING_LOCATION, &final_ord_map);
|
|
1466 /* Bulletproofing. We ought to only have different ordinary maps
|
|
1467 for start vs finish due to line-length jumps. */
|
|
1468 if (start_ord_map != final_ord_map
|
|
1469 && start_ord_map->to_file != final_ord_map->to_file)
|
|
1470 return "start and finish are spelled in different ordinary maps";
|
111
|
1471 location_t start_loc
|
|
1472 = linemap_position_for_line_and_column (line_table, final_ord_map,
|
|
1473 start.line, start.column);
|
|
1474
|
|
1475 cpp_string_location_reader loc_reader (start_loc, line_table);
|
|
1476 loc_readers.safe_push (loc_reader);
|
|
1477 }
|
|
1478
|
|
1479 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
|
|
1480 const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
|
|
1481 loc_readers.address (),
|
|
1482 num_locs, &ranges, type);
|
|
1483 if (err)
|
|
1484 return err;
|
|
1485
|
|
1486 /* Success: "ranges" should now contain information on the string. */
|
|
1487 return NULL;
|
|
1488 }
|
|
1489
|
|
1490 /* Attempt to populate *OUT_LOC with source location information on the
|
|
1491 given characters within the string literal found at STRLOC.
|
|
1492 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
|
|
1493 character set.
|
|
1494
|
|
1495 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
|
|
1496 and string literal "012345\n789"
|
|
1497 *OUT_LOC is written to with:
|
|
1498 "012345\n789"
|
|
1499 ~^~~~~
|
|
1500
|
|
1501 If CONCATS is non-NULL, then any string literals that the token at
|
|
1502 STRLOC was concatenated with are also considered.
|
|
1503
|
|
1504 This is implemented by re-parsing the relevant source line(s).
|
|
1505
|
|
1506 Return NULL if successful, or an error message if any errors occurred.
|
|
1507 Error messages are intended for GCC developers (to help debugging) rather
|
|
1508 than for end-users. */
|
|
1509
|
|
1510 const char *
|
|
1511 get_source_location_for_substring (cpp_reader *pfile,
|
|
1512 string_concat_db *concats,
|
|
1513 location_t strloc,
|
|
1514 enum cpp_ttype type,
|
|
1515 int caret_idx, int start_idx, int end_idx,
|
|
1516 source_location *out_loc)
|
|
1517 {
|
|
1518 gcc_checking_assert (caret_idx >= 0);
|
|
1519 gcc_checking_assert (start_idx >= 0);
|
|
1520 gcc_checking_assert (end_idx >= 0);
|
|
1521 gcc_assert (out_loc);
|
|
1522
|
|
1523 cpp_substring_ranges ranges;
|
|
1524 const char *err
|
|
1525 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
|
|
1526 if (err)
|
|
1527 return err;
|
|
1528
|
|
1529 if (caret_idx >= ranges.get_num_ranges ())
|
|
1530 return "caret_idx out of range";
|
|
1531 if (start_idx >= ranges.get_num_ranges ())
|
|
1532 return "start_idx out of range";
|
|
1533 if (end_idx >= ranges.get_num_ranges ())
|
|
1534 return "end_idx out of range";
|
|
1535
|
|
1536 *out_loc = make_location (ranges.get_range (caret_idx).m_start,
|
|
1537 ranges.get_range (start_idx).m_start,
|
|
1538 ranges.get_range (end_idx).m_finish);
|
|
1539 return NULL;
|
|
1540 }
|
|
1541
|
|
1542 #if CHECKING_P
|
|
1543
|
|
1544 namespace selftest {
|
|
1545
|
|
1546 /* Selftests of location handling. */
|
|
1547
|
|
1548 /* Attempt to populate *OUT_RANGE with source location information on the
|
|
1549 given character within the string literal found at STRLOC.
|
|
1550 CHAR_IDX refers to an offset within the execution character set.
|
|
1551 If CONCATS is non-NULL, then any string literals that the token at
|
|
1552 STRLOC was concatenated with are also considered.
|
|
1553
|
|
1554 This is implemented by re-parsing the relevant source line(s).
|
|
1555
|
|
1556 Return NULL if successful, or an error message if any errors occurred.
|
|
1557 Error messages are intended for GCC developers (to help debugging) rather
|
|
1558 than for end-users. */
|
|
1559
|
|
1560 static const char *
|
|
1561 get_source_range_for_char (cpp_reader *pfile,
|
|
1562 string_concat_db *concats,
|
|
1563 location_t strloc,
|
|
1564 enum cpp_ttype type,
|
|
1565 int char_idx,
|
|
1566 source_range *out_range)
|
|
1567 {
|
|
1568 gcc_checking_assert (char_idx >= 0);
|
|
1569 gcc_assert (out_range);
|
|
1570
|
|
1571 cpp_substring_ranges ranges;
|
|
1572 const char *err
|
|
1573 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
|
|
1574 if (err)
|
|
1575 return err;
|
|
1576
|
|
1577 if (char_idx >= ranges.get_num_ranges ())
|
|
1578 return "char_idx out of range";
|
|
1579
|
|
1580 *out_range = ranges.get_range (char_idx);
|
|
1581 return NULL;
|
|
1582 }
|
|
1583
|
|
1584 /* As get_source_range_for_char, but write to *OUT the number
|
|
1585 of ranges that are available. */
|
|
1586
|
|
1587 static const char *
|
|
1588 get_num_source_ranges_for_substring (cpp_reader *pfile,
|
|
1589 string_concat_db *concats,
|
|
1590 location_t strloc,
|
|
1591 enum cpp_ttype type,
|
|
1592 int *out)
|
|
1593 {
|
|
1594 gcc_assert (out);
|
|
1595
|
|
1596 cpp_substring_ranges ranges;
|
|
1597 const char *err
|
|
1598 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
|
|
1599
|
|
1600 if (err)
|
|
1601 return err;
|
|
1602
|
|
1603 *out = ranges.get_num_ranges ();
|
|
1604 return NULL;
|
|
1605 }
|
|
1606
|
|
1607 /* Selftests of location handling. */
|
|
1608
|
131
|
1609 /* Verify that compare() on linenum_type handles comparisons over the full
|
|
1610 range of the type. */
|
|
1611
|
|
1612 static void
|
|
1613 test_linenum_comparisons ()
|
|
1614 {
|
|
1615 linenum_type min_line (0);
|
|
1616 linenum_type max_line (0xffffffff);
|
|
1617 ASSERT_EQ (0, compare (min_line, min_line));
|
|
1618 ASSERT_EQ (0, compare (max_line, max_line));
|
|
1619
|
|
1620 ASSERT_GT (compare (max_line, min_line), 0);
|
|
1621 ASSERT_LT (compare (min_line, max_line), 0);
|
|
1622 }
|
|
1623
|
111
|
1624 /* Helper function for verifying location data: when location_t
|
|
1625 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
|
|
1626 as having column 0. */
|
|
1627
|
|
1628 static bool
|
|
1629 should_have_column_data_p (location_t loc)
|
|
1630 {
|
|
1631 if (IS_ADHOC_LOC (loc))
|
|
1632 loc = get_location_from_adhoc_loc (line_table, loc);
|
|
1633 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
|
|
1634 return false;
|
|
1635 return true;
|
|
1636 }
|
|
1637
|
|
1638 /* Selftest for should_have_column_data_p. */
|
|
1639
|
|
1640 static void
|
|
1641 test_should_have_column_data_p ()
|
|
1642 {
|
|
1643 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
|
|
1644 ASSERT_TRUE
|
|
1645 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
|
|
1646 ASSERT_FALSE
|
|
1647 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
|
|
1648 }
|
|
1649
|
|
1650 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
|
|
1651 on LOC. */
|
|
1652
|
|
1653 static void
|
|
1654 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
|
|
1655 location_t loc)
|
|
1656 {
|
|
1657 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
|
|
1658 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
|
|
1659 /* If location_t values are sufficiently high, then column numbers
|
|
1660 will be unavailable and LOCATION_COLUMN (loc) will be 0.
|
|
1661 When close to the threshold, column numbers *may* be present: if
|
|
1662 the final linemap before the threshold contains a line that straddles
|
|
1663 the threshold, locations in that line have column information. */
|
|
1664 if (should_have_column_data_p (loc))
|
|
1665 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
|
|
1666 }
|
|
1667
|
|
1668 /* Various selftests involve constructing a line table and one or more
|
|
1669 line maps within it.
|
|
1670
|
|
1671 For maximum test coverage we want to run these tests with a variety
|
|
1672 of situations:
|
|
1673 - line_table->default_range_bits: some frontends use a non-zero value
|
|
1674 and others use zero
|
|
1675 - the fallback modes within line-map.c: there are various threshold
|
|
1676 values for source_location/location_t beyond line-map.c changes
|
|
1677 behavior (disabling of the range-packing optimization, disabling
|
|
1678 of column-tracking). We can exercise these by starting the line_table
|
|
1679 at interesting values at or near these thresholds.
|
|
1680
|
|
1681 The following struct describes a particular case within our test
|
|
1682 matrix. */
|
|
1683
|
|
1684 struct line_table_case
|
|
1685 {
|
|
1686 line_table_case (int default_range_bits, int base_location)
|
|
1687 : m_default_range_bits (default_range_bits),
|
|
1688 m_base_location (base_location)
|
|
1689 {}
|
|
1690
|
|
1691 int m_default_range_bits;
|
|
1692 int m_base_location;
|
|
1693 };
|
|
1694
|
|
1695 /* Constructor. Store the old value of line_table, and create a new
|
|
1696 one, using sane defaults. */
|
|
1697
|
|
1698 line_table_test::line_table_test ()
|
|
1699 {
|
|
1700 gcc_assert (saved_line_table == NULL);
|
|
1701 saved_line_table = line_table;
|
|
1702 line_table = ggc_alloc<line_maps> ();
|
|
1703 linemap_init (line_table, BUILTINS_LOCATION);
|
|
1704 gcc_assert (saved_line_table->reallocator);
|
|
1705 line_table->reallocator = saved_line_table->reallocator;
|
|
1706 gcc_assert (saved_line_table->round_alloc_size);
|
|
1707 line_table->round_alloc_size = saved_line_table->round_alloc_size;
|
|
1708 line_table->default_range_bits = 0;
|
|
1709 }
|
|
1710
|
|
1711 /* Constructor. Store the old value of line_table, and create a new
|
|
1712 one, using the sitation described in CASE_. */
|
|
1713
|
|
1714 line_table_test::line_table_test (const line_table_case &case_)
|
|
1715 {
|
|
1716 gcc_assert (saved_line_table == NULL);
|
|
1717 saved_line_table = line_table;
|
|
1718 line_table = ggc_alloc<line_maps> ();
|
|
1719 linemap_init (line_table, BUILTINS_LOCATION);
|
|
1720 gcc_assert (saved_line_table->reallocator);
|
|
1721 line_table->reallocator = saved_line_table->reallocator;
|
|
1722 gcc_assert (saved_line_table->round_alloc_size);
|
|
1723 line_table->round_alloc_size = saved_line_table->round_alloc_size;
|
|
1724 line_table->default_range_bits = case_.m_default_range_bits;
|
|
1725 if (case_.m_base_location)
|
|
1726 {
|
|
1727 line_table->highest_location = case_.m_base_location;
|
|
1728 line_table->highest_line = case_.m_base_location;
|
|
1729 }
|
|
1730 }
|
|
1731
|
|
1732 /* Destructor. Restore the old value of line_table. */
|
|
1733
|
|
1734 line_table_test::~line_table_test ()
|
|
1735 {
|
|
1736 gcc_assert (saved_line_table != NULL);
|
|
1737 line_table = saved_line_table;
|
|
1738 saved_line_table = NULL;
|
|
1739 }
|
|
1740
|
|
1741 /* Verify basic operation of ordinary linemaps. */
|
|
1742
|
|
1743 static void
|
|
1744 test_accessing_ordinary_linemaps (const line_table_case &case_)
|
|
1745 {
|
|
1746 line_table_test ltt (case_);
|
|
1747
|
|
1748 /* Build a simple linemap describing some locations. */
|
|
1749 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
|
|
1750
|
|
1751 linemap_line_start (line_table, 1, 100);
|
|
1752 location_t loc_a = linemap_position_for_column (line_table, 1);
|
|
1753 location_t loc_b = linemap_position_for_column (line_table, 23);
|
|
1754
|
|
1755 linemap_line_start (line_table, 2, 100);
|
|
1756 location_t loc_c = linemap_position_for_column (line_table, 1);
|
|
1757 location_t loc_d = linemap_position_for_column (line_table, 17);
|
|
1758
|
|
1759 /* Example of a very long line. */
|
|
1760 linemap_line_start (line_table, 3, 2000);
|
|
1761 location_t loc_e = linemap_position_for_column (line_table, 700);
|
|
1762
|
|
1763 /* Transitioning back to a short line. */
|
|
1764 linemap_line_start (line_table, 4, 0);
|
|
1765 location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
|
|
1766
|
|
1767 if (should_have_column_data_p (loc_back_to_short))
|
|
1768 {
|
|
1769 /* Verify that we switched to short lines in the linemap. */
|
|
1770 line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
|
|
1771 ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
1772 }
|
111
|
1773
|
|
1774 /* Example of a line that will eventually be seen to be longer
|
|
1775 than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
|
|
1776 below that. */
|
|
1777 linemap_line_start (line_table, 5, 2000);
|
|
1778
|
|
1779 location_t loc_start_of_very_long_line
|
|
1780 = linemap_position_for_column (line_table, 2000);
|
|
1781 location_t loc_too_wide
|
|
1782 = linemap_position_for_column (line_table, 4097);
|
|
1783 location_t loc_too_wide_2
|
|
1784 = linemap_position_for_column (line_table, 4098);
|
|
1785
|
|
1786 /* ...and back to a sane line length. */
|
|
1787 linemap_line_start (line_table, 6, 100);
|
|
1788 location_t loc_sane_again = linemap_position_for_column (line_table, 10);
|
|
1789
|
|
1790 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
|
|
1791
|
|
1792 /* Multiple files. */
|
|
1793 linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
|
|
1794 linemap_line_start (line_table, 1, 200);
|
|
1795 location_t loc_f = linemap_position_for_column (line_table, 150);
|
|
1796 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
|
|
1797
|
|
1798 /* Verify that we can recover the location info. */
|
|
1799 assert_loceq ("foo.c", 1, 1, loc_a);
|
|
1800 assert_loceq ("foo.c", 1, 23, loc_b);
|
|
1801 assert_loceq ("foo.c", 2, 1, loc_c);
|
|
1802 assert_loceq ("foo.c", 2, 17, loc_d);
|
|
1803 assert_loceq ("foo.c", 3, 700, loc_e);
|
|
1804 assert_loceq ("foo.c", 4, 100, loc_back_to_short);
|
|
1805
|
|
1806 /* In the very wide line, the initial location should be fully tracked. */
|
|
1807 assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
|
|
1808 /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
|
|
1809 be disabled. */
|
|
1810 assert_loceq ("foo.c", 5, 0, loc_too_wide);
|
|
1811 assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
|
|
1812 /*...and column-tracking should be re-enabled for subsequent lines. */
|
|
1813 assert_loceq ("foo.c", 6, 10, loc_sane_again);
|
|
1814
|
|
1815 assert_loceq ("bar.c", 1, 150, loc_f);
|
|
1816
|
|
1817 ASSERT_FALSE (is_location_from_builtin_token (loc_a));
|
|
1818 ASSERT_TRUE (pure_location_p (line_table, loc_a));
|
|
1819
|
|
1820 /* Verify using make_location to build a range, and extracting data
|
|
1821 back from it. */
|
|
1822 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
|
|
1823 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
|
|
1824 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
|
|
1825 source_range src_range = get_range_from_loc (line_table, range_c_b_d);
|
|
1826 ASSERT_EQ (loc_b, src_range.m_start);
|
|
1827 ASSERT_EQ (loc_d, src_range.m_finish);
|
|
1828 }
|
|
1829
|
|
1830 /* Verify various properties of UNKNOWN_LOCATION. */
|
|
1831
|
|
1832 static void
|
|
1833 test_unknown_location ()
|
|
1834 {
|
|
1835 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
|
|
1836 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
|
|
1837 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
|
|
1838 }
|
|
1839
|
|
1840 /* Verify various properties of BUILTINS_LOCATION. */
|
|
1841
|
|
1842 static void
|
|
1843 test_builtins ()
|
|
1844 {
|
|
1845 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
|
|
1846 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
|
|
1847 }
|
|
1848
|
|
1849 /* Regression test for make_location.
|
|
1850 Ensure that we use pure locations for the start/finish of the range,
|
|
1851 rather than storing a packed or ad-hoc range as the start/finish. */
|
|
1852
|
|
1853 static void
|
|
1854 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
|
|
1855 {
|
|
1856 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
|
|
1857 with C++ frontend.
|
|
1858 ....................0000000001111111111222.
|
|
1859 ....................1234567890123456789012. */
|
|
1860 const char *content = " r += !aaa == bbb;\n";
|
|
1861 temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
|
|
1862 line_table_test ltt (case_);
|
|
1863 linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
|
|
1864
|
|
1865 const location_t c11 = linemap_position_for_column (line_table, 11);
|
|
1866 const location_t c12 = linemap_position_for_column (line_table, 12);
|
|
1867 const location_t c13 = linemap_position_for_column (line_table, 13);
|
|
1868 const location_t c14 = linemap_position_for_column (line_table, 14);
|
|
1869 const location_t c21 = linemap_position_for_column (line_table, 21);
|
|
1870
|
|
1871 if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
|
|
1872 return;
|
|
1873
|
|
1874 /* Use column 13 for the caret location, arbitrarily, to verify that we
|
|
1875 handle start != caret. */
|
|
1876 const location_t aaa = make_location (c13, c12, c14);
|
|
1877 ASSERT_EQ (c13, get_pure_location (aaa));
|
|
1878 ASSERT_EQ (c12, get_start (aaa));
|
|
1879 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
|
|
1880 ASSERT_EQ (c14, get_finish (aaa));
|
|
1881 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
|
|
1882
|
|
1883 /* Make a location using a location with a range as the start-point. */
|
|
1884 const location_t not_aaa = make_location (c11, aaa, c14);
|
|
1885 ASSERT_EQ (c11, get_pure_location (not_aaa));
|
|
1886 /* It should use the start location of the range, not store the range
|
|
1887 itself. */
|
|
1888 ASSERT_EQ (c12, get_start (not_aaa));
|
|
1889 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
|
|
1890 ASSERT_EQ (c14, get_finish (not_aaa));
|
|
1891 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
|
|
1892
|
|
1893 /* Similarly, make a location with a range as the end-point. */
|
|
1894 const location_t aaa_eq_bbb = make_location (c12, c12, c21);
|
|
1895 ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
|
|
1896 ASSERT_EQ (c12, get_start (aaa_eq_bbb));
|
|
1897 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
|
|
1898 ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
|
|
1899 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
|
|
1900 const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
|
|
1901 /* It should use the finish location of the range, not store the range
|
|
1902 itself. */
|
|
1903 ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
|
|
1904 ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
|
|
1905 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
|
|
1906 ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
|
|
1907 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
|
|
1908 }
|
|
1909
|
|
1910 /* Verify reading of input files (e.g. for caret-based diagnostics). */
|
|
1911
|
|
1912 static void
|
|
1913 test_reading_source_line ()
|
|
1914 {
|
|
1915 /* Create a tempfile and write some text to it. */
|
|
1916 temp_source_file tmp (SELFTEST_LOCATION, ".txt",
|
|
1917 "01234567890123456789\n"
|
|
1918 "This is the test text\n"
|
|
1919 "This is the 3rd line");
|
|
1920
|
|
1921 /* Read back a specific line from the tempfile. */
|
131
|
1922 char_span source_line = location_get_source_line (tmp.get_filename (), 3);
|
|
1923 ASSERT_TRUE (source_line);
|
|
1924 ASSERT_TRUE (source_line.get_buffer () != NULL);
|
|
1925 ASSERT_EQ (20, source_line.length ());
|
111
|
1926 ASSERT_TRUE (!strncmp ("This is the 3rd line",
|
131
|
1927 source_line.get_buffer (), source_line.length ()));
|
|
1928
|
|
1929 source_line = location_get_source_line (tmp.get_filename (), 2);
|
|
1930 ASSERT_TRUE (source_line);
|
|
1931 ASSERT_TRUE (source_line.get_buffer () != NULL);
|
|
1932 ASSERT_EQ (21, source_line.length ());
|
111
|
1933 ASSERT_TRUE (!strncmp ("This is the test text",
|
131
|
1934 source_line.get_buffer (), source_line.length ()));
|
|
1935
|
|
1936 source_line = location_get_source_line (tmp.get_filename (), 4);
|
|
1937 ASSERT_FALSE (source_line);
|
|
1938 ASSERT_TRUE (source_line.get_buffer () == NULL);
|
111
|
1939 }
|
|
1940
|
|
1941 /* Tests of lexing. */
|
|
1942
|
|
1943 /* Verify that token TOK from PARSER has cpp_token_as_text
|
|
1944 equal to EXPECTED_TEXT. */
|
|
1945
|
|
1946 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
|
|
1947 SELFTEST_BEGIN_STMT \
|
|
1948 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
|
|
1949 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
|
|
1950 SELFTEST_END_STMT
|
|
1951
|
|
1952 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
|
|
1953 and ranges from EXP_START_COL to EXP_FINISH_COL.
|
|
1954 Use LOC as the effective location of the selftest. */
|
|
1955
|
|
1956 static void
|
|
1957 assert_token_loc_eq (const location &loc,
|
|
1958 const cpp_token *tok,
|
|
1959 const char *exp_filename, int exp_linenum,
|
|
1960 int exp_start_col, int exp_finish_col)
|
|
1961 {
|
|
1962 location_t tok_loc = tok->src_loc;
|
|
1963 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
|
|
1964 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
|
|
1965
|
|
1966 /* If location_t values are sufficiently high, then column numbers
|
|
1967 will be unavailable. */
|
|
1968 if (!should_have_column_data_p (tok_loc))
|
|
1969 return;
|
|
1970
|
|
1971 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
|
|
1972 source_range tok_range = get_range_from_loc (line_table, tok_loc);
|
|
1973 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
|
|
1974 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
|
|
1975 }
|
|
1976
|
|
1977 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
|
|
1978 SELFTEST_LOCATION as the effective location of the selftest. */
|
|
1979
|
|
1980 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
|
|
1981 EXP_START_COL, EXP_FINISH_COL) \
|
|
1982 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
|
|
1983 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
|
|
1984
|
|
1985 /* Test of lexing a file using libcpp, verifying tokens and their
|
|
1986 location information. */
|
|
1987
|
|
1988 static void
|
|
1989 test_lexer (const line_table_case &case_)
|
|
1990 {
|
|
1991 /* Create a tempfile and write some text to it. */
|
|
1992 const char *content =
|
|
1993 /*00000000011111111112222222222333333.3333444444444.455555555556
|
|
1994 12345678901234567890123456789012345.6789012345678.901234567890. */
|
|
1995 ("test_name /* c-style comment */\n"
|
|
1996 " \"test literal\"\n"
|
|
1997 " // test c++-style comment\n"
|
|
1998 " 42\n");
|
|
1999 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
|
|
2000
|
|
2001 line_table_test ltt (case_);
|
|
2002
|
|
2003 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
|
|
2004
|
|
2005 const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
|
|
2006 ASSERT_NE (fname, NULL);
|
|
2007
|
|
2008 /* Verify that we get the expected tokens back, with the correct
|
|
2009 location information. */
|
|
2010
|
|
2011 location_t loc;
|
|
2012 const cpp_token *tok;
|
|
2013 tok = cpp_get_token_with_location (parser, &loc);
|
|
2014 ASSERT_NE (tok, NULL);
|
|
2015 ASSERT_EQ (tok->type, CPP_NAME);
|
|
2016 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
|
|
2017 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
|
|
2018
|
|
2019 tok = cpp_get_token_with_location (parser, &loc);
|
|
2020 ASSERT_NE (tok, NULL);
|
|
2021 ASSERT_EQ (tok->type, CPP_STRING);
|
|
2022 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
|
|
2023 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
|
|
2024
|
|
2025 tok = cpp_get_token_with_location (parser, &loc);
|
|
2026 ASSERT_NE (tok, NULL);
|
|
2027 ASSERT_EQ (tok->type, CPP_NUMBER);
|
|
2028 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
|
|
2029 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
|
|
2030
|
|
2031 tok = cpp_get_token_with_location (parser, &loc);
|
|
2032 ASSERT_NE (tok, NULL);
|
|
2033 ASSERT_EQ (tok->type, CPP_EOF);
|
|
2034
|
|
2035 cpp_finish (parser, NULL);
|
|
2036 cpp_destroy (parser);
|
|
2037 }
|
|
2038
|
|
2039 /* Forward decls. */
|
|
2040
|
|
2041 struct lexer_test;
|
|
2042 class lexer_test_options;
|
|
2043
|
|
2044 /* A class for specifying options of a lexer_test.
|
|
2045 The "apply" vfunc is called during the lexer_test constructor. */
|
|
2046
|
|
2047 class lexer_test_options
|
|
2048 {
|
|
2049 public:
|
|
2050 virtual void apply (lexer_test &) = 0;
|
|
2051 };
|
|
2052
|
|
2053 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
|
|
2054 in its dtor.
|
|
2055
|
|
2056 This is needed by struct lexer_test to ensure that the cleanup of the
|
|
2057 cpp_reader happens *after* the cleanup of the temp_source_file. */
|
|
2058
|
|
2059 class cpp_reader_ptr
|
|
2060 {
|
|
2061 public:
|
|
2062 cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
|
|
2063
|
|
2064 ~cpp_reader_ptr ()
|
|
2065 {
|
|
2066 cpp_finish (m_ptr, NULL);
|
|
2067 cpp_destroy (m_ptr);
|
|
2068 }
|
|
2069
|
|
2070 operator cpp_reader * () const { return m_ptr; }
|
|
2071
|
|
2072 private:
|
|
2073 cpp_reader *m_ptr;
|
|
2074 };
|
|
2075
|
|
2076 /* A struct for writing lexer tests. */
|
|
2077
|
|
2078 struct lexer_test
|
|
2079 {
|
|
2080 lexer_test (const line_table_case &case_, const char *content,
|
|
2081 lexer_test_options *options);
|
|
2082 ~lexer_test ();
|
|
2083
|
|
2084 const cpp_token *get_token ();
|
|
2085
|
|
2086 /* The ordering of these fields matters.
|
|
2087 The line_table_test must be first, since the cpp_reader_ptr
|
|
2088 uses it.
|
|
2089 The cpp_reader must be cleaned up *after* the temp_source_file
|
|
2090 since the filenames in input.c's input cache are owned by the
|
|
2091 cpp_reader; in particular, when ~temp_source_file evicts the
|
|
2092 filename the filenames must still be alive. */
|
|
2093 line_table_test m_ltt;
|
|
2094 cpp_reader_ptr m_parser;
|
|
2095 temp_source_file m_tempfile;
|
|
2096 string_concat_db m_concats;
|
|
2097 bool m_implicitly_expect_EOF;
|
|
2098 };
|
|
2099
|
|
2100 /* Use an EBCDIC encoding for the execution charset, specifically
|
|
2101 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
|
|
2102
|
|
2103 This exercises iconv integration within libcpp.
|
|
2104 Not every build of iconv supports the given charset,
|
|
2105 so we need to flag this error and handle it gracefully. */
|
|
2106
|
|
2107 class ebcdic_execution_charset : public lexer_test_options
|
|
2108 {
|
|
2109 public:
|
|
2110 ebcdic_execution_charset () : m_num_iconv_errors (0)
|
|
2111 {
|
|
2112 gcc_assert (s_singleton == NULL);
|
|
2113 s_singleton = this;
|
|
2114 }
|
|
2115 ~ebcdic_execution_charset ()
|
|
2116 {
|
|
2117 gcc_assert (s_singleton == this);
|
|
2118 s_singleton = NULL;
|
|
2119 }
|
|
2120
|
|
2121 void apply (lexer_test &test) FINAL OVERRIDE
|
|
2122 {
|
|
2123 cpp_options *cpp_opts = cpp_get_options (test.m_parser);
|
|
2124 cpp_opts->narrow_charset = "IBM1047";
|
|
2125
|
|
2126 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
|
131
|
2127 callbacks->diagnostic = on_diagnostic;
|
111
|
2128 }
|
|
2129
|
131
|
2130 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
|
|
2131 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
|
|
2132 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
|
|
2133 rich_location *richloc ATTRIBUTE_UNUSED,
|
|
2134 const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
|
111
|
2135 ATTRIBUTE_FPTR_PRINTF(5,0)
|
|
2136 {
|
|
2137 gcc_assert (s_singleton);
|
|
2138 /* Avoid exgettext from picking this up, it is translated in libcpp. */
|
|
2139 const char *msg = "conversion from %s to %s not supported by iconv";
|
|
2140 #ifdef ENABLE_NLS
|
|
2141 msg = dgettext ("cpplib", msg);
|
|
2142 #endif
|
|
2143 /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
|
|
2144 when the local iconv build doesn't support the conversion. */
|
|
2145 if (strcmp (msgid, msg) == 0)
|
|
2146 {
|
|
2147 s_singleton->m_num_iconv_errors++;
|
|
2148 return true;
|
|
2149 }
|
|
2150
|
|
2151 /* Otherwise, we have an unexpected error. */
|
|
2152 abort ();
|
|
2153 }
|
|
2154
|
|
2155 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
|
|
2156
|
|
2157 private:
|
|
2158 static ebcdic_execution_charset *s_singleton;
|
|
2159 int m_num_iconv_errors;
|
|
2160 };
|
|
2161
|
|
2162 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
|
|
2163
|
131
|
2164 /* A lexer_test_options subclass that records a list of diagnostic
|
111
|
2165 messages emitted by the lexer. */
|
|
2166
|
131
|
2167 class lexer_diagnostic_sink : public lexer_test_options
|
111
|
2168 {
|
|
2169 public:
|
131
|
2170 lexer_diagnostic_sink ()
|
111
|
2171 {
|
|
2172 gcc_assert (s_singleton == NULL);
|
|
2173 s_singleton = this;
|
|
2174 }
|
131
|
2175 ~lexer_diagnostic_sink ()
|
111
|
2176 {
|
|
2177 gcc_assert (s_singleton == this);
|
|
2178 s_singleton = NULL;
|
|
2179
|
|
2180 int i;
|
|
2181 char *str;
|
131
|
2182 FOR_EACH_VEC_ELT (m_diagnostics, i, str)
|
111
|
2183 free (str);
|
|
2184 }
|
|
2185
|
|
2186 void apply (lexer_test &test) FINAL OVERRIDE
|
|
2187 {
|
|
2188 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
|
131
|
2189 callbacks->diagnostic = on_diagnostic;
|
111
|
2190 }
|
|
2191
|
131
|
2192 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
|
|
2193 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
|
|
2194 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
|
|
2195 rich_location *richloc ATTRIBUTE_UNUSED,
|
|
2196 const char *msgid, va_list *ap)
|
111
|
2197 ATTRIBUTE_FPTR_PRINTF(5,0)
|
|
2198 {
|
|
2199 char *msg = xvasprintf (msgid, *ap);
|
131
|
2200 s_singleton->m_diagnostics.safe_push (msg);
|
111
|
2201 return true;
|
|
2202 }
|
|
2203
|
131
|
2204 auto_vec<char *> m_diagnostics;
|
111
|
2205
|
|
2206 private:
|
131
|
2207 static lexer_diagnostic_sink *s_singleton;
|
111
|
2208 };
|
|
2209
|
131
|
2210 lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
|
111
|
2211
|
|
2212 /* Constructor. Override line_table with a new instance based on CASE_,
|
|
2213 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
|
|
2214 start parsing the tempfile. */
|
|
2215
|
|
2216 lexer_test::lexer_test (const line_table_case &case_, const char *content,
|
|
2217 lexer_test_options *options)
|
|
2218 : m_ltt (case_),
|
|
2219 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
|
|
2220 /* Create a tempfile and write the text to it. */
|
|
2221 m_tempfile (SELFTEST_LOCATION, ".c", content),
|
|
2222 m_concats (),
|
|
2223 m_implicitly_expect_EOF (true)
|
|
2224 {
|
|
2225 if (options)
|
|
2226 options->apply (*this);
|
|
2227
|
|
2228 cpp_init_iconv (m_parser);
|
|
2229
|
|
2230 /* Parse the file. */
|
|
2231 const char *fname = cpp_read_main_file (m_parser,
|
|
2232 m_tempfile.get_filename ());
|
|
2233 ASSERT_NE (fname, NULL);
|
|
2234 }
|
|
2235
|
|
2236 /* Destructor. By default, verify that the next token in m_parser is EOF. */
|
|
2237
|
|
2238 lexer_test::~lexer_test ()
|
|
2239 {
|
|
2240 location_t loc;
|
|
2241 const cpp_token *tok;
|
|
2242
|
|
2243 if (m_implicitly_expect_EOF)
|
|
2244 {
|
|
2245 tok = cpp_get_token_with_location (m_parser, &loc);
|
|
2246 ASSERT_NE (tok, NULL);
|
|
2247 ASSERT_EQ (tok->type, CPP_EOF);
|
|
2248 }
|
|
2249 }
|
|
2250
|
|
2251 /* Get the next token from m_parser. */
|
|
2252
|
|
2253 const cpp_token *
|
|
2254 lexer_test::get_token ()
|
|
2255 {
|
|
2256 location_t loc;
|
|
2257 const cpp_token *tok;
|
|
2258
|
|
2259 tok = cpp_get_token_with_location (m_parser, &loc);
|
|
2260 ASSERT_NE (tok, NULL);
|
|
2261 return tok;
|
|
2262 }
|
|
2263
|
|
2264 /* Verify that locations within string literals are correctly handled. */
|
|
2265
|
|
2266 /* Verify get_source_range_for_substring for token(s) at STRLOC,
|
|
2267 using the string concatenation database for TEST.
|
|
2268
|
|
2269 Assert that the character at index IDX is on EXPECTED_LINE,
|
|
2270 and that it begins at column EXPECTED_START_COL and ends at
|
|
2271 EXPECTED_FINISH_COL (unless the locations are beyond
|
|
2272 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
|
|
2273 columns). */
|
|
2274
|
|
2275 static void
|
|
2276 assert_char_at_range (const location &loc,
|
|
2277 lexer_test& test,
|
|
2278 location_t strloc, enum cpp_ttype type, int idx,
|
|
2279 int expected_line, int expected_start_col,
|
|
2280 int expected_finish_col)
|
|
2281 {
|
|
2282 cpp_reader *pfile = test.m_parser;
|
|
2283 string_concat_db *concats = &test.m_concats;
|
|
2284
|
|
2285 source_range actual_range = source_range();
|
|
2286 const char *err
|
|
2287 = get_source_range_for_char (pfile, concats, strloc, type, idx,
|
|
2288 &actual_range);
|
|
2289 if (should_have_column_data_p (strloc))
|
|
2290 ASSERT_EQ_AT (loc, NULL, err);
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
2291 else
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
2292 {
|
111
|
2293 ASSERT_STREQ_AT (loc,
|
|
2294 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
|
|
2295 err);
|
|
2296 return;
|
|
2297 }
|
|
2298
|
|
2299 int actual_start_line = LOCATION_LINE (actual_range.m_start);
|
|
2300 ASSERT_EQ_AT (loc, expected_line, actual_start_line);
|
|
2301 int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
|
|
2302 ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
|
|
2303
|
|
2304 if (should_have_column_data_p (actual_range.m_start))
|
|
2305 {
|
|
2306 int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
|
|
2307 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
|
|
2308 }
|
|
2309 if (should_have_column_data_p (actual_range.m_finish))
|
|
2310 {
|
|
2311 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
|
|
2312 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
|
|
2313 }
|
|
2314 }
|
|
2315
|
|
2316 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
|
|
2317 the effective location of any errors. */
|
|
2318
|
|
2319 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
|
|
2320 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
|
|
2321 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
|
|
2322 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
|
|
2323 (EXPECTED_FINISH_COL))
|
|
2324
|
|
2325 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
|
|
2326 using the string concatenation database for TEST.
|
|
2327
|
|
2328 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
|
|
2329
|
|
2330 static void
|
|
2331 assert_num_substring_ranges (const location &loc,
|
|
2332 lexer_test& test,
|
|
2333 location_t strloc,
|
|
2334 enum cpp_ttype type,
|
|
2335 int expected_num_ranges)
|
|
2336 {
|
|
2337 cpp_reader *pfile = test.m_parser;
|
|
2338 string_concat_db *concats = &test.m_concats;
|
|
2339
|
|
2340 int actual_num_ranges = -1;
|
|
2341 const char *err
|
|
2342 = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
|
|
2343 &actual_num_ranges);
|
|
2344 if (should_have_column_data_p (strloc))
|
|
2345 ASSERT_EQ_AT (loc, NULL, err);
|
|
2346 else
|
|
2347 {
|
|
2348 ASSERT_STREQ_AT (loc,
|
|
2349 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
|
|
2350 err);
|
|
2351 return;
|
|
2352 }
|
|
2353 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
|
|
2354 }
|
|
2355
|
|
2356 /* Macro for calling assert_num_substring_ranges, supplying
|
|
2357 SELFTEST_LOCATION for the effective location of any errors. */
|
|
2358
|
|
2359 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
|
|
2360 EXPECTED_NUM_RANGES) \
|
|
2361 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
|
|
2362 (TYPE), (EXPECTED_NUM_RANGES))
|
|
2363
|
|
2364
|
|
2365 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
|
|
2366 returns an error (using the string concatenation database for TEST). */
|
|
2367
|
|
2368 static void
|
|
2369 assert_has_no_substring_ranges (const location &loc,
|
|
2370 lexer_test& test,
|
|
2371 location_t strloc,
|
|
2372 enum cpp_ttype type,
|
|
2373 const char *expected_err)
|
|
2374 {
|
|
2375 cpp_reader *pfile = test.m_parser;
|
|
2376 string_concat_db *concats = &test.m_concats;
|
|
2377 cpp_substring_ranges ranges;
|
|
2378 const char *actual_err
|
|
2379 = get_substring_ranges_for_loc (pfile, concats, strloc,
|
|
2380 type, ranges);
|
|
2381 if (should_have_column_data_p (strloc))
|
|
2382 ASSERT_STREQ_AT (loc, expected_err, actual_err);
|
|
2383 else
|
|
2384 ASSERT_STREQ_AT (loc,
|
|
2385 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
|
|
2386 actual_err);
|
|
2387 }
|
|
2388
|
|
2389 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
|
|
2390 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
|
|
2391 (STRLOC), (TYPE), (ERR))
|
|
2392
|
|
2393 /* Lex a simple string literal. Verify the substring location data, before
|
|
2394 and after running cpp_interpret_string on it. */
|
|
2395
|
|
2396 static void
|
|
2397 test_lexer_string_locations_simple (const line_table_case &case_)
|
|
2398 {
|
|
2399 /* Digits 0-9 (with 0 at column 10), the simple way.
|
|
2400 ....................000000000.11111111112.2222222223333333333
|
|
2401 ....................123456789.01234567890.1234567890123456789
|
|
2402 We add a trailing comment to ensure that we correctly locate
|
|
2403 the end of the string literal token. */
|
|
2404 const char *content = " \"0123456789\" /* not a string */\n";
|
|
2405 lexer_test test (case_, content, NULL);
|
|
2406
|
|
2407 /* Verify that we get the expected token back, with the correct
|
|
2408 location information. */
|
|
2409 const cpp_token *tok = test.get_token ();
|
|
2410 ASSERT_EQ (tok->type, CPP_STRING);
|
|
2411 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
|
|
2412 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
|
|
2413
|
|
2414 /* At this point in lexing, the quote characters are treated as part of
|
|
2415 the string (they are stripped off by cpp_interpret_string). */
|
|
2416
|
|
2417 ASSERT_EQ (tok->val.str.len, 12);
|
|
2418
|
|
2419 /* Verify that cpp_interpret_string works. */
|
|
2420 cpp_string dst_string;
|
|
2421 const enum cpp_ttype type = CPP_STRING;
|
|
2422 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
|
|
2423 &dst_string, type);
|
|
2424 ASSERT_TRUE (result);
|
|
2425 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
|
|
2426 free (const_cast <unsigned char *> (dst_string.text));
|
|
2427
|
|
2428 /* Verify ranges of individual characters. This no longer includes the
|
|
2429 opening quote, but does include the closing quote. */
|
|
2430 for (int i = 0; i <= 10; i++)
|
|
2431 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
|
|
2432 10 + i, 10 + i);
|
|
2433
|
|
2434 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
|
|
2435 }
|
|
2436
|
|
2437 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
|
|
2438 encoding. */
|
|
2439
|
|
2440 static void
|
|
2441 test_lexer_string_locations_ebcdic (const line_table_case &case_)
|
|
2442 {
|
|
2443 /* EBCDIC support requires iconv. */
|
|
2444 if (!HAVE_ICONV)
|
|
2445 return;
|
|
2446
|
|
2447 /* Digits 0-9 (with 0 at column 10), the simple way.
|
|
2448 ....................000000000.11111111112.2222222223333333333
|
|
2449 ....................123456789.01234567890.1234567890123456789
|
|
2450 We add a trailing comment to ensure that we correctly locate
|
|
2451 the end of the string literal token. */
|
|
2452 const char *content = " \"0123456789\" /* not a string */\n";
|
|
2453 ebcdic_execution_charset use_ebcdic;
|
|
2454 lexer_test test (case_, content, &use_ebcdic);
|
|
2455
|
|
2456 /* Verify that we get the expected token back, with the correct
|
|
2457 location information. */
|
|
2458 const cpp_token *tok = test.get_token ();
|
|
2459 ASSERT_EQ (tok->type, CPP_STRING);
|
|
2460 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
|
|
2461 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
|
|
2462
|
|
2463 /* At this point in lexing, the quote characters are treated as part of
|
|
2464 the string (they are stripped off by cpp_interpret_string). */
|
|
2465
|
|
2466 ASSERT_EQ (tok->val.str.len, 12);
|
|
2467
|
|
2468 /* The remainder of the test requires an iconv implementation that
|
|
2469 can convert from UTF-8 to the EBCDIC encoding requested above. */
|
|
2470 if (use_ebcdic.iconv_errors_occurred_p ())
|
|
2471 return;
|
|
2472
|
|
2473 /* Verify that cpp_interpret_string works. */
|
|
2474 cpp_string dst_string;
|
|
2475 const enum cpp_ttype type = CPP_STRING;
|
|
2476 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
|
|
2477 &dst_string, type);
|
|
2478 ASSERT_TRUE (result);
|
|
2479 /* We should now have EBCDIC-encoded text, specifically
|
|
2480 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
|
|
2481 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
|
|
2482 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
|
|
2483 (const char *)dst_string.text);
|
|
2484 free (const_cast <unsigned char *> (dst_string.text));
|
|
2485
|
|
2486 /* Verify that we don't attempt to record substring location information
|
|
2487 for such cases. */
|
|
2488 ASSERT_HAS_NO_SUBSTRING_RANGES
|
|
2489 (test, tok->src_loc, type,
|
|
2490 "execution character set != source character set");
|
|
2491 }
|
|
2492
|
|
2493 /* Lex a string literal containing a hex-escaped character.
|
|
2494 Verify the substring location data, before and after running
|
|
2495 cpp_interpret_string on it. */
|
|
2496
|
|
2497 static void
|
|
2498 test_lexer_string_locations_hex (const line_table_case &case_)
|
|
2499 {
|
|
2500 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
|
|
2501 and with a space in place of digit 6, to terminate the escaped
|
|
2502 hex code.
|
|
2503 ....................000000000.111111.11112222.
|
|
2504 ....................123456789.012345.67890123. */
|
|
2505 const char *content = " \"01234\\x35 789\"\n";
|
|
2506 lexer_test test (case_, content, NULL);
|
|
2507
|
|
2508 /* Verify that we get the expected token back, with the correct
|
|
2509 location information. */
|
|
2510 const cpp_token *tok = test.get_token ();
|
|
2511 ASSERT_EQ (tok->type, CPP_STRING);
|
|
2512 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
|
|
2513 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
|
|
2514
|
|
2515 /* At this point in lexing, the quote characters are treated as part of
|
|
2516 the string (they are stripped off by cpp_interpret_string). */
|
|
2517 ASSERT_EQ (tok->val.str.len, 15);
|
|
2518
|
|
2519 /* Verify that cpp_interpret_string works. */
|
|
2520 cpp_string dst_string;
|
|
2521 const enum cpp_ttype type = CPP_STRING;
|
|
2522 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
|
|
2523 &dst_string, type);
|
|
2524 ASSERT_TRUE (result);
|
|
2525 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
|
|
2526 free (const_cast <unsigned char *> (dst_string.text));
|
|
2527
|
|
2528 /* Verify ranges of individual characters. This no longer includes the
|
|
2529 opening quote, but does include the closing quote. */
|
|
2530 for (int i = 0; i <= 4; i++)
|
|
2531 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
|
|
2532 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
|
|
2533 for (int i = 6; i <= 10; i++)
|
|
2534 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
|
|
2535
|
|
2536 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
|
|
2537 }
|
|
2538
|
|
2539 /* Lex a string literal containing an octal-escaped character.
|
|
2540 Verify the substring location data after running cpp_interpret_string
|
|
2541 on it. */
|
|
2542
|
|
2543 static void
|
|
2544 test_lexer_string_locations_oct (const line_table_case &case_)
|
|
2545 {
|
|
2546 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
|
|
2547 and with a space in place of digit 6, to terminate the escaped
|
|
2548 octal code.
|
|
2549 ....................000000000.111111.11112222.2222223333333333444
|
|
2550 ....................123456789.012345.67890123.4567890123456789012 */
|
|
2551 const char *content = " \"01234\\065 789\" /* not a string */\n";
|
|
2552 lexer_test test (case_, content, NULL);
|
|
2553
|
|
2554 /* Verify that we get the expected token back, with the correct
|
|
2555 location information. */
|
|
2556 const cpp_token *tok = test.get_token ();
|
|
2557 ASSERT_EQ (tok->type, CPP_STRING);
|
|
2558 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
|
|
2559
|
|
2560 /* Verify that cpp_interpret_string works. */
|
|
2561 cpp_string dst_string;
|
|
2562 const enum cpp_ttype type = CPP_STRING;
|
|
2563 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
|
|
2564 &dst_string, type);
|
|
2565 ASSERT_TRUE (result);
|
|
2566 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
|
|
2567 free (const_cast <unsigned char *> (dst_string.text));
|
|
2568
|
|
2569 /* Verify ranges of individual characters. This no longer includes the
|
|
2570 opening quote, but does include the closing quote. */
|
|
2571 for (int i = 0; i < 5; i++)
|
|
2572 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
|
|
2573 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
|
|
2574 for (int i = 6; i <= 10; i++)
|
|
2575 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
|
|
2576
|
|
2577 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
|
|
2578 }
|
|
2579
|
|
2580 /* Test of string literal containing letter escapes. */
|
|
2581
|
|
2582 static void
|
|
2583 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
|
|
2584 {
|
|
2585 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
|
|
2586 .....................000000000.1.11111.1.1.11222.22222223333333
|
|
2587 .....................123456789.0.12345.6.7.89012.34567890123456. */
|
|
2588 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
|
|
2589 lexer_test test (case_, content, NULL);
|
|
2590
|
|
2591 /* Verify that we get the expected tokens back. */
|
|
2592 const cpp_token *tok = test.get_token ();
|
|
2593 ASSERT_EQ (tok->type, CPP_STRING);
|
|
2594 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
|
|
2595
|
|
2596 /* Verify ranges of individual characters. */
|
|
2597 /* "\t". */
|
|
2598 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
|
|
2599 0, 1, 10, 11);
|
|
2600 /* "foo". */
|
|
2601 for (int i = 1; i <= 3; i++)
|
|
2602 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
|
|
2603 i, 1, 11 + i, 11 + i);
|
|
2604 /* "\\" and "\n". */
|
|
2605 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
|
|
2606 4, 1, 15, 16);
|
|
2607 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
|
|
2608 5, 1, 17, 18);
|
|
2609
|
|
2610 /* "bar" and closing quote for nul-terminator. */
|
|
2611 for (int i = 6; i <= 9; i++)
|
|
2612 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
|
|
2613 i, 1, 13 + i, 13 + i);
|
|
2614
|
|
2615 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
|
|
2616 }
|
|
2617
|
|
2618 /* Another test of a string literal containing a letter escape.
|
|
2619 Based on string seen in
|
|
2620 printf ("%-%\n");
|
|
2621 in gcc.dg/format/c90-printf-1.c. */
|
|
2622
|
|
2623 static void
|
|
2624 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
|
|
2625 {
|
|
2626 /* .....................000000000.1111.11.1111.22222222223.
|
|
2627 .....................123456789.0123.45.6789.01234567890. */
|
|
2628 const char *content = (" \"%-%\\n\" /* non-str */\n");
|
|
2629 lexer_test test (case_, content, NULL);
|
|
2630
|
|
2631 /* Verify that we get the expected tokens back. */
|
|
2632 const cpp_token *tok = test.get_token ();
|
|
2633 ASSERT_EQ (tok->type, CPP_STRING);
|
|
2634 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
|
|
2635
|
|
2636 /* Verify ranges of individual characters. */
|
|
2637 /* "%-%". */
|
|
2638 for (int i = 0; i < 3; i++)
|
|
2639 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
|
|
2640 i, 1, 10 + i, 10 + i);
|
|
2641 /* "\n". */
|
|
2642 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
|
|
2643 3, 1, 13, 14);
|
|
2644
|
|
2645 /* Closing quote for nul-terminator. */
|
|
2646 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
|
|
2647 4, 1, 15, 15);
|
|
2648
|
|
2649 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
|
|
2650 }
|
|
2651
|
|
2652 /* Lex a string literal containing UCN 4 characters.
|
|
2653 Verify the substring location data after running cpp_interpret_string
|
|
2654 on it. */
|
|
2655
|
|
2656 static void
|
|
2657 test_lexer_string_locations_ucn4 (const line_table_case &case_)
|
|
2658 {
|
|
2659 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
|
|
2660 as UCN 4.
|
|
2661 ....................000000000.111111.111122.222222223.33333333344444
|
|
2662 ....................123456789.012345.678901.234567890.12345678901234 */
|
|
2663 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
|
|
2664 lexer_test test (case_, content, NULL);
|
|
2665
|
|
2666 /* Verify that we get the expected token back, with the correct
|
|
2667 location information. */
|
|
2668 const cpp_token *tok = test.get_token ();
|
|
2669 ASSERT_EQ (tok->type, CPP_STRING);
|
|
2670 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
|
|
2671
|
|
2672 /* Verify that cpp_interpret_string works.
|
|
2673 The string should be encoded in the execution character
|
|
2674 set. Assuming that that is UTF-8, we should have the following:
|
|
2675 ----------- ---- ----- ------- ----------------
|
|
2676 Byte offset Byte Octal Unicode Source Column(s)
|
|
2677 ----------- ---- ----- ------- ----------------
|
|
2678 0 0x30 '0' 10
|
|
2679 1 0x31 '1' 11
|
|
2680 2 0x32 '2' 12
|
|
2681 3 0x33 '3' 13
|
|
2682 4 0x34 '4' 14
|
|
2683 5 0xE2 \342 U+2174 15-20
|
|
2684 6 0x85 \205 (cont) 15-20
|
|
2685 7 0xB4 \264 (cont) 15-20
|
|
2686 8 0xE2 \342 U+2175 21-26
|
|
2687 9 0x85 \205 (cont) 21-26
|
|
2688 10 0xB5 \265 (cont) 21-26
|
|
2689 11 0x37 '7' 27
|
|
2690 12 0x38 '8' 28
|
|
2691 13 0x39 '9' 29
|
|
2692 14 0x00 30 (closing quote)
|
|
2693 ----------- ---- ----- ------- ---------------. */
|
|
2694
|
|
2695 cpp_string dst_string;
|
|
2696 const enum cpp_ttype type = CPP_STRING;
|
|
2697 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
|
|
2698 &dst_string, type);
|
|
2699 ASSERT_TRUE (result);
|
|
2700 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
|
|
2701 (const char *)dst_string.text);
|
|
2702 free (const_cast <unsigned char *> (dst_string.text));
|
|
2703
|
|
2704 /* Verify ranges of individual characters. This no longer includes the
|
|
2705 opening quote, but does include the closing quote.
|
|
2706 '01234'. */
|
|
2707 for (int i = 0; i <= 4; i++)
|
|
2708 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
|
|
2709 /* U+2174. */
|
|
2710 for (int i = 5; i <= 7; i++)
|
|
2711 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
|
|
2712 /* U+2175. */
|
|
2713 for (int i = 8; i <= 10; i++)
|
|
2714 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
|
|
2715 /* '789' and nul terminator */
|
|
2716 for (int i = 11; i <= 14; i++)
|
|
2717 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
|
|
2718
|
|
2719 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
|
|
2720 }
|
|
2721
|
|
2722 /* Lex a string literal containing UCN 8 characters.
|
|
2723 Verify the substring location data after running cpp_interpret_string
|
|
2724 on it. */
|
|
2725
|
|
2726 static void
|
|
2727 test_lexer_string_locations_ucn8 (const line_table_case &case_)
|
|
2728 {
|
|
2729 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
|
|
2730 ....................000000000.111111.1111222222.2222333333333.344444
|
|
2731 ....................123456789.012345.6789012345.6789012345678.901234 */
|
|
2732 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
|
|
2733 lexer_test test (case_, content, NULL);
|
|
2734
|
|
2735 /* Verify that we get the expected token back, with the correct
|
|
2736 location information. */
|
|
2737 const cpp_token *tok = test.get_token ();
|
|
2738 ASSERT_EQ (tok->type, CPP_STRING);
|
|
2739 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
|
|
2740 "\"01234\\U00002174\\U00002175789\"");
|
|
2741
|
|
2742 /* Verify that cpp_interpret_string works.
|
|
2743 The UTF-8 encoding of the string is identical to that from
|
|
2744 the ucn4 testcase above; the only difference is the column
|
|
2745 locations. */
|
|
2746 cpp_string dst_string;
|
|
2747 const enum cpp_ttype type = CPP_STRING;
|
|
2748 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
|
|
2749 &dst_string, type);
|
|
2750 ASSERT_TRUE (result);
|
|
2751 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
|
|
2752 (const char *)dst_string.text);
|
|
2753 free (const_cast <unsigned char *> (dst_string.text));
|
|
2754
|
|
2755 /* Verify ranges of individual characters. This no longer includes the
|
|
2756 opening quote, but does include the closing quote.
|
|
2757 '01234'. */
|
|
2758 for (int i = 0; i <= 4; i++)
|
|
2759 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
|
|
2760 /* U+2174. */
|
|
2761 for (int i = 5; i <= 7; i++)
|
|
2762 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
|
|
2763 /* U+2175. */
|
|
2764 for (int i = 8; i <= 10; i++)
|
|
2765 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
|
|
2766 /* '789' at columns 35-37 */
|
|
2767 for (int i = 11; i <= 13; i++)
|
|
2768 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
|
|
2769 /* Closing quote/nul-terminator at column 38. */
|
|
2770 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
|
|
2771
|
|
2772 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
|
|
2773 }
|
|
2774
|
|
2775 /* Fetch a big-endian 32-bit value and convert to host endianness. */
|
|
2776
|
|
2777 static uint32_t
|
|
2778 uint32_from_big_endian (const uint32_t *ptr_be_value)
|
|
2779 {
|
|
2780 const unsigned char *buf = (const unsigned char *)ptr_be_value;
|
|
2781 return (((uint32_t) buf[0] << 24)
|
|
2782 | ((uint32_t) buf[1] << 16)
|
|
2783 | ((uint32_t) buf[2] << 8)
|
|
2784 | (uint32_t) buf[3]);
|
|
2785 }
|
|
2786
|
|
2787 /* Lex a wide string literal and verify that attempts to read substring
|
|
2788 location data from it fail gracefully. */
|
|
2789
|
|
2790 static void
|
|
2791 test_lexer_string_locations_wide_string (const line_table_case &case_)
|
|
2792 {
|
|
2793 /* Digits 0-9.
|
|
2794 ....................000000000.11111111112.22222222233333
|
|
2795 ....................123456789.01234567890.12345678901234 */
|
|
2796 const char *content = " L\"0123456789\" /* non-str */\n";
|
|
2797 lexer_test test (case_, content, NULL);
|
|
2798
|
|
2799 /* Verify that we get the expected token back, with the correct
|
|
2800 location information. */
|
|
2801 const cpp_token *tok = test.get_token ();
|
|
2802 ASSERT_EQ (tok->type, CPP_WSTRING);
|
|
2803 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
|
|
2804
|
|
2805 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
|
|
2806 cpp_string dst_string;
|
|
2807 const enum cpp_ttype type = CPP_WSTRING;
|
|
2808 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
|
|
2809 &dst_string, type);
|
|
2810 ASSERT_TRUE (result);
|
|
2811 /* The cpp_reader defaults to big-endian with
|
|
2812 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
|
|
2813 now be encoded as UTF-32BE. */
|
|
2814 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
|
|
2815 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
|
|
2816 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
|
|
2817 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
|
|
2818 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
|
|
2819 free (const_cast <unsigned char *> (dst_string.text));
|
|
2820
|
|
2821 /* We don't yet support generating substring location information
|
|
2822 for L"" strings. */
|
|
2823 ASSERT_HAS_NO_SUBSTRING_RANGES
|
|
2824 (test, tok->src_loc, type,
|
|
2825 "execution character set != source character set");
|
|
2826 }
|
|
2827
|
|
2828 /* Fetch a big-endian 16-bit value and convert to host endianness. */
|
|
2829
|
|
2830 static uint16_t
|
|
2831 uint16_from_big_endian (const uint16_t *ptr_be_value)
|
|
2832 {
|
|
2833 const unsigned char *buf = (const unsigned char *)ptr_be_value;
|
|
2834 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
|
|
2835 }
|
|
2836
|
|
2837 /* Lex a u"" string literal and verify that attempts to read substring
|
|
2838 location data from it fail gracefully. */
|
|
2839
|
|
2840 static void
|
|
2841 test_lexer_string_locations_string16 (const line_table_case &case_)
|
|
2842 {
|
|
2843 /* Digits 0-9.
|
|
2844 ....................000000000.11111111112.22222222233333
|
|
2845 ....................123456789.01234567890.12345678901234 */
|
|
2846 const char *content = " u\"0123456789\" /* non-str */\n";
|
|
2847 lexer_test test (case_, content, NULL);
|
|
2848
|
|
2849 /* Verify that we get the expected token back, with the correct
|
|
2850 location information. */
|
|
2851 const cpp_token *tok = test.get_token ();
|
|
2852 ASSERT_EQ (tok->type, CPP_STRING16);
|
|
2853 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
|
|
2854
|
|
2855 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
|
|
2856 cpp_string dst_string;
|
|
2857 const enum cpp_ttype type = CPP_STRING16;
|
|
2858 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
|
|
2859 &dst_string, type);
|
|
2860 ASSERT_TRUE (result);
|
|
2861
|
|
2862 /* The cpp_reader defaults to big-endian, so dst_string should
|
|
2863 now be encoded as UTF-16BE. */
|
|
2864 const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
|
|
2865 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
|
|
2866 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
|
|
2867 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
|
|
2868 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
|
|
2869 free (const_cast <unsigned char *> (dst_string.text));
|
|
2870
|
|
2871 /* We don't yet support generating substring location information
|
|
2872 for L"" strings. */
|
|
2873 ASSERT_HAS_NO_SUBSTRING_RANGES
|
|
2874 (test, tok->src_loc, type,
|
|
2875 "execution character set != source character set");
|
|
2876 }
|
|
2877
|
|
2878 /* Lex a U"" string literal and verify that attempts to read substring
|
|
2879 location data from it fail gracefully. */
|
|
2880
|
|
2881 static void
|
|
2882 test_lexer_string_locations_string32 (const line_table_case &case_)
|
|
2883 {
|
|
2884 /* Digits 0-9.
|
|
2885 ....................000000000.11111111112.22222222233333
|
|
2886 ....................123456789.01234567890.12345678901234 */
|
|
2887 const char *content = " U\"0123456789\" /* non-str */\n";
|
|
2888 lexer_test test (case_, content, NULL);
|
|
2889
|
|
2890 /* Verify that we get the expected token back, with the correct
|
|
2891 location information. */
|
|
2892 const cpp_token *tok = test.get_token ();
|
|
2893 ASSERT_EQ (tok->type, CPP_STRING32);
|
|
2894 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
|
|
2895
|
|
2896 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
|
|
2897 cpp_string dst_string;
|
|
2898 const enum cpp_ttype type = CPP_STRING32;
|
|
2899 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
|
|
2900 &dst_string, type);
|
|
2901 ASSERT_TRUE (result);
|
|
2902
|
|
2903 /* The cpp_reader defaults to big-endian, so dst_string should
|
|
2904 now be encoded as UTF-32BE. */
|
|
2905 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
|
|
2906 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
|
|
2907 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
|
|
2908 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
|
|
2909 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
|
|
2910 free (const_cast <unsigned char *> (dst_string.text));
|
|
2911
|
|
2912 /* We don't yet support generating substring location information
|
|
2913 for L"" strings. */
|
|
2914 ASSERT_HAS_NO_SUBSTRING_RANGES
|
|
2915 (test, tok->src_loc, type,
|
|
2916 "execution character set != source character set");
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
2917 }
|
111
|
2918
|
|
2919 /* Lex a u8-string literal.
|
|
2920 Verify the substring location data after running cpp_interpret_string
|
|
2921 on it. */
|
|
2922
|
|
2923 static void
|
|
2924 test_lexer_string_locations_u8 (const line_table_case &case_)
|
|
2925 {
|
|
2926 /* Digits 0-9.
|
|
2927 ....................000000000.11111111112.22222222233333
|
|
2928 ....................123456789.01234567890.12345678901234 */
|
|
2929 const char *content = " u8\"0123456789\" /* non-str */\n";
|
|
2930 lexer_test test (case_, content, NULL);
|
|
2931
|
|
2932 /* Verify that we get the expected token back, with the correct
|
|
2933 location information. */
|
|
2934 const cpp_token *tok = test.get_token ();
|
|
2935 ASSERT_EQ (tok->type, CPP_UTF8STRING);
|
|
2936 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
|
|
2937
|
|
2938 /* Verify that cpp_interpret_string works. */
|
|
2939 cpp_string dst_string;
|
|
2940 const enum cpp_ttype type = CPP_STRING;
|
|
2941 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
|
|
2942 &dst_string, type);
|
|
2943 ASSERT_TRUE (result);
|
|
2944 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
|
|
2945 free (const_cast <unsigned char *> (dst_string.text));
|
|
2946
|
|
2947 /* Verify ranges of individual characters. This no longer includes the
|
|
2948 opening quote, but does include the closing quote. */
|
|
2949 for (int i = 0; i <= 10; i++)
|
|
2950 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
|
|
2951 }
|
|
2952
|
|
2953 /* Lex a string literal containing UTF-8 source characters.
|
|
2954 Verify the substring location data after running cpp_interpret_string
|
|
2955 on it. */
|
|
2956
|
|
2957 static void
|
|
2958 test_lexer_string_locations_utf8_source (const line_table_case &case_)
|
|
2959 {
|
|
2960 /* This string literal is written out to the source file as UTF-8,
|
|
2961 and is of the form "before mojibake after", where "mojibake"
|
|
2962 is written as the following four unicode code points:
|
|
2963 U+6587 CJK UNIFIED IDEOGRAPH-6587
|
|
2964 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
|
|
2965 U+5316 CJK UNIFIED IDEOGRAPH-5316
|
|
2966 U+3051 HIRAGANA LETTER KE.
|
|
2967 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
|
|
2968 "before" and "after" are 1 byte per unicode character.
|
|
2969
|
|
2970 The numbering shown are "columns", which are *byte* numbers within
|
|
2971 the line, rather than unicode character numbers.
|
|
2972
|
|
2973 .................... 000000000.1111111.
|
|
2974 .................... 123456789.0123456. */
|
|
2975 const char *content = (" \"before "
|
|
2976 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
|
|
2977 UTF-8: 0xE6 0x96 0x87
|
|
2978 C octal escaped UTF-8: \346\226\207
|
|
2979 "column" numbers: 17-19. */
|
|
2980 "\346\226\207"
|
|
2981
|
|
2982 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
|
|
2983 UTF-8: 0xE5 0xAD 0x97
|
|
2984 C octal escaped UTF-8: \345\255\227
|
|
2985 "column" numbers: 20-22. */
|
|
2986 "\345\255\227"
|
|
2987
|
|
2988 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
|
|
2989 UTF-8: 0xE5 0x8C 0x96
|
|
2990 C octal escaped UTF-8: \345\214\226
|
|
2991 "column" numbers: 23-25. */
|
|
2992 "\345\214\226"
|
|
2993
|
|
2994 /* U+3051 HIRAGANA LETTER KE
|
|
2995 UTF-8: 0xE3 0x81 0x91
|
|
2996 C octal escaped UTF-8: \343\201\221
|
|
2997 "column" numbers: 26-28. */
|
|
2998 "\343\201\221"
|
|
2999
|
|
3000 /* column numbers 29 onwards
|
|
3001 2333333.33334444444444
|
|
3002 9012345.67890123456789. */
|
|
3003 " after\" /* non-str */\n");
|
|
3004 lexer_test test (case_, content, NULL);
|
|
3005
|
|
3006 /* Verify that we get the expected token back, with the correct
|
|
3007 location information. */
|
|
3008 const cpp_token *tok = test.get_token ();
|
|
3009 ASSERT_EQ (tok->type, CPP_STRING);
|
|
3010 ASSERT_TOKEN_AS_TEXT_EQ
|
|
3011 (test.m_parser, tok,
|
|
3012 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
|
|
3013
|
|
3014 /* Verify that cpp_interpret_string works. */
|
|
3015 cpp_string dst_string;
|
|
3016 const enum cpp_ttype type = CPP_STRING;
|
|
3017 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
|
|
3018 &dst_string, type);
|
|
3019 ASSERT_TRUE (result);
|
|
3020 ASSERT_STREQ
|
|
3021 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
|
|
3022 (const char *)dst_string.text);
|
|
3023 free (const_cast <unsigned char *> (dst_string.text));
|
|
3024
|
|
3025 /* Verify ranges of individual characters. This no longer includes the
|
|
3026 opening quote, but does include the closing quote.
|
|
3027 Assuming that both source and execution encodings are UTF-8, we have
|
|
3028 a run of 25 octets in each, plus the NUL terminator. */
|
|
3029 for (int i = 0; i < 25; i++)
|
|
3030 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
|
|
3031 /* NUL-terminator should use the closing quote at column 35. */
|
|
3032 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
|
|
3033
|
|
3034 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
|
|
3035 }
|
|
3036
|
|
3037 /* Test of string literal concatenation. */
|
|
3038
|
|
3039 static void
|
|
3040 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
|
|
3041 {
|
|
3042 /* Digits 0-9.
|
|
3043 .....................000000000.111111.11112222222222
|
|
3044 .....................123456789.012345.67890123456789. */
|
|
3045 const char *content = (" \"01234\" /* non-str */\n"
|
|
3046 " \"56789\" /* non-str */\n");
|
|
3047 lexer_test test (case_, content, NULL);
|
|
3048
|
|
3049 location_t input_locs[2];
|
|
3050
|
|
3051 /* Verify that we get the expected tokens back. */
|
|
3052 auto_vec <cpp_string> input_strings;
|
|
3053 const cpp_token *tok_a = test.get_token ();
|
|
3054 ASSERT_EQ (tok_a->type, CPP_STRING);
|
|
3055 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
|
|
3056 input_strings.safe_push (tok_a->val.str);
|
|
3057 input_locs[0] = tok_a->src_loc;
|
|
3058
|
|
3059 const cpp_token *tok_b = test.get_token ();
|
|
3060 ASSERT_EQ (tok_b->type, CPP_STRING);
|
|
3061 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
|
|
3062 input_strings.safe_push (tok_b->val.str);
|
|
3063 input_locs[1] = tok_b->src_loc;
|
|
3064
|
|
3065 /* Verify that cpp_interpret_string works. */
|
|
3066 cpp_string dst_string;
|
|
3067 const enum cpp_ttype type = CPP_STRING;
|
|
3068 bool result = cpp_interpret_string (test.m_parser,
|
|
3069 input_strings.address (), 2,
|
|
3070 &dst_string, type);
|
|
3071 ASSERT_TRUE (result);
|
|
3072 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
|
|
3073 free (const_cast <unsigned char *> (dst_string.text));
|
|
3074
|
|
3075 /* Simulate c-lex.c's lex_string in order to record concatenation. */
|
|
3076 test.m_concats.record_string_concatenation (2, input_locs);
|
|
3077
|
|
3078 location_t initial_loc = input_locs[0];
|
|
3079
|
|
3080 /* "01234" on line 1. */
|
|
3081 for (int i = 0; i <= 4; i++)
|
|
3082 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
|
|
3083 /* "56789" in line 2, plus its closing quote for the nul terminator. */
|
|
3084 for (int i = 5; i <= 10; i++)
|
|
3085 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
|
|
3086
|
|
3087 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
|
|
3088 }
|
|
3089
|
|
3090 /* Another test of string literal concatenation. */
|
|
3091
|
|
3092 static void
|
|
3093 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
|
|
3094 {
|
|
3095 /* Digits 0-9.
|
|
3096 .....................000000000.111.11111112222222
|
|
3097 .....................123456789.012.34567890123456. */
|
|
3098 const char *content = (" \"01\" /* non-str */\n"
|
|
3099 " \"23\" /* non-str */\n"
|
|
3100 " \"45\" /* non-str */\n"
|
|
3101 " \"67\" /* non-str */\n"
|
|
3102 " \"89\" /* non-str */\n");
|
|
3103 lexer_test test (case_, content, NULL);
|
|
3104
|
|
3105 auto_vec <cpp_string> input_strings;
|
|
3106 location_t input_locs[5];
|
|
3107
|
|
3108 /* Verify that we get the expected tokens back. */
|
|
3109 for (int i = 0; i < 5; i++)
|
|
3110 {
|
|
3111 const cpp_token *tok = test.get_token ();
|
|
3112 ASSERT_EQ (tok->type, CPP_STRING);
|
|
3113 input_strings.safe_push (tok->val.str);
|
|
3114 input_locs[i] = tok->src_loc;
|
|
3115 }
|
|
3116
|
|
3117 /* Verify that cpp_interpret_string works. */
|
|
3118 cpp_string dst_string;
|
|
3119 const enum cpp_ttype type = CPP_STRING;
|
|
3120 bool result = cpp_interpret_string (test.m_parser,
|
|
3121 input_strings.address (), 5,
|
|
3122 &dst_string, type);
|
|
3123 ASSERT_TRUE (result);
|
|
3124 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
|
|
3125 free (const_cast <unsigned char *> (dst_string.text));
|
|
3126
|
|
3127 /* Simulate c-lex.c's lex_string in order to record concatenation. */
|
|
3128 test.m_concats.record_string_concatenation (5, input_locs);
|
|
3129
|
|
3130 location_t initial_loc = input_locs[0];
|
|
3131
|
|
3132 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
|
|
3133 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
|
|
3134 and expect get_source_range_for_substring to fail.
|
|
3135 However, for a string concatenation test, we can have a case
|
|
3136 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
|
|
3137 but subsequent strings can be after it.
|
|
3138 Attempting to detect this within assert_char_at_range
|
|
3139 would overcomplicate the logic for the common test cases, so
|
|
3140 we detect it here. */
|
|
3141 if (should_have_column_data_p (input_locs[0])
|
|
3142 && !should_have_column_data_p (input_locs[4]))
|
|
3143 {
|
|
3144 /* Verify that get_source_range_for_substring gracefully rejects
|
|
3145 this case. */
|
|
3146 source_range actual_range;
|
|
3147 const char *err
|
|
3148 = get_source_range_for_char (test.m_parser, &test.m_concats,
|
|
3149 initial_loc, type, 0, &actual_range);
|
|
3150 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
|
|
3151 return;
|
|
3152 }
|
|
3153
|
|
3154 for (int i = 0; i < 5; i++)
|
|
3155 for (int j = 0; j < 2; j++)
|
|
3156 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
|
|
3157 i + 1, 10 + j, 10 + j);
|
|
3158
|
|
3159 /* NUL-terminator should use the final closing quote at line 5 column 12. */
|
|
3160 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
|
|
3161
|
|
3162 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
|
|
3163 }
|
|
3164
|
|
3165 /* Another test of string literal concatenation, this time combined with
|
|
3166 various kinds of escaped characters. */
|
|
3167
|
|
3168 static void
|
|
3169 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
|
|
3170 {
|
|
3171 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
|
|
3172 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
|
|
3173 const char *content
|
|
3174 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
|
|
3175 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
|
|
3176 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
|
|
3177 lexer_test test (case_, content, NULL);
|
|
3178
|
|
3179 auto_vec <cpp_string> input_strings;
|
|
3180 location_t input_locs[4];
|
|
3181
|
|
3182 /* Verify that we get the expected tokens back. */
|
|
3183 for (int i = 0; i < 4; i++)
|
|
3184 {
|
|
3185 const cpp_token *tok = test.get_token ();
|
|
3186 ASSERT_EQ (tok->type, CPP_STRING);
|
|
3187 input_strings.safe_push (tok->val.str);
|
|
3188 input_locs[i] = tok->src_loc;
|
|
3189 }
|
|
3190
|
|
3191 /* Verify that cpp_interpret_string works. */
|
|
3192 cpp_string dst_string;
|
|
3193 const enum cpp_ttype type = CPP_STRING;
|
|
3194 bool result = cpp_interpret_string (test.m_parser,
|
|
3195 input_strings.address (), 4,
|
|
3196 &dst_string, type);
|
|
3197 ASSERT_TRUE (result);
|
|
3198 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
|
|
3199 free (const_cast <unsigned char *> (dst_string.text));
|
|
3200
|
|
3201 /* Simulate c-lex.c's lex_string in order to record concatenation. */
|
|
3202 test.m_concats.record_string_concatenation (4, input_locs);
|
|
3203
|
|
3204 location_t initial_loc = input_locs[0];
|
|
3205
|
|
3206 for (int i = 0; i <= 4; i++)
|
|
3207 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
|
|
3208 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
|
|
3209 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
|
|
3210 for (int i = 7; i <= 9; i++)
|
|
3211 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
|
|
3212
|
|
3213 /* NUL-terminator should use the location of the final closing quote. */
|
|
3214 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
|
|
3215
|
|
3216 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
|
|
3217 }
|
|
3218
|
|
3219 /* Test of string literal in a macro. */
|
|
3220
|
|
3221 static void
|
|
3222 test_lexer_string_locations_macro (const line_table_case &case_)
|
|
3223 {
|
|
3224 /* Digits 0-9.
|
|
3225 .....................0000000001111111111.22222222223.
|
|
3226 .....................1234567890123456789.01234567890. */
|
|
3227 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
|
|
3228 " MACRO");
|
|
3229 lexer_test test (case_, content, NULL);
|
|
3230
|
|
3231 /* Verify that we get the expected tokens back. */
|
|
3232 const cpp_token *tok = test.get_token ();
|
|
3233 ASSERT_EQ (tok->type, CPP_PADDING);
|
|
3234
|
|
3235 tok = test.get_token ();
|
|
3236 ASSERT_EQ (tok->type, CPP_STRING);
|
|
3237 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
|
|
3238
|
|
3239 /* Verify ranges of individual characters. We ought to
|
|
3240 see columns within the macro definition. */
|
|
3241 for (int i = 0; i <= 10; i++)
|
|
3242 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
|
|
3243 i, 1, 20 + i, 20 + i);
|
|
3244
|
|
3245 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
|
|
3246
|
|
3247 tok = test.get_token ();
|
|
3248 ASSERT_EQ (tok->type, CPP_PADDING);
|
|
3249 }
|
|
3250
|
|
3251 /* Test of stringification of a macro argument. */
|
|
3252
|
|
3253 static void
|
|
3254 test_lexer_string_locations_stringified_macro_argument
|
|
3255 (const line_table_case &case_)
|
|
3256 {
|
|
3257 /* .....................000000000111111111122222222223.
|
|
3258 .....................123456789012345678901234567890. */
|
|
3259 const char *content = ("#define MACRO(X) #X /* non-str */\n"
|
|
3260 "MACRO(foo)\n");
|
|
3261 lexer_test test (case_, content, NULL);
|
|
3262
|
|
3263 /* Verify that we get the expected token back. */
|
|
3264 const cpp_token *tok = test.get_token ();
|
|
3265 ASSERT_EQ (tok->type, CPP_PADDING);
|
|
3266
|
|
3267 tok = test.get_token ();
|
|
3268 ASSERT_EQ (tok->type, CPP_STRING);
|
|
3269 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
|
|
3270
|
|
3271 /* We don't support getting the location of a stringified macro
|
|
3272 argument. Verify that it fails gracefully. */
|
|
3273 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
|
|
3274 "cpp_interpret_string_1 failed");
|
|
3275
|
|
3276 tok = test.get_token ();
|
|
3277 ASSERT_EQ (tok->type, CPP_PADDING);
|
|
3278
|
|
3279 tok = test.get_token ();
|
|
3280 ASSERT_EQ (tok->type, CPP_PADDING);
|
|
3281 }
|
|
3282
|
|
3283 /* Ensure that we are fail gracefully if something attempts to pass
|
|
3284 in a location that isn't a string literal token. Seen on this code:
|
|
3285
|
|
3286 const char a[] = " %d ";
|
|
3287 __builtin_printf (a, 0.5);
|
|
3288 ^
|
|
3289
|
|
3290 when c-format.c erroneously used the indicated one-character
|
|
3291 location as the format string location, leading to a read past the
|
|
3292 end of a string buffer in cpp_interpret_string_1. */
|
|
3293
|
|
3294 static void
|
|
3295 test_lexer_string_locations_non_string (const line_table_case &case_)
|
|
3296 {
|
|
3297 /* .....................000000000111111111122222222223.
|
|
3298 .....................123456789012345678901234567890. */
|
|
3299 const char *content = (" a\n");
|
|
3300 lexer_test test (case_, content, NULL);
|
|
3301
|
|
3302 /* Verify that we get the expected token back. */
|
|
3303 const cpp_token *tok = test.get_token ();
|
|
3304 ASSERT_EQ (tok->type, CPP_NAME);
|
|
3305 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
|
|
3306
|
|
3307 /* At this point, libcpp is attempting to interpret the name as a
|
|
3308 string literal, despite it not starting with a quote. We don't detect
|
|
3309 that, but we should at least fail gracefully. */
|
|
3310 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
|
|
3311 "cpp_interpret_string_1 failed");
|
|
3312 }
|
|
3313
|
|
3314 /* Ensure that we can read substring information for a token which
|
|
3315 starts in one linemap and ends in another . Adapted from
|
|
3316 gcc.dg/cpp/pr69985.c. */
|
|
3317
|
|
3318 static void
|
|
3319 test_lexer_string_locations_long_line (const line_table_case &case_)
|
|
3320 {
|
|
3321 /* .....................000000.000111111111
|
|
3322 .....................123456.789012346789. */
|
|
3323 const char *content = ("/* A very long line, so that we start a new line map. */\n"
|
|
3324 " \"0123456789012345678901234567890123456789"
|
|
3325 "0123456789012345678901234567890123456789"
|
|
3326 "0123456789012345678901234567890123456789"
|
|
3327 "0123456789\"\n");
|
|
3328
|
|
3329 lexer_test test (case_, content, NULL);
|
|
3330
|
|
3331 /* Verify that we get the expected token back. */
|
|
3332 const cpp_token *tok = test.get_token ();
|
|
3333 ASSERT_EQ (tok->type, CPP_STRING);
|
|
3334
|
|
3335 if (!should_have_column_data_p (line_table->highest_location))
|
|
3336 return;
|
|
3337
|
|
3338 /* Verify ranges of individual characters. */
|
|
3339 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
|
|
3340 for (int i = 0; i < 131; i++)
|
|
3341 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
|
|
3342 i, 2, 7 + i, 7 + i);
|
|
3343 }
|
|
3344
|
|
3345 /* Test of locations within a raw string that doesn't contain a newline. */
|
|
3346
|
|
3347 static void
|
|
3348 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
|
|
3349 {
|
|
3350 /* .....................00.0000000111111111122.
|
|
3351 .....................12.3456789012345678901. */
|
|
3352 const char *content = ("R\"foo(0123456789)foo\"\n");
|
|
3353 lexer_test test (case_, content, NULL);
|
|
3354
|
|
3355 /* Verify that we get the expected token back. */
|
|
3356 const cpp_token *tok = test.get_token ();
|
|
3357 ASSERT_EQ (tok->type, CPP_STRING);
|
|
3358
|
|
3359 /* Verify that cpp_interpret_string works. */
|
|
3360 cpp_string dst_string;
|
|
3361 const enum cpp_ttype type = CPP_STRING;
|
|
3362 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
|
|
3363 &dst_string, type);
|
|
3364 ASSERT_TRUE (result);
|
|
3365 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
|
|
3366 free (const_cast <unsigned char *> (dst_string.text));
|
|
3367
|
|
3368 if (!should_have_column_data_p (line_table->highest_location))
|
|
3369 return;
|
|
3370
|
|
3371 /* 0-9, plus the nil terminator. */
|
|
3372 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
|
|
3373 for (int i = 0; i < 11; i++)
|
|
3374 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
|
|
3375 i, 1, 7 + i, 7 + i);
|
|
3376 }
|
|
3377
|
|
3378 /* Test of locations within a raw string that contains a newline. */
|
|
3379
|
|
3380 static void
|
|
3381 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
|
|
3382 {
|
|
3383 /* .....................00.0000.
|
|
3384 .....................12.3456. */
|
|
3385 const char *content = ("R\"foo(\n"
|
|
3386 /* .....................00000.
|
|
3387 .....................12345. */
|
|
3388 "hello\n"
|
|
3389 "world\n"
|
|
3390 /* .....................00000.
|
|
3391 .....................12345. */
|
|
3392 ")foo\"\n");
|
|
3393 lexer_test test (case_, content, NULL);
|
|
3394
|
|
3395 /* Verify that we get the expected token back. */
|
|
3396 const cpp_token *tok = test.get_token ();
|
|
3397 ASSERT_EQ (tok->type, CPP_STRING);
|
|
3398
|
|
3399 /* Verify that cpp_interpret_string works. */
|
|
3400 cpp_string dst_string;
|
|
3401 const enum cpp_ttype type = CPP_STRING;
|
|
3402 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
|
|
3403 &dst_string, type);
|
|
3404 ASSERT_TRUE (result);
|
|
3405 ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
|
|
3406 free (const_cast <unsigned char *> (dst_string.text));
|
|
3407
|
|
3408 if (!should_have_column_data_p (line_table->highest_location))
|
|
3409 return;
|
|
3410
|
|
3411 /* Currently we don't support locations within raw strings that
|
|
3412 contain newlines. */
|
|
3413 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
|
|
3414 "range endpoints are on different lines");
|
|
3415 }
|
|
3416
|
|
3417 /* Test of parsing an unterminated raw string. */
|
|
3418
|
|
3419 static void
|
|
3420 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
|
|
3421 {
|
|
3422 const char *content = "R\"ouch()ouCh\" /* etc */";
|
|
3423
|
131
|
3424 lexer_diagnostic_sink diagnostics;
|
|
3425 lexer_test test (case_, content, &diagnostics);
|
111
|
3426 test.m_implicitly_expect_EOF = false;
|
|
3427
|
|
3428 /* Attempt to parse the raw string. */
|
|
3429 const cpp_token *tok = test.get_token ();
|
|
3430 ASSERT_EQ (tok->type, CPP_EOF);
|
|
3431
|
131
|
3432 ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
|
111
|
3433 /* We expect the message "unterminated raw string"
|
|
3434 in the "cpplib" translation domain.
|
|
3435 It's not clear that dgettext is available on all supported hosts,
|
|
3436 so this assertion is commented-out for now.
|
|
3437 ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
|
131
|
3438 diagnostics.m_diagnostics[0]);
|
111
|
3439 */
|
|
3440 }
|
|
3441
|
|
3442 /* Test of lexing char constants. */
|
|
3443
|
|
3444 static void
|
|
3445 test_lexer_char_constants (const line_table_case &case_)
|
|
3446 {
|
|
3447 /* Various char constants.
|
|
3448 .....................0000000001111111111.22222222223.
|
|
3449 .....................1234567890123456789.01234567890. */
|
|
3450 const char *content = (" 'a'\n"
|
|
3451 " u'a'\n"
|
|
3452 " U'a'\n"
|
|
3453 " L'a'\n"
|
|
3454 " 'abc'\n");
|
|
3455 lexer_test test (case_, content, NULL);
|
|
3456
|
|
3457 /* Verify that we get the expected tokens back. */
|
|
3458 /* 'a'. */
|
|
3459 const cpp_token *tok = test.get_token ();
|
|
3460 ASSERT_EQ (tok->type, CPP_CHAR);
|
|
3461 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
|
|
3462
|
|
3463 unsigned int chars_seen;
|
|
3464 int unsignedp;
|
|
3465 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
|
|
3466 &chars_seen, &unsignedp);
|
|
3467 ASSERT_EQ (cc, 'a');
|
|
3468 ASSERT_EQ (chars_seen, 1);
|
|
3469
|
|
3470 /* u'a'. */
|
|
3471 tok = test.get_token ();
|
|
3472 ASSERT_EQ (tok->type, CPP_CHAR16);
|
|
3473 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
|
|
3474
|
|
3475 /* U'a'. */
|
|
3476 tok = test.get_token ();
|
|
3477 ASSERT_EQ (tok->type, CPP_CHAR32);
|
|
3478 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
|
|
3479
|
|
3480 /* L'a'. */
|
|
3481 tok = test.get_token ();
|
|
3482 ASSERT_EQ (tok->type, CPP_WCHAR);
|
|
3483 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
|
|
3484
|
|
3485 /* 'abc' (c-char-sequence). */
|
|
3486 tok = test.get_token ();
|
|
3487 ASSERT_EQ (tok->type, CPP_CHAR);
|
|
3488 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
|
|
3489 }
|
|
3490 /* A table of interesting location_t values, giving one axis of our test
|
|
3491 matrix. */
|
|
3492
|
|
3493 static const location_t boundary_locations[] = {
|
|
3494 /* Zero means "don't override the default values for a new line_table". */
|
|
3495 0,
|
|
3496
|
|
3497 /* An arbitrary non-zero value that isn't close to one of
|
|
3498 the boundary values below. */
|
|
3499 0x10000,
|
|
3500
|
|
3501 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
|
|
3502 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
|
|
3503 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
|
|
3504 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
|
|
3505 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
|
|
3506 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
|
|
3507
|
|
3508 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
|
|
3509 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
|
|
3510 LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
|
|
3511 LINE_MAP_MAX_LOCATION_WITH_COLS,
|
|
3512 LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
|
|
3513 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
|
|
3514 };
|
|
3515
|
|
3516 /* Run TESTCASE multiple times, once for each case in our test matrix. */
|
|
3517
|
|
3518 void
|
|
3519 for_each_line_table_case (void (*testcase) (const line_table_case &))
|
|
3520 {
|
|
3521 /* As noted above in the description of struct line_table_case,
|
|
3522 we want to explore a test matrix of interesting line_table
|
|
3523 situations, running various selftests for each case within the
|
|
3524 matrix. */
|
|
3525
|
|
3526 /* Run all tests with:
|
|
3527 (a) line_table->default_range_bits == 0, and
|
|
3528 (b) line_table->default_range_bits == 5. */
|
|
3529 int num_cases_tested = 0;
|
|
3530 for (int default_range_bits = 0; default_range_bits <= 5;
|
|
3531 default_range_bits += 5)
|
|
3532 {
|
|
3533 /* ...and use each of the "interesting" location values as
|
|
3534 the starting location within line_table. */
|
|
3535 const int num_boundary_locations
|
|
3536 = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
|
|
3537 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
|
|
3538 {
|
|
3539 line_table_case c (default_range_bits, boundary_locations[loc_idx]);
|
|
3540
|
|
3541 testcase (c);
|
|
3542
|
|
3543 num_cases_tested++;
|
|
3544 }
|
|
3545 }
|
|
3546
|
|
3547 /* Verify that we fully covered the test matrix. */
|
|
3548 ASSERT_EQ (num_cases_tested, 2 * 12);
|
|
3549 }
|
|
3550
|
|
3551 /* Run all of the selftests within this file. */
|
|
3552
|
|
3553 void
|
|
3554 input_c_tests ()
|
|
3555 {
|
131
|
3556 test_linenum_comparisons ();
|
111
|
3557 test_should_have_column_data_p ();
|
|
3558 test_unknown_location ();
|
|
3559 test_builtins ();
|
|
3560 for_each_line_table_case (test_make_location_nonpure_range_endpoints);
|
|
3561
|
|
3562 for_each_line_table_case (test_accessing_ordinary_linemaps);
|
|
3563 for_each_line_table_case (test_lexer);
|
|
3564 for_each_line_table_case (test_lexer_string_locations_simple);
|
|
3565 for_each_line_table_case (test_lexer_string_locations_ebcdic);
|
|
3566 for_each_line_table_case (test_lexer_string_locations_hex);
|
|
3567 for_each_line_table_case (test_lexer_string_locations_oct);
|
|
3568 for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
|
|
3569 for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
|
|
3570 for_each_line_table_case (test_lexer_string_locations_ucn4);
|
|
3571 for_each_line_table_case (test_lexer_string_locations_ucn8);
|
|
3572 for_each_line_table_case (test_lexer_string_locations_wide_string);
|
|
3573 for_each_line_table_case (test_lexer_string_locations_string16);
|
|
3574 for_each_line_table_case (test_lexer_string_locations_string32);
|
|
3575 for_each_line_table_case (test_lexer_string_locations_u8);
|
|
3576 for_each_line_table_case (test_lexer_string_locations_utf8_source);
|
|
3577 for_each_line_table_case (test_lexer_string_locations_concatenation_1);
|
|
3578 for_each_line_table_case (test_lexer_string_locations_concatenation_2);
|
|
3579 for_each_line_table_case (test_lexer_string_locations_concatenation_3);
|
|
3580 for_each_line_table_case (test_lexer_string_locations_macro);
|
|
3581 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
|
|
3582 for_each_line_table_case (test_lexer_string_locations_non_string);
|
|
3583 for_each_line_table_case (test_lexer_string_locations_long_line);
|
|
3584 for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
|
|
3585 for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
|
|
3586 for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
|
|
3587 for_each_line_table_case (test_lexer_char_constants);
|
|
3588
|
|
3589 test_reading_source_line ();
|
|
3590 }
|
|
3591
|
|
3592 } // namespace selftest
|
|
3593
|
|
3594 #endif /* CHECKING_P */
|