Mercurial > hg > CbC > CbC_gcc
comparison gcc/ada/scng.adb @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | |
children | 84e7813d76e9 |
comparison
equal
deleted
inserted
replaced
68:561a7518be6b | 111:04ced10e8804 |
---|---|
1 ------------------------------------------------------------------------------ | |
2 -- -- | |
3 -- GNAT COMPILER COMPONENTS -- | |
4 -- -- | |
5 -- S C N G -- | |
6 -- -- | |
7 -- B o d y -- | |
8 -- -- | |
9 -- Copyright (C) 1992-2016, Free Software Foundation, Inc. -- | |
10 -- -- | |
11 -- GNAT is free software; you can redistribute it and/or modify it under -- | |
12 -- terms of the GNU General Public License as published by the Free Soft- -- | |
13 -- ware Foundation; either version 3, or (at your option) any later ver- -- | |
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- | |
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- | |
16 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- | |
17 -- for more details. You should have received a copy of the GNU General -- | |
18 -- Public License distributed with GNAT; see file COPYING3. If not, go to -- | |
19 -- http://www.gnu.org/licenses for a complete copy of the license. -- | |
20 -- -- | |
21 -- GNAT was originally developed by the GNAT team at New York University. -- | |
22 -- Extensive contributions were provided by Ada Core Technologies Inc. -- | |
23 -- -- | |
24 ------------------------------------------------------------------------------ | |
25 | |
26 with Atree; use Atree; | |
27 with Csets; use Csets; | |
28 with Hostparm; use Hostparm; | |
29 with Namet; use Namet; | |
30 with Opt; use Opt; | |
31 with Restrict; use Restrict; | |
32 with Rident; use Rident; | |
33 with Scans; use Scans; | |
34 with Sinput; use Sinput; | |
35 with Snames; use Snames; | |
36 with Stringt; use Stringt; | |
37 with Stylesw; use Stylesw; | |
38 with Uintp; use Uintp; | |
39 with Urealp; use Urealp; | |
40 with Widechar; use Widechar; | |
41 | |
42 pragma Warnings (Off); | |
43 -- This package is used also by gnatcoll | |
44 with System.CRC32; | |
45 with System.UTF_32; use System.UTF_32; | |
46 with System.WCh_Con; use System.WCh_Con; | |
47 pragma Warnings (On); | |
48 | |
49 package body Scng is | |
50 | |
51 use ASCII; | |
52 -- Make control characters visible | |
53 | |
54 Special_Characters : array (Character) of Boolean := (others => False); | |
55 -- For characters that are Special token, the value is True | |
56 | |
57 Comment_Is_Token : Boolean := False; | |
58 -- True if comments are tokens | |
59 | |
60 End_Of_Line_Is_Token : Boolean := False; | |
61 -- True if End_Of_Line is a token | |
62 | |
63 ----------------------- | |
64 -- Local Subprograms -- | |
65 ----------------------- | |
66 | |
67 procedure Accumulate_Token_Checksum; | |
68 pragma Inline (Accumulate_Token_Checksum); | |
69 -- Called after each numeric literal and identifier/keyword. For keywords, | |
70 -- the token used is Tok_Identifier. This allows detection of additional | |
71 -- spaces added in sources when using the builder switch -m. | |
72 | |
73 procedure Accumulate_Token_Checksum_GNAT_6_3; | |
74 -- Used in place of Accumulate_Token_Checksum for GNAT versions 5.04 to | |
75 -- 6.3, when Tok_Some was not included in Token_Type and the actual | |
76 -- Token_Type was used for keywords. This procedure is never used in the | |
77 -- compiler or gnatmake, only in gprbuild. | |
78 | |
79 procedure Accumulate_Token_Checksum_GNAT_5_03; | |
80 -- Used in place of Accumulate_Token_Checksum for GNAT version 5.03, when | |
81 -- Tok_Interface, Tok_Some, Tok_Synchronized and Tok_Overriding were not | |
82 -- included in Token_Type and the actual Token_Type was used for keywords. | |
83 -- This procedure is never used in the compiler or gnatmake, only in | |
84 -- gprbuild. | |
85 | |
86 procedure Accumulate_Checksum (C : Character); | |
87 pragma Inline (Accumulate_Checksum); | |
88 -- This routine accumulates the checksum given character C. During the | |
89 -- scanning of a source file, this routine is called with every character | |
90 -- in the source, excluding blanks, and all control characters (except | |
91 -- that ESC is included in the checksum). Upper case letters not in string | |
92 -- literals are folded by the caller. See Sinput spec for the documentation | |
93 -- of the checksum algorithm. Note: checksum values are only used if we | |
94 -- generate code, so it is not necessary to worry about making the right | |
95 -- sequence of calls in any error situation. | |
96 | |
97 procedure Accumulate_Checksum (C : Char_Code); | |
98 pragma Inline (Accumulate_Checksum); | |
99 -- This version is identical, except that the argument, C, is a character | |
100 -- code value instead of a character. This is used when wide characters | |
101 -- are scanned. We use the character code rather than the ASCII characters | |
102 -- so that the checksum is independent of wide character encoding method. | |
103 | |
104 procedure Initialize_Checksum; | |
105 pragma Inline (Initialize_Checksum); | |
106 -- Initialize checksum value | |
107 | |
108 ------------------------- | |
109 -- Accumulate_Checksum -- | |
110 ------------------------- | |
111 | |
112 procedure Accumulate_Checksum (C : Character) is | |
113 begin | |
114 System.CRC32.Update (System.CRC32.CRC32 (Checksum), C); | |
115 end Accumulate_Checksum; | |
116 | |
117 procedure Accumulate_Checksum (C : Char_Code) is | |
118 begin | |
119 if C > 16#FFFF# then | |
120 Accumulate_Checksum (Character'Val (C / 2 ** 24)); | |
121 Accumulate_Checksum (Character'Val ((C / 2 ** 16) mod 256)); | |
122 Accumulate_Checksum (Character'Val ((C / 256) mod 256)); | |
123 else | |
124 Accumulate_Checksum (Character'Val (C / 256)); | |
125 end if; | |
126 | |
127 Accumulate_Checksum (Character'Val (C mod 256)); | |
128 end Accumulate_Checksum; | |
129 | |
130 ------------------------------- | |
131 -- Accumulate_Token_Checksum -- | |
132 ------------------------------- | |
133 | |
134 procedure Accumulate_Token_Checksum is | |
135 begin | |
136 System.CRC32.Update | |
137 (System.CRC32.CRC32 (Checksum), | |
138 Character'Val (Token_Type'Pos (Token))); | |
139 end Accumulate_Token_Checksum; | |
140 | |
141 ---------------------------------------- | |
142 -- Accumulate_Token_Checksum_GNAT_6_3 -- | |
143 ---------------------------------------- | |
144 | |
145 procedure Accumulate_Token_Checksum_GNAT_6_3 is | |
146 begin | |
147 -- Individual values of Token_Type are used, instead of subranges, so | |
148 -- that additions or suppressions of enumerated values in type | |
149 -- Token_Type are detected by the compiler. | |
150 | |
151 case Token is | |
152 when Tok_Abs | |
153 | Tok_Abstract | |
154 | Tok_Access | |
155 | Tok_Aliased | |
156 | Tok_All | |
157 | Tok_Ampersand | |
158 | Tok_And | |
159 | Tok_Apostrophe | |
160 | Tok_Array | |
161 | Tok_Asterisk | |
162 | Tok_At | |
163 | Tok_At_Sign | |
164 | Tok_Body | |
165 | Tok_Box | |
166 | Tok_Char_Literal | |
167 | Tok_Colon | |
168 | Tok_Colon_Equal | |
169 | Tok_Comma | |
170 | Tok_Constant | |
171 | Tok_Delta | |
172 | Tok_Digits | |
173 | Tok_Do | |
174 | Tok_Dot | |
175 | Tok_Double_Asterisk | |
176 | Tok_Equal | |
177 | Tok_Greater | |
178 | Tok_Greater_Equal | |
179 | Tok_Greater_Greater | |
180 | Tok_Identifier | |
181 | Tok_In | |
182 | Tok_Integer_Literal | |
183 | Tok_Interface | |
184 | Tok_Is | |
185 | Tok_Left_Paren | |
186 | Tok_Less | |
187 | Tok_Less_Equal | |
188 | Tok_Limited | |
189 | Tok_Minus | |
190 | Tok_Mod | |
191 | Tok_New | |
192 | Tok_Not | |
193 | Tok_Not_Equal | |
194 | Tok_Null | |
195 | Tok_Of | |
196 | Tok_Operator_Symbol | |
197 | Tok_Or | |
198 | Tok_Others | |
199 | Tok_Out | |
200 | Tok_Plus | |
201 | Tok_Range | |
202 | Tok_Real_Literal | |
203 | Tok_Record | |
204 | Tok_Rem | |
205 | Tok_Renames | |
206 | Tok_Reverse | |
207 | Tok_Right_Paren | |
208 | Tok_Slash | |
209 | Tok_String_Literal | |
210 | Tok_Xor | |
211 => | |
212 System.CRC32.Update | |
213 (System.CRC32.CRC32 (Checksum), | |
214 Character'Val (Token_Type'Pos (Token))); | |
215 | |
216 when Tok_Some => | |
217 System.CRC32.Update | |
218 (System.CRC32.CRC32 (Checksum), | |
219 Character'Val (Token_Type'Pos (Tok_Identifier))); | |
220 | |
221 when No_Token | |
222 | Tok_Abort | |
223 | Tok_Accept | |
224 | Tok_Arrow | |
225 | Tok_Begin | |
226 | Tok_Case | |
227 | Tok_Comment | |
228 | Tok_Declare | |
229 | Tok_Delay | |
230 | Tok_Dot_Dot | |
231 | Tok_Else | |
232 | Tok_Elsif | |
233 | Tok_End | |
234 | Tok_End_Of_Line | |
235 | Tok_Entry | |
236 | Tok_EOF | |
237 | Tok_Exception | |
238 | Tok_Exit | |
239 | Tok_Extends | |
240 | Tok_External | |
241 | Tok_External_As_List | |
242 | Tok_For | |
243 | Tok_Function | |
244 | Tok_Generic | |
245 | Tok_Goto | |
246 | Tok_If | |
247 | Tok_Less_Less | |
248 | Tok_Loop | |
249 | Tok_Overriding | |
250 | Tok_Package | |
251 | Tok_Pragma | |
252 | Tok_Private | |
253 | Tok_Procedure | |
254 | Tok_Project | |
255 | Tok_Protected | |
256 | Tok_Raise | |
257 | Tok_Requeue | |
258 | Tok_Return | |
259 | Tok_Select | |
260 | Tok_Semicolon | |
261 | Tok_Separate | |
262 | Tok_SPARK_Hide | |
263 | Tok_Special | |
264 | Tok_Subtype | |
265 | Tok_Synchronized | |
266 | Tok_Tagged | |
267 | Tok_Task | |
268 | Tok_Terminate | |
269 | Tok_Then | |
270 | Tok_Type | |
271 | Tok_Until | |
272 | Tok_Use | |
273 | Tok_Vertical_Bar | |
274 | Tok_When | |
275 | Tok_While | |
276 | Tok_With | |
277 => | |
278 System.CRC32.Update | |
279 (System.CRC32.CRC32 (Checksum), | |
280 Character'Val (Token_Type'Pos (Token_Type'Pred (Token)))); | |
281 end case; | |
282 end Accumulate_Token_Checksum_GNAT_6_3; | |
283 | |
284 ----------------------------------------- | |
285 -- Accumulate_Token_Checksum_GNAT_5_03 -- | |
286 ----------------------------------------- | |
287 | |
288 procedure Accumulate_Token_Checksum_GNAT_5_03 is | |
289 begin | |
290 -- Individual values of Token_Type are used, instead of subranges, so | |
291 -- that additions or suppressions of enumerated values in type | |
292 -- Token_Type are detected by the compiler. | |
293 | |
294 case Token is | |
295 when Tok_Abs | |
296 | Tok_Abstract | |
297 | Tok_Access | |
298 | Tok_Aliased | |
299 | Tok_All | |
300 | Tok_Ampersand | |
301 | Tok_And | |
302 | Tok_Apostrophe | |
303 | Tok_Array | |
304 | Tok_Asterisk | |
305 | Tok_At | |
306 | Tok_At_Sign | |
307 | Tok_Body | |
308 | Tok_Box | |
309 | Tok_Char_Literal | |
310 | Tok_Colon | |
311 | Tok_Colon_Equal | |
312 | Tok_Comma | |
313 | Tok_Constant | |
314 | Tok_Delta | |
315 | Tok_Digits | |
316 | Tok_Do | |
317 | Tok_Dot | |
318 | Tok_Double_Asterisk | |
319 | Tok_Equal | |
320 | Tok_Greater | |
321 | Tok_Greater_Equal | |
322 | Tok_Greater_Greater | |
323 | Tok_Identifier | |
324 | Tok_In | |
325 | Tok_Integer_Literal | |
326 | Tok_Is | |
327 | Tok_Left_Paren | |
328 | Tok_Less | |
329 | Tok_Less_Equal | |
330 | Tok_Minus | |
331 | Tok_Mod | |
332 | Tok_New | |
333 | Tok_Not | |
334 | Tok_Not_Equal | |
335 | Tok_Null | |
336 | Tok_Operator_Symbol | |
337 | Tok_Or | |
338 | Tok_Others | |
339 | Tok_Plus | |
340 | Tok_Range | |
341 | Tok_Real_Literal | |
342 | Tok_Rem | |
343 | Tok_Right_Paren | |
344 | Tok_Slash | |
345 | Tok_String_Literal | |
346 | Tok_Xor | |
347 => | |
348 System.CRC32.Update | |
349 (System.CRC32.CRC32 (Checksum), | |
350 Character'Val (Token_Type'Pos (Token))); | |
351 | |
352 when Tok_Interface | |
353 | Tok_Overriding | |
354 | Tok_Some | |
355 | Tok_Synchronized | |
356 => | |
357 System.CRC32.Update | |
358 (System.CRC32.CRC32 (Checksum), | |
359 Character'Val (Token_Type'Pos (Tok_Identifier))); | |
360 | |
361 when Tok_Limited | |
362 | Tok_Of | |
363 | Tok_Out | |
364 | Tok_Record | |
365 | Tok_Renames | |
366 | Tok_Reverse | |
367 => | |
368 System.CRC32.Update | |
369 (System.CRC32.CRC32 (Checksum), | |
370 Character'Val (Token_Type'Pos (Token) - 1)); | |
371 | |
372 when Tok_Abort | |
373 | Tok_Accept | |
374 | Tok_Begin | |
375 | Tok_Case | |
376 | Tok_Declare | |
377 | Tok_Delay | |
378 | Tok_Else | |
379 | Tok_Elsif | |
380 | Tok_End | |
381 | Tok_Entry | |
382 | Tok_Exception | |
383 | Tok_Exit | |
384 | Tok_For | |
385 | Tok_Goto | |
386 | Tok_If | |
387 | Tok_Less_Less | |
388 | Tok_Loop | |
389 | Tok_Pragma | |
390 | Tok_Protected | |
391 | Tok_Raise | |
392 | Tok_Requeue | |
393 | Tok_Return | |
394 | Tok_Select | |
395 | Tok_Subtype | |
396 | Tok_Tagged | |
397 | Tok_Task | |
398 | Tok_Terminate | |
399 | Tok_Then | |
400 | Tok_Type | |
401 | Tok_Until | |
402 | Tok_When | |
403 | Tok_While | |
404 => | |
405 System.CRC32.Update | |
406 (System.CRC32.CRC32 (Checksum), | |
407 Character'Val (Token_Type'Pos (Token) - 2)); | |
408 | |
409 when No_Token | |
410 | Tok_Arrow | |
411 | Tok_Comment | |
412 | Tok_Dot_Dot | |
413 | Tok_End_Of_Line | |
414 | Tok_EOF | |
415 | Tok_Extends | |
416 | Tok_External | |
417 | Tok_External_As_List | |
418 | Tok_Function | |
419 | Tok_Generic | |
420 | Tok_Package | |
421 | Tok_Private | |
422 | Tok_Procedure | |
423 | Tok_Project | |
424 | Tok_Semicolon | |
425 | Tok_Separate | |
426 | Tok_SPARK_Hide | |
427 | Tok_Special | |
428 | Tok_Use | |
429 | Tok_Vertical_Bar | |
430 | Tok_With | |
431 => | |
432 System.CRC32.Update | |
433 (System.CRC32.CRC32 (Checksum), | |
434 Character'Val (Token_Type'Pos (Token) - 4)); | |
435 end case; | |
436 end Accumulate_Token_Checksum_GNAT_5_03; | |
437 | |
438 ----------------------- | |
439 -- Check_End_Of_Line -- | |
440 ----------------------- | |
441 | |
442 procedure Check_End_Of_Line is | |
443 Len : constant Int := | |
444 Int (Scan_Ptr) - | |
445 Int (Current_Line_Start) - | |
446 Wide_Char_Byte_Count; | |
447 | |
448 -- Start of processing for Check_End_Of_Line | |
449 | |
450 begin | |
451 if Style_Check then | |
452 Style.Check_Line_Terminator (Len); | |
453 end if; | |
454 | |
455 -- Deal with checking maximum line length | |
456 | |
457 if Style_Check and Style_Check_Max_Line_Length then | |
458 Style.Check_Line_Max_Length (Len); | |
459 | |
460 -- If style checking is inactive, check maximum line length against | |
461 -- standard value. | |
462 | |
463 elsif Len > Max_Line_Length then | |
464 Error_Msg | |
465 ("this line is too long", | |
466 Current_Line_Start + Source_Ptr (Max_Line_Length)); | |
467 end if; | |
468 | |
469 -- Now one more checking circuit. Normally we are only enforcing a limit | |
470 -- of physical characters, with tabs counting as one character. But if | |
471 -- after tab expansion we would have a total line length that exceeded | |
472 -- 32766, that would really cause trouble, because column positions | |
473 -- would exceed the maximum we allow for a column count. Note: the limit | |
474 -- is 32766 rather than 32767, since we use a value of 32767 for special | |
475 -- purposes (see Sinput). Now we really do not want to go messing with | |
476 -- tabs in the normal case, so what we do is to check for a line that | |
477 -- has more than 4096 physical characters. Any shorter line could not | |
478 -- be a problem, even if it was all tabs. | |
479 | |
480 if Len >= 4096 then | |
481 declare | |
482 Col : Natural; | |
483 Ptr : Source_Ptr; | |
484 | |
485 begin | |
486 Col := 1; | |
487 Ptr := Current_Line_Start; | |
488 loop | |
489 exit when Ptr = Scan_Ptr; | |
490 | |
491 if Source (Ptr) = ASCII.HT then | |
492 Col := (Col - 1 + 8) / 8 * 8 + 1; | |
493 else | |
494 Col := Col + 1; | |
495 end if; | |
496 | |
497 if Col > 32766 then | |
498 Error_Msg | |
499 ("this line is longer than 32766 characters", | |
500 Current_Line_Start); | |
501 raise Unrecoverable_Error; | |
502 end if; | |
503 | |
504 Ptr := Ptr + 1; | |
505 end loop; | |
506 end; | |
507 end if; | |
508 | |
509 -- Reset wide character byte count for next line | |
510 | |
511 Wide_Char_Byte_Count := 0; | |
512 end Check_End_Of_Line; | |
513 | |
514 ---------------------------- | |
515 -- Determine_Token_Casing -- | |
516 ---------------------------- | |
517 | |
518 function Determine_Token_Casing return Casing_Type is | |
519 begin | |
520 return Determine_Casing (Source (Token_Ptr .. Scan_Ptr - 1)); | |
521 end Determine_Token_Casing; | |
522 | |
523 ------------------------- | |
524 -- Initialize_Checksum -- | |
525 ------------------------- | |
526 | |
527 procedure Initialize_Checksum is | |
528 begin | |
529 System.CRC32.Initialize (System.CRC32.CRC32 (Checksum)); | |
530 end Initialize_Checksum; | |
531 | |
532 ------------------------ | |
533 -- Initialize_Scanner -- | |
534 ------------------------ | |
535 | |
536 procedure Initialize_Scanner (Index : Source_File_Index) is | |
537 begin | |
538 -- Establish reserved words | |
539 | |
540 Scans.Initialize_Ada_Keywords; | |
541 | |
542 -- Initialize scan control variables | |
543 | |
544 Current_Source_File := Index; | |
545 Source := Source_Text (Current_Source_File); | |
546 Scan_Ptr := Source_First (Current_Source_File); | |
547 Token := No_Token; | |
548 Token_Ptr := Scan_Ptr; | |
549 Current_Line_Start := Scan_Ptr; | |
550 Token_Node := Empty; | |
551 Token_Name := No_Name; | |
552 Start_Column := Set_Start_Column; | |
553 First_Non_Blank_Location := Scan_Ptr; | |
554 | |
555 Initialize_Checksum; | |
556 Wide_Char_Byte_Count := 0; | |
557 | |
558 -- Do not call Scan, otherwise the License stuff does not work in Scn | |
559 | |
560 end Initialize_Scanner; | |
561 | |
562 ------------------------------ | |
563 -- Reset_Special_Characters -- | |
564 ------------------------------ | |
565 | |
566 procedure Reset_Special_Characters is | |
567 begin | |
568 Special_Characters := (others => False); | |
569 end Reset_Special_Characters; | |
570 | |
571 ---------- | |
572 -- Scan -- | |
573 ---------- | |
574 | |
575 procedure Scan is | |
576 | |
577 Start_Of_Comment : Source_Ptr; | |
578 -- Record start of comment position | |
579 | |
580 Underline_Found : Boolean; | |
581 -- During scanning of an identifier, set to True if last character | |
582 -- scanned was an underline or other punctuation character. This | |
583 -- is used to flag the error of two underlines/punctuations in a | |
584 -- row or ending an identifier with a underline/punctuation. Here | |
585 -- punctuation means any UTF_32 character in the Unicode category | |
586 -- Punctuation,Connector. | |
587 | |
588 Wptr : Source_Ptr; | |
589 -- Used to remember start of last wide character scanned | |
590 | |
591 function Double_Char_Token (C : Character) return Boolean; | |
592 -- This function is used for double character tokens like := or <>. It | |
593 -- checks if the character following Source (Scan_Ptr) is C, and if so | |
594 -- bumps Scan_Ptr past the pair of characters and returns True. A space | |
595 -- between the two characters is also recognized with an appropriate | |
596 -- error message being issued. If C is not present, False is returned. | |
597 -- Note that Double_Char_Token can only be used for tokens defined in | |
598 -- the Ada syntax (it's use for error cases like && is not appropriate | |
599 -- since we do not want a junk message for a case like &-space-&). | |
600 | |
601 procedure Error_Illegal_Character; | |
602 -- Give illegal character error, Scan_Ptr points to character. On | |
603 -- return, Scan_Ptr is bumped past the illegal character. | |
604 | |
605 procedure Error_Illegal_Wide_Character; | |
606 -- Give illegal wide character message. On return, Scan_Ptr is bumped | |
607 -- past the illegal character, which may still leave us pointing to | |
608 -- junk, not much we can do if the escape sequence is messed up. | |
609 | |
610 procedure Error_No_Double_Underline; | |
611 -- Signal error of two underline or punctuation characters in a row. | |
612 -- Called with Scan_Ptr pointing to second underline/punctuation char. | |
613 | |
614 procedure Nlit; | |
615 -- This is the procedure for scanning out numeric literals. On entry, | |
616 -- Scan_Ptr points to the digit that starts the numeric literal (the | |
617 -- checksum for this character has not been accumulated yet). On return | |
618 -- Scan_Ptr points past the last character of the numeric literal, Token | |
619 -- and Token_Node are set appropriately, and the checksum is updated. | |
620 | |
621 procedure Slit; | |
622 -- This is the procedure for scanning out string literals. On entry, | |
623 -- Scan_Ptr points to the opening string quote (the checksum for this | |
624 -- character has not been accumulated yet). On return Scan_Ptr points | |
625 -- past the closing quote of the string literal, Token and Token_Node | |
626 -- are set appropriately, and the checksum is updated. | |
627 | |
628 procedure Skip_Other_Format_Characters; | |
629 -- Skips past any "other format" category characters at the current | |
630 -- cursor location (does not skip past spaces or any other characters). | |
631 | |
632 function Start_Of_Wide_Character return Boolean; | |
633 -- Returns True if the scan pointer is pointing to the start of a wide | |
634 -- character sequence, does not modify the scan pointer in any case. | |
635 | |
636 ----------------------- | |
637 -- Double_Char_Token -- | |
638 ----------------------- | |
639 | |
640 function Double_Char_Token (C : Character) return Boolean is | |
641 begin | |
642 if Source (Scan_Ptr + 1) = C then | |
643 Accumulate_Checksum (C); | |
644 Scan_Ptr := Scan_Ptr + 2; | |
645 return True; | |
646 | |
647 elsif Source (Scan_Ptr + 1) = ' ' | |
648 and then Source (Scan_Ptr + 2) = C | |
649 then | |
650 Scan_Ptr := Scan_Ptr + 1; | |
651 Error_Msg_S -- CODEFIX | |
652 ("no space allowed here"); | |
653 Scan_Ptr := Scan_Ptr + 2; | |
654 return True; | |
655 | |
656 else | |
657 return False; | |
658 end if; | |
659 end Double_Char_Token; | |
660 | |
661 ----------------------------- | |
662 -- Error_Illegal_Character -- | |
663 ----------------------------- | |
664 | |
665 procedure Error_Illegal_Character is | |
666 begin | |
667 Error_Msg_S ("illegal character"); | |
668 Scan_Ptr := Scan_Ptr + 1; | |
669 end Error_Illegal_Character; | |
670 | |
671 ---------------------------------- | |
672 -- Error_Illegal_Wide_Character -- | |
673 ---------------------------------- | |
674 | |
675 procedure Error_Illegal_Wide_Character is | |
676 begin | |
677 Scan_Ptr := Scan_Ptr + 1; | |
678 Error_Msg ("illegal wide character", Wptr); | |
679 end Error_Illegal_Wide_Character; | |
680 | |
681 ------------------------------- | |
682 -- Error_No_Double_Underline -- | |
683 ------------------------------- | |
684 | |
685 procedure Error_No_Double_Underline is | |
686 begin | |
687 Underline_Found := False; | |
688 | |
689 -- There are four cases, and we special case the messages | |
690 | |
691 if Source (Scan_Ptr) = '_' then | |
692 if Source (Scan_Ptr - 1) = '_' then | |
693 Error_Msg_S -- CODEFIX | |
694 ("two consecutive underlines not permitted"); | |
695 else | |
696 Error_Msg_S ("underline cannot follow punctuation character"); | |
697 end if; | |
698 | |
699 else | |
700 if Source (Scan_Ptr - 1) = '_' then | |
701 Error_Msg_S ("punctuation character cannot follow underline"); | |
702 else | |
703 Error_Msg_S | |
704 ("two consecutive punctuation characters not permitted"); | |
705 end if; | |
706 end if; | |
707 end Error_No_Double_Underline; | |
708 | |
709 ---------- | |
710 -- Nlit -- | |
711 ---------- | |
712 | |
713 procedure Nlit is | |
714 | |
715 C : Character; | |
716 -- Current source program character | |
717 | |
718 Base_Char : Character; | |
719 -- Either # or : (character at start of based number) | |
720 | |
721 Base : Int; | |
722 -- Value of base | |
723 | |
724 UI_Base : Uint; | |
725 -- Value of base in Uint format | |
726 | |
727 UI_Int_Value : Uint; | |
728 -- Value of integer scanned by Scan_Integer in Uint format | |
729 | |
730 UI_Num_Value : Uint; | |
731 -- Value of integer in numeric value being scanned | |
732 | |
733 Scale : Int; | |
734 -- Scale value for real literal | |
735 | |
736 UI_Scale : Uint; | |
737 -- Scale in Uint format | |
738 | |
739 Exponent_Is_Negative : Boolean; | |
740 -- Set true for negative exponent | |
741 | |
742 Extended_Digit_Value : Int; | |
743 -- Extended digit value | |
744 | |
745 Point_Scanned : Boolean; | |
746 -- Flag for decimal point scanned in numeric literal | |
747 | |
748 ----------------------- | |
749 -- Local Subprograms -- | |
750 ----------------------- | |
751 | |
752 procedure Error_Digit_Expected; | |
753 -- Signal error of bad digit, Scan_Ptr points to the location at | |
754 -- which the digit was expected on input, and is unchanged on return. | |
755 | |
756 procedure Scan_Integer; | |
757 -- Scan integer literal. On entry, Scan_Ptr points to a digit, on | |
758 -- exit Scan_Ptr points past the last character of the integer. | |
759 -- | |
760 -- For each digit encountered, UI_Int_Value is multiplied by 10, and | |
761 -- the value of the digit added to the result. In addition, the value | |
762 -- in Scale is decremented by one for each actual digit scanned. | |
763 | |
764 -------------------------- | |
765 -- Error_Digit_Expected -- | |
766 -------------------------- | |
767 | |
768 procedure Error_Digit_Expected is | |
769 begin | |
770 Error_Msg_S ("digit expected"); | |
771 end Error_Digit_Expected; | |
772 | |
773 ------------------ | |
774 -- Scan_Integer -- | |
775 ------------------ | |
776 | |
777 procedure Scan_Integer is | |
778 C : Character; | |
779 -- Next character scanned | |
780 | |
781 begin | |
782 C := Source (Scan_Ptr); | |
783 | |
784 -- Loop through digits (allowing underlines) | |
785 | |
786 loop | |
787 Accumulate_Checksum (C); | |
788 UI_Int_Value := | |
789 UI_Int_Value * 10 + (Character'Pos (C) - Character'Pos ('0')); | |
790 Scan_Ptr := Scan_Ptr + 1; | |
791 Scale := Scale - 1; | |
792 C := Source (Scan_Ptr); | |
793 | |
794 -- Case of underline encountered | |
795 | |
796 if C = '_' then | |
797 | |
798 -- We do not accumulate the '_' in the checksum, so that | |
799 -- 1_234 is equivalent to 1234, and does not trigger | |
800 -- compilation for "minimal recompilation" (gnatmake -m). | |
801 | |
802 loop | |
803 Scan_Ptr := Scan_Ptr + 1; | |
804 C := Source (Scan_Ptr); | |
805 exit when C /= '_'; | |
806 Error_No_Double_Underline; | |
807 end loop; | |
808 | |
809 if C not in '0' .. '9' then | |
810 Error_Digit_Expected; | |
811 exit; | |
812 end if; | |
813 | |
814 else | |
815 exit when C not in '0' .. '9'; | |
816 end if; | |
817 end loop; | |
818 end Scan_Integer; | |
819 | |
820 -- Start of processing for Nlit | |
821 | |
822 begin | |
823 Base := 10; | |
824 UI_Base := Uint_10; | |
825 UI_Int_Value := Uint_0; | |
826 Based_Literal_Uses_Colon := False; | |
827 Scale := 0; | |
828 Scan_Integer; | |
829 Point_Scanned := False; | |
830 UI_Num_Value := UI_Int_Value; | |
831 | |
832 -- Various possibilities now for continuing the literal are period, | |
833 -- E/e (for exponent), or :/# (for based literal). | |
834 | |
835 Scale := 0; | |
836 C := Source (Scan_Ptr); | |
837 | |
838 if C = '.' then | |
839 | |
840 -- Scan out point, but do not scan past .. which is a range | |
841 -- sequence, and must not be eaten up scanning a numeric literal. | |
842 | |
843 while C = '.' and then Source (Scan_Ptr + 1) /= '.' loop | |
844 Accumulate_Checksum ('.'); | |
845 | |
846 if Point_Scanned then | |
847 Error_Msg_S ("duplicate point ignored"); | |
848 end if; | |
849 | |
850 Point_Scanned := True; | |
851 Scan_Ptr := Scan_Ptr + 1; | |
852 C := Source (Scan_Ptr); | |
853 | |
854 if C not in '0' .. '9' then | |
855 Error_Msg | |
856 ("real literal cannot end with point", Scan_Ptr - 1); | |
857 else | |
858 Scan_Integer; | |
859 UI_Num_Value := UI_Int_Value; | |
860 end if; | |
861 end loop; | |
862 | |
863 -- Based literal case. The base is the value we already scanned. | |
864 -- In the case of colon, we insist that the following character | |
865 -- is indeed an extended digit or a period. This catches a number | |
866 -- of common errors, as well as catching the well known tricky | |
867 -- bug otherwise arising from "x : integer range 1 .. 10:= 6;" | |
868 | |
869 elsif C = '#' | |
870 or else (C = ':' and then | |
871 (Source (Scan_Ptr + 1) = '.' | |
872 or else | |
873 Source (Scan_Ptr + 1) in '0' .. '9' | |
874 or else | |
875 Source (Scan_Ptr + 1) in 'A' .. 'Z' | |
876 or else | |
877 Source (Scan_Ptr + 1) in 'a' .. 'z')) | |
878 then | |
879 Accumulate_Checksum (C); | |
880 Base_Char := C; | |
881 UI_Base := UI_Int_Value; | |
882 | |
883 if Base_Char = ':' then | |
884 Based_Literal_Uses_Colon := True; | |
885 end if; | |
886 | |
887 if UI_Base < 2 or else UI_Base > 16 then | |
888 Error_Msg_SC ("base not 2-16"); | |
889 UI_Base := Uint_16; | |
890 end if; | |
891 | |
892 Base := UI_To_Int (UI_Base); | |
893 Scan_Ptr := Scan_Ptr + 1; | |
894 | |
895 -- Scan out extended integer [. integer] | |
896 | |
897 C := Source (Scan_Ptr); | |
898 UI_Int_Value := Uint_0; | |
899 Scale := 0; | |
900 | |
901 loop | |
902 if C in '0' .. '9' then | |
903 Accumulate_Checksum (C); | |
904 Extended_Digit_Value := | |
905 Int'(Character'Pos (C)) - Int'(Character'Pos ('0')); | |
906 | |
907 elsif C in 'A' .. 'F' then | |
908 Accumulate_Checksum (Character'Val (Character'Pos (C) + 32)); | |
909 Extended_Digit_Value := | |
910 Int'(Character'Pos (C)) - Int'(Character'Pos ('A')) + 10; | |
911 | |
912 elsif C in 'a' .. 'f' then | |
913 Accumulate_Checksum (C); | |
914 Extended_Digit_Value := | |
915 Int'(Character'Pos (C)) - Int'(Character'Pos ('a')) + 10; | |
916 | |
917 else | |
918 Error_Msg_S ("extended digit expected"); | |
919 exit; | |
920 end if; | |
921 | |
922 if Extended_Digit_Value >= Base then | |
923 Error_Msg_S ("digit '>= base"); | |
924 end if; | |
925 | |
926 UI_Int_Value := UI_Int_Value * UI_Base + Extended_Digit_Value; | |
927 Scale := Scale - 1; | |
928 Scan_Ptr := Scan_Ptr + 1; | |
929 C := Source (Scan_Ptr); | |
930 | |
931 if C = '_' then | |
932 loop | |
933 Accumulate_Checksum ('_'); | |
934 Scan_Ptr := Scan_Ptr + 1; | |
935 C := Source (Scan_Ptr); | |
936 exit when C /= '_'; | |
937 Error_No_Double_Underline; | |
938 end loop; | |
939 | |
940 elsif C = '.' then | |
941 Accumulate_Checksum ('.'); | |
942 | |
943 if Point_Scanned then | |
944 Error_Msg_S ("duplicate point ignored"); | |
945 end if; | |
946 | |
947 Scan_Ptr := Scan_Ptr + 1; | |
948 C := Source (Scan_Ptr); | |
949 Point_Scanned := True; | |
950 Scale := 0; | |
951 | |
952 elsif C = Base_Char then | |
953 Accumulate_Checksum (C); | |
954 Scan_Ptr := Scan_Ptr + 1; | |
955 exit; | |
956 | |
957 elsif C = '#' or else C = ':' then | |
958 Error_Msg_S ("based number delimiters must match"); | |
959 Scan_Ptr := Scan_Ptr + 1; | |
960 exit; | |
961 | |
962 elsif not Identifier_Char (C) then | |
963 if Base_Char = '#' then | |
964 Error_Msg_S -- CODEFIX | |
965 ("missing '#"); | |
966 else | |
967 Error_Msg_S -- CODEFIX | |
968 ("missing ':"); | |
969 end if; | |
970 | |
971 exit; | |
972 end if; | |
973 | |
974 end loop; | |
975 | |
976 UI_Num_Value := UI_Int_Value; | |
977 end if; | |
978 | |
979 -- Scan out exponent | |
980 | |
981 if not Point_Scanned then | |
982 Scale := 0; | |
983 UI_Scale := Uint_0; | |
984 else | |
985 UI_Scale := UI_From_Int (Scale); | |
986 end if; | |
987 | |
988 if Source (Scan_Ptr) = 'e' or else Source (Scan_Ptr) = 'E' then | |
989 Accumulate_Checksum ('e'); | |
990 Scan_Ptr := Scan_Ptr + 1; | |
991 Exponent_Is_Negative := False; | |
992 | |
993 if Source (Scan_Ptr) = '+' then | |
994 Accumulate_Checksum ('+'); | |
995 Scan_Ptr := Scan_Ptr + 1; | |
996 | |
997 elsif Source (Scan_Ptr) = '-' then | |
998 Accumulate_Checksum ('-'); | |
999 | |
1000 if not Point_Scanned then | |
1001 Error_Msg_S | |
1002 ("negative exponent not allowed for integer literal"); | |
1003 else | |
1004 Exponent_Is_Negative := True; | |
1005 end if; | |
1006 | |
1007 Scan_Ptr := Scan_Ptr + 1; | |
1008 end if; | |
1009 | |
1010 UI_Int_Value := Uint_0; | |
1011 | |
1012 if Source (Scan_Ptr) in '0' .. '9' then | |
1013 Scan_Integer; | |
1014 else | |
1015 Error_Digit_Expected; | |
1016 end if; | |
1017 | |
1018 if Exponent_Is_Negative then | |
1019 UI_Scale := UI_Scale - UI_Int_Value; | |
1020 else | |
1021 UI_Scale := UI_Scale + UI_Int_Value; | |
1022 end if; | |
1023 end if; | |
1024 | |
1025 -- Case of real literal to be returned | |
1026 | |
1027 if Point_Scanned then | |
1028 Token := Tok_Real_Literal; | |
1029 Real_Literal_Value := | |
1030 UR_From_Components ( | |
1031 Num => UI_Num_Value, | |
1032 Den => -UI_Scale, | |
1033 Rbase => Base); | |
1034 | |
1035 -- Case of integer literal to be returned | |
1036 | |
1037 else | |
1038 Token := Tok_Integer_Literal; | |
1039 | |
1040 if UI_Scale = 0 then | |
1041 Int_Literal_Value := UI_Num_Value; | |
1042 | |
1043 -- Avoid doing possibly expensive calculations in cases like | |
1044 -- parsing 163E800_000# when semantics will not be done anyway. | |
1045 -- This is especially useful when parsing garbled input. | |
1046 | |
1047 elsif Operating_Mode /= Check_Syntax | |
1048 and then (Serious_Errors_Detected = 0 or else Try_Semantics) | |
1049 then | |
1050 Int_Literal_Value := UI_Num_Value * UI_Base ** UI_Scale; | |
1051 | |
1052 else | |
1053 Int_Literal_Value := No_Uint; | |
1054 end if; | |
1055 end if; | |
1056 | |
1057 if Checksum_Accumulate_Token_Checksum then | |
1058 Accumulate_Token_Checksum; | |
1059 end if; | |
1060 | |
1061 return; | |
1062 end Nlit; | |
1063 | |
1064 ---------- | |
1065 -- Slit -- | |
1066 ---------- | |
1067 | |
1068 procedure Slit is | |
1069 | |
1070 Delimiter : Character; | |
1071 -- Delimiter (first character of string) | |
1072 | |
1073 C : Character; | |
1074 -- Current source program character | |
1075 | |
1076 Code : Char_Code; | |
1077 -- Current character code value | |
1078 | |
1079 Err : Boolean; | |
1080 -- Error flag for Scan_Wide call | |
1081 | |
1082 String_Start : Source_Ptr; | |
1083 -- Point to first character of string | |
1084 | |
1085 procedure Error_Bad_String_Char; | |
1086 -- Signal bad character in string/character literal. On entry | |
1087 -- Scan_Ptr points to the improper character encountered during the | |
1088 -- scan. Scan_Ptr is not modified, so it still points to the bad | |
1089 -- character on return. | |
1090 | |
1091 procedure Error_Unterminated_String; | |
1092 -- Procedure called if a line terminator character is encountered | |
1093 -- during scanning a string, meaning that the string is not properly | |
1094 -- terminated. | |
1095 | |
1096 procedure Set_String; | |
1097 -- Procedure used to distinguish between string and operator symbol. | |
1098 -- On entry the string has been scanned out, and its characters start | |
1099 -- at Token_Ptr and end one character before Scan_Ptr. On exit Token | |
1100 -- is set to Tok_String_Literal/Tok_Operator_Symbol as appropriate, | |
1101 -- and Token_Node is appropriately initialized. In addition, in the | |
1102 -- operator symbol case, Token_Name is appropriately set, and the | |
1103 -- flags [Wide_]Wide_Character_Found are set appropriately. | |
1104 | |
1105 --------------------------- | |
1106 -- Error_Bad_String_Char -- | |
1107 --------------------------- | |
1108 | |
1109 procedure Error_Bad_String_Char is | |
1110 C : constant Character := Source (Scan_Ptr); | |
1111 | |
1112 begin | |
1113 if C = HT then | |
1114 Error_Msg_S ("horizontal tab not allowed in string"); | |
1115 | |
1116 elsif C = VT or else C = FF then | |
1117 Error_Msg_S ("format effector not allowed in string"); | |
1118 | |
1119 elsif C in Upper_Half_Character then | |
1120 Error_Msg_S ("(Ada 83) upper half character not allowed"); | |
1121 | |
1122 else | |
1123 Error_Msg_S ("control character not allowed in string"); | |
1124 end if; | |
1125 end Error_Bad_String_Char; | |
1126 | |
1127 ------------------------------- | |
1128 -- Error_Unterminated_String -- | |
1129 ------------------------------- | |
1130 | |
1131 procedure Error_Unterminated_String is | |
1132 S : Source_Ptr; | |
1133 | |
1134 begin | |
1135 -- An interesting little refinement. Consider the following | |
1136 -- examples: | |
1137 | |
1138 -- A := "this is an unterminated string; | |
1139 -- A := "this is an unterminated string & | |
1140 -- P(A, "this is a parameter that didn't get terminated); | |
1141 -- P("this is a parameter that didn't get terminated, A); | |
1142 | |
1143 -- We fiddle a little to do slightly better placement in these | |
1144 -- cases also if there is white space at the end of the line we | |
1145 -- place the flag at the start of this white space, not at the | |
1146 -- end. Note that we only have to test for blanks, since tabs | |
1147 -- aren't allowed in strings in the first place and would have | |
1148 -- caused an error message. | |
1149 | |
1150 -- Two more cases that we treat specially are: | |
1151 | |
1152 -- A := "this string uses the wrong terminator' | |
1153 -- A := "this string uses the wrong terminator' & | |
1154 | |
1155 -- In these cases we give a different error message as well | |
1156 | |
1157 -- We actually reposition the scan pointer to the point where we | |
1158 -- place the flag in these cases, since it seems a better bet on | |
1159 -- the original intention. | |
1160 | |
1161 while Source (Scan_Ptr - 1) = ' ' | |
1162 or else Source (Scan_Ptr - 1) = '&' | |
1163 loop | |
1164 Scan_Ptr := Scan_Ptr - 1; | |
1165 Unstore_String_Char; | |
1166 end loop; | |
1167 | |
1168 -- Check for case of incorrect string terminator, but single quote | |
1169 -- is not considered incorrect if the opening terminator misused | |
1170 -- a single quote (error message already given). | |
1171 | |
1172 if Delimiter /= ''' | |
1173 and then Source (Scan_Ptr - 1) = ''' | |
1174 then | |
1175 Unstore_String_Char; | |
1176 Error_Msg | |
1177 ("incorrect string terminator character", Scan_Ptr - 1); | |
1178 return; | |
1179 end if; | |
1180 | |
1181 -- Backup over semicolon or right-paren/semicolon sequence | |
1182 | |
1183 if Source (Scan_Ptr - 1) = ';' then | |
1184 Scan_Ptr := Scan_Ptr - 1; | |
1185 Unstore_String_Char; | |
1186 | |
1187 if Source (Scan_Ptr - 1) = ')' then | |
1188 Scan_Ptr := Scan_Ptr - 1; | |
1189 Unstore_String_Char; | |
1190 end if; | |
1191 end if; | |
1192 | |
1193 -- See if there is a comma in the string, if so, guess that | |
1194 -- the first comma terminates the string. | |
1195 | |
1196 S := String_Start; | |
1197 while S < Scan_Ptr loop | |
1198 if Source (S) = ',' then | |
1199 while Scan_Ptr > S loop | |
1200 Scan_Ptr := Scan_Ptr - 1; | |
1201 Unstore_String_Char; | |
1202 end loop; | |
1203 | |
1204 exit; | |
1205 end if; | |
1206 | |
1207 S := S + 1; | |
1208 end loop; | |
1209 | |
1210 -- Now we have adjusted the scan pointer, give message | |
1211 | |
1212 Error_Msg_S -- CODEFIX | |
1213 ("missing string quote"); | |
1214 end Error_Unterminated_String; | |
1215 | |
1216 ---------------- | |
1217 -- Set_String -- | |
1218 ---------------- | |
1219 | |
1220 procedure Set_String is | |
1221 Slen : constant Int := Int (Scan_Ptr - Token_Ptr - 2); | |
1222 C1 : Character; | |
1223 C2 : Character; | |
1224 C3 : Character; | |
1225 | |
1226 begin | |
1227 -- Token_Name is currently set to Error_Name. The following | |
1228 -- section of code resets Token_Name to the proper Name_Op_xx | |
1229 -- value if the string is a valid operator symbol, otherwise it is | |
1230 -- left set to Error_Name. | |
1231 | |
1232 if Slen = 1 then | |
1233 C1 := Source (Token_Ptr + 1); | |
1234 | |
1235 case C1 is | |
1236 when '=' => | |
1237 Token_Name := Name_Op_Eq; | |
1238 | |
1239 when '>' => | |
1240 Token_Name := Name_Op_Gt; | |
1241 | |
1242 when '<' => | |
1243 Token_Name := Name_Op_Lt; | |
1244 | |
1245 when '+' => | |
1246 Token_Name := Name_Op_Add; | |
1247 | |
1248 when '-' => | |
1249 Token_Name := Name_Op_Subtract; | |
1250 | |
1251 when '&' => | |
1252 Token_Name := Name_Op_Concat; | |
1253 | |
1254 when '*' => | |
1255 Token_Name := Name_Op_Multiply; | |
1256 | |
1257 when '/' => | |
1258 Token_Name := Name_Op_Divide; | |
1259 | |
1260 when others => | |
1261 null; | |
1262 end case; | |
1263 | |
1264 elsif Slen = 2 then | |
1265 C1 := Source (Token_Ptr + 1); | |
1266 C2 := Source (Token_Ptr + 2); | |
1267 | |
1268 if C1 = '*' and then C2 = '*' then | |
1269 Token_Name := Name_Op_Expon; | |
1270 | |
1271 elsif C2 = '=' then | |
1272 | |
1273 if C1 = '/' then | |
1274 Token_Name := Name_Op_Ne; | |
1275 elsif C1 = '<' then | |
1276 Token_Name := Name_Op_Le; | |
1277 elsif C1 = '>' then | |
1278 Token_Name := Name_Op_Ge; | |
1279 end if; | |
1280 | |
1281 elsif (C1 = 'O' or else C1 = 'o') and then -- OR | |
1282 (C2 = 'R' or else C2 = 'r') | |
1283 then | |
1284 Token_Name := Name_Op_Or; | |
1285 end if; | |
1286 | |
1287 elsif Slen = 3 then | |
1288 C1 := Source (Token_Ptr + 1); | |
1289 C2 := Source (Token_Ptr + 2); | |
1290 C3 := Source (Token_Ptr + 3); | |
1291 | |
1292 if (C1 = 'A' or else C1 = 'a') and then -- AND | |
1293 (C2 = 'N' or else C2 = 'n') and then | |
1294 (C3 = 'D' or else C3 = 'd') | |
1295 then | |
1296 Token_Name := Name_Op_And; | |
1297 | |
1298 elsif (C1 = 'A' or else C1 = 'a') and then -- ABS | |
1299 (C2 = 'B' or else C2 = 'b') and then | |
1300 (C3 = 'S' or else C3 = 's') | |
1301 then | |
1302 Token_Name := Name_Op_Abs; | |
1303 | |
1304 elsif (C1 = 'M' or else C1 = 'm') and then -- MOD | |
1305 (C2 = 'O' or else C2 = 'o') and then | |
1306 (C3 = 'D' or else C3 = 'd') | |
1307 then | |
1308 Token_Name := Name_Op_Mod; | |
1309 | |
1310 elsif (C1 = 'N' or else C1 = 'n') and then -- NOT | |
1311 (C2 = 'O' or else C2 = 'o') and then | |
1312 (C3 = 'T' or else C3 = 't') | |
1313 then | |
1314 Token_Name := Name_Op_Not; | |
1315 | |
1316 elsif (C1 = 'R' or else C1 = 'r') and then -- REM | |
1317 (C2 = 'E' or else C2 = 'e') and then | |
1318 (C3 = 'M' or else C3 = 'm') | |
1319 then | |
1320 Token_Name := Name_Op_Rem; | |
1321 | |
1322 elsif (C1 = 'X' or else C1 = 'x') and then -- XOR | |
1323 (C2 = 'O' or else C2 = 'o') and then | |
1324 (C3 = 'R' or else C3 = 'r') | |
1325 then | |
1326 Token_Name := Name_Op_Xor; | |
1327 end if; | |
1328 | |
1329 end if; | |
1330 | |
1331 -- If it is an operator symbol, then Token_Name is set. If it is | |
1332 -- some other string value, then Token_Name still contains | |
1333 -- Error_Name. | |
1334 | |
1335 if Token_Name = Error_Name then | |
1336 Token := Tok_String_Literal; | |
1337 | |
1338 else | |
1339 Token := Tok_Operator_Symbol; | |
1340 end if; | |
1341 end Set_String; | |
1342 | |
1343 -- Start of processing for Slit | |
1344 | |
1345 begin | |
1346 -- On entry, Scan_Ptr points to the opening character of the string | |
1347 -- which is either a percent, double quote, or apostrophe (single | |
1348 -- quote). The latter case is an error detected by the character | |
1349 -- literal circuit. | |
1350 | |
1351 String_Start := Scan_Ptr; | |
1352 | |
1353 Delimiter := Source (Scan_Ptr); | |
1354 Accumulate_Checksum (Delimiter); | |
1355 | |
1356 Start_String; | |
1357 Wide_Character_Found := False; | |
1358 Wide_Wide_Character_Found := False; | |
1359 Scan_Ptr := Scan_Ptr + 1; | |
1360 | |
1361 -- Loop to scan out characters of string literal | |
1362 | |
1363 loop | |
1364 C := Source (Scan_Ptr); | |
1365 | |
1366 if C = Delimiter then | |
1367 Accumulate_Checksum (C); | |
1368 Scan_Ptr := Scan_Ptr + 1; | |
1369 exit when Source (Scan_Ptr) /= Delimiter; | |
1370 Code := Get_Char_Code (C); | |
1371 Accumulate_Checksum (C); | |
1372 Scan_Ptr := Scan_Ptr + 1; | |
1373 | |
1374 else | |
1375 if C = '"' and then Delimiter = '%' then | |
1376 Error_Msg_S | |
1377 ("quote not allowed in percent delimited string"); | |
1378 Code := Get_Char_Code (C); | |
1379 Scan_Ptr := Scan_Ptr + 1; | |
1380 | |
1381 elsif Start_Of_Wide_Character then | |
1382 Wptr := Scan_Ptr; | |
1383 Scan_Wide (Source, Scan_Ptr, Code, Err); | |
1384 | |
1385 if Err then | |
1386 Error_Illegal_Wide_Character; | |
1387 Code := Get_Char_Code (' '); | |
1388 end if; | |
1389 | |
1390 Accumulate_Checksum (Code); | |
1391 | |
1392 -- In Ada 95 mode we allow any wide characters in a string | |
1393 -- but in Ada 2005, the set of characters allowed has been | |
1394 -- restricted to graphic characters. | |
1395 | |
1396 if Ada_Version >= Ada_2005 | |
1397 and then Is_UTF_32_Non_Graphic (UTF_32 (Code)) | |
1398 then | |
1399 Error_Msg | |
1400 ("(Ada 2005) non-graphic character not permitted " & | |
1401 "in string literal", Wptr); | |
1402 end if; | |
1403 | |
1404 else | |
1405 Accumulate_Checksum (C); | |
1406 | |
1407 if C not in Graphic_Character then | |
1408 if C in Line_Terminator then | |
1409 Error_Unterminated_String; | |
1410 exit; | |
1411 | |
1412 elsif C in Upper_Half_Character then | |
1413 if Ada_Version = Ada_83 then | |
1414 Error_Bad_String_Char; | |
1415 end if; | |
1416 | |
1417 else | |
1418 Error_Bad_String_Char; | |
1419 end if; | |
1420 end if; | |
1421 | |
1422 Code := Get_Char_Code (C); | |
1423 Scan_Ptr := Scan_Ptr + 1; | |
1424 end if; | |
1425 end if; | |
1426 | |
1427 Store_String_Char (Code); | |
1428 | |
1429 if not In_Character_Range (Code) then | |
1430 if In_Wide_Character_Range (Code) then | |
1431 Wide_Character_Found := True; | |
1432 else | |
1433 Wide_Wide_Character_Found := True; | |
1434 end if; | |
1435 end if; | |
1436 end loop; | |
1437 | |
1438 String_Literal_Id := End_String; | |
1439 Set_String; | |
1440 return; | |
1441 end Slit; | |
1442 | |
1443 ---------------------------------- | |
1444 -- Skip_Other_Format_Characters -- | |
1445 ---------------------------------- | |
1446 | |
1447 procedure Skip_Other_Format_Characters is | |
1448 P : Source_Ptr; | |
1449 Code : Char_Code; | |
1450 Err : Boolean; | |
1451 | |
1452 begin | |
1453 while Start_Of_Wide_Character loop | |
1454 P := Scan_Ptr; | |
1455 Scan_Wide (Source, Scan_Ptr, Code, Err); | |
1456 | |
1457 if not Is_UTF_32_Other (UTF_32 (Code)) then | |
1458 Scan_Ptr := P; | |
1459 return; | |
1460 end if; | |
1461 end loop; | |
1462 end Skip_Other_Format_Characters; | |
1463 | |
1464 ----------------------------- | |
1465 -- Start_Of_Wide_Character -- | |
1466 ----------------------------- | |
1467 | |
1468 function Start_Of_Wide_Character return Boolean is | |
1469 C : constant Character := Source (Scan_Ptr); | |
1470 | |
1471 begin | |
1472 -- ESC encoding method with ESC present | |
1473 | |
1474 if C = ESC | |
1475 and then Wide_Character_Encoding_Method in WC_ESC_Encoding_Method | |
1476 then | |
1477 return True; | |
1478 | |
1479 -- Upper half character with upper half encoding | |
1480 | |
1481 elsif C in Upper_Half_Character and then Upper_Half_Encoding then | |
1482 return True; | |
1483 | |
1484 -- Brackets encoding | |
1485 | |
1486 elsif C = '[' | |
1487 and then Source (Scan_Ptr + 1) = '"' | |
1488 and then Identifier_Char (Source (Scan_Ptr + 2)) | |
1489 then | |
1490 return True; | |
1491 | |
1492 -- Not the start of a wide character | |
1493 | |
1494 else | |
1495 return False; | |
1496 end if; | |
1497 end Start_Of_Wide_Character; | |
1498 | |
1499 -- Start of processing for Scan | |
1500 | |
1501 begin | |
1502 Prev_Token := Token; | |
1503 Prev_Token_Ptr := Token_Ptr; | |
1504 Token_Name := Error_Name; | |
1505 | |
1506 -- The following loop runs more than once only if a format effector | |
1507 -- (tab, vertical tab, form feed, line feed, carriage return) is | |
1508 -- encountered and skipped, or some error situation, such as an | |
1509 -- illegal character, is encountered. | |
1510 | |
1511 <<Scan_Next_Character>> | |
1512 | |
1513 loop | |
1514 -- Skip past blanks, loop is opened up for speed | |
1515 | |
1516 while Source (Scan_Ptr) = ' ' loop | |
1517 if Source (Scan_Ptr + 1) /= ' ' then | |
1518 Scan_Ptr := Scan_Ptr + 1; | |
1519 exit; | |
1520 end if; | |
1521 | |
1522 if Source (Scan_Ptr + 2) /= ' ' then | |
1523 Scan_Ptr := Scan_Ptr + 2; | |
1524 exit; | |
1525 end if; | |
1526 | |
1527 if Source (Scan_Ptr + 3) /= ' ' then | |
1528 Scan_Ptr := Scan_Ptr + 3; | |
1529 exit; | |
1530 end if; | |
1531 | |
1532 if Source (Scan_Ptr + 4) /= ' ' then | |
1533 Scan_Ptr := Scan_Ptr + 4; | |
1534 exit; | |
1535 end if; | |
1536 | |
1537 if Source (Scan_Ptr + 5) /= ' ' then | |
1538 Scan_Ptr := Scan_Ptr + 5; | |
1539 exit; | |
1540 end if; | |
1541 | |
1542 if Source (Scan_Ptr + 6) /= ' ' then | |
1543 Scan_Ptr := Scan_Ptr + 6; | |
1544 exit; | |
1545 end if; | |
1546 | |
1547 if Source (Scan_Ptr + 7) /= ' ' then | |
1548 Scan_Ptr := Scan_Ptr + 7; | |
1549 exit; | |
1550 end if; | |
1551 | |
1552 Scan_Ptr := Scan_Ptr + 8; | |
1553 end loop; | |
1554 | |
1555 -- We are now at a non-blank character, which is the first character | |
1556 -- of the token we will scan, and hence the value of Token_Ptr. | |
1557 | |
1558 Token_Ptr := Scan_Ptr; | |
1559 | |
1560 -- Here begins the main case statement which transfers control on the | |
1561 -- basis of the non-blank character we have encountered. | |
1562 | |
1563 case Source (Scan_Ptr) is | |
1564 | |
1565 -- Line terminator characters | |
1566 | |
1567 when CR | LF | FF | VT => | |
1568 goto Scan_Line_Terminator; | |
1569 | |
1570 -- Horizontal tab, just skip past it | |
1571 | |
1572 when HT => | |
1573 if Style_Check then | |
1574 Style.Check_HT; | |
1575 end if; | |
1576 | |
1577 Scan_Ptr := Scan_Ptr + 1; | |
1578 | |
1579 -- End of file character, treated as an end of file only if it is | |
1580 -- the last character in the buffer, otherwise it is ignored. | |
1581 | |
1582 when EOF => | |
1583 if Scan_Ptr = Source_Last (Current_Source_File) then | |
1584 Check_End_Of_Line; | |
1585 | |
1586 if Style_Check then | |
1587 Style.Check_EOF; | |
1588 end if; | |
1589 | |
1590 Token := Tok_EOF; | |
1591 return; | |
1592 else | |
1593 Scan_Ptr := Scan_Ptr + 1; | |
1594 end if; | |
1595 | |
1596 -- Ampersand | |
1597 | |
1598 when '&' => | |
1599 Accumulate_Checksum ('&'); | |
1600 | |
1601 if Source (Scan_Ptr + 1) = '&' then | |
1602 Error_Msg_S -- CODEFIX | |
1603 ("'&'& should be `AND THEN`"); | |
1604 Scan_Ptr := Scan_Ptr + 2; | |
1605 Token := Tok_And; | |
1606 return; | |
1607 | |
1608 else | |
1609 Scan_Ptr := Scan_Ptr + 1; | |
1610 Token := Tok_Ampersand; | |
1611 return; | |
1612 end if; | |
1613 | |
1614 when '@' => | |
1615 if Ada_Version < Ada_2020 then | |
1616 Error_Msg ("target_name is an Ada 2020 feature", Scan_Ptr); | |
1617 Scan_Ptr := Scan_Ptr + 1; | |
1618 | |
1619 else | |
1620 -- AI12-0125-03 : @ is target_name | |
1621 | |
1622 Accumulate_Checksum ('@'); | |
1623 Scan_Ptr := Scan_Ptr + 1; | |
1624 Token := Tok_At_Sign; | |
1625 return; | |
1626 end if; | |
1627 | |
1628 -- Asterisk (can be multiplication operator or double asterisk which | |
1629 -- is the exponentiation compound delimiter). | |
1630 | |
1631 when '*' => | |
1632 Accumulate_Checksum ('*'); | |
1633 | |
1634 if Source (Scan_Ptr + 1) = '*' then | |
1635 Accumulate_Checksum ('*'); | |
1636 Scan_Ptr := Scan_Ptr + 2; | |
1637 Token := Tok_Double_Asterisk; | |
1638 return; | |
1639 | |
1640 else | |
1641 Scan_Ptr := Scan_Ptr + 1; | |
1642 Token := Tok_Asterisk; | |
1643 return; | |
1644 end if; | |
1645 | |
1646 -- Colon, which can either be an isolated colon, or part of an | |
1647 -- assignment compound delimiter. | |
1648 | |
1649 when ':' => | |
1650 Accumulate_Checksum (':'); | |
1651 | |
1652 if Double_Char_Token ('=') then | |
1653 Token := Tok_Colon_Equal; | |
1654 | |
1655 if Style_Check then | |
1656 Style.Check_Colon_Equal; | |
1657 end if; | |
1658 | |
1659 return; | |
1660 | |
1661 elsif Source (Scan_Ptr + 1) = '-' | |
1662 and then Source (Scan_Ptr + 2) /= '-' | |
1663 then | |
1664 Token := Tok_Colon_Equal; | |
1665 Error_Msg -- CODEFIX | |
1666 (":- should be :=", Scan_Ptr); | |
1667 Scan_Ptr := Scan_Ptr + 2; | |
1668 return; | |
1669 | |
1670 else | |
1671 Scan_Ptr := Scan_Ptr + 1; | |
1672 Token := Tok_Colon; | |
1673 | |
1674 if Style_Check then | |
1675 Style.Check_Colon; | |
1676 end if; | |
1677 | |
1678 return; | |
1679 end if; | |
1680 | |
1681 -- Left parenthesis | |
1682 | |
1683 when '(' => | |
1684 Accumulate_Checksum ('('); | |
1685 Scan_Ptr := Scan_Ptr + 1; | |
1686 Token := Tok_Left_Paren; | |
1687 | |
1688 if Style_Check then | |
1689 Style.Check_Left_Paren; | |
1690 end if; | |
1691 | |
1692 return; | |
1693 | |
1694 -- Left bracket | |
1695 | |
1696 when '[' => | |
1697 if Source (Scan_Ptr + 1) = '"' then | |
1698 goto Scan_Wide_Character; | |
1699 | |
1700 else | |
1701 Error_Msg_S ("illegal character, replaced by ""("""); | |
1702 Scan_Ptr := Scan_Ptr + 1; | |
1703 Token := Tok_Left_Paren; | |
1704 return; | |
1705 end if; | |
1706 | |
1707 -- Left brace | |
1708 | |
1709 when '{' => | |
1710 Error_Msg_S ("illegal character, replaced by ""("""); | |
1711 Scan_Ptr := Scan_Ptr + 1; | |
1712 Token := Tok_Left_Paren; | |
1713 return; | |
1714 | |
1715 -- Comma | |
1716 | |
1717 when ',' => | |
1718 Accumulate_Checksum (','); | |
1719 Scan_Ptr := Scan_Ptr + 1; | |
1720 Token := Tok_Comma; | |
1721 | |
1722 if Style_Check then | |
1723 Style.Check_Comma; | |
1724 end if; | |
1725 | |
1726 return; | |
1727 | |
1728 -- Dot, which is either an isolated period, or part of a double dot | |
1729 -- compound delimiter sequence. We also check for the case of a | |
1730 -- digit following the period, to give a better error message. | |
1731 | |
1732 when '.' => | |
1733 Accumulate_Checksum ('.'); | |
1734 | |
1735 if Double_Char_Token ('.') then | |
1736 Token := Tok_Dot_Dot; | |
1737 | |
1738 if Style_Check then | |
1739 Style.Check_Dot_Dot; | |
1740 end if; | |
1741 | |
1742 return; | |
1743 | |
1744 elsif Source (Scan_Ptr + 1) in '0' .. '9' then | |
1745 Error_Msg_S ("numeric literal cannot start with point"); | |
1746 Scan_Ptr := Scan_Ptr + 1; | |
1747 | |
1748 else | |
1749 Scan_Ptr := Scan_Ptr + 1; | |
1750 Token := Tok_Dot; | |
1751 return; | |
1752 end if; | |
1753 | |
1754 -- Equal, which can either be an equality operator, or part of the | |
1755 -- arrow (=>) compound delimiter. | |
1756 | |
1757 when '=' => | |
1758 Accumulate_Checksum ('='); | |
1759 | |
1760 if Double_Char_Token ('>') then | |
1761 Token := Tok_Arrow; | |
1762 | |
1763 if Style_Check then | |
1764 Style.Check_Arrow (Inside_Depends); | |
1765 end if; | |
1766 | |
1767 return; | |
1768 | |
1769 elsif Source (Scan_Ptr + 1) = '=' then | |
1770 Error_Msg_S -- CODEFIX | |
1771 ("== should be ="); | |
1772 Scan_Ptr := Scan_Ptr + 1; | |
1773 end if; | |
1774 | |
1775 Scan_Ptr := Scan_Ptr + 1; | |
1776 Token := Tok_Equal; | |
1777 return; | |
1778 | |
1779 -- Greater than, which can be a greater than operator, greater than | |
1780 -- or equal operator, or first character of a right label bracket. | |
1781 | |
1782 when '>' => | |
1783 Accumulate_Checksum ('>'); | |
1784 | |
1785 if Double_Char_Token ('=') then | |
1786 Token := Tok_Greater_Equal; | |
1787 return; | |
1788 | |
1789 elsif Double_Char_Token ('>') then | |
1790 Token := Tok_Greater_Greater; | |
1791 return; | |
1792 | |
1793 else | |
1794 Scan_Ptr := Scan_Ptr + 1; | |
1795 Token := Tok_Greater; | |
1796 return; | |
1797 end if; | |
1798 | |
1799 -- Less than, which can be a less than operator, less than or equal | |
1800 -- operator, or the first character of a left label bracket, or the | |
1801 -- first character of a box (<>) compound delimiter. | |
1802 | |
1803 when '<' => | |
1804 Accumulate_Checksum ('<'); | |
1805 | |
1806 if Double_Char_Token ('=') then | |
1807 Token := Tok_Less_Equal; | |
1808 return; | |
1809 | |
1810 elsif Double_Char_Token ('>') then | |
1811 Token := Tok_Box; | |
1812 | |
1813 if Style_Check then | |
1814 Style.Check_Box; | |
1815 end if; | |
1816 | |
1817 return; | |
1818 | |
1819 elsif Double_Char_Token ('<') then | |
1820 Token := Tok_Less_Less; | |
1821 return; | |
1822 | |
1823 else | |
1824 Scan_Ptr := Scan_Ptr + 1; | |
1825 Token := Tok_Less; | |
1826 return; | |
1827 end if; | |
1828 | |
1829 -- Minus, which is either a subtraction operator, or the first | |
1830 -- character of double minus starting a comment | |
1831 | |
1832 when '-' => Minus_Case : begin | |
1833 if Source (Scan_Ptr + 1) = '>' then | |
1834 Error_Msg_S ("invalid token"); | |
1835 Scan_Ptr := Scan_Ptr + 2; | |
1836 Token := Tok_Arrow; | |
1837 return; | |
1838 | |
1839 elsif Source (Scan_Ptr + 1) /= '-' then | |
1840 Accumulate_Checksum ('-'); | |
1841 Scan_Ptr := Scan_Ptr + 1; | |
1842 Token := Tok_Minus; | |
1843 return; | |
1844 | |
1845 -- Comment | |
1846 | |
1847 else -- Source (Scan_Ptr + 1) = '-' then | |
1848 if Style_Check then | |
1849 Style.Check_Comment; | |
1850 end if; | |
1851 | |
1852 Scan_Ptr := Scan_Ptr + 2; | |
1853 | |
1854 -- If we are in preprocessor mode with Replace_In_Comments set, | |
1855 -- then we return the "--" as a token on its own. | |
1856 | |
1857 if Replace_In_Comments then | |
1858 Token := Tok_Comment; | |
1859 return; | |
1860 end if; | |
1861 | |
1862 -- Otherwise scan out the comment | |
1863 | |
1864 Start_Of_Comment := Scan_Ptr; | |
1865 | |
1866 -- Loop to scan comment (this loop runs more than once only if | |
1867 -- a horizontal tab or other non-graphic character is scanned) | |
1868 | |
1869 loop | |
1870 -- Scan to non graphic character (opened up for speed) | |
1871 | |
1872 -- Note that we just eat left brackets, which means that | |
1873 -- bracket notation cannot be used for end of line | |
1874 -- characters in comments. This seems a reasonable choice, | |
1875 -- since no one would ever use brackets notation in a real | |
1876 -- program in this situation, and if we allow brackets | |
1877 -- notation, we forbid some valid comments which contain a | |
1878 -- brackets sequence that happens to match an end of line | |
1879 -- character. | |
1880 | |
1881 loop | |
1882 exit when Source (Scan_Ptr) not in Graphic_Character; | |
1883 Scan_Ptr := Scan_Ptr + 1; | |
1884 exit when Source (Scan_Ptr) not in Graphic_Character; | |
1885 Scan_Ptr := Scan_Ptr + 1; | |
1886 exit when Source (Scan_Ptr) not in Graphic_Character; | |
1887 Scan_Ptr := Scan_Ptr + 1; | |
1888 exit when Source (Scan_Ptr) not in Graphic_Character; | |
1889 Scan_Ptr := Scan_Ptr + 1; | |
1890 exit when Source (Scan_Ptr) not in Graphic_Character; | |
1891 Scan_Ptr := Scan_Ptr + 1; | |
1892 end loop; | |
1893 | |
1894 -- Keep going if horizontal tab | |
1895 | |
1896 if Source (Scan_Ptr) = HT then | |
1897 if Style_Check then | |
1898 Style.Check_HT; | |
1899 end if; | |
1900 | |
1901 Scan_Ptr := Scan_Ptr + 1; | |
1902 | |
1903 -- Terminate scan of comment if line terminator | |
1904 | |
1905 elsif Source (Scan_Ptr) in Line_Terminator then | |
1906 exit; | |
1907 | |
1908 -- Terminate scan of comment if end of file encountered | |
1909 -- (embedded EOF character or real last character in file) | |
1910 | |
1911 elsif Source (Scan_Ptr) = EOF then | |
1912 exit; | |
1913 | |
1914 -- If we have a wide character, we have to scan it out, | |
1915 -- because it might be a legitimate line terminator | |
1916 | |
1917 elsif Start_Of_Wide_Character then | |
1918 declare | |
1919 Wptr : constant Source_Ptr := Scan_Ptr; | |
1920 Code : Char_Code; | |
1921 Err : Boolean; | |
1922 | |
1923 begin | |
1924 Scan_Wide (Source, Scan_Ptr, Code, Err); | |
1925 | |
1926 -- If not well formed wide character, then just skip | |
1927 -- past it and ignore it. | |
1928 | |
1929 if Err then | |
1930 Scan_Ptr := Wptr + 1; | |
1931 | |
1932 -- If UTF_32 terminator, terminate comment scan | |
1933 | |
1934 elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then | |
1935 Scan_Ptr := Wptr; | |
1936 exit; | |
1937 end if; | |
1938 end; | |
1939 | |
1940 -- Keep going if character in 80-FF range, or is ESC. These | |
1941 -- characters are allowed in comments by RM-2.1(1), 2.7(2). | |
1942 -- They are allowed even in Ada 83 mode according to the | |
1943 -- approved AI. ESC was added to the AI in June 93. | |
1944 | |
1945 elsif Source (Scan_Ptr) in Upper_Half_Character | |
1946 or else Source (Scan_Ptr) = ESC | |
1947 then | |
1948 Scan_Ptr := Scan_Ptr + 1; | |
1949 | |
1950 -- Otherwise we have an illegal comment character, ignore | |
1951 -- this error in relaxed semantics mode. | |
1952 | |
1953 else | |
1954 if Relaxed_RM_Semantics then | |
1955 Scan_Ptr := Scan_Ptr + 1; | |
1956 else | |
1957 Error_Illegal_Character; | |
1958 end if; | |
1959 end if; | |
1960 end loop; | |
1961 | |
1962 -- Note that, except when comments are tokens, we do NOT | |
1963 -- execute a return here, instead we fall through to reexecute | |
1964 -- the scan loop to look for a token. | |
1965 | |
1966 if Comment_Is_Token then | |
1967 Name_Len := Integer (Scan_Ptr - Start_Of_Comment); | |
1968 Name_Buffer (1 .. Name_Len) := | |
1969 String (Source (Start_Of_Comment .. Scan_Ptr - 1)); | |
1970 Comment_Id := Name_Find; | |
1971 Token := Tok_Comment; | |
1972 return; | |
1973 end if; | |
1974 | |
1975 -- If the SPARK restriction is set for this unit, then generate | |
1976 -- a token Tok_SPARK_Hide for a SPARK HIDE directive. | |
1977 | |
1978 if Restriction_Check_Required (SPARK_05) | |
1979 and then Source (Start_Of_Comment) = '#' | |
1980 then | |
1981 declare | |
1982 Scan_SPARK_Ptr : Source_Ptr; | |
1983 | |
1984 begin | |
1985 Scan_SPARK_Ptr := Start_Of_Comment + 1; | |
1986 | |
1987 -- Scan out blanks | |
1988 | |
1989 while Source (Scan_SPARK_Ptr) = ' ' | |
1990 or else Source (Scan_SPARK_Ptr) = HT | |
1991 loop | |
1992 Scan_SPARK_Ptr := Scan_SPARK_Ptr + 1; | |
1993 end loop; | |
1994 | |
1995 -- Recognize HIDE directive. SPARK input cannot be | |
1996 -- encoded as wide characters, so only deal with | |
1997 -- lower/upper case. | |
1998 | |
1999 if (Source (Scan_SPARK_Ptr) = 'h' | |
2000 or else Source (Scan_SPARK_Ptr) = 'H') | |
2001 and then (Source (Scan_SPARK_Ptr + 1) = 'i' | |
2002 or else Source (Scan_SPARK_Ptr + 1) = 'I') | |
2003 and then (Source (Scan_SPARK_Ptr + 2) = 'd' | |
2004 or else Source (Scan_SPARK_Ptr + 2) = 'D') | |
2005 and then (Source (Scan_SPARK_Ptr + 3) = 'e' | |
2006 or else Source (Scan_SPARK_Ptr + 3) = 'E') | |
2007 and then (Source (Scan_SPARK_Ptr + 4) = ' ' | |
2008 or else Source (Scan_SPARK_Ptr + 4) = HT) | |
2009 then | |
2010 Token := Tok_SPARK_Hide; | |
2011 return; | |
2012 end if; | |
2013 end; | |
2014 end if; | |
2015 end if; | |
2016 end Minus_Case; | |
2017 | |
2018 -- Double quote or percent starting a string literal | |
2019 | |
2020 when '"' | '%' => | |
2021 Slit; | |
2022 Post_Scan; | |
2023 return; | |
2024 | |
2025 -- Apostrophe. This can either be the start of a character literal, | |
2026 -- or an isolated apostrophe used in a qualified expression or an | |
2027 -- attribute. In the following: | |
2028 | |
2029 -- A := CHARACTER'('A'); | |
2030 | |
2031 -- the first apostrophe is treated as an isolated apostrophe, and the | |
2032 -- second one is treated as the start of the character literal 'A'. | |
2033 -- Note that RM-2.2(7) does not require a separator between "'" and | |
2034 -- "(" in the above, so we cannot use lookahead to distinguish the | |
2035 -- cases; we use look-back instead. Analysis of the grammar shows | |
2036 -- that some tokens can be followed by an apostrophe, and some by a | |
2037 -- character literal, but none by both. Some cannot be followed by | |
2038 -- either, so it doesn't matter what we do in those cases, except to | |
2039 -- get good error behavior. | |
2040 | |
2041 when ''' => Char_Literal_Case : declare | |
2042 Code : Char_Code; | |
2043 Err : Boolean; | |
2044 | |
2045 begin | |
2046 Accumulate_Checksum ('''); | |
2047 Scan_Ptr := Scan_Ptr + 1; | |
2048 | |
2049 -- Distinguish between apostrophe and character literal. It's an | |
2050 -- apostrophe if the previous token is one of the following. | |
2051 -- Reserved words are included for things like A.all'Address and | |
2052 -- T'Digits'Img. Strings literals are included for things like | |
2053 -- "abs"'Address. Other literals are included to give better error | |
2054 -- behavior for illegal cases like 123'Img. | |
2055 -- | |
2056 -- In Ada 2020, a target name (i.e. @) is a valid prefix of an | |
2057 -- attribute, and functions like a name. | |
2058 | |
2059 if Prev_Token = Tok_All | |
2060 or else Prev_Token = Tok_At_Sign | |
2061 or else Prev_Token = Tok_Delta | |
2062 or else Prev_Token = Tok_Digits | |
2063 or else Prev_Token = Tok_Identifier | |
2064 or else Prev_Token = Tok_Project | |
2065 or else Prev_Token = Tok_Right_Paren | |
2066 or else Prev_Token in Token_Class_Literal | |
2067 then | |
2068 Token := Tok_Apostrophe; | |
2069 | |
2070 if Style_Check then | |
2071 Style.Check_Apostrophe; | |
2072 end if; | |
2073 | |
2074 return; | |
2075 | |
2076 -- Otherwise the apostrophe starts a character literal | |
2077 | |
2078 else | |
2079 -- Case of wide character literal | |
2080 | |
2081 if Start_Of_Wide_Character then | |
2082 Wptr := Scan_Ptr; | |
2083 Scan_Wide (Source, Scan_Ptr, Code, Err); | |
2084 Accumulate_Checksum (Code); | |
2085 | |
2086 if Err then | |
2087 Error_Illegal_Wide_Character; | |
2088 Code := Character'Pos (' '); | |
2089 | |
2090 -- In Ada 95 mode we allow any wide character in a character | |
2091 -- literal, but in Ada 2005, the set of characters allowed | |
2092 -- is restricted to graphic characters. | |
2093 | |
2094 elsif Ada_Version >= Ada_2005 | |
2095 and then Is_UTF_32_Non_Graphic (UTF_32 (Code)) | |
2096 then | |
2097 Error_Msg -- CODEFIX???? | |
2098 ("(Ada 2005) non-graphic character not permitted " & | |
2099 "in character literal", Wptr); | |
2100 end if; | |
2101 | |
2102 if Source (Scan_Ptr) /= ''' then | |
2103 Error_Msg_S ("missing apostrophe"); | |
2104 else | |
2105 Scan_Ptr := Scan_Ptr + 1; | |
2106 end if; | |
2107 | |
2108 -- If we do not find a closing quote in the expected place then | |
2109 -- assume that we have a misguided attempt at a string literal. | |
2110 | |
2111 -- However, if previous token is RANGE, then we return an | |
2112 -- apostrophe instead since this gives better error recovery | |
2113 | |
2114 elsif Source (Scan_Ptr + 1) /= ''' then | |
2115 if Prev_Token = Tok_Range then | |
2116 Token := Tok_Apostrophe; | |
2117 return; | |
2118 | |
2119 else | |
2120 Scan_Ptr := Scan_Ptr - 1; | |
2121 Error_Msg_S | |
2122 ("strings are delimited by double quote character"); | |
2123 Slit; | |
2124 Post_Scan; | |
2125 return; | |
2126 end if; | |
2127 | |
2128 -- Otherwise we have a (non-wide) character literal | |
2129 | |
2130 else | |
2131 Accumulate_Checksum (Source (Scan_Ptr)); | |
2132 | |
2133 if Source (Scan_Ptr) not in Graphic_Character then | |
2134 if Source (Scan_Ptr) in Upper_Half_Character then | |
2135 if Ada_Version = Ada_83 then | |
2136 Error_Illegal_Character; | |
2137 end if; | |
2138 | |
2139 else | |
2140 Error_Illegal_Character; | |
2141 end if; | |
2142 end if; | |
2143 | |
2144 Code := Get_Char_Code (Source (Scan_Ptr)); | |
2145 Scan_Ptr := Scan_Ptr + 2; | |
2146 end if; | |
2147 | |
2148 -- Fall through here with Scan_Ptr updated past the closing | |
2149 -- quote, and Code set to the Char_Code value for the literal | |
2150 | |
2151 Accumulate_Checksum ('''); | |
2152 Token := Tok_Char_Literal; | |
2153 Set_Character_Literal_Name (Code); | |
2154 Token_Name := Name_Find; | |
2155 Character_Code := Code; | |
2156 Post_Scan; | |
2157 return; | |
2158 end if; | |
2159 end Char_Literal_Case; | |
2160 | |
2161 -- Right parenthesis | |
2162 | |
2163 when ')' => | |
2164 Accumulate_Checksum (')'); | |
2165 Scan_Ptr := Scan_Ptr + 1; | |
2166 Token := Tok_Right_Paren; | |
2167 | |
2168 if Style_Check then | |
2169 Style.Check_Right_Paren; | |
2170 end if; | |
2171 | |
2172 return; | |
2173 | |
2174 -- Right bracket or right brace, treated as right paren | |
2175 | |
2176 when ']' | '}' => | |
2177 Error_Msg_S ("illegal character, replaced by "")"""); | |
2178 Scan_Ptr := Scan_Ptr + 1; | |
2179 Token := Tok_Right_Paren; | |
2180 return; | |
2181 | |
2182 -- Slash (can be division operator or first character of not equal) | |
2183 | |
2184 when '/' => | |
2185 Accumulate_Checksum ('/'); | |
2186 | |
2187 if Double_Char_Token ('=') then | |
2188 Token := Tok_Not_Equal; | |
2189 return; | |
2190 else | |
2191 Scan_Ptr := Scan_Ptr + 1; | |
2192 Token := Tok_Slash; | |
2193 return; | |
2194 end if; | |
2195 | |
2196 -- Semicolon | |
2197 | |
2198 when ';' => | |
2199 Accumulate_Checksum (';'); | |
2200 Scan_Ptr := Scan_Ptr + 1; | |
2201 Token := Tok_Semicolon; | |
2202 | |
2203 if Style_Check then | |
2204 Style.Check_Semicolon; | |
2205 end if; | |
2206 | |
2207 return; | |
2208 | |
2209 -- Vertical bar | |
2210 | |
2211 when '|' => Vertical_Bar_Case : begin | |
2212 Accumulate_Checksum ('|'); | |
2213 | |
2214 -- Special check for || to give nice message | |
2215 | |
2216 if Source (Scan_Ptr + 1) = '|' then | |
2217 Error_Msg_S -- CODEFIX | |
2218 ("""'|'|"" should be `OR ELSE`"); | |
2219 Scan_Ptr := Scan_Ptr + 2; | |
2220 Token := Tok_Or; | |
2221 return; | |
2222 | |
2223 else | |
2224 Scan_Ptr := Scan_Ptr + 1; | |
2225 Token := Tok_Vertical_Bar; | |
2226 | |
2227 if Style_Check then | |
2228 Style.Check_Vertical_Bar; | |
2229 end if; | |
2230 | |
2231 Post_Scan; | |
2232 return; | |
2233 end if; | |
2234 end Vertical_Bar_Case; | |
2235 | |
2236 -- Exclamation, replacement character for vertical bar | |
2237 | |
2238 when '!' => Exclamation_Case : begin | |
2239 Accumulate_Checksum ('!'); | |
2240 | |
2241 if Source (Scan_Ptr + 1) = '=' then | |
2242 Error_Msg_S -- CODEFIX | |
2243 ("'!= should be /="); | |
2244 Scan_Ptr := Scan_Ptr + 2; | |
2245 Token := Tok_Not_Equal; | |
2246 return; | |
2247 | |
2248 else | |
2249 Scan_Ptr := Scan_Ptr + 1; | |
2250 Token := Tok_Vertical_Bar; | |
2251 Post_Scan; | |
2252 return; | |
2253 end if; | |
2254 end Exclamation_Case; | |
2255 | |
2256 -- Plus | |
2257 | |
2258 when '+' => Plus_Case : begin | |
2259 Accumulate_Checksum ('+'); | |
2260 Scan_Ptr := Scan_Ptr + 1; | |
2261 Token := Tok_Plus; | |
2262 return; | |
2263 end Plus_Case; | |
2264 | |
2265 -- Digits starting a numeric literal | |
2266 | |
2267 when '0' .. '9' => | |
2268 | |
2269 -- First a bit of a scan ahead to see if we have a case of an | |
2270 -- identifier starting with a digit (remembering exponent case). | |
2271 | |
2272 declare | |
2273 C : constant Character := Source (Scan_Ptr + 1); | |
2274 | |
2275 begin | |
2276 -- OK literal if digit followed by digit or underscore | |
2277 | |
2278 if C in '0' .. '9' or else C = '_' then | |
2279 null; | |
2280 | |
2281 -- OK literal if digit not followed by identifier char | |
2282 | |
2283 elsif not Identifier_Char (C) then | |
2284 null; | |
2285 | |
2286 -- OK literal if digit followed by e/E followed by digit/sign. | |
2287 -- We also allow underscore after the E, which is an error, but | |
2288 -- better handled by Nlit than deciding this is an identifier. | |
2289 | |
2290 elsif (C = 'e' or else C = 'E') | |
2291 and then (Source (Scan_Ptr + 2) in '0' .. '9' | |
2292 or else Source (Scan_Ptr + 2) = '+' | |
2293 or else Source (Scan_Ptr + 2) = '-' | |
2294 or else Source (Scan_Ptr + 2) = '_') | |
2295 then | |
2296 null; | |
2297 | |
2298 -- Here we have what really looks like an identifier that | |
2299 -- starts with a digit, so give error msg. | |
2300 | |
2301 else | |
2302 Error_Msg_S ("identifier may not start with digit"); | |
2303 Name_Len := 1; | |
2304 Underline_Found := False; | |
2305 Name_Buffer (1) := Source (Scan_Ptr); | |
2306 Accumulate_Checksum (Name_Buffer (1)); | |
2307 Scan_Ptr := Scan_Ptr + 1; | |
2308 goto Scan_Identifier; | |
2309 end if; | |
2310 end; | |
2311 | |
2312 -- Here we have an OK integer literal | |
2313 | |
2314 Nlit; | |
2315 | |
2316 -- Check for proper delimiter, ignoring other format characters | |
2317 | |
2318 Skip_Other_Format_Characters; | |
2319 | |
2320 if Identifier_Char (Source (Scan_Ptr)) then | |
2321 Error_Msg_S | |
2322 ("delimiter required between literal and identifier"); | |
2323 end if; | |
2324 | |
2325 Post_Scan; | |
2326 return; | |
2327 | |
2328 -- Lower case letters | |
2329 | |
2330 when 'a' .. 'z' => | |
2331 Name_Len := 1; | |
2332 Underline_Found := False; | |
2333 Name_Buffer (1) := Source (Scan_Ptr); | |
2334 Accumulate_Checksum (Name_Buffer (1)); | |
2335 Scan_Ptr := Scan_Ptr + 1; | |
2336 goto Scan_Identifier; | |
2337 | |
2338 -- Upper case letters | |
2339 | |
2340 when 'A' .. 'Z' => | |
2341 Name_Len := 1; | |
2342 Underline_Found := False; | |
2343 Name_Buffer (1) := | |
2344 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32); | |
2345 Accumulate_Checksum (Name_Buffer (1)); | |
2346 Scan_Ptr := Scan_Ptr + 1; | |
2347 goto Scan_Identifier; | |
2348 | |
2349 -- Underline character | |
2350 | |
2351 when '_' => | |
2352 if Special_Characters ('_') then | |
2353 Token_Ptr := Scan_Ptr; | |
2354 Scan_Ptr := Scan_Ptr + 1; | |
2355 Token := Tok_Special; | |
2356 Special_Character := '_'; | |
2357 return; | |
2358 end if; | |
2359 | |
2360 Error_Msg_S ("identifier cannot start with underline"); | |
2361 Name_Len := 1; | |
2362 Name_Buffer (1) := '_'; | |
2363 Scan_Ptr := Scan_Ptr + 1; | |
2364 Underline_Found := False; | |
2365 goto Scan_Identifier; | |
2366 | |
2367 -- Space (not possible, because we scanned past blanks) | |
2368 | |
2369 when ' ' => | |
2370 raise Program_Error; | |
2371 | |
2372 -- Characters in top half of ASCII 8-bit chart | |
2373 | |
2374 when Upper_Half_Character => | |
2375 | |
2376 -- Wide character case | |
2377 | |
2378 if Upper_Half_Encoding then | |
2379 goto Scan_Wide_Character; | |
2380 | |
2381 -- Otherwise we have OK Latin-1 character | |
2382 | |
2383 else | |
2384 -- Upper half characters may possibly be identifier letters | |
2385 -- but can never be digits, so Identifier_Char can be used to | |
2386 -- test for a valid start of identifier character. | |
2387 | |
2388 if Identifier_Char (Source (Scan_Ptr)) then | |
2389 Name_Len := 0; | |
2390 Underline_Found := False; | |
2391 goto Scan_Identifier; | |
2392 else | |
2393 Error_Illegal_Character; | |
2394 end if; | |
2395 end if; | |
2396 | |
2397 when ESC => | |
2398 | |
2399 -- ESC character, possible start of identifier if wide characters | |
2400 -- using ESC encoding are allowed in identifiers, which we can | |
2401 -- tell by looking at the Identifier_Char flag for ESC, which is | |
2402 -- only true if these conditions are met. In Ada 2005 mode, may | |
2403 -- also be valid UTF_32 space or line terminator character. | |
2404 | |
2405 if Identifier_Char (ESC) then | |
2406 Name_Len := 0; | |
2407 goto Scan_Wide_Character; | |
2408 else | |
2409 Error_Illegal_Character; | |
2410 end if; | |
2411 | |
2412 -- Invalid control characters | |
2413 | |
2414 when ACK | |
2415 | ASCII.SO | |
2416 | BEL | |
2417 | BS | |
2418 | CAN | |
2419 | DC1 | |
2420 | DC2 | |
2421 | DC3 | |
2422 | DC4 | |
2423 | DEL | |
2424 | DLE | |
2425 | EM | |
2426 | ENQ | |
2427 | EOT | |
2428 | ETB | |
2429 | ETX | |
2430 | FS | |
2431 | GS | |
2432 | NAK | |
2433 | NUL | |
2434 | RS | |
2435 | SI | |
2436 | SOH | |
2437 | STX | |
2438 | SYN | |
2439 | US | |
2440 => | |
2441 Error_Illegal_Character; | |
2442 | |
2443 -- Invalid graphic characters | |
2444 -- Note that '@' is handled elsewhere, because following AI12-125 | |
2445 -- it denotes the target_name of an assignment. | |
2446 | |
2447 when '#' | '$' | '?' | '`' | '\' | '^' | '~' => | |
2448 | |
2449 -- If Set_Special_Character has been called for this character, | |
2450 -- set Scans.Special_Character and return a Special token. | |
2451 | |
2452 if Special_Characters (Source (Scan_Ptr)) then | |
2453 Token_Ptr := Scan_Ptr; | |
2454 Token := Tok_Special; | |
2455 Special_Character := Source (Scan_Ptr); | |
2456 Scan_Ptr := Scan_Ptr + 1; | |
2457 return; | |
2458 | |
2459 -- Check for something looking like a preprocessor directive | |
2460 | |
2461 elsif Source (Scan_Ptr) = '#' | |
2462 and then (Source (Scan_Ptr + 1 .. Scan_Ptr + 2) = "if" | |
2463 or else | |
2464 Source (Scan_Ptr + 1 .. Scan_Ptr + 5) = "elsif" | |
2465 or else | |
2466 Source (Scan_Ptr + 1 .. Scan_Ptr + 4) = "else" | |
2467 or else | |
2468 Source (Scan_Ptr + 1 .. Scan_Ptr + 3) = "end") | |
2469 then | |
2470 Error_Msg_S | |
2471 ("preprocessor directive ignored, preprocessor not active"); | |
2472 | |
2473 -- Skip to end of line | |
2474 | |
2475 loop | |
2476 if Source (Scan_Ptr) in Graphic_Character | |
2477 or else | |
2478 Source (Scan_Ptr) = HT | |
2479 then | |
2480 Scan_Ptr := Scan_Ptr + 1; | |
2481 | |
2482 -- Done if line terminator or EOF | |
2483 | |
2484 elsif Source (Scan_Ptr) in Line_Terminator | |
2485 or else | |
2486 Source (Scan_Ptr) = EOF | |
2487 then | |
2488 exit; | |
2489 | |
2490 -- If we have a wide character, we have to scan it out, | |
2491 -- because it might be a legitimate line terminator | |
2492 | |
2493 elsif Start_Of_Wide_Character then | |
2494 declare | |
2495 Wptr : constant Source_Ptr := Scan_Ptr; | |
2496 Code : Char_Code; | |
2497 Err : Boolean; | |
2498 | |
2499 begin | |
2500 Scan_Wide (Source, Scan_Ptr, Code, Err); | |
2501 | |
2502 -- If not well formed wide character, then just skip | |
2503 -- past it and ignore it. | |
2504 | |
2505 if Err then | |
2506 Scan_Ptr := Wptr + 1; | |
2507 | |
2508 -- If UTF_32 terminator, terminate comment scan | |
2509 | |
2510 elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then | |
2511 Scan_Ptr := Wptr; | |
2512 exit; | |
2513 end if; | |
2514 end; | |
2515 | |
2516 -- Else keep going (don't worry about bad comment chars | |
2517 -- in this context, we just want to find the end of line. | |
2518 | |
2519 else | |
2520 Scan_Ptr := Scan_Ptr + 1; | |
2521 end if; | |
2522 end loop; | |
2523 | |
2524 -- Otherwise, this is an illegal character | |
2525 | |
2526 else | |
2527 Error_Illegal_Character; | |
2528 end if; | |
2529 | |
2530 -- End switch on non-blank character | |
2531 | |
2532 end case; | |
2533 | |
2534 -- End loop past format effectors. The exit from this loop is by | |
2535 -- executing a return statement following completion of token scan | |
2536 -- (control never falls out of this loop to the code that follows). | |
2537 | |
2538 end loop; | |
2539 | |
2540 pragma Assert (False); | |
2541 | |
2542 -- Wide_Character scanning routine. On entry we have encountered the | |
2543 -- initial character of a wide character sequence. | |
2544 | |
2545 <<Scan_Wide_Character>> | |
2546 declare | |
2547 Code : Char_Code; | |
2548 Cat : Category; | |
2549 Err : Boolean; | |
2550 | |
2551 begin | |
2552 Wptr := Scan_Ptr; | |
2553 Scan_Wide (Source, Scan_Ptr, Code, Err); | |
2554 | |
2555 -- If bad wide character, signal error and continue scan | |
2556 | |
2557 if Err then | |
2558 Error_Illegal_Wide_Character; | |
2559 goto Scan_Next_Character; | |
2560 end if; | |
2561 | |
2562 Cat := Get_Category (UTF_32 (Code)); | |
2563 | |
2564 -- If OK letter, reset scan ptr and go scan identifier | |
2565 | |
2566 if Is_UTF_32_Letter (Cat) then | |
2567 Scan_Ptr := Wptr; | |
2568 Name_Len := 0; | |
2569 Underline_Found := False; | |
2570 goto Scan_Identifier; | |
2571 | |
2572 -- If OK wide space, ignore and keep scanning (we do not include | |
2573 -- any ignored spaces in checksum) | |
2574 | |
2575 elsif Is_UTF_32_Space (Cat) then | |
2576 goto Scan_Next_Character; | |
2577 | |
2578 -- If other format character, ignore and keep scanning (again we | |
2579 -- do not include in the checksum) (this is for AI-0079). | |
2580 | |
2581 elsif Is_UTF_32_Other (Cat) then | |
2582 goto Scan_Next_Character; | |
2583 | |
2584 -- If OK wide line terminator, terminate current line | |
2585 | |
2586 elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then | |
2587 Scan_Ptr := Wptr; | |
2588 goto Scan_Line_Terminator; | |
2589 | |
2590 -- Punctuation is an error (at start of identifier) | |
2591 | |
2592 elsif Is_UTF_32_Punctuation (Cat) then | |
2593 Error_Msg ("identifier cannot start with punctuation", Wptr); | |
2594 Scan_Ptr := Wptr; | |
2595 Name_Len := 0; | |
2596 Underline_Found := False; | |
2597 goto Scan_Identifier; | |
2598 | |
2599 -- Mark character is an error (at start of identifier) | |
2600 | |
2601 elsif Is_UTF_32_Mark (Cat) then | |
2602 Error_Msg ("identifier cannot start with mark character", Wptr); | |
2603 Scan_Ptr := Wptr; | |
2604 Name_Len := 0; | |
2605 Underline_Found := False; | |
2606 goto Scan_Identifier; | |
2607 | |
2608 -- Extended digit character is an error. Could be bad start of | |
2609 -- identifier or bad literal. Not worth doing too much to try to | |
2610 -- distinguish these cases, but we will do a little bit. | |
2611 | |
2612 elsif Is_UTF_32_Digit (Cat) then | |
2613 Error_Msg | |
2614 ("identifier cannot start with digit character", Wptr); | |
2615 Scan_Ptr := Wptr; | |
2616 Name_Len := 0; | |
2617 Underline_Found := False; | |
2618 goto Scan_Identifier; | |
2619 | |
2620 -- All other wide characters are illegal here | |
2621 | |
2622 else | |
2623 Error_Illegal_Wide_Character; | |
2624 goto Scan_Next_Character; | |
2625 end if; | |
2626 end; | |
2627 | |
2628 -- Routine to scan line terminator. On entry Scan_Ptr points to a | |
2629 -- character which is one of FF,LR,CR,VT, or one of the wide characters | |
2630 -- that is treated as a line terminator. | |
2631 | |
2632 <<Scan_Line_Terminator>> | |
2633 | |
2634 -- Check line too long | |
2635 | |
2636 Check_End_Of_Line; | |
2637 | |
2638 -- Set Token_Ptr, if End_Of_Line is a token, for the case when it is | |
2639 -- a physical line. | |
2640 | |
2641 if End_Of_Line_Is_Token then | |
2642 Token_Ptr := Scan_Ptr; | |
2643 end if; | |
2644 | |
2645 declare | |
2646 Physical : Boolean; | |
2647 | |
2648 begin | |
2649 Skip_Line_Terminators (Scan_Ptr, Physical); | |
2650 | |
2651 -- If we are at start of physical line, update scan pointers to | |
2652 -- reflect the start of the new line. | |
2653 | |
2654 if Physical then | |
2655 Current_Line_Start := Scan_Ptr; | |
2656 Start_Column := Set_Start_Column; | |
2657 First_Non_Blank_Location := Scan_Ptr; | |
2658 | |
2659 -- If End_Of_Line is a token, we return it as it is a | |
2660 -- physical line. | |
2661 | |
2662 if End_Of_Line_Is_Token then | |
2663 Token := Tok_End_Of_Line; | |
2664 return; | |
2665 end if; | |
2666 end if; | |
2667 end; | |
2668 | |
2669 goto Scan_Next_Character; | |
2670 | |
2671 -- Identifier scanning routine. On entry, some initial characters of | |
2672 -- the identifier may have already been stored in Name_Buffer. If so, | |
2673 -- Name_Len has the number of characters stored, otherwise Name_Len is | |
2674 -- set to zero on entry. Underline_Found is also set False on entry. | |
2675 | |
2676 <<Scan_Identifier>> | |
2677 | |
2678 -- This loop scans as fast as possible past lower half letters and | |
2679 -- digits, which we expect to be the most common characters. | |
2680 | |
2681 loop | |
2682 if Source (Scan_Ptr) in 'a' .. 'z' | |
2683 or else Source (Scan_Ptr) in '0' .. '9' | |
2684 then | |
2685 Name_Buffer (Name_Len + 1) := Source (Scan_Ptr); | |
2686 Accumulate_Checksum (Source (Scan_Ptr)); | |
2687 | |
2688 elsif Source (Scan_Ptr) in 'A' .. 'Z' then | |
2689 Name_Buffer (Name_Len + 1) := | |
2690 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32); | |
2691 Accumulate_Checksum (Name_Buffer (Name_Len + 1)); | |
2692 | |
2693 else | |
2694 exit; | |
2695 end if; | |
2696 | |
2697 Underline_Found := False; | |
2698 Scan_Ptr := Scan_Ptr + 1; | |
2699 Name_Len := Name_Len + 1; | |
2700 end loop; | |
2701 | |
2702 -- If we fall through, then we have encountered either an underline | |
2703 -- character, or an extended identifier character (i.e. one from the | |
2704 -- upper half), or a wide character, or an identifier terminator. The | |
2705 -- initial test speeds us up in the most common case where we have | |
2706 -- an identifier terminator. Note that ESC is an identifier character | |
2707 -- only if a wide character encoding method that uses ESC encoding | |
2708 -- is active, so if we find an ESC character we know that we have a | |
2709 -- wide character. | |
2710 | |
2711 if Identifier_Char (Source (Scan_Ptr)) | |
2712 or else (Source (Scan_Ptr) in Upper_Half_Character | |
2713 and then Upper_Half_Encoding) | |
2714 then | |
2715 -- Case of underline | |
2716 | |
2717 if Source (Scan_Ptr) = '_' then | |
2718 Accumulate_Checksum ('_'); | |
2719 | |
2720 if Underline_Found then | |
2721 Error_No_Double_Underline; | |
2722 else | |
2723 Underline_Found := True; | |
2724 Name_Len := Name_Len + 1; | |
2725 Name_Buffer (Name_Len) := '_'; | |
2726 end if; | |
2727 | |
2728 Scan_Ptr := Scan_Ptr + 1; | |
2729 goto Scan_Identifier; | |
2730 | |
2731 -- Upper half character | |
2732 | |
2733 elsif Source (Scan_Ptr) in Upper_Half_Character | |
2734 and then not Upper_Half_Encoding | |
2735 then | |
2736 Accumulate_Checksum (Source (Scan_Ptr)); | |
2737 Store_Encoded_Character | |
2738 (Get_Char_Code (Fold_Lower (Source (Scan_Ptr)))); | |
2739 Scan_Ptr := Scan_Ptr + 1; | |
2740 Underline_Found := False; | |
2741 goto Scan_Identifier; | |
2742 | |
2743 -- Left bracket not followed by a quote terminates an identifier. | |
2744 -- This is an error, but we don't want to give a junk error msg | |
2745 -- about wide characters in this case. | |
2746 | |
2747 elsif Source (Scan_Ptr) = '[' | |
2748 and then Source (Scan_Ptr + 1) /= '"' | |
2749 then | |
2750 null; | |
2751 | |
2752 -- We know we have a wide character encoding here (the current | |
2753 -- character is either ESC, left bracket, or an upper half | |
2754 -- character depending on the encoding method). | |
2755 | |
2756 else | |
2757 -- Scan out the wide character and insert the appropriate | |
2758 -- encoding into the name table entry for the identifier. | |
2759 | |
2760 declare | |
2761 Code : Char_Code; | |
2762 Err : Boolean; | |
2763 Chr : Character; | |
2764 Cat : Category; | |
2765 | |
2766 begin | |
2767 Wptr := Scan_Ptr; | |
2768 Scan_Wide (Source, Scan_Ptr, Code, Err); | |
2769 | |
2770 -- If error, signal error | |
2771 | |
2772 if Err then | |
2773 Error_Illegal_Wide_Character; | |
2774 | |
2775 -- If the character scanned is a normal identifier | |
2776 -- character, then we treat it that way. | |
2777 | |
2778 elsif In_Character_Range (Code) | |
2779 and then Identifier_Char (Get_Character (Code)) | |
2780 then | |
2781 Chr := Get_Character (Code); | |
2782 Accumulate_Checksum (Chr); | |
2783 Store_Encoded_Character | |
2784 (Get_Char_Code (Fold_Lower (Chr))); | |
2785 Underline_Found := False; | |
2786 | |
2787 -- Here if not a normal identifier character | |
2788 | |
2789 else | |
2790 Cat := Get_Category (UTF_32 (Code)); | |
2791 | |
2792 -- Wide character in Unicode category "Other, Format" | |
2793 -- is not accepted in an identifier. This is because it | |
2794 -- it is considered a security risk (AI-0091). | |
2795 | |
2796 -- However, it is OK for such a character to appear at | |
2797 -- the end of an identifier. | |
2798 | |
2799 if Is_UTF_32_Other (Cat) then | |
2800 if not Identifier_Char (Source (Scan_Ptr)) then | |
2801 goto Scan_Identifier_Complete; | |
2802 else | |
2803 Error_Msg | |
2804 ("identifier cannot contain other_format " | |
2805 & "character", Wptr); | |
2806 goto Scan_Identifier; | |
2807 end if; | |
2808 | |
2809 -- Wide character in category Separator,Space terminates | |
2810 | |
2811 elsif Is_UTF_32_Space (Cat) then | |
2812 goto Scan_Identifier_Complete; | |
2813 end if; | |
2814 | |
2815 -- Here if wide character is part of the identifier | |
2816 | |
2817 -- Make sure we are allowing wide characters in | |
2818 -- identifiers. Note that we allow wide character | |
2819 -- notation for an OK identifier character. This in | |
2820 -- particular allows bracket or other notation to be | |
2821 -- used for upper half letters. | |
2822 | |
2823 -- Wide characters are always allowed in Ada 2005 | |
2824 | |
2825 if Identifier_Character_Set /= 'w' | |
2826 and then Ada_Version < Ada_2005 | |
2827 then | |
2828 Error_Msg | |
2829 ("wide character not allowed in identifier", Wptr); | |
2830 end if; | |
2831 | |
2832 -- If OK letter, store it folding to upper case. Note | |
2833 -- that we include the folded letter in the checksum. | |
2834 | |
2835 if Is_UTF_32_Letter (Cat) then | |
2836 Code := | |
2837 Char_Code (UTF_32_To_Upper_Case (UTF_32 (Code))); | |
2838 Accumulate_Checksum (Code); | |
2839 Store_Encoded_Character (Code); | |
2840 Underline_Found := False; | |
2841 | |
2842 -- If OK extended digit or mark, then store it | |
2843 | |
2844 elsif Is_UTF_32_Digit (Cat) | |
2845 or else Is_UTF_32_Mark (Cat) | |
2846 then | |
2847 Accumulate_Checksum (Code); | |
2848 Store_Encoded_Character (Code); | |
2849 Underline_Found := False; | |
2850 | |
2851 -- Wide punctuation is also stored, but counts as an | |
2852 -- underline character for error checking purposes. | |
2853 | |
2854 elsif Is_UTF_32_Punctuation (Cat) then | |
2855 Accumulate_Checksum (Code); | |
2856 | |
2857 if Underline_Found then | |
2858 declare | |
2859 Cend : constant Source_Ptr := Scan_Ptr; | |
2860 begin | |
2861 Scan_Ptr := Wptr; | |
2862 Error_No_Double_Underline; | |
2863 Scan_Ptr := Cend; | |
2864 end; | |
2865 | |
2866 else | |
2867 Store_Encoded_Character (Code); | |
2868 Underline_Found := True; | |
2869 end if; | |
2870 | |
2871 -- Any other wide character is not acceptable | |
2872 | |
2873 else | |
2874 Error_Msg | |
2875 ("invalid wide character in identifier", Wptr); | |
2876 end if; | |
2877 end if; | |
2878 | |
2879 goto Scan_Identifier; | |
2880 end; | |
2881 end if; | |
2882 end if; | |
2883 | |
2884 -- Scan of identifier is complete. The identifier is stored in | |
2885 -- Name_Buffer, and Scan_Ptr points past the last character. | |
2886 | |
2887 <<Scan_Identifier_Complete>> | |
2888 Token_Name := Name_Find; | |
2889 | |
2890 -- Check for identifier ending with underline or punctuation char | |
2891 | |
2892 if Underline_Found then | |
2893 Underline_Found := False; | |
2894 | |
2895 if Source (Scan_Ptr - 1) = '_' then | |
2896 Error_Msg | |
2897 ("identifier cannot end with underline", Scan_Ptr - 1); | |
2898 else | |
2899 Error_Msg | |
2900 ("identifier cannot end with punctuation character", Wptr); | |
2901 end if; | |
2902 end if; | |
2903 | |
2904 -- We will assume it is an identifier, not a keyword, so that the | |
2905 -- checksum is independent of the Ada version. | |
2906 | |
2907 Token := Tok_Identifier; | |
2908 | |
2909 -- Here is where we check if it was a keyword | |
2910 | |
2911 if Is_Keyword_Name (Token_Name) then | |
2912 if Opt.Checksum_GNAT_6_3 then | |
2913 Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name)); | |
2914 | |
2915 if Checksum_Accumulate_Token_Checksum then | |
2916 if Checksum_GNAT_5_03 then | |
2917 Accumulate_Token_Checksum_GNAT_5_03; | |
2918 else | |
2919 Accumulate_Token_Checksum_GNAT_6_3; | |
2920 end if; | |
2921 end if; | |
2922 | |
2923 else | |
2924 Accumulate_Token_Checksum; | |
2925 Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name)); | |
2926 end if; | |
2927 | |
2928 -- Keyword style checks | |
2929 | |
2930 if Style_Check then | |
2931 | |
2932 -- Deal with possible style check for non-lower case keyword, | |
2933 -- but we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords | |
2934 -- for this purpose if they appear as attribute designators. | |
2935 -- Actually we only check the first character for speed. | |
2936 | |
2937 -- Ada 2005 (AI-284): Do not apply the style check in case of | |
2938 -- "pragma Interface" | |
2939 | |
2940 -- Ada 2005 (AI-340): Do not apply the style check in case of | |
2941 -- MOD attribute. | |
2942 | |
2943 if Source (Token_Ptr) <= 'Z' | |
2944 and then (Prev_Token /= Tok_Apostrophe | |
2945 or else | |
2946 (Token /= Tok_Access and then | |
2947 Token /= Tok_Delta and then | |
2948 Token /= Tok_Digits and then | |
2949 Token /= Tok_Mod and then | |
2950 Token /= Tok_Range)) | |
2951 and then (Token /= Tok_Interface | |
2952 or else | |
2953 (Token = Tok_Interface | |
2954 and then Prev_Token /= Tok_Pragma)) | |
2955 then | |
2956 Style.Non_Lower_Case_Keyword; | |
2957 end if; | |
2958 | |
2959 -- Check THEN/ELSE style rules. These do not apply to AND THEN | |
2960 -- or OR ELSE, and do not apply in if expressions. | |
2961 | |
2962 if (Token = Tok_Then and then Prev_Token /= Tok_And) | |
2963 or else | |
2964 (Token = Tok_Else and then Prev_Token /= Tok_Or) | |
2965 then | |
2966 if Inside_If_Expression = 0 then | |
2967 Style.Check_Separate_Stmt_Lines; | |
2968 end if; | |
2969 end if; | |
2970 end if; | |
2971 | |
2972 -- We must reset Token_Name since this is not an identifier and | |
2973 -- if we leave Token_Name set, the parser gets confused because | |
2974 -- it thinks it is dealing with an identifier instead of the | |
2975 -- corresponding keyword. | |
2976 | |
2977 Token_Name := No_Name; | |
2978 return; | |
2979 | |
2980 -- It is an identifier after all | |
2981 | |
2982 else | |
2983 if Checksum_Accumulate_Token_Checksum then | |
2984 Accumulate_Token_Checksum; | |
2985 end if; | |
2986 | |
2987 Post_Scan; | |
2988 return; | |
2989 end if; | |
2990 end Scan; | |
2991 | |
2992 -------------------------- | |
2993 -- Set_Comment_As_Token -- | |
2994 -------------------------- | |
2995 | |
2996 procedure Set_Comment_As_Token (Value : Boolean) is | |
2997 begin | |
2998 Comment_Is_Token := Value; | |
2999 end Set_Comment_As_Token; | |
3000 | |
3001 ------------------------------ | |
3002 -- Set_End_Of_Line_As_Token -- | |
3003 ------------------------------ | |
3004 | |
3005 procedure Set_End_Of_Line_As_Token (Value : Boolean) is | |
3006 begin | |
3007 End_Of_Line_Is_Token := Value; | |
3008 end Set_End_Of_Line_As_Token; | |
3009 | |
3010 --------------------------- | |
3011 -- Set_Special_Character -- | |
3012 --------------------------- | |
3013 | |
3014 procedure Set_Special_Character (C : Character) is | |
3015 begin | |
3016 case C is | |
3017 when '#' | '$' | '_' | '?' | '@' | '`' | '\' | '^' | '~' => | |
3018 Special_Characters (C) := True; | |
3019 | |
3020 when others => | |
3021 null; | |
3022 end case; | |
3023 end Set_Special_Character; | |
3024 | |
3025 ---------------------- | |
3026 -- Set_Start_Column -- | |
3027 ---------------------- | |
3028 | |
3029 -- Note: it seems at first glance a little expensive to compute this value | |
3030 -- for every source line (since it is certainly not used for all source | |
3031 -- lines). On the other hand, it doesn't take much more work to skip past | |
3032 -- the initial white space on the line counting the columns than it would | |
3033 -- to scan past the white space using the standard scanning circuits. | |
3034 | |
3035 function Set_Start_Column return Column_Number is | |
3036 Start_Column : Column_Number := 0; | |
3037 | |
3038 begin | |
3039 -- Outer loop scans past horizontal tab characters | |
3040 | |
3041 Tabs_Loop : loop | |
3042 | |
3043 -- Inner loop scans past blanks as fast as possible, bumping Scan_Ptr | |
3044 -- past the blanks and adjusting Start_Column to account for them. | |
3045 | |
3046 Blanks_Loop : loop | |
3047 if Source (Scan_Ptr) = ' ' then | |
3048 if Source (Scan_Ptr + 1) = ' ' then | |
3049 if Source (Scan_Ptr + 2) = ' ' then | |
3050 if Source (Scan_Ptr + 3) = ' ' then | |
3051 if Source (Scan_Ptr + 4) = ' ' then | |
3052 if Source (Scan_Ptr + 5) = ' ' then | |
3053 if Source (Scan_Ptr + 6) = ' ' then | |
3054 Scan_Ptr := Scan_Ptr + 7; | |
3055 Start_Column := Start_Column + 7; | |
3056 else | |
3057 Scan_Ptr := Scan_Ptr + 6; | |
3058 Start_Column := Start_Column + 6; | |
3059 exit Blanks_Loop; | |
3060 end if; | |
3061 else | |
3062 Scan_Ptr := Scan_Ptr + 5; | |
3063 Start_Column := Start_Column + 5; | |
3064 exit Blanks_Loop; | |
3065 end if; | |
3066 else | |
3067 Scan_Ptr := Scan_Ptr + 4; | |
3068 Start_Column := Start_Column + 4; | |
3069 exit Blanks_Loop; | |
3070 end if; | |
3071 else | |
3072 Scan_Ptr := Scan_Ptr + 3; | |
3073 Start_Column := Start_Column + 3; | |
3074 exit Blanks_Loop; | |
3075 end if; | |
3076 else | |
3077 Scan_Ptr := Scan_Ptr + 2; | |
3078 Start_Column := Start_Column + 2; | |
3079 exit Blanks_Loop; | |
3080 end if; | |
3081 else | |
3082 Scan_Ptr := Scan_Ptr + 1; | |
3083 Start_Column := Start_Column + 1; | |
3084 exit Blanks_Loop; | |
3085 end if; | |
3086 else | |
3087 exit Blanks_Loop; | |
3088 end if; | |
3089 end loop Blanks_Loop; | |
3090 | |
3091 -- Outer loop keeps going only if a horizontal tab follows | |
3092 | |
3093 if Source (Scan_Ptr) = HT then | |
3094 if Style_Check then | |
3095 Style.Check_HT; | |
3096 end if; | |
3097 | |
3098 Scan_Ptr := Scan_Ptr + 1; | |
3099 Start_Column := (Start_Column / 8) * 8 + 8; | |
3100 else | |
3101 exit Tabs_Loop; | |
3102 end if; | |
3103 end loop Tabs_Loop; | |
3104 | |
3105 return Start_Column; | |
3106 | |
3107 -- A constraint error can happen only if we have a compiler with checks on | |
3108 -- and a line with a ludicrous number of tabs or spaces at the start. In | |
3109 -- such a case, we really don't care if Start_Column is right or not. | |
3110 | |
3111 exception | |
3112 when Constraint_Error => | |
3113 return Start_Column; | |
3114 end Set_Start_Column; | |
3115 | |
3116 end Scng; |