111
|
1 // lex.h -- Go frontend lexer. -*- C++ -*-
|
|
2
|
|
3 // Copyright 2009 The Go Authors. All rights reserved.
|
|
4 // Use of this source code is governed by a BSD-style
|
|
5 // license that can be found in the LICENSE file.
|
|
6
|
|
7 #ifndef GO_LEX_H
|
|
8 #define GO_LEX_H
|
|
9
|
|
10 #include <mpfr.h>
|
|
11
|
|
12 #include "operator.h"
|
|
13 #include "go-linemap.h"
|
|
14
|
|
15 struct Unicode_range;
|
|
16
|
|
17 // The keywords. These must be in sorted order, other than
|
|
18 // KEYWORD_INVALID. They must match the Keywords::mapping_ array in
|
|
19 // lex.cc.
|
|
20
|
|
21 enum Keyword
|
|
22 {
|
|
23 KEYWORD_INVALID, // Not a keyword.
|
|
24 KEYWORD_ASM,
|
|
25 KEYWORD_BREAK,
|
|
26 KEYWORD_CASE,
|
|
27 KEYWORD_CHAN,
|
|
28 KEYWORD_CONST,
|
|
29 KEYWORD_CONTINUE,
|
|
30 KEYWORD_DEFAULT,
|
|
31 KEYWORD_DEFER,
|
|
32 KEYWORD_ELSE,
|
|
33 KEYWORD_FALLTHROUGH,
|
|
34 KEYWORD_FOR,
|
|
35 KEYWORD_FUNC,
|
|
36 KEYWORD_GO,
|
|
37 KEYWORD_GOTO,
|
|
38 KEYWORD_IF,
|
|
39 KEYWORD_IMPORT,
|
|
40 KEYWORD_INTERFACE,
|
|
41 KEYWORD_MAP,
|
|
42 KEYWORD_PACKAGE,
|
|
43 KEYWORD_RANGE,
|
|
44 KEYWORD_RETURN,
|
|
45 KEYWORD_SELECT,
|
|
46 KEYWORD_STRUCT,
|
|
47 KEYWORD_SWITCH,
|
|
48 KEYWORD_TYPE,
|
|
49 KEYWORD_VAR
|
|
50 };
|
|
51
|
|
52 // Pragmas built from magic comments and recorded for functions.
|
|
53 // These are used as bits in a bitmask.
|
|
54 // The set of values is intended to be the same as the gc compiler.
|
|
55
|
|
56 enum GoPragma
|
|
57 {
|
|
58 GOPRAGMA_NOINTERFACE = 1 << 0, // Method not in type descriptor.
|
|
59 GOPRAGMA_NOESCAPE = 1 << 1, // Args do not escape.
|
|
60 GOPRAGMA_NORACE = 1 << 2, // No race detector.
|
|
61 GOPRAGMA_NOSPLIT = 1 << 3, // Do not split stack.
|
|
62 GOPRAGMA_NOINLINE = 1 << 4, // Do not inline.
|
|
63 GOPRAGMA_SYSTEMSTACK = 1 << 5, // Must run on system stack.
|
|
64 GOPRAGMA_NOWRITEBARRIER = 1 << 6, // No write barriers.
|
|
65 GOPRAGMA_NOWRITEBARRIERREC = 1 << 7, // No write barriers here or callees.
|
131
|
66 GOPRAGMA_YESWRITEBARRIERREC = 1 << 8, // Stops nowritebarrierrec.
|
|
67 GOPRAGMA_MARK = 1 << 9, // Marker for nowritebarrierrec.
|
|
68 GOPRAGMA_CGOUNSAFEARGS = 1 << 10, // Pointer to arg is pointer to all.
|
|
69 GOPRAGMA_UINTPTRESCAPES = 1 << 11, // uintptr(p) escapes.
|
|
70 GOPRAGMA_NOTINHEAP = 1 << 12 // type is not in heap.
|
111
|
71 };
|
|
72
|
|
73 // A token returned from the lexer.
|
|
74
|
|
75 class Token
|
|
76 {
|
|
77 public:
|
|
78 // Token classification.
|
|
79 enum Classification
|
|
80 {
|
|
81 // Token is invalid.
|
|
82 TOKEN_INVALID,
|
|
83 // Token indicates end of input.
|
|
84 TOKEN_EOF,
|
|
85 // Token is a keyword.
|
|
86 TOKEN_KEYWORD,
|
|
87 // Token is an identifier.
|
|
88 TOKEN_IDENTIFIER,
|
|
89 // Token is a string of characters.
|
|
90 TOKEN_STRING,
|
|
91 // Token is an operator.
|
|
92 TOKEN_OPERATOR,
|
|
93 // Token is a character constant.
|
|
94 TOKEN_CHARACTER,
|
|
95 // Token is an integer.
|
|
96 TOKEN_INTEGER,
|
|
97 // Token is a floating point number.
|
|
98 TOKEN_FLOAT,
|
|
99 // Token is an imaginary number.
|
|
100 TOKEN_IMAGINARY
|
|
101 };
|
|
102
|
|
103 ~Token();
|
|
104 Token(const Token&);
|
|
105 Token& operator=(const Token&);
|
|
106
|
|
107 // Get token classification.
|
|
108 Classification
|
|
109 classification() const
|
|
110 { return this->classification_; }
|
|
111
|
|
112 // Make a token for an invalid value.
|
|
113 static Token
|
|
114 make_invalid_token(Location location)
|
|
115 { return Token(TOKEN_INVALID, location); }
|
|
116
|
|
117 // Make a token representing end of file.
|
|
118 static Token
|
|
119 make_eof_token(Location location)
|
|
120 { return Token(TOKEN_EOF, location); }
|
|
121
|
|
122 // Make a keyword token.
|
|
123 static Token
|
|
124 make_keyword_token(Keyword keyword, Location location)
|
|
125 {
|
|
126 Token tok(TOKEN_KEYWORD, location);
|
|
127 tok.u_.keyword = keyword;
|
|
128 return tok;
|
|
129 }
|
|
130
|
|
131 // Make an identifier token.
|
|
132 static Token
|
|
133 make_identifier_token(const std::string& value, bool is_exported,
|
|
134 Location location)
|
|
135 {
|
|
136 Token tok(TOKEN_IDENTIFIER, location);
|
|
137 tok.u_.identifier_value.name = new std::string(value);
|
|
138 tok.u_.identifier_value.is_exported = is_exported;
|
|
139 return tok;
|
|
140 }
|
|
141
|
|
142 // Make a quoted string token.
|
|
143 static Token
|
|
144 make_string_token(const std::string& value, Location location)
|
|
145 {
|
|
146 Token tok(TOKEN_STRING, location);
|
|
147 tok.u_.string_value = new std::string(value);
|
|
148 return tok;
|
|
149 }
|
|
150
|
|
151 // Make an operator token.
|
|
152 static Token
|
|
153 make_operator_token(Operator op, Location location)
|
|
154 {
|
|
155 Token tok(TOKEN_OPERATOR, location);
|
|
156 tok.u_.op = op;
|
|
157 return tok;
|
|
158 }
|
|
159
|
|
160 // Make a character constant token.
|
|
161 static Token
|
|
162 make_character_token(mpz_t val, Location location)
|
|
163 {
|
|
164 Token tok(TOKEN_CHARACTER, location);
|
|
165 mpz_init(tok.u_.integer_value);
|
|
166 mpz_swap(tok.u_.integer_value, val);
|
|
167 return tok;
|
|
168 }
|
|
169
|
|
170 // Make an integer token.
|
|
171 static Token
|
|
172 make_integer_token(mpz_t val, Location location)
|
|
173 {
|
|
174 Token tok(TOKEN_INTEGER, location);
|
|
175 mpz_init(tok.u_.integer_value);
|
|
176 mpz_swap(tok.u_.integer_value, val);
|
|
177 return tok;
|
|
178 }
|
|
179
|
|
180 // Make a float token.
|
|
181 static Token
|
|
182 make_float_token(mpfr_t val, Location location)
|
|
183 {
|
|
184 Token tok(TOKEN_FLOAT, location);
|
|
185 mpfr_init(tok.u_.float_value);
|
|
186 mpfr_swap(tok.u_.float_value, val);
|
|
187 return tok;
|
|
188 }
|
|
189
|
|
190 // Make a token for an imaginary number.
|
|
191 static Token
|
|
192 make_imaginary_token(mpfr_t val, Location location)
|
|
193 {
|
|
194 Token tok(TOKEN_IMAGINARY, location);
|
|
195 mpfr_init(tok.u_.float_value);
|
|
196 mpfr_swap(tok.u_.float_value, val);
|
|
197 return tok;
|
|
198 }
|
|
199
|
|
200 // Get the location of the token.
|
|
201 Location
|
|
202 location() const
|
|
203 { return this->location_; }
|
|
204
|
|
205 // Return whether this is an invalid token.
|
|
206 bool
|
|
207 is_invalid() const
|
|
208 { return this->classification_ == TOKEN_INVALID; }
|
|
209
|
|
210 // Return whether this is the EOF token.
|
|
211 bool
|
|
212 is_eof() const
|
|
213 { return this->classification_ == TOKEN_EOF; }
|
|
214
|
|
215 // Return the keyword value for a keyword token.
|
|
216 Keyword
|
|
217 keyword() const
|
|
218 {
|
|
219 go_assert(this->classification_ == TOKEN_KEYWORD);
|
|
220 return this->u_.keyword;
|
|
221 }
|
|
222
|
|
223 // Return whether this is an identifier.
|
|
224 bool
|
|
225 is_identifier() const
|
|
226 { return this->classification_ == TOKEN_IDENTIFIER; }
|
|
227
|
|
228 // Return the identifier.
|
|
229 const std::string&
|
|
230 identifier() const
|
|
231 {
|
|
232 go_assert(this->classification_ == TOKEN_IDENTIFIER);
|
|
233 return *this->u_.identifier_value.name;
|
|
234 }
|
|
235
|
|
236 // Return whether the identifier is exported.
|
|
237 bool
|
|
238 is_identifier_exported() const
|
|
239 {
|
|
240 go_assert(this->classification_ == TOKEN_IDENTIFIER);
|
|
241 return this->u_.identifier_value.is_exported;
|
|
242 }
|
|
243
|
|
244 // Return whether this is a string.
|
|
245 bool
|
|
246 is_string() const
|
|
247 {
|
|
248 return this->classification_ == TOKEN_STRING;
|
|
249 }
|
|
250
|
|
251 // Return the value of a string. The returned value is a string of
|
|
252 // UTF-8 characters.
|
|
253 std::string
|
|
254 string_value() const
|
|
255 {
|
|
256 go_assert(this->classification_ == TOKEN_STRING);
|
|
257 return *this->u_.string_value;
|
|
258 }
|
|
259
|
|
260 // Return the value of a character constant.
|
|
261 const mpz_t*
|
|
262 character_value() const
|
|
263 {
|
|
264 go_assert(this->classification_ == TOKEN_CHARACTER);
|
|
265 return &this->u_.integer_value;
|
|
266 }
|
|
267
|
|
268 // Return the value of an integer.
|
|
269 const mpz_t*
|
|
270 integer_value() const
|
|
271 {
|
|
272 go_assert(this->classification_ == TOKEN_INTEGER);
|
|
273 return &this->u_.integer_value;
|
|
274 }
|
|
275
|
|
276 // Return the value of a float.
|
|
277 const mpfr_t*
|
|
278 float_value() const
|
|
279 {
|
|
280 go_assert(this->classification_ == TOKEN_FLOAT);
|
|
281 return &this->u_.float_value;
|
|
282 }
|
|
283
|
|
284 // Return the value of an imaginary number.
|
|
285 const mpfr_t*
|
|
286 imaginary_value() const
|
|
287 {
|
|
288 go_assert(this->classification_ == TOKEN_IMAGINARY);
|
|
289 return &this->u_.float_value;
|
|
290 }
|
|
291
|
|
292 // Return the operator value for an operator token.
|
|
293 Operator
|
|
294 op() const
|
|
295 {
|
|
296 go_assert(this->classification_ == TOKEN_OPERATOR);
|
|
297 return this->u_.op;
|
|
298 }
|
|
299
|
|
300 // Return whether this token is KEYWORD.
|
|
301 bool
|
|
302 is_keyword(Keyword keyword) const
|
|
303 {
|
|
304 return (this->classification_ == TOKEN_KEYWORD
|
|
305 && this->u_.keyword == keyword);
|
|
306 }
|
|
307
|
|
308 // Return whether this token is OP.
|
|
309 bool
|
|
310 is_op(Operator op) const
|
|
311 { return this->classification_ == TOKEN_OPERATOR && this->u_.op == op; }
|
|
312
|
|
313 // Print the token for debugging.
|
|
314 void
|
|
315 print(FILE*) const;
|
|
316
|
|
317 private:
|
|
318 // Private constructor used by make_..._token functions above.
|
|
319 Token(Classification, Location);
|
|
320
|
|
321 // Clear the token.
|
|
322 void
|
|
323 clear();
|
|
324
|
|
325 // The token classification.
|
|
326 Classification classification_;
|
|
327 union
|
|
328 {
|
|
329 // The keyword value for TOKEN_KEYWORD.
|
|
330 Keyword keyword;
|
|
331 // The token value for TOKEN_IDENTIFIER.
|
|
332 struct
|
|
333 {
|
|
334 // The name of the identifier. This has been mangled to only
|
|
335 // include ASCII characters.
|
|
336 std::string* name;
|
|
337 // Whether this name should be exported. This is true if the
|
|
338 // first letter in the name is upper case.
|
|
339 bool is_exported;
|
|
340 } identifier_value;
|
|
341 // The string value for TOKEN_STRING.
|
|
342 std::string* string_value;
|
|
343 // The token value for TOKEN_CHARACTER or TOKEN_INTEGER.
|
|
344 mpz_t integer_value;
|
|
345 // The token value for TOKEN_FLOAT or TOKEN_IMAGINARY.
|
|
346 mpfr_t float_value;
|
|
347 // The token value for TOKEN_OPERATOR or the keyword value
|
|
348 Operator op;
|
|
349 } u_;
|
|
350 // The source location.
|
|
351 Location location_;
|
|
352 };
|
|
353
|
|
354 // The lexer itself.
|
|
355
|
|
356 class Lex
|
|
357 {
|
|
358 public:
|
|
359 Lex(const char* input_file_name, FILE* input_file, Linemap *linemap);
|
|
360
|
|
361 ~Lex();
|
|
362
|
|
363 // Return the next token.
|
|
364 Token
|
|
365 next_token();
|
|
366
|
|
367 // Return the contents of any current //extern comment.
|
|
368 const std::string&
|
|
369 extern_name() const
|
|
370 { return this->extern_; }
|
|
371
|
|
372 // Return the current set of pragmas, and clear them.
|
|
373 unsigned int
|
|
374 get_and_clear_pragmas()
|
|
375 {
|
|
376 unsigned int ret = this->pragmas_;
|
|
377 this->pragmas_ = 0;
|
|
378 return ret;
|
|
379 }
|
|
380
|
|
381 struct Linkname
|
|
382 {
|
145
|
383 std::string ext_name; // External name; empty to just export.
|
111
|
384 bool is_exported; // Whether the internal name is exported.
|
|
385 Location loc; // Location of go:linkname directive.
|
|
386
|
|
387 Linkname()
|
|
388 : ext_name(), is_exported(false), loc()
|
|
389 { }
|
|
390
|
|
391 Linkname(const std::string& ext_name_a, bool is_exported_a, Location loc_a)
|
|
392 : ext_name(ext_name_a), is_exported(is_exported_a), loc(loc_a)
|
|
393 { }
|
|
394 };
|
|
395
|
|
396 typedef std::map<std::string, Linkname> Linknames;
|
|
397
|
|
398 // Return the linknames seen so far, or NULL if none, and clear the
|
|
399 // set. These are from go:linkname compiler directives.
|
|
400 Linknames*
|
|
401 get_and_clear_linknames()
|
|
402 {
|
|
403 Linknames* ret = this->linknames_;
|
|
404 this->linknames_ = NULL;
|
|
405 return ret;
|
|
406 }
|
|
407
|
|
408 // Return whether the identifier NAME should be exported. NAME is a
|
|
409 // mangled name which includes only ASCII characters.
|
|
410 static bool
|
131
|
411 is_exported_mangled_name(const std::string& name);
|
|
412
|
|
413 // Return whether the identifier NAME should be exported. NAME is
|
|
414 // an unmangled utf-8 string and may contain non-ASCII characters.
|
|
415 static bool
|
111
|
416 is_exported_name(const std::string& name);
|
|
417
|
|
418 // Return whether the identifier NAME is invalid. When we see an
|
|
419 // invalid character we still build an identifier, but we use a
|
|
420 // magic string to indicate that the identifier is invalid. We then
|
|
421 // use this to avoid knockon errors.
|
|
422 static bool
|
|
423 is_invalid_identifier(const std::string& name);
|
|
424
|
|
425 // A helper function. Append V to STR. IS_CHARACTER is true if V
|
|
426 // is a Unicode character which should be converted into UTF-8,
|
|
427 // false if it is a byte value to be appended directly. The
|
|
428 // location is used to warn about an out of range character.
|
|
429 static void
|
|
430 append_char(unsigned int v, bool is_charater, std::string* str,
|
|
431 Location);
|
|
432
|
|
433 // A helper function. Fetch a UTF-8 character from STR and store it
|
|
434 // in *VALUE. Return the number of bytes read from STR. Return 0
|
|
435 // if STR does not point to a valid UTF-8 character.
|
|
436 static int
|
|
437 fetch_char(const char* str, unsigned int *value);
|
|
438
|
|
439 // Return whether C is a Unicode or "C" locale space character.
|
|
440 static bool
|
|
441 is_unicode_space(unsigned int c);
|
|
442
|
145
|
443 // Convert the specified hex char into an unsigned integer value.
|
|
444 static unsigned
|
|
445 hex_val(char c);
|
|
446
|
111
|
447 private:
|
|
448 ssize_t
|
|
449 get_line();
|
|
450
|
|
451 bool
|
|
452 require_line();
|
|
453
|
|
454 // The current location.
|
|
455 Location
|
|
456 location() const;
|
|
457
|
|
458 // A position CHARS column positions before the current location.
|
|
459 Location
|
|
460 earlier_location(int chars) const;
|
|
461
|
|
462 static bool
|
|
463 is_hex_digit(char);
|
|
464
|
145
|
465 static bool
|
|
466 is_base_digit(int base, char);
|
|
467
|
111
|
468 static unsigned char
|
|
469 octal_value(char c)
|
|
470 { return c - '0'; }
|
|
471
|
|
472 Token
|
|
473 make_invalid_token()
|
|
474 { return Token::make_invalid_token(this->location()); }
|
|
475
|
|
476 Token
|
|
477 make_eof_token()
|
|
478 { return Token::make_eof_token(this->location()); }
|
|
479
|
|
480 Token
|
|
481 make_operator(Operator op, int chars)
|
|
482 { return Token::make_operator_token(op, this->earlier_location(chars)); }
|
|
483
|
|
484 Token
|
|
485 gather_identifier();
|
|
486
|
|
487 static bool
|
145
|
488 could_be_exponent(int base, const char*, const char*);
|
111
|
489
|
|
490 Token
|
|
491 gather_number();
|
|
492
|
145
|
493 void
|
|
494 skip_exponent();
|
|
495
|
111
|
496 Token
|
|
497 gather_character();
|
|
498
|
|
499 Token
|
|
500 gather_string();
|
|
501
|
|
502 Token
|
|
503 gather_raw_string();
|
|
504
|
|
505 const char*
|
|
506 advance_one_utf8_char(const char*, unsigned int*, bool*);
|
|
507
|
|
508 const char*
|
|
509 advance_one_char(const char*, bool, unsigned int*, bool*);
|
|
510
|
|
511 static bool
|
|
512 is_unicode_digit(unsigned int c);
|
|
513
|
|
514 static bool
|
|
515 is_unicode_letter(unsigned int c);
|
|
516
|
|
517 static bool
|
|
518 is_unicode_uppercase(unsigned int c);
|
|
519
|
|
520 static bool
|
|
521 is_in_unicode_range(unsigned int C, const Unicode_range* ranges,
|
|
522 size_t range_size);
|
|
523
|
|
524 Operator
|
|
525 three_character_operator(char, char, char);
|
|
526
|
|
527 Operator
|
|
528 two_character_operator(char, char);
|
|
529
|
|
530 Operator
|
|
531 one_character_operator(char);
|
|
532
|
|
533 bool
|
|
534 skip_c_comment(bool* found_newline);
|
|
535
|
|
536 void
|
|
537 skip_cpp_comment();
|
|
538
|
|
539 // The input file name.
|
|
540 const char* input_file_name_;
|
|
541 // The input file.
|
|
542 FILE* input_file_;
|
|
543 // The object used to keep track of file names and line numbers.
|
|
544 Linemap* linemap_;
|
|
545 // The line buffer. This holds the current line.
|
|
546 char* linebuf_;
|
|
547 // The size of the line buffer.
|
|
548 size_t linebufsize_;
|
|
549 // The nmber of characters in the current line.
|
|
550 size_t linesize_;
|
|
551 // The current offset in linebuf_.
|
|
552 size_t lineoff_;
|
|
553 // The current line number.
|
|
554 size_t lineno_;
|
|
555 // Whether to add a semicolon if we see a newline now.
|
|
556 bool add_semi_at_eol_;
|
|
557 // Pragmas for the next function, from magic comments.
|
|
558 unsigned int pragmas_;
|
|
559 // The external name to use for a function declaration, from a magic
|
|
560 // //extern comment.
|
|
561 std::string extern_;
|
|
562 // The list of //go:linkname comments, if any.
|
|
563 Linknames* linknames_;
|
|
564 };
|
|
565
|
|
566 #endif // !defined(GO_LEX_H)
|