CbC/CbC_gcc: gcc/d/dmd/lexer.c comparison

comparison gcc/d/dmd/lexer.c @ 145:1830386684a0

gcc-9.2.0

author	anatofuz
date	Thu, 13 Feb 2020 11:34:05 +0900
parents
children

comparison

equal deleted inserted replaced

-:84e7813d76e9
+:1830386684a0
+/* Compiler implementation of the D programming language
+* Copyright (C) 1999-2019 by The D Language Foundation, All Rights Reserved
+* written by Walter Bright
+* http://www.digitalmars.com
+* Distributed under the Boost Software License, Version 1.0.
+* http://www.boost.org/LICENSE_1_0.txt
+* https://github.com/D-Programming-Language/dmd/blob/master/src/lexer.c
+*/
+/* Lexical Analyzer */
+#include "root/dsystem.h" // for time() and ctime()
+#include "root/rmem.h"
+#include "mars.h"
+#include "lexer.h"
+#include "utf.h"
+#include "identifier.h"
+#include "id.h"
+extern int HtmlNamedEntity(const utf8_t *p, size_t length);
+#define LS 0x2028       // UTF line separator
+#define PS 0x2029       // UTF paragraph separator
+/********************************************
+* Do our own char maps
+*/
+static unsigned char cmtable[256];
+const int CMoctal =     0x1;
+const int CMhex =       0x2;
+const int CMidchar =    0x4;
+inline bool isoctal (utf8_t c) { return (cmtable[c] & CMoctal) != 0; }
+inline bool ishex   (utf8_t c) { return (cmtable[c] & CMhex) != 0; }
+inline bool isidchar(utf8_t c) { return (cmtable[c] & CMidchar) != 0; }
+struct CMTableInitializer
+{
+CMTableInitializer();
+};
+static CMTableInitializer cmtableinitializer;
+CMTableInitializer::CMTableInitializer()
+{
+for (unsigned c = 0; c < 256; c++)
+{
+if ('0' <= c && c <= '7')
+cmtable[c] |= CMoctal;
+if (isxdigit(c))
+cmtable[c] |= CMhex;
+if (isalnum(c) || c == '_')
+cmtable[c] |= CMidchar;
+}
+}
+/*************************** Lexer ********************************************/
+OutBuffer Lexer::stringbuffer;
+Lexer::Lexer(const char *filename,
+const utf8_t *base, size_t begoffset, size_t endoffset,
+bool doDocComment, bool commentToken)
+{
+scanloc = Loc(filename, 1, 1);
+//printf("Lexer::Lexer(%p,%d)\n",base,length);
+//printf("lexer.filename = %s\n", filename);
+this->token = Token();
+this->token.ptr = NULL;
+this->token.value = TOKreserved;
+this->token.blockComment = NULL;
+this->token.lineComment = NULL;
+this->base = base;
+this->end  = base + endoffset;
+p = base + begoffset;
+line = p;
+this->doDocComment = doDocComment;
+this->anyToken = 0;
+this->commentToken = commentToken;
+this->errors = false;
+//initKeywords();
+/* If first line starts with '#!', ignore the line
+*/
+if (p[0] == '#' && p[1] =='!')
+{
+p += 2;
+while (1)
+{
+utf8_t c = *p++;
+switch (c)
+{
+case 0:
+case 0x1A:
+p--;
+/* fall through */
+case '\n':
+break;
+default:
+continue;
+}
+break;
+}
+endOfLine();
+}
+}
+void Lexer::endOfLine()
+{
+scanloc.linnum++;
+line = p;
+}
+void Lexer::error(const char *format, ...)
+{
+va_list ap;
+va_start(ap, format);
+::verror(token.loc, format, ap);
+va_end(ap);
+errors = true;
+}
+void Lexer::error(Loc loc, const char *format, ...)
+{
+va_list ap;
+va_start(ap, format);
+::verror(loc, format, ap);
+va_end(ap);
+errors = true;
+}
+void Lexer::deprecation(const char *format, ...)
+{
+va_list ap;
+va_start(ap, format);
+::vdeprecation(token.loc, format, ap);
+va_end(ap);
+if (global.params.useDeprecated == DIAGNOSTICerror)
+errors = true;
+}
+TOK Lexer::nextToken()
+{
+if (token.next)
+{
+Token *t = token.next;
+memcpy(&token,t,sizeof(Token));
+t->free();
+}
+else
+{
+scan(&token);
+}
+//token.print();
+return token.value;
+}
+Token *Lexer::peek(Token *ct)
+{
+Token *t;
+if (ct->next)
+t = ct->next;
+else
+{
+t = Token::alloc();
+scan(t);
+ct->next = t;
+}
+return t;
+}
+/***********************
+* Look ahead at next token's value.
+*/
+TOK Lexer::peekNext()
+{
+return peek(&token)->value;
+}
+/***********************
+* Look 2 tokens ahead at value.
+*/
+TOK Lexer::peekNext2()
+{
+Token *t = peek(&token);
+return peek(t)->value;
+}
+/*********************************
+* tk is on the opening (.
+* Look ahead and return token that is past the closing ).
+*/
+Token *Lexer::peekPastParen(Token *tk)
+{
+//printf("peekPastParen()\n");
+int parens = 1;
+int curlynest = 0;
+while (1)
+{
+tk = peek(tk);
+//tk->print();
+switch (tk->value)
+{
+case TOKlparen:
+parens++;
+continue;
+case TOKrparen:
+--parens;
+if (parens)
+continue;
+tk = peek(tk);
+break;
+case TOKlcurly:
+curlynest++;
+continue;
+case TOKrcurly:
+if (--curlynest >= 0)
+continue;
+break;
+case TOKsemicolon:
+if (curlynest)
+continue;
+break;
+case TOKeof:
+break;
+default:
+continue;
+}
+return tk;
+}
+}
+/****************************
+* Turn next token in buffer into a token.
+*/
+void Lexer::scan(Token *t)
+{
+unsigned lastLine = scanloc.linnum;
+Loc startLoc;
+t->blockComment = NULL;
+t->lineComment = NULL;
+while (1)
+{
+t->ptr = p;
+//printf("p = %p, *p = '%c'\n",p,*p);
+t->loc = loc();
+switch (*p)
+{
+case 0:
+case 0x1A:
+t->value = TOKeof;                      // end of file
+return;
+case ' ':
+case '\t':
+case '\v':
+case '\f':
+p++;
+continue;                       // skip white space
+case '\r':
+p++;
+if (*p != '\n')                 // if CR stands by itself
+endOfLine();
+continue;                       // skip white space
+case '\n':
+p++;
+endOfLine();
+continue;                       // skip white space
+case '0':   case '1':   case '2':   case '3':   case '4':
+case '5':   case '6':   case '7':   case '8':   case '9':
+t->value = number(t);
+return;
+case '\'':
+t->value = charConstant(t);
+return;
+case 'r':
+if (p[1] != '"')
+goto case_ident;
+p++;
+/* fall through */
+case '`':
+t->value = wysiwygStringConstant(t, *p);
+return;
+case 'x':
+if (p[1] != '"')
+goto case_ident;
+p++;
+t->value = hexStringConstant(t);
+return;
+case 'q':
+if (p[1] == '"')
+{
+p++;
+t->value = delimitedStringConstant(t);
+return;
+}
+else if (p[1] == '{')
+{
+p++;
+t->value = tokenStringConstant(t);
+return;
+}
+else
+goto case_ident;
+case '"':
+t->value = escapeStringConstant(t);
+return;
+case 'a':   case 'b':   case 'c':   case 'd':   case 'e':
+case 'f':   case 'g':   case 'h':   case 'i':   case 'j':
+case 'k':   case 'l':   case 'm':   case 'n':   case 'o':
+case 'p':   /*case 'q': case 'r':*/ case 's':   case 't':
+case 'u':   case 'v':   case 'w': /*case 'x':*/ case 'y':
+case 'z':
+case 'A':   case 'B':   case 'C':   case 'D':   case 'E':
+case 'F':   case 'G':   case 'H':   case 'I':   case 'J':
+case 'K':   case 'L':   case 'M':   case 'N':   case 'O':
+case 'P':   case 'Q':   case 'R':   case 'S':   case 'T':
+case 'U':   case 'V':   case 'W':   case 'X':   case 'Y':
+case 'Z':
+case '_':
+case_ident:
+{   utf8_t c;
+while (1)
+{
+c = *++p;
+if (isidchar(c))
+continue;
+else if (c & 0x80)
+{   const utf8_t *s = p;
+unsigned u = decodeUTF();
+if (isUniAlpha(u))
+continue;
+error("char 0x%04x not allowed in identifier", u);
+p = s;
+}
+break;
+}
+Identifier *id = Identifier::idPool((const char *)t->ptr, p - t->ptr);
+t->ident = id;
+t->value = (TOK) id->getValue();
+anyToken = 1;
+if (*t->ptr == '_')     // if special identifier token
+{
+static bool initdone = false;
+static char date[11+1];
+static char time[8+1];
+static char timestamp[24+1];
+if (!initdone)       // lazy evaluation
+{
+initdone = true;
+time_t ct;
+::time(&ct);
+char *p = ctime(&ct);
+assert(p);
+sprintf(&date[0], "%.6s %.4s", p + 4, p + 20);
+sprintf(&time[0], "%.8s", p + 11);
+sprintf(&timestamp[0], "%.24s", p);
+}
+if (id == Id::DATE)
+{
+t->ustring = (utf8_t *)date;
+goto Lstr;
+}
+else if (id == Id::TIME)
+{
+t->ustring = (utf8_t *)time;
+goto Lstr;
+}
+else if (id == Id::VENDOR)
+{
+t->ustring = (utf8_t *)const_cast<char *>(global.vendor);
+goto Lstr;
+}
+else if (id == Id::TIMESTAMP)
+{
+t->ustring = (utf8_t *)timestamp;
+Lstr:
+t->value = TOKstring;
+t->postfix = 0;
+t->len = (unsigned)strlen((char *)t->ustring);
+}
+else if (id == Id::VERSIONX)
+{   unsigned major = 0;
+unsigned minor = 0;
+bool point = false;
+for (const char *p = global.version + 1; 1; p++)
+{
+c = *p;
+if (isdigit((utf8_t)c))
+minor = minor * 10 + c - '0';
+else if (c == '.')
+{
+if (point)
+break;      // ignore everything after second '.'
+point = true;
+major = minor;
+minor = 0;
+}
+else
+break;
+}
+t->value = TOKint64v;
+t->uns64value = major * 1000 + minor;
+}
+else if (id == Id::EOFX)
+{
+t->value = TOKeof;
+// Advance scanner to end of file
+while (!(*p == 0 || *p == 0x1A))
+p++;
+}
+}
+//printf("t->value = %d\n",t->value);
+return;
+}
+case '/':
+p++;
+switch (*p)
+{
+case '=':
+p++;
+t->value = TOKdivass;
+return;
+case '*':
+p++;
+startLoc = loc();
+while (1)
+{
+while (1)
+{   utf8_t c = *p;
+switch (c)
+{
+case '/':
+break;
+case '\n':
+endOfLine();
+p++;
+continue;
+case '\r':
+p++;
+if (*p != '\n')
+endOfLine();
+continue;
+case 0:
+case 0x1A:
+error("unterminated /* */ comment");
+p = end;
+t->loc = loc();
+t->value = TOKeof;
+return;
+default:
+if (c & 0x80)
+{   unsigned u = decodeUTF();
+if (u == PS || u == LS)
+endOfLine();
+}
+p++;
+continue;
+}
+break;
+}
+p++;
+if (p[-2] == '*' && p - 3 != t->ptr)
+break;
+}
+if (commentToken)
+{
+t->loc = startLoc;
+t->value = TOKcomment;
+return;
+}
+else if (doDocComment && t->ptr[2] == '*' && p - 4 != t->ptr)
+{   // if /** but not /**/
+getDocComment(t, lastLine == startLoc.linnum);
+}
+continue;
+case '/':           // do // style comments
+startLoc = loc();
+while (1)
+{   utf8_t c = *++p;
+switch (c)
+{
+case '\n':
+break;
+case '\r':
+if (p[1] == '\n')
+p++;
+break;
+case 0:
+case 0x1A:
+if (commentToken)
+{
+p = end;
+t->loc = startLoc;
+t->value = TOKcomment;
+return;
+}
+if (doDocComment && t->ptr[2] == '/')
+getDocComment(t, lastLine == startLoc.linnum);
+p = end;
+t->loc = loc();
+t->value = TOKeof;
+return;
+default:
+if (c & 0x80)
+{   unsigned u = decodeUTF();
+if (u == PS || u == LS)
+break;
+}
+continue;
+}
+break;
+}
+if (commentToken)
+{
+p++;
+endOfLine();
+t->loc = startLoc;
+t->value = TOKcomment;
+return;
+}
+if (doDocComment && t->ptr[2] == '/')
+getDocComment(t, lastLine == startLoc.linnum);
+p++;
+endOfLine();
+continue;
+case '+':
+{   int nest;
+startLoc = loc();
+p++;
+nest = 1;
+while (1)
+{   utf8_t c = *p;
+switch (c)
+{
+case '/':
+p++;
+if (*p == '+')
+{
+p++;
+nest++;
+}
+continue;
+case '+':
+p++;
+if (*p == '/')
+{
+p++;
+if (--nest == 0)
+break;
+}
+continue;
+case '\r':
+p++;
+if (*p != '\n')
+endOfLine();
+continue;
+case '\n':
+endOfLine();
+p++;
+continue;
+case 0:
+case 0x1A:
+error("unterminated /+ +/ comment");
+p = end;
+t->loc = loc();
+t->value = TOKeof;
+return;
+default:
+if (c & 0x80)
+{   unsigned u = decodeUTF();
+if (u == PS || u == LS)
+endOfLine();
+}
+p++;
+continue;
+}
+break;
+}
+if (commentToken)
+{
+t->loc = startLoc;
+t->value = TOKcomment;
+return;
+}
+if (doDocComment && t->ptr[2] == '+' && p - 4 != t->ptr)
+{   // if /++ but not /++/
+getDocComment(t, lastLine == startLoc.linnum);
+}
+continue;
+}
+default:
+break;
+}
+t->value = TOKdiv;
+return;
+case '.':
+p++;
+if (isdigit(*p))
+{   /* Note that we don't allow ._1 and ._ as being
+* valid floating point numbers.
+*/
+p--;
+t->value = inreal(t);
+}
+else if (p[0] == '.')
+{
+if (p[1] == '.')
+{   p += 2;
+t->value = TOKdotdotdot;
+}
+else
+{   p++;
+t->value = TOKslice;
+}
+}
+else
+t->value = TOKdot;
+return;
+case '&':
+p++;
+if (*p == '=')
+{   p++;
+t->value = TOKandass;
+}
+else if (*p == '&')
+{   p++;
+t->value = TOKandand;
+}
+else
+t->value = TOKand;
+return;
+case '|':
+p++;
+if (*p == '=')
+{   p++;
+t->value = TOKorass;
+}
+else if (*p == '|')
+{   p++;
+t->value = TOKoror;
+}
+else
+t->value = TOKor;
+return;
+case '-':
+p++;
+if (*p == '=')
+{   p++;
+t->value = TOKminass;
+}
+else if (*p == '-')
+{   p++;
+t->value = TOKminusminus;
+}
+else
+t->value = TOKmin;
+return;
+case '+':
+p++;
+if (*p == '=')
+{   p++;
+t->value = TOKaddass;
+}
+else if (*p == '+')
+{   p++;
+t->value = TOKplusplus;
+}
+else
+t->value = TOKadd;
+return;
+case '<':
+p++;
+if (*p == '=')
+{   p++;
+t->value = TOKle;                   // <=
+}
+else if (*p == '<')
+{   p++;
+if (*p == '=')
+{   p++;
+t->value = TOKshlass;           // <<=
+}
+else
+t->value = TOKshl;              // <<
+}
+else if (*p == '>')
+{   p++;
+if (*p == '=')
+{   p++;
+t->value = TOKleg;              // <>=
+}
+else
+t->value = TOKlg;               // <>
+}
+else
+t->value = TOKlt;                   // <
+return;
+case '>':
+p++;
+if (*p == '=')
+{   p++;
+t->value = TOKge;                   // >=
+}
+else if (*p == '>')
+{   p++;
+if (*p == '=')
+{   p++;
+t->value = TOKshrass;           // >>=
+}
+else if (*p == '>')
+{   p++;
+if (*p == '=')
+{   p++;
+t->value = TOKushrass;      // >>>=
+}
+else
+t->value = TOKushr;         // >>>
+}
+else
+t->value = TOKshr;              // >>
+}
+else
+t->value = TOKgt;                   // >
+return;
+case '!':
+p++;
+if (*p == '=')
+{   p++;
+t->value = TOKnotequal;         // !=
+}
+else if (*p == '<')
+{   p++;
+if (*p == '>')
+{   p++;
+if (*p == '=')
+{   p++;
+t->value = TOKunord; // !<>=
+}
+else
+t->value = TOKue;   // !<>
+}
+else if (*p == '=')
+{   p++;
+t->value = TOKug;       // !<=
+}
+else
+t->value = TOKuge;      // !<
+}
+else if (*p == '>')
+{   p++;
+if (*p == '=')
+{   p++;
+t->value = TOKul;       // !>=
+}
+else
+t->value = TOKule;      // !>
+}
+else
+t->value = TOKnot;          // !
+return;
+case '=':
+p++;
+if (*p == '=')
+{   p++;
+t->value = TOKequal;            // ==
+}
+else if (*p == '>')
+{   p++;
+t->value = TOKgoesto;               // =>
+}
+else
+t->value = TOKassign;               // =
+return;
+case '~':
+p++;
+if (*p == '=')
+{   p++;
+t->value = TOKcatass;               // ~=
+}
+else
+t->value = TOKtilde;                // ~
+return;
+case '^':
+p++;
+if (*p == '^')
+{   p++;
+if (*p == '=')
+{   p++;
+t->value = TOKpowass;  // ^^=
+}
+else
+t->value = TOKpow;     // ^^
+}
+else if (*p == '=')
+{   p++;
+t->value = TOKxorass;    // ^=
+}
+else
+t->value = TOKxor;       // ^
+return;
+case '(': p++; t->value = TOKlparen; return;
+case ')': p++; t->value = TOKrparen; return;
+case '[': p++; t->value = TOKlbracket; return;
+case ']': p++; t->value = TOKrbracket; return;
+case '{': p++; t->value = TOKlcurly; return;
+case '}': p++; t->value = TOKrcurly; return;
+case '?': p++; t->value = TOKquestion; return;
+case ',': p++; t->value = TOKcomma; return;
+case ';': p++; t->value = TOKsemicolon; return;
+case ':': p++; t->value = TOKcolon; return;
+case '$': p++; t->value = TOKdollar; return;
+case '@': p++; t->value = TOKat; return;
+case '*':
+p++;
+if (*p == '=')
+{   p++;
+t->value = TOKmulass;
+}
+else
+t->value = TOKmul;
+return;
+case '%':
+p++;
+if (*p == '=')
+{   p++;
+t->value = TOKmodass;
+}
+else
+t->value = TOKmod;
+return;
+case '#':
+{
+p++;
+Token n;
+scan(&n);
+if (n.value == TOKidentifier)
+{
+if (n.ident == Id::line)
+{
+poundLine();
+continue;
+}
+else
+{
+const Loc locx = loc();
+warning(locx, "C preprocessor directive `#%s` is not supported", n.ident->toChars());
+}
+}
+else if (n.value == TOKif)
+{
+error("C preprocessor directive `#if` is not supported, use `version` or `static if`");
+}
+t->value = TOKpound;
+return;
+}
+default:
+{   unsigned c = *p;
+if (c & 0x80)
+{   c = decodeUTF();
+// Check for start of unicode identifier
+if (isUniAlpha(c))
+goto case_ident;
+if (c == PS || c == LS)
+{
+endOfLine();
+p++;
+continue;
+}
+}
+if (c < 0x80 && isprint(c))
+error("character '%c' is not a valid token", c);
+else
+error("character 0x%02x is not a valid token", c);
+p++;
+continue;
+}
+}
+}
+}
+/*******************************************
+* Parse escape sequence.
+*/
+unsigned Lexer::escapeSequence()
+{   unsigned c = *p;
+int n;
+int ndigits;
+switch (c)
+{
+case '\'':
+case '"':
+case '?':
+case '\\':
+Lconsume:
+p++;
+break;
+case 'a':       c = 7;          goto Lconsume;
+case 'b':       c = 8;          goto Lconsume;
+case 'f':       c = 12;         goto Lconsume;
+case 'n':       c = 10;         goto Lconsume;
+case 'r':       c = 13;         goto Lconsume;
+case 't':       c = 9;          goto Lconsume;
+case 'v':       c = 11;         goto Lconsume;
+case 'u':
+ndigits = 4;
+goto Lhex;
+case 'U':
+ndigits = 8;
+goto Lhex;
+case 'x':
+ndigits = 2;
+Lhex:
+p++;
+c = *p;
+if (ishex((utf8_t)c))
+{   unsigned v;
+n = 0;
+v = 0;
+while (1)
+{
+if (isdigit((utf8_t)c))
+c -= '0';
+else if (islower(c))
+c -= 'a' - 10;
+else
+c -= 'A' - 10;
+v = v * 16 + c;
+c = *++p;
+if (++n == ndigits)
+break;
+if (!ishex((utf8_t)c))
+{   error("escape hex sequence has %d hex digits instead of %d", n, ndigits);
+break;
+}
+}
+if (ndigits != 2 && !utf_isValidDchar(v))
+{   error("invalid UTF character \\U%08x", v);
+v = '?';        // recover with valid UTF character
+}
+c = v;
+}
+else
+error("undefined escape hex sequence \\%c",c);
+break;
+case '&':                       // named character entity
+for (const utf8_t *idstart = ++p; 1; p++)
+{
+switch (*p)
+{
+case ';':
+c = HtmlNamedEntity(idstart, p - idstart);
+if (c == ~0U)
+{   error("unnamed character entity &%.*s;", (int)(p - idstart), idstart);
+c = ' ';
+}
+p++;
+break;
+default:
+if (isalpha(*p) ||
+(p != idstart && isdigit(*p)))
+continue;
+error("unterminated named entity &%.*s;", (int)(p - idstart + 1), idstart);
+break;
+}
+break;
+}
+break;
+case 0:
+case 0x1A:                      // end of file
+c = '\\';
+break;
+default:
+if (isoctal((utf8_t)c))
+{   unsigned v;
+n = 0;
+v = 0;
+do
+{
+v = v * 8 + (c - '0');
+c = *++p;
+} while (++n < 3 && isoctal((utf8_t)c));
+c = v;
+if (c > 0xFF)
+error("escape octal sequence \\%03o is larger than \\377", c);
+}
+else
+error("undefined escape sequence \\%c",c);
+break;
+}
+return c;
+}
+/**************************************
+*/
+TOK Lexer::wysiwygStringConstant(Token *t, int tc)
+{
+int c;
+Loc start = loc();
+p++;
+stringbuffer.reset();
+while (1)
+{
+c = *p++;
+switch (c)
+{
+case '\n':
+endOfLine();
+break;
+case '\r':
+if (*p == '\n')
+continue;   // ignore
+c = '\n';       // treat EndOfLine as \n character
+endOfLine();
+break;
+case 0:
+case 0x1A:
+error("unterminated string constant starting at %s", start.toChars());
+t->ustring = (utf8_t *)const_cast<char *>("");
+t->len = 0;
+t->postfix = 0;
+return TOKstring;
+case '"':
+case '`':
+if (c == tc)
+{
+t->len = (unsigned)stringbuffer.offset;
+stringbuffer.writeByte(0);
+t->ustring = (utf8_t *)mem.xmalloc(stringbuffer.offset);
+memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
+stringPostfix(t);
+return TOKstring;
+}
+break;
+default:
+if (c & 0x80)
+{   p--;
+unsigned u = decodeUTF();
+p++;
+if (u == PS || u == LS)
+endOfLine();
+stringbuffer.writeUTF8(u);
+continue;
+}
+break;
+}
+stringbuffer.writeByte(c);
+}
+}
+/**************************************
+* Lex hex strings:
+*      x"0A ae 34FE BD"
+*/
+TOK Lexer::hexStringConstant(Token *t)
+{
+unsigned c;
+Loc start = loc();
+unsigned n = 0;
+unsigned v = ~0; // dead assignment, needed to suppress warning
+p++;
+stringbuffer.reset();
+while (1)
+{
+c = *p++;
+switch (c)
+{
+case ' ':
+case '\t':
+case '\v':
+case '\f':
+continue;                       // skip white space
+case '\r':
+if (*p == '\n')
+continue;                   // ignore
+// Treat isolated '\r' as if it were a '\n'
+/* fall through */
+case '\n':
+endOfLine();
+continue;
+case 0:
+case 0x1A:
+error("unterminated string constant starting at %s", start.toChars());
+t->ustring = (utf8_t *)const_cast<char *>("");
+t->len = 0;
+t->postfix = 0;
+return TOKxstring;
+case '"':
+if (n & 1)
+{   error("odd number (%d) of hex characters in hex string", n);
+stringbuffer.writeByte(v);
+}
+t->len = (unsigned)stringbuffer.offset;
+stringbuffer.writeByte(0);
+t->ustring = (utf8_t *)mem.xmalloc(stringbuffer.offset);
+memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
+stringPostfix(t);
+return TOKxstring;
+default:
+if (c >= '0' && c <= '9')
+c -= '0';
+else if (c >= 'a' && c <= 'f')
+c -= 'a' - 10;
+else if (c >= 'A' && c <= 'F')
+c -= 'A' - 10;
+else if (c & 0x80)
+{   p--;
+unsigned u = decodeUTF();
+p++;
+if (u == PS || u == LS)
+endOfLine();
+else
+error("non-hex character \\u%04x in hex string", u);
+}
+else
+error("non-hex character '%c' in hex string", c);
+if (n & 1)
+{   v = (v << 4) | c;
+stringbuffer.writeByte(v);
+}
+else
+v = c;
+n++;
+break;
+}
+}
+}
+/**************************************
+* Lex delimited strings:
+*      q"(foo(xxx))"   // "foo(xxx)"
+*      q"[foo(]"       // "foo("
+*      q"/foo]/"       // "foo]"
+*      q"HERE
+*      foo
+*      HERE"           // "foo\n"
+* Input:
+*      p is on the "
+*/
+TOK Lexer::delimitedStringConstant(Token *t)
+{
+unsigned c;
+Loc start = loc();
+unsigned delimleft = 0;
+unsigned delimright = 0;
+unsigned nest = 1;
+unsigned nestcount = ~0; // dead assignment, needed to suppress warning
+Identifier *hereid = NULL;
+unsigned blankrol = 0;
+unsigned startline = 0;
+p++;
+stringbuffer.reset();
+while (1)
+{
+c = *p++;
+//printf("c = '%c'\n", c);
+switch (c)
+{
+case '\n':
+Lnextline:
+endOfLine();
+startline = 1;
+if (blankrol)
+{   blankrol = 0;
+continue;
+}
+if (hereid)
+{
+stringbuffer.writeUTF8(c);
+continue;
+}
+break;
+case '\r':
+if (*p == '\n')
+continue;   // ignore
+c = '\n';       // treat EndOfLine as \n character
+goto Lnextline;
+case 0:
+case 0x1A:
+error("unterminated delimited string constant starting at %s", start.toChars());
+t->ustring = (utf8_t *)const_cast<char *>("");
+t->len = 0;
+t->postfix = 0;
+return TOKstring;
+default:
+if (c & 0x80)
+{   p--;
+c = decodeUTF();
+p++;
+if (c == PS || c == LS)
+goto Lnextline;
+}
+break;
+}
+if (delimleft == 0)
+{   delimleft = c;
+nest = 1;
+nestcount = 1;
+if (c == '(')
+delimright = ')';
+else if (c == '{')
+delimright = '}';
+else if (c == '[')
+delimright = ']';
+else if (c == '<')
+delimright = '>';
+else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c)))
+{   // Start of identifier; must be a heredoc
+Token tok;
+p--;
+scan(&tok);               // read in heredoc identifier
+if (tok.value != TOKidentifier)
+{   error("identifier expected for heredoc, not %s", tok.toChars());
+delimright = c;
+}
+else
+{   hereid = tok.ident;
+//printf("hereid = '%s'\n", hereid->toChars());
+blankrol = 1;
+}
+nest = 0;
+}
+else
+{   delimright = c;
+nest = 0;
+if (isspace(c))
+error("delimiter cannot be whitespace");
+}
+}
+else
+{
+if (blankrol)
+{   error("heredoc rest of line should be blank");
+blankrol = 0;
+continue;
+}
+if (nest == 1)
+{
+if (c == delimleft)
+nestcount++;
+else if (c == delimright)
+{   nestcount--;
+if (nestcount == 0)
+goto Ldone;
+}
+}
+else if (c == delimright)
+goto Ldone;
+if (startline && isalpha(c) && hereid)
+{   Token tok;
+const utf8_t *psave = p;
+p--;
+scan(&tok);               // read in possible heredoc identifier
+//printf("endid = '%s'\n", tok.ident->toChars());
+if (tok.value == TOKidentifier && tok.ident->equals(hereid))
+{   /* should check that rest of line is blank
+*/
+goto Ldone;
+}
+p = psave;
+}
+stringbuffer.writeUTF8(c);
+startline = 0;
+}
+}
+Ldone:
+if (*p == '"')
+p++;
+else if (hereid)
+error("delimited string must end in %s\"", hereid->toChars());
+else
+error("delimited string must end in %c\"", delimright);
+t->len = (unsigned)stringbuffer.offset;
+stringbuffer.writeByte(0);
+t->ustring = (utf8_t *)mem.xmalloc(stringbuffer.offset);
+memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
+stringPostfix(t);
+return TOKstring;
+}
+/**************************************
+* Lex delimited strings:
+*      q{ foo(xxx) } // " foo(xxx) "
+*      q{foo(}       // "foo("
+*      q{{foo}"}"}   // "{foo}"}""
+* Input:
+*      p is on the q
+*/
+TOK Lexer::tokenStringConstant(Token *t)
+{
+unsigned nest = 1;
+Loc start = loc();
+const utf8_t *pstart = ++p;
+while (1)
+{   Token tok;
+scan(&tok);
+switch (tok.value)
+{
+case TOKlcurly:
+nest++;
+continue;
+case TOKrcurly:
+if (--nest == 0)
+{
+t->len = (unsigned)(p - 1 - pstart);
+t->ustring = (utf8_t *)mem.xmalloc(t->len + 1);
+memcpy(t->ustring, pstart, t->len);
+t->ustring[t->len] = 0;
+stringPostfix(t);
+return TOKstring;
+}
+continue;
+case TOKeof:
+error("unterminated token string constant starting at %s", start.toChars());
+t->ustring = (utf8_t *)const_cast<char *>("");
+t->len = 0;
+t->postfix = 0;
+return TOKstring;
+default:
+continue;
+}
+}
+}
+/**************************************
+*/
+TOK Lexer::escapeStringConstant(Token *t)
+{
+unsigned c;
+Loc start = loc();
+p++;
+stringbuffer.reset();
+while (1)
+{
+c = *p++;
+switch (c)
+{
+case '\\':
+switch (*p)
+{
+case 'u':
+case 'U':
+case '&':
+c = escapeSequence();
+stringbuffer.writeUTF8(c);
+continue;
+default:
+c = escapeSequence();
+break;
+}
+break;
+case '\n':
+endOfLine();
+break;
+case '\r':
+if (*p == '\n')
+continue;   // ignore
+c = '\n';       // treat EndOfLine as \n character
+endOfLine();
+break;
+case '"':
+t->len = (unsigned)stringbuffer.offset;
+stringbuffer.writeByte(0);
+t->ustring = (utf8_t *)mem.xmalloc(stringbuffer.offset);
+memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
+stringPostfix(t);
+return TOKstring;
+case 0:
+case 0x1A:
+p--;
+error("unterminated string constant starting at %s", start.toChars());
+t->ustring = (utf8_t *)const_cast<char *>("");
+t->len = 0;
+t->postfix = 0;
+return TOKstring;
+default:
+if (c & 0x80)
+{
+p--;
+c = decodeUTF();
+if (c == LS || c == PS)
+{   c = '\n';
+endOfLine();
+}
+p++;
+stringbuffer.writeUTF8(c);
+continue;
+}
+break;
+}
+stringbuffer.writeByte(c);
+}
+}
+/**************************************
+*/
+TOK Lexer::charConstant(Token *t)
+{
+unsigned c;
+TOK tk = TOKcharv;
+//printf("Lexer::charConstant\n");
+p++;
+c = *p++;
+switch (c)
+{
+case '\\':
+switch (*p)
+{
+case 'u':
+t->uns64value = escapeSequence();
+tk = TOKwcharv;
+break;
+case 'U':
+case '&':
+t->uns64value = escapeSequence();
+tk = TOKdcharv;
+break;
+default:
+t->uns64value = escapeSequence();
+break;
+}
+break;
+case '\n':
+L1:
+endOfLine();
+/* fall through */
+case '\r':
+case 0:
+case 0x1A:
+case '\'':
+error("unterminated character constant");
+t->uns64value = '?';
+return tk;
+default:
+if (c & 0x80)
+{
+p--;
+c = decodeUTF();
+p++;
+if (c == LS || c == PS)
+goto L1;
+if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE))
+tk = TOKwcharv;
+else
+tk = TOKdcharv;
+}
+t->uns64value = c;
+break;
+}
+if (*p != '\'')
+{
+error("unterminated character constant");
+t->uns64value = '?';
+return tk;
+}
+p++;
+return tk;
+}
+/***************************************
+* Get postfix of string literal.
+*/
+void Lexer::stringPostfix(Token *t)
+{
+switch (*p)
+{
+case 'c':
+case 'w':
+case 'd':
+t->postfix = *p;
+p++;
+break;
+default:
+t->postfix = 0;
+break;
+}
+}
+/**************************************
+* Read in a number.
+* If it's an integer, store it in tok.TKutok.Vlong.
+*      integers can be decimal, octal or hex
+*      Handle the suffixes U, UL, LU, L, etc.
+* If it's double, store it in tok.TKutok.Vdouble.
+* Returns:
+*      TKnum
+*      TKdouble,...
+*/
+TOK Lexer::number(Token *t)
+{
+int base = 10;
+const utf8_t *start = p;
+unsigned c;
+uinteger_t n = 0;                       // unsigned >=64 bit integer type
+int d;
+bool err = false;
+bool overflow = false;
+c = *p;
+if (c == '0')
+{
+++p;
+c = *p;
+switch (c)
+{
+case '0': case '1': case '2': case '3':
+case '4': case '5': case '6': case '7':
+n = c - '0';
+++p;
+base = 8;
+break;
+case 'x':
+case 'X':
+++p;
+base = 16;
+break;
+case 'b':
+case 'B':
+++p;
+base = 2;
+break;
+case '.':
+if (p[1] == '.')
+goto Ldone; // if ".."
+if (isalpha(p[1]) || p[1] == '_' || p[1] & 0x80)
+goto Ldone; // if ".identifier" or ".unicode"
+goto Lreal; // '.' is part of current token
+case 'i':
+case 'f':
+case 'F':
+goto Lreal;
+case '_':
+++p;
+base = 8;
+break;
+case 'L':
+if (p[1] == 'i')
+goto Lreal;
+break;
+default:
+break;
+}
+}
+while (1)
+{
+c = *p;
+switch (c)
+{
+case '0': case '1':
+++p;
+d = c - '0';
+break;
+case '2': case '3':
+case '4': case '5': case '6': case '7':
+if (base == 2 && !err)
+{
+error("binary digit expected");
+err = true;
+}
+++p;
+d = c - '0';
+break;
+case '8': case '9':
+++p;
+if (base < 10 && !err)
+{
+error("radix %d digit expected, not '%c'", base, c);
+err = true;
+}
+d = c - '0';
+break;
+case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+++p;
+if (base != 16)
+{
+if (c == 'e' || c == 'E' || c == 'f' || c == 'F')
+goto Lreal;
+if (!err)
+{
+error("radix %d digit expected, not '%c'", base, c);
+err = true;
+}
+}
+if (c >= 'a')
+d = c + 10 - 'a';
+else
+d = c + 10 - 'A';
+break;
+case 'L':
+if (p[1] == 'i')
+goto Lreal;
+goto Ldone;
+case '.':
+if (p[1] == '.')
+goto Ldone; // if ".."
+if (base == 10 && (isalpha(p[1]) || p[1] == '_' || p[1] & 0x80))
+goto Ldone; // if ".identifier" or ".unicode"
+goto Lreal; // otherwise as part of a floating point literal
+case 'p':
+case 'P':
+case 'i':
+Lreal:
+p = start;
+return inreal(t);
+case '_':
+++p;
+continue;
+default:
+goto Ldone;
+}
+uinteger_t n2 = n * base;
+if ((n2 / base != n || n2 + d < n))
+{
+overflow = true;
+}
+n = n2 + d;
+// if n needs more than 64 bits
+if (sizeof(n) > 8 &&
+n > 0xFFFFFFFFFFFFFFFFULL)
+{
+overflow = true;
+}
+}
+Ldone:
+if (overflow && !err)
+{
+error("integer overflow");
+err = true;
+}
+enum FLAGS
+{
+FLAGS_none     = 0,
+FLAGS_decimal  = 1,             // decimal
+FLAGS_unsigned = 2,             // u or U suffix
+FLAGS_long     = 4,             // L suffix
+};
+unsigned flags = (base == 10) ? FLAGS_decimal : FLAGS_none;
+// Parse trailing 'u', 'U', 'l' or 'L' in any combination
+const utf8_t *psuffix = p;
+while (1)
+{
+utf8_t f;
+switch (*p)
+{
+case 'U':
+case 'u':
+f = FLAGS_unsigned;
+goto L1;
+case 'l':
+f = FLAGS_long;
+error("lower case integer suffix 'l' is not allowed. Please use 'L' instead");
+goto L1;
+case 'L':
+f = FLAGS_long;
+L1:
+p++;
+if ((flags & f) && !err)
+{
+error("unrecognized token");
+err = true;
+}
+flags = (FLAGS) (flags | f);
+continue;
+default:
+break;
+}
+break;
+}
+if (base == 8 && n >= 8)
+error("octal literals 0%llo%.*s are no longer supported, use std.conv.octal!%llo%.*s instead",
+n, p - psuffix, psuffix, n, p - psuffix, psuffix);
+TOK result;
+switch (flags)
+{
+case FLAGS_none:
+/* Octal or Hexadecimal constant.
+* First that fits: int, uint, long, ulong
+*/
+if (n & 0x8000000000000000LL)
+result = TOKuns64v;
+else if (n & 0xFFFFFFFF00000000LL)
+result = TOKint64v;
+else if (n & 0x80000000)
+result = TOKuns32v;
+else
+result = TOKint32v;
+break;
+case FLAGS_decimal:
+/* First that fits: int, long, long long
+*/
+if (n & 0x8000000000000000LL)
+{
+if (!err)
+{
+error("signed integer overflow");
+err = true;
+}
+result = TOKuns64v;
+}
+else if (n & 0xFFFFFFFF80000000LL)
+result = TOKint64v;
+else
+result = TOKint32v;
+break;
+case FLAGS_unsigned:
+case FLAGS_decimal | FLAGS_unsigned:
+/* First that fits: uint, ulong
+*/
+if (n & 0xFFFFFFFF00000000LL)
+result = TOKuns64v;
+else
+result = TOKuns32v;
+break;
+case FLAGS_decimal | FLAGS_long:
+if (n & 0x8000000000000000LL)
+{
+if (!err)
+{
+error("signed integer overflow");
+err = true;
+}
+result = TOKuns64v;
+}
+else
+result = TOKint64v;
+break;
+case FLAGS_long:
+if (n & 0x8000000000000000LL)
+result = TOKuns64v;
+else
+result = TOKint64v;
+break;
+case FLAGS_unsigned | FLAGS_long:
+case FLAGS_decimal | FLAGS_unsigned | FLAGS_long:
+result = TOKuns64v;
+break;
+default:
+assert(0);
+}
+t->uns64value = n;
+return result;
+}
+/**************************************
+* Read in characters, converting them to real.
+* Bugs:
+*      Exponent overflow not detected.
+*      Too much requested precision is not detected.
+*/
+TOK Lexer::inreal(Token *t)
+{
+//printf("Lexer::inreal()\n");
+bool isWellformedString = true;
+stringbuffer.reset();
+const utf8_t *pstart = p;
+char hex = 0;
+unsigned c = *p++;
+// Leading '0x'
+if (c == '0')
+{
+c = *p++;
+if (c == 'x' || c == 'X')
+{
+hex = true;
+c = *p++;
+}
+}
+// Digits to left of '.'
+while (1)
+{
+if (c == '.')
+{
+c = *p++;
+break;
+}
+if (isdigit(c) || (hex && isxdigit(c)) || c == '_')
+{
+c = *p++;
+continue;
+}
+break;
+}
+// Digits to right of '.'
+while (1)
+{
+if (isdigit(c) || (hex && isxdigit(c)) || c == '_')
+{
+c = *p++;
+continue;
+}
+break;
+}
+if (c == 'e' || c == 'E' || (hex && (c == 'p' || c == 'P')))
+{
+c = *p++;
+if (c == '-' || c == '+')
+{
+c = *p++;
+}
+bool anyexp = false;
+while (1)
+{
+if (isdigit(c))
+{
+anyexp = true;
+c = *p++;
+continue;
+}
+if (c == '_')
+{
+c = *p++;
+continue;
+}
+if (!anyexp)
+{
+error("missing exponent");
+isWellformedString = false;
+}
+break;
+}
+}
+else if (hex)
+{
+error("exponent required for hex float");
+isWellformedString = false;
+}
+--p;
+while (pstart < p)
+{
+if (*pstart != '_')
+stringbuffer.writeByte(*pstart);
+++pstart;
+}
+stringbuffer.writeByte(0);
+const char *sbufptr = (char *)stringbuffer.data;
+TOK result;
+bool isOutOfRange = false;
+t->floatvalue = (isWellformedString ? CTFloat::parse(sbufptr, &isOutOfRange) : CTFloat::zero);
+errno = 0;
+switch (*p)
+{
+case 'F':
+case 'f':
+if (isWellformedString && !isOutOfRange)
+isOutOfRange = Port::isFloat32LiteralOutOfRange(sbufptr);
+result = TOKfloat32v;
+p++;
+break;
+default:
+if (isWellformedString && !isOutOfRange)
+isOutOfRange = Port::isFloat64LiteralOutOfRange(sbufptr);
+result = TOKfloat64v;
+break;
+case 'l':
+error("use 'L' suffix instead of 'l'");
+/* fall through */
+case 'L':
+result = TOKfloat80v;
+p++;
+break;
+}
+if (*p == 'i' || *p == 'I')
+{
+if (*p == 'I')
+error("use 'i' suffix instead of 'I'");
+p++;
+switch (result)
+{
+case TOKfloat32v:
+result = TOKimaginary32v;
+break;
+case TOKfloat64v:
+result = TOKimaginary64v;
+break;
+case TOKfloat80v:
+result = TOKimaginary80v;
+break;
+default: break;
+}
+}
+const bool isLong = (result == TOKfloat80v || result == TOKimaginary80v);
+if (isOutOfRange && !isLong)
+{
+const char *suffix = (result == TOKfloat32v || result == TOKimaginary32v) ? "f" : "";
+error(scanloc, "number '%s%s' is not representable", (char *)stringbuffer.data, suffix);
+}
+return result;
+}
+/*********************************************
+* parse:
+*      #line linnum [filespec]
+* also allow __LINE__ for linnum, and __FILE__ for filespec
+*/
+void Lexer::poundLine()
+{
+Token tok;
+int linnum = this->scanloc.linnum;
+char *filespec = NULL;
+Loc loc = this->loc();
+scan(&tok);
+if (tok.value == TOKint32v || tok.value == TOKint64v)
+{
+int lin = (int)(tok.uns64value - 1);
+if ((unsigned)lin != tok.uns64value - 1)
+error("line number %lld out of range", (unsigned long long)tok.uns64value);
+else
+linnum = lin;
+}
+else if (tok.value == TOKline)
+{
+}
+else
+goto Lerr;
+while (1)
+{
+switch (*p)
+{
+case 0:
+case 0x1A:
+case '\n':
+Lnewline:
+this->scanloc.linnum = linnum;
+if (filespec)
+this->scanloc.filename = filespec;
+return;
+case '\r':
+p++;
+if (*p != '\n')
+{   p--;
+goto Lnewline;
+}
+continue;
+case ' ':
+case '\t':
+case '\v':
+case '\f':
+p++;
+continue;                       // skip white space
+case '_':
+if (memcmp(p, "__FILE__", 8) == 0)
+{
+p += 8;
+filespec = mem.xstrdup(scanloc.filename);
+continue;
+}
+goto Lerr;
+case '"':
+if (filespec)
+goto Lerr;
+stringbuffer.reset();
+p++;
+while (1)
+{   unsigned c;
+c = *p;
+switch (c)
+{
+case '\n':
+case '\r':
+case 0:
+case 0x1A:
+goto Lerr;
+case '"':
+stringbuffer.writeByte(0);
+filespec = mem.xstrdup((char *)stringbuffer.data);
+p++;
+break;
+default:
+if (c & 0x80)
+{   unsigned u = decodeUTF();
+if (u == PS || u == LS)
+goto Lerr;
+}
+stringbuffer.writeByte(c);
+p++;
+continue;
+}
+break;
+}
+continue;
+default:
+if (*p & 0x80)
+{   unsigned u = decodeUTF();
+if (u == PS || u == LS)
+goto Lnewline;
+}
+goto Lerr;
+}
+}
+Lerr:
+error(loc, "#line integer [\"filespec\"]\\n expected");
+}
+/********************************************
+* Decode UTF character.
+* Issue error messages for invalid sequences.
+* Return decoded character, advance p to last character in UTF sequence.
+*/
+unsigned Lexer::decodeUTF()
+{
+dchar_t u;
+utf8_t c;
+const utf8_t *s = p;
+size_t len;
+size_t idx;
+const char *msg;
+c = *s;
+assert(c & 0x80);
+// Check length of remaining string up to 6 UTF-8 characters
+for (len = 1; len < 6 && s[len]; len++)
+;
+idx = 0;
+msg = utf_decodeChar(s, len, &idx, &u);
+p += idx - 1;
+if (msg)
+{
+error("%s", msg);
+}
+return u;
+}
+/***************************************************
+* Parse doc comment embedded between t->ptr and p.
+* Remove trailing blanks and tabs from lines.
+* Replace all newlines with \n.
+* Remove leading comment character from each line.
+* Decide if it's a lineComment or a blockComment.
+* Append to previous one for this token.
+*/
+void Lexer::getDocComment(Token *t, unsigned lineComment)
+{
+/* ct tells us which kind of comment it is: '/', '*', or '+'
+*/
+utf8_t ct = t->ptr[2];
+/* Start of comment text skips over / * *, / + +, or / / /
+*/
+const utf8_t *q = t->ptr + 3;      // start of comment text
+const utf8_t *qend = p;
+if (ct == '*' || ct == '+')
+qend -= 2;
+/* Scan over initial row of ****'s or ++++'s or ////'s
+*/
+for (; q < qend; q++)
+{
+if (*q != ct)
+break;
+}
+/* Remove leading spaces until start of the comment
+*/
+int linestart = 0;
+if (ct == '/')
+{
+while (q < qend && (*q == ' ' || *q == '\t'))
+++q;
+}
+else if (q < qend)
+{
+if (*q == '\r')
+{
+++q;
+if (q < qend && *q == '\n')
+++q;
+linestart = 1;
+}
+else if (*q == '\n')
+{
+++q;
+linestart = 1;
+}
+}
+/* Remove trailing row of ****'s or ++++'s
+*/
+if (ct != '/')
+{
+for (; q < qend; qend--)
+{
+if (qend[-1] != ct)
+break;
+}
+}
+/* Comment is now [q .. qend].
+* Canonicalize it into buf[].
+*/
+OutBuffer buf;
+for (; q < qend; q++)
+{
+utf8_t c = *q;
+switch (c)
+{
+case '*':
+case '+':
+if (linestart && c == ct)
+{   linestart = 0;
+/* Trim preceding whitespace up to preceding \n
+*/
+while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
+buf.offset--;
+continue;
+}
+break;
+case ' ':
+case '\t':
+break;
+case '\r':
+if (q[1] == '\n')
+continue;           // skip the \r
+goto Lnewline;
+default:
+if (c == 226)
+{
+// If LS or PS
+if (q[1] == 128 &&
+(q[2] == 168 || q[2] == 169))
+{
+q += 2;
+goto Lnewline;
+}
+}
+linestart = 0;
+break;
+Lnewline:
+c = '\n';               // replace all newlines with \n
+/* fall through */
+case '\n':
+linestart = 1;
+/* Trim trailing whitespace
+*/
+while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
+buf.offset--;
+break;
+}
+buf.writeByte(c);
+}
+/* Trim trailing whitespace (if the last line does not have newline)
+*/
+if (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
+{
+while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
+buf.offset--;
+}
+// Always end with a newline
+if (!buf.offset || buf.data[buf.offset - 1] != '\n')
+buf.writeByte('\n');
+buf.writeByte(0);
+// It's a line comment if the start of the doc comment comes
+// after other non-whitespace on the same line.
+const utf8_t** dc = (lineComment && anyToken)
+? &t->lineComment
+: &t->blockComment;
+// Combine with previous doc comment, if any
+if (*dc)
+*dc = combineComments(*dc, (utf8_t *)buf.data);
+else
+*dc = (utf8_t *)buf.extractData();
+}
+/********************************************
+* Combine two document comments into one,
+* separated by a newline.
+*/
+const utf8_t *Lexer::combineComments(const utf8_t *c1, const utf8_t *c2)
+{
+//printf("Lexer::combineComments('%s', '%s')\n", c1, c2);
+const utf8_t *c = c2;
+if (c1)
+{
+c = c1;
+if (c2)
+{
+size_t len1 = strlen((const char *)c1);
+size_t len2 = strlen((const char *)c2);
+int insertNewLine = 0;
+if (len1 && c1[len1 - 1] != '\n')
+{
+++len1;
+insertNewLine = 1;
+}
+utf8_t *p = (utf8_t *)mem.xmalloc(len1 + 1 + len2 + 1);
+memcpy(p, c1, len1 - insertNewLine);
+if (insertNewLine)
+p[len1 - 1] = '\n';
+p[len1] = '\n';
+memcpy(p + len1 + 1, c2, len2);
+p[len1 + 1 + len2] = 0;
+c = p;
+}
+}
+return c;
+}

Mercurial > hg > CbC > CbC_gcc

comparison gcc/d/dmd/lexer.c @ 145:1830386684a0