dmd.lexer source code

1 /**
2  * Implements the lexical analyzer, which converts source code into lexical tokens.
3  *
4  * Specification: $(LINK2 https://dlang.org/spec/lex.html, Lexical)
5  *
6  * Copyright:   Copyright (C) 1999-2020 by The D Language Foundation, All Rights Reserved
7  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
8  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
9  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/lexer.d, _lexer.d)
10  * Documentation:  https://dlang.org/phobos/dmd_lexer.html
11  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/lexer.d
12  */
13 
14 module dmd.lexer;
15 
16 import core.stdc.ctype;
17 import core.stdc.errno;
18 import core.stdc.stdarg;
19 import core.stdc.stdio;
20 import core.stdc.stdlib : getenv;
21 import core.stdc.string;
22 import core.stdc.time;
23 
24 import dmd.diagnostic : DiagnosticHandler, Severity, DefaultDiagnosticHandler, DefaultDiagnosticReporter;
25 import dmd.entity;
26 import dmd.errors;
27 import dmd.globals;
28 import dmd.id;
29 import dmd.identifier;
30 import dmd.root.ctfloat;
31 import dmd.root.outbuffer;
32 import dmd.root.port;
33 import dmd.root.rmem;
34 import dmd.root.string;
35 import dmd.tokens;
36 import dmd.utf;
37 import dmd.utils;
38 
39 nothrow:
40 
41 private enum LS = 0x2028;       // UTF line separator
42 private enum PS = 0x2029;       // UTF paragraph separator
43 
44 /********************************************
45  * Do our own char maps
46  */
47 private static immutable cmtable = () {
48     ubyte[256] table;
49     foreach (const c; 0 .. table.length)
50     {
51         if ('0' <= c && c <= '7')
52             table[c] |= CMoctal;
53         if (c_isxdigit(c))
54             table[c] |= CMhex;
55         if (c_isalnum(c) || c == '_')
56             table[c] |= CMidchar;
57 
58         switch (c)
59         {
60             case 'x': case 'X':
61             case 'b': case 'B':
62                 table[c] |= CMzerosecond;
63                 break;
64 
65             case '0': .. case '9':
66             case 'e': case 'E':
67             case 'f': case 'F':
68             case 'l': case 'L':
69             case 'p': case 'P':
70             case 'u': case 'U':
71             case 'i':
72             case '.':
73             case '_':
74                 table[c] |= CMzerosecond | CMdigitsecond;
75                 break;
76 
77             default:
78                 break;
79         }
80 
81         switch (c)
82         {
83             case '\\':
84             case '\n':
85             case '\r':
86             case 0:
87             case 0x1A:
88             case '\'':
89                 break;
90             default:
91                 if (!(c & 0x80))
92                     table[c] |= CMsinglechar;
93                 break;
94         }
95     }
96     return table;
97 }();
98 
99 private
100 {
101     enum CMoctal  = 0x1;
102     enum CMhex    = 0x2;
103     enum CMidchar = 0x4;
104     enum CMzerosecond = 0x8;
105     enum CMdigitsecond = 0x10;
106     enum CMsinglechar = 0x20;
107 }
108 
109 private bool isoctal(const char c) pure @nogc @safe
110 {
111     return (cmtable[c] & CMoctal) != 0;
112 }
113 
114 private bool ishex(const char c) pure @nogc @safe
115 {
116     return (cmtable[c] & CMhex) != 0;
117 }
118 
119 private bool isidchar(const char c) pure @nogc @safe
120 {
121     return (cmtable[c] & CMidchar) != 0;
122 }
123 
124 private bool isZeroSecond(const char c) pure @nogc @safe
125 {
126     return (cmtable[c] & CMzerosecond) != 0;
127 }
128 
129 private bool isDigitSecond(const char c) pure @nogc @safe
130 {
131     return (cmtable[c] & CMdigitsecond) != 0;
132 }
133 
134 private bool issinglechar(const char c) pure @nogc @safe
135 {
136     return (cmtable[c] & CMsinglechar) != 0;
137 }
138 
139 private bool c_isxdigit(const int c) pure @nogc @safe
140 {
141     return (( c >= '0' && c <= '9') ||
142             ( c >= 'a' && c <= 'f') ||
143             ( c >= 'A' && c <= 'F'));
144 }
145 
146 private bool c_isalnum(const int c) pure @nogc @safe
147 {
148     return (( c >= '0' && c <= '9') ||
149             ( c >= 'a' && c <= 'z') ||
150             ( c >= 'A' && c <= 'Z'));
151 }
152 
153 unittest
154 {
155     //printf("lexer.unittest\n");
156     /* Not much here, just trying things out.
157      */
158     string text = "int"; // We rely on the implicit null-terminator
159     DefaultDiagnosticHandler diagnosticHandler;
160     scope Lexer lex1 = new Lexer(null, text.ptr, 0, text.length, 0, 0, diagnosticHandler.diagnosticHandler);
161     TOK tok;
162     tok = lex1.nextToken();
163     diagnosticHandler.report();
164     //printf("tok == %s, %d, %d\n", Token::toChars(tok), tok, TOK.int32);
165     assert(tok == TOK.int32);
166     tok = lex1.nextToken();
167     diagnosticHandler.report();
168     assert(tok == TOK.endOfFile);
169     tok = lex1.nextToken();
170     diagnosticHandler.report();
171     assert(tok == TOK.endOfFile);
172     tok = lex1.nextToken();
173     diagnosticHandler.report();
174     assert(tok == TOK.endOfFile);
175 }
176 
177 unittest
178 {
179     // We don't want to see Lexer error output during these tests.
180     uint errors = global.startGagging();
181     scope(exit) global.endGagging(errors);
182 
183     // Test malformed input: even malformed input should end in a TOK.endOfFile.
184     static immutable char[][] testcases =
185     [   // Testcase must end with 0 or 0x1A.
186         [0], // not malformed, but pathological
187         ['\'', 0],
188         ['\'', 0x1A],
189         ['{', '{', 'q', '{', 0],
190         [0xFF, 0],
191         [0xFF, 0x80, 0],
192         [0xFF, 0xFF, 0],
193         [0xFF, 0xFF, 0],
194         ['x', '"', 0x1A],
195     ];
196 
197     foreach (testcase; testcases)
198     {
199         DefaultDiagnosticHandler diagnosticHandler;
200         scope Lexer lex2 = new Lexer(null, testcase.ptr, 0, testcase.length-1, 0, 0, diagnosticHandler.diagnosticHandler);
201         TOK tok = lex2.nextToken();
202         diagnosticHandler.report();
203         size_t iterations = 1;
204         while ((tok != TOK.endOfFile) && (iterations++ < testcase.length))
205         {
206             tok = lex2.nextToken();
207         }
208         assert(tok == TOK.endOfFile);
209         tok = lex2.nextToken();
210         assert(tok == TOK.endOfFile);
211     }
212 }
213 
214 /***********************************************************
215  */
216 class Lexer
217 {
218     private __gshared OutBuffer stringbuffer;
219 
220     Loc scanloc;            // for error messages
221     Loc prevloc;            // location of token before current
222 
223     const(char)* p;         // current character
224 
225     Token token;
226 
227     private
228     {
229         const(char)* base;      // pointer to start of buffer
230         const(char)* end;       // pointer to last element of buffer
231         const(char)* line;      // start of current line
232 
233         bool doDocComment;      // collect doc comment information
234         bool anyToken;          // seen at least one token
235         bool commentToken;      // comments are TOK.comment's
236         int inTokenStringConstant; // can be larger than 1 when in nested q{} strings
237         int lastDocLine;        // last line of previous doc comment
238 
239         Token* tokenFreelist;
240         DiagnosticHandler handleDiagnostic;
241         DefaultDiagnosticReporter diagnosticReporter;
242     }
243 
244   nothrow:
245 
246     /*********************
247      * Creates a Lexer for the source code base[begoffset..endoffset+1].
248      * The last character, base[endoffset], must be null (0) or EOF (0x1A).
249      *
250      * Params:
251      *  filename = used for error messages
252      *  base = source code, must be terminated by a null (0) or EOF (0x1A) character
253      *  begoffset = starting offset into base[]
254      *  endoffset = the last offset to read into base[]
255      *  doDocComment = handle documentation comments
256      *  commentToken = comments become TOK.comment's
257      *  diagnosticHandler = diagnostic handler
258      */
259     this(const(char)* filename, const(char)* base, size_t begoffset,
260         size_t endoffset, bool doDocComment, bool commentToken,
261         DiagnosticHandler handleDiagnostic) pure
262     {
263         scanloc = Loc(filename, 1, 1);
264         //printf("Lexer::Lexer(%p,%d)\n",base,length);
265         //printf("lexer.filename = %s\n", filename);
266         token = Token.init;
267         this.base = base;
268         this.end = base + endoffset;
269         p = base + begoffset;
270         line = p;
271         this.doDocComment = doDocComment;
272         this.commentToken = commentToken;
273         this.inTokenStringConstant = 0;
274         this.lastDocLine = 0;
275         this.handleDiagnostic = handleDiagnostic;
276 
277         //initKeywords();
278         /* If first line starts with '#!', ignore the line
279          */
280         if (p && p[0] == '#' && p[1] == '!')
281         {
282             p += 2;
283             while (1)
284             {
285                 char c = *p++;
286                 switch (c)
287                 {
288                 case 0:
289                 case 0x1A:
290                     p--;
291                     goto case;
292                 case '\n':
293                     break;
294                 default:
295                     continue;
296                 }
297                 break;
298             }
299             endOfLine();
300         }
301     }
302 
303     /// Returns: a newly allocated `Token`.
304     Token* allocateToken() pure nothrow @safe
305     {
306         if (tokenFreelist)
307         {
308             Token* t = tokenFreelist;
309             tokenFreelist = t.next;
310             t.next = null;
311             return t;
312         }
313         return new Token();
314     }
315 
316     /// Frees the given token by returning it to the freelist.
317     private void releaseToken(Token* token) pure nothrow @nogc @safe
318     {
319         if (mem.isGCEnabled)
320             *token = Token.init;
321         token.next = tokenFreelist;
322         tokenFreelist = token;
323     }
324 
325     TOK nextToken()
326     {
327         prevloc = token.loc;
328         if (token.next)
329         {
330             Token* t = token.next;
331             memcpy(&token, t, Token.sizeof);
332             releaseToken(t);
333         }
334         else
335         {
336             scan(&token);
337         }
338         //printf(token.toChars());
339         return token.value;
340     }
341 
342     /***********************
343      * Look ahead at next token's value.
344      */
345     final TOK peekNext()
346     {
347         return peek(&token).value;
348     }
349 
350     /***********************
351      * Look 2 tokens ahead at value.
352      */
353     final TOK peekNext2()
354     {
355         Token* t = peek(&token);
356         return peek(t).value;
357     }
358 
359     /****************************
360      * Turn next token in buffer into a token.
361      */
362     final void scan(Token* t)
363     {
364         const lastLine = scanloc.linnum;
365         Loc startLoc;
366         t.blockComment = null;
367         t.lineComment = null;
368 
369         while (1)
370         {
371             t.ptr = p;
372             //printf("p = %p, *p = '%c'\n",p,*p);
373             t.loc = loc();
374             switch (*p)
375             {
376             case 0:
377             case 0x1A:
378                 t.value = TOK.endOfFile; // end of file
379                 // Intentionally not advancing `p`, such that subsequent calls keep returning TOK.endOfFile.
380                 return;
381             case ' ':
382             case '\t':
383             case '\v':
384             case '\f':
385                 p++;
386                 continue; // skip white space
387             case '\r':
388                 p++;
389                 if (*p != '\n') // if CR stands by itself
390                 {
391                     endOfLine();
392                     goto skipFourSpaces;
393                 }
394                 continue; // skip white space
395             case '\n':
396                 p++;
397                 endOfLine();
398                 skipFourSpaces:
399                 while (*(cast(uint*)p) == 0x20202020) //' ' == 0x20
400                 {
401                     p+=4;
402                 }
403                 continue; // skip white space
404             case '0':
405                 if (!isZeroSecond(p[1]))        // if numeric literal does not continue
406                 {
407                     ++p;
408                     t.unsvalue = 0;
409                     t.value = TOK.int32Literal;
410                     return;
411                 }
412                 goto Lnumber;
413 
414             case '1': .. case '9':
415                 if (!isDigitSecond(p[1]))       // if numeric literal does not continue
416                 {
417                     t.unsvalue = *p - '0';
418                     ++p;
419                     t.value = TOK.int32Literal;
420                     return;
421                 }
422             Lnumber:
423                 t.value = number(t);
424                 return;
425 
426             case '\'':
427                 if (issinglechar(p[1]) && p[2] == '\'')
428                 {
429                     t.unsvalue = p[1];        // simple one character literal
430                     t.value = TOK.charLiteral;
431                     p += 3;
432                 }
433                 else
434                     t.value = charConstant(t);
435                 return;
436             case 'r':
437                 if (p[1] != '"')
438                     goto case_ident;
439                 p++;
440                 goto case '`';
441             case '`':
442                 wysiwygStringConstant(t);
443                 return;
444             case 'x':
445                 if (p[1] != '"')
446                     goto case_ident;
447                 p++;
448                 auto start = p;
449                 auto hexString = new OutBuffer();
450                 t.value = hexStringConstant(t);
451                 hexString.write(start[0 .. p - start]);
452                 error("Built-in hex string literals are obsolete, use `std.conv.hexString!%s` instead.", hexString.extractChars());
453                 return;
454             case 'q':
455                 if (p[1] == '"')
456                 {
457                     p++;
458                     delimitedStringConstant(t);
459                     return;
460                 }
461                 else if (p[1] == '{')
462                 {
463                     p++;
464                     tokenStringConstant(t);
465                     return;
466                 }
467                 else
468                     goto case_ident;
469             case '"':
470                 escapeStringConstant(t);
471                 return;
472             case 'a':
473             case 'b':
474             case 'c':
475             case 'd':
476             case 'e':
477             case 'f':
478             case 'g':
479             case 'h':
480             case 'i':
481             case 'j':
482             case 'k':
483             case 'l':
484             case 'm':
485             case 'n':
486             case 'o':
487             case 'p':
488                 /*case 'q': case 'r':*/
489             case 's':
490             case 't':
491             case 'u':
492             case 'v':
493             case 'w':
494                 /*case 'x':*/
495             case 'y':
496             case 'z':
497             case 'A':
498             case 'B':
499             case 'C':
500             case 'D':
501             case 'E':
502             case 'F':
503             case 'G':
504             case 'H':
505             case 'I':
506             case 'J':
507             case 'K':
508             case 'L':
509             case 'M':
510             case 'N':
511             case 'O':
512             case 'P':
513             case 'Q':
514             case 'R':
515             case 'S':
516             case 'T':
517             case 'U':
518             case 'V':
519             case 'W':
520             case 'X':
521             case 'Y':
522             case 'Z':
523             case '_':
524             case_ident:
525                 {
526                     while (1)
527                     {
528                         const c = *++p;
529                         if (isidchar(c))
530                             continue;
531                         else if (c & 0x80)
532                         {
533                             const s = p;
534                             const u = decodeUTF();
535                             if (isUniAlpha(u))
536                                 continue;
537                             error("char 0x%04x not allowed in identifier", u);
538                             p = s;
539                         }
540                         break;
541                     }
542                     Identifier id = Identifier.idPool(cast(char*)t.ptr, cast(uint)(p - t.ptr));
543                     t.ident = id;
544                     t.value = cast(TOK)id.getValue();
545                     anyToken = 1;
546                     if (*t.ptr == '_') // if special identifier token
547                     {
548                         // Lazy initialization
549                         TimeStampInfo.initialize(t.loc);
550 
551                         if (id == Id.DATE)
552                         {
553                             t.ustring = TimeStampInfo.date.ptr;
554                             goto Lstr;
555                         }
556                         else if (id == Id.TIME)
557                         {
558                             t.ustring = TimeStampInfo.time.ptr;
559                             goto Lstr;
560                         }
561                         else if (id == Id.VENDOR)
562                         {
563                             t.ustring = global.vendor.xarraydup.ptr;
564                             goto Lstr;
565                         }
566                         else if (id == Id.TIMESTAMP)
567                         {
568                             t.ustring = TimeStampInfo.timestamp.ptr;
569                         Lstr:
570                             t.value = TOK.string_;
571                             t.postfix = 0;
572                             t.len = cast(uint)strlen(t.ustring);
573                         }
574                         else if (id == Id.VERSIONX)
575                         {
576                             t.value = TOK.int64Literal;
577                             t.unsvalue = global.versionNumber();
578                         }
579                         else if (id == Id.EOFX)
580                         {
581                             t.value = TOK.endOfFile;
582                             // Advance scanner to end of file
583                             while (!(*p == 0 || *p == 0x1A))
584                                 p++;
585                         }
586                     }
587                     //printf("t.value = %d\n",t.value);
588                     return;
589                 }
590             case '/':
591                 p++;
592                 switch (*p)
593                 {
594                 case '=':
595                     p++;
596                     t.value = TOK.divAssign;
597                     return;
598                 case '*':
599                     p++;
600                     startLoc = loc();
601                     while (1)
602                     {
603                         while (1)
604                         {
605                             const c = *p;
606                             switch (c)
607                             {
608                             case '/':
609                                 break;
610                             case '\n':
611                                 endOfLine();
612                                 p++;
613                                 continue;
614                             case '\r':
615                                 p++;
616                                 if (*p != '\n')
617                                     endOfLine();
618                                 continue;
619                             case 0:
620                             case 0x1A:
621                                 error("unterminated /* */ comment");
622                                 p = end;
623                                 t.loc = loc();
624                                 t.value = TOK.endOfFile;
625                                 return;
626                             default:
627                                 if (c & 0x80)
628                                 {
629                                     const u = decodeUTF();
630                                     if (u == PS || u == LS)
631                                         endOfLine();
632                                 }
633                                 p++;
634                                 continue;
635                             }
636                             break;
637                         }
638                         p++;
639                         if (p[-2] == '*' && p - 3 != t.ptr)
640                             break;
641                     }
642                     if (commentToken)
643                     {
644                         t.loc = startLoc;
645                         t.value = TOK.comment;
646                         return;
647                     }
648                     else if (doDocComment && t.ptr[2] == '*' && p - 4 != t.ptr)
649                     {
650                         // if /** but not /**/
651                         getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
652                         lastDocLine = scanloc.linnum;
653                     }
654                     continue;
655                 case '/': // do // style comments
656                     startLoc = loc();
657                     while (1)
658                     {
659                         const c = *++p;
660                         switch (c)
661                         {
662                         case '\n':
663                             break;
664                         case '\r':
665                             if (p[1] == '\n')
666                                 p++;
667                             break;
668                         case 0:
669                         case 0x1A:
670                             if (commentToken)
671                             {
672                                 p = end;
673                                 t.loc = startLoc;
674                                 t.value = TOK.comment;
675                                 return;
676                             }
677                             if (doDocComment && t.ptr[2] == '/')
678                             {
679                                 getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
680                                 lastDocLine = scanloc.linnum;
681                             }
682                             p = end;
683                             t.loc = loc();
684                             t.value = TOK.endOfFile;
685                             return;
686                         default:
687                             if (c & 0x80)
688                             {
689                                 const u = decodeUTF();
690                                 if (u == PS || u == LS)
691                                     break;
692                             }
693                             continue;
694                         }
695                         break;
696                     }
697                     if (commentToken)
698                     {
699                         p++;
700                         endOfLine();
701                         t.loc = startLoc;
702                         t.value = TOK.comment;
703                         return;
704                     }
705                     if (doDocComment && t.ptr[2] == '/')
706                     {
707                         getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
708                         lastDocLine = scanloc.linnum;
709                     }
710                     p++;
711                     endOfLine();
712                     continue;
713                 case '+':
714                     {
715                         int nest;
716                         startLoc = loc();
717                         p++;
718                         nest = 1;
719                         while (1)
720                         {
721                             char c = *p;
722                             switch (c)
723                             {
724                             case '/':
725                                 p++;
726                                 if (*p == '+')
727                                 {
728                                     p++;
729                                     nest++;
730                                 }
731                                 continue;
732                             case '+':
733                                 p++;
734                                 if (*p == '/')
735                                 {
736                                     p++;
737                                     if (--nest == 0)
738                                         break;
739                                 }
740                                 continue;
741                             case '\r':
742                                 p++;
743                                 if (*p != '\n')
744                                     endOfLine();
745                                 continue;
746                             case '\n':
747                                 endOfLine();
748                                 p++;
749                                 continue;
750                             case 0:
751                             case 0x1A:
752                                 error("unterminated /+ +/ comment");
753                                 p = end;
754                                 t.loc = loc();
755                                 t.value = TOK.endOfFile;
756                                 return;
757                             default:
758                                 if (c & 0x80)
759                                 {
760                                     uint u = decodeUTF();
761                                     if (u == PS || u == LS)
762                                         endOfLine();
763                                 }
764                                 p++;
765                                 continue;
766                             }
767                             break;
768                         }
769                         if (commentToken)
770                         {
771                             t.loc = startLoc;
772                             t.value = TOK.comment;
773                             return;
774                         }
775                         if (doDocComment && t.ptr[2] == '+' && p - 4 != t.ptr)
776                         {
777                             // if /++ but not /++/
778                             getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
779                             lastDocLine = scanloc.linnum;
780                         }
781                         continue;
782                     }
783                 default:
784                     break;
785                 }
786                 t.value = TOK.div;
787                 return;
788             case '.':
789                 p++;
790                 if (isdigit(*p))
791                 {
792                     /* Note that we don't allow ._1 and ._ as being
793                      * valid floating point numbers.
794                      */
795                     p--;
796                     t.value = inreal(t);
797                 }
798                 else if (p[0] == '.')
799                 {
800                     if (p[1] == '.')
801                     {
802                         p += 2;
803                         t.value = TOK.dotDotDot;
804                     }
805                     else
806                     {
807                         p++;
808                         t.value = TOK.slice;
809                     }
810                 }
811                 else
812                     t.value = TOK.dot;
813                 return;
814             case '&':
815                 p++;
816                 if (*p == '=')
817                 {
818                     p++;
819                     t.value = TOK.andAssign;
820                 }
821                 else if (*p == '&')
822                 {
823                     p++;
824                     t.value = TOK.andAnd;
825                 }
826                 else
827                     t.value = TOK.and;
828                 return;
829             case '|':
830                 p++;
831                 if (*p == '=')
832                 {
833                     p++;
834                     t.value = TOK.orAssign;
835                 }
836                 else if (*p == '|')
837                 {
838                     p++;
839                     t.value = TOK.orOr;
840                 }
841                 else
842                     t.value = TOK.or;
843                 return;
844             case '-':
845                 p++;
846                 if (*p == '=')
847                 {
848                     p++;
849                     t.value = TOK.minAssign;
850                 }
851                 else if (*p == '-')
852                 {
853                     p++;
854                     t.value = TOK.minusMinus;
855                 }
856                 else
857                     t.value = TOK.min;
858                 return;
859             case '+':
860                 p++;
861                 if (*p == '=')
862                 {
863                     p++;
864                     t.value = TOK.addAssign;
865                 }
866                 else if (*p == '+')
867                 {
868                     p++;
869                     t.value = TOK.plusPlus;
870                 }
871                 else
872                     t.value = TOK.add;
873                 return;
874             case '<':
875                 p++;
876                 if (*p == '=')
877                 {
878                     p++;
879                     t.value = TOK.lessOrEqual; // <=
880                 }
881                 else if (*p == '<')
882                 {
883                     p++;
884                     if (*p == '=')
885                     {
886                         p++;
887                         t.value = TOK.leftShiftAssign; // <<=
888                     }
889                     else
890                         t.value = TOK.leftShift; // <<
891                 }
892                 else
893                     t.value = TOK.lessThan; // <
894                 return;
895             case '>':
896                 p++;
897                 if (*p == '=')
898                 {
899                     p++;
900                     t.value = TOK.greaterOrEqual; // >=
901                 }
902                 else if (*p == '>')
903                 {
904                     p++;
905                     if (*p == '=')
906                     {
907                         p++;
908                         t.value = TOK.rightShiftAssign; // >>=
909                     }
910                     else if (*p == '>')
911                     {
912                         p++;
913                         if (*p == '=')
914                         {
915                             p++;
916                             t.value = TOK.unsignedRightShiftAssign; // >>>=
917                         }
918                         else
919                             t.value = TOK.unsignedRightShift; // >>>
920                     }
921                     else
922                         t.value = TOK.rightShift; // >>
923                 }
924                 else
925                     t.value = TOK.greaterThan; // >
926                 return;
927             case '!':
928                 p++;
929                 if (*p == '=')
930                 {
931                     p++;
932                     t.value = TOK.notEqual; // !=
933                 }
934                 else
935                     t.value = TOK.not; // !
936                 return;
937             case '=':
938                 p++;
939                 if (*p == '=')
940                 {
941                     p++;
942                     t.value = TOK.equal; // ==
943                 }
944                 else if (*p == '>')
945                 {
946                     p++;
947                     t.value = TOK.goesTo; // =>
948                 }
949                 else
950                     t.value = TOK.assign; // =
951                 return;
952             case '~':
953                 p++;
954                 if (*p == '=')
955                 {
956                     p++;
957                     t.value = TOK.concatenateAssign; // ~=
958                 }
959                 else
960                     t.value = TOK.tilde; // ~
961                 return;
962             case '^':
963                 p++;
964                 if (*p == '^')
965                 {
966                     p++;
967                     if (*p == '=')
968                     {
969                         p++;
970                         t.value = TOK.powAssign; // ^^=
971                     }
972                     else
973                         t.value = TOK.pow; // ^^
974                 }
975                 else if (*p == '=')
976                 {
977                     p++;
978                     t.value = TOK.xorAssign; // ^=
979                 }
980                 else
981                     t.value = TOK.xor; // ^
982                 return;
983             case '(':
984                 p++;
985                 t.value = TOK.leftParentheses;
986                 return;
987             case ')':
988                 p++;
989                 t.value = TOK.rightParentheses;
990                 return;
991             case '[':
992                 p++;
993                 t.value = TOK.leftBracket;
994                 return;
995             case ']':
996                 p++;
997                 t.value = TOK.rightBracket;
998                 return;
999             case '{':
1000                 p++;
1001                 t.value = TOK.leftCurly;
1002                 return;
1003             case '}':
1004                 p++;
1005                 t.value = TOK.rightCurly;
1006                 return;
1007             case '?':
1008                 p++;
1009                 t.value = TOK.question;
1010                 return;
1011             case ',':
1012                 p++;
1013                 t.value = TOK.comma;
1014                 return;
1015             case ';':
1016                 p++;
1017                 t.value = TOK.semicolon;
1018                 return;
1019             case ':':
1020                 p++;
1021                 t.value = TOK.colon;
1022                 return;
1023             case '$':
1024                 p++;
1025                 t.value = TOK.dollar;
1026                 return;
1027             case '@':
1028                 p++;
1029                 t.value = TOK.at;
1030                 return;
1031             case '*':
1032                 p++;
1033                 if (*p == '=')
1034                 {
1035                     p++;
1036                     t.value = TOK.mulAssign;
1037                 }
1038                 else
1039                     t.value = TOK.mul;
1040                 return;
1041             case '%':
1042                 p++;
1043                 if (*p == '=')
1044                 {
1045                     p++;
1046                     t.value = TOK.modAssign;
1047                 }
1048                 else
1049                     t.value = TOK.mod;
1050                 return;
1051             case '#':
1052                 {
1053                     p++;
1054                     Token n;
1055                     scan(&n);
1056                     if (n.value == TOK.identifier)
1057                     {
1058                         if (n.ident == Id.line)
1059                         {
1060                             poundLine();
1061                             continue;
1062                         }
1063                         else
1064                         {
1065                             const locx = loc();
1066                             warning(locx, "C preprocessor directive `#%s` is not supported", n.ident.toChars());
1067                         }
1068                     }
1069                     else if (n.value == TOK.if_)
1070                     {
1071                         error("C preprocessor directive `#if` is not supported, use `version` or `static if`");
1072                     }
1073                     t.value = TOK.pound;
1074                     return;
1075                 }
1076             default:
1077                 {
1078                     dchar c = *p;
1079                     if (c & 0x80)
1080                     {
1081                         c = decodeUTF();
1082                         // Check for start of unicode identifier
1083                         if (isUniAlpha(c))
1084                             goto case_ident;
1085                         if (c == PS || c == LS)
1086                         {
1087                             endOfLine();
1088                             p++;
1089                             continue;
1090                         }
1091                     }
1092                     if (c < 0x80 && isprint(c))
1093                         error("character '%c' is not a valid token", c);
1094                     else
1095                         error("character 0x%02x is not a valid token", c);
1096                     p++;
1097                     continue;
1098                 }
1099             }
1100         }
1101     }
1102 
1103     final Token* peek(Token* ct)
1104     {
1105         Token* t;
1106         if (ct.next)
1107             t = ct.next;
1108         else
1109         {
1110             t = allocateToken();
1111             scan(t);
1112             ct.next = t;
1113         }
1114         return t;
1115     }
1116 
1117     /*********************************
1118      * tk is on the opening (.
1119      * Look ahead and return token that is past the closing ).
1120      */
1121     final Token* peekPastParen(Token* tk)
1122     {
1123         //printf("peekPastParen()\n");
1124         int parens = 1;
1125         int curlynest = 0;
1126         while (1)
1127         {
1128             tk = peek(tk);
1129             //tk.print();
1130             switch (tk.value)
1131             {
1132             case TOK.leftParentheses:
1133                 parens++;
1134                 continue;
1135             case TOK.rightParentheses:
1136                 --parens;
1137                 if (parens)
1138                     continue;
1139                 tk = peek(tk);
1140                 break;
1141             case TOK.leftCurly:
1142                 curlynest++;
1143                 continue;
1144             case TOK.rightCurly:
1145                 if (--curlynest >= 0)
1146                     continue;
1147                 break;
1148             case TOK.semicolon:
1149                 if (curlynest)
1150                     continue;
1151                 break;
1152             case TOK.endOfFile:
1153                 break;
1154             default:
1155                 continue;
1156             }
1157             return tk;
1158         }
1159     }
1160 
1161     /*******************************************
1162      * Parse escape sequence.
1163      */
1164     private uint escapeSequence()
1165     {
1166         return Lexer.escapeSequence(token.loc, p);
1167     }
1168 
1169     /**
1170     Parse the given string literal escape sequence into a single character.
1171     Params:
1172         loc = the location of the current token
1173         sequence = pointer to string with escape sequence to parse. this is a reference
1174                    variable that is also used to return the position after the sequence
1175     Returns:
1176         the escaped sequence as a single character
1177     */
1178     private static dchar escapeSequence(const ref Loc loc, ref const(char)* sequence)
1179     {
1180         const(char)* p = sequence; // cache sequence reference on stack
1181         scope(exit) sequence = p;
1182 
1183         uint c = *p;
1184         int ndigits;
1185         switch (c)
1186         {
1187         case '\'':
1188         case '"':
1189         case '?':
1190         case '\\':
1191         Lconsume:
1192             p++;
1193             break;
1194         case 'a':
1195             c = 7;
1196             goto Lconsume;
1197         case 'b':
1198             c = 8;
1199             goto Lconsume;
1200         case 'f':
1201             c = 12;
1202             goto Lconsume;
1203         case 'n':
1204             c = 10;
1205             goto Lconsume;
1206         case 'r':
1207             c = 13;
1208             goto Lconsume;
1209         case 't':
1210             c = 9;
1211             goto Lconsume;
1212         case 'v':
1213             c = 11;
1214             goto Lconsume;
1215         case 'u':
1216             ndigits = 4;
1217             goto Lhex;
1218         case 'U':
1219             ndigits = 8;
1220             goto Lhex;
1221         case 'x':
1222             ndigits = 2;
1223         Lhex:
1224             p++;
1225             c = *p;
1226             if (ishex(cast(char)c))
1227             {
1228                 uint v = 0;
1229                 int n = 0;
1230                 while (1)
1231                 {
1232                     if (isdigit(cast(char)c))
1233                         c -= '0';
1234                     else if (islower(c))
1235                         c -= 'a' - 10;
1236                     else
1237                         c -= 'A' - 10;
1238                     v = v * 16 + c;
1239                     c = *++p;
1240                     if (++n == ndigits)
1241                         break;
1242                     if (!ishex(cast(char)c))
1243                     {
1244                         .error(loc, "escape hex sequence has %d hex digits instead of %d", n, ndigits);
1245                         break;
1246                     }
1247                 }
1248                 if (ndigits != 2 && !utf_isValidDchar(v))
1249                 {
1250                     .error(loc, "invalid UTF character \\U%08x", v);
1251                     v = '?'; // recover with valid UTF character
1252                 }
1253                 c = v;
1254             }
1255             else
1256             {
1257                 .error(loc, "undefined escape hex sequence \\%c%c", sequence[0], c);
1258                 p++;
1259             }
1260             break;
1261         case '&':
1262             // named character entity
1263             for (const idstart = ++p; 1; p++)
1264             {
1265                 switch (*p)
1266                 {
1267                 case ';':
1268                     c = HtmlNamedEntity(idstart, p - idstart);
1269                     if (c == ~0)
1270                     {
1271                         .error(loc, "unnamed character entity &%.*s;", cast(int)(p - idstart), idstart);
1272                         c = '?';
1273                     }
1274                     p++;
1275                     break;
1276                 default:
1277                     if (isalpha(*p) || (p != idstart && isdigit(*p)))
1278                         continue;
1279                     .error(loc, "unterminated named entity &%.*s;", cast(int)(p - idstart + 1), idstart);
1280                     c = '?';
1281                     break;
1282                 }
1283                 break;
1284             }
1285             break;
1286         case 0:
1287         case 0x1A:
1288             // end of file
1289             c = '\\';
1290             break;
1291         default:
1292             if (isoctal(cast(char)c))
1293             {
1294                 uint v = 0;
1295                 int n = 0;
1296                 do
1297                 {
1298                     v = v * 8 + (c - '0');
1299                     c = *++p;
1300                 }
1301                 while (++n < 3 && isoctal(cast(char)c));
1302                 c = v;
1303                 if (c > 0xFF)
1304                     .error(loc, "escape octal sequence \\%03o is larger than \\377", c);
1305             }
1306             else
1307             {
1308                 .error(loc, "undefined escape sequence \\%c", c);
1309                 p++;
1310             }
1311             break;
1312         }
1313         return c;
1314     }
1315 
1316     /**
1317     Lex a wysiwyg string. `p` must be pointing to the first character before the
1318     contents of the string literal. The character pointed to by `p` will be used as
1319     the terminating character (i.e. backtick or double-quote).
1320     Params:
1321         result = pointer to the token that accepts the result
1322     */
1323     private void wysiwygStringConstant(Token* result)
1324     {
1325         result.value = TOK.string_;
1326         Loc start = loc();
1327         auto terminator = p[0];
1328         p++;
1329         stringbuffer.setsize(0);
1330         while (1)
1331         {
1332             dchar c = p[0];
1333             p++;
1334             switch (c)
1335             {
1336             case '\n':
1337                 endOfLine();
1338                 break;
1339             case '\r':
1340                 if (p[0] == '\n')
1341                     continue; // ignore
1342                 c = '\n'; // treat EndOfLine as \n character
1343                 endOfLine();
1344                 break;
1345             case 0:
1346             case 0x1A:
1347                 error("unterminated string constant starting at %s", start.toChars());
1348                 result.setString();
1349                 // rewind `p` so it points to the EOF character
1350                 p--;
1351                 return;
1352             default:
1353                 if (c == terminator)
1354                 {
1355                     result.setString(stringbuffer);
1356                     stringPostfix(result);
1357                     return;
1358                 }
1359                 else if (c & 0x80)
1360                 {
1361                     p--;
1362                     const u = decodeUTF();
1363                     p++;
1364                     if (u == PS || u == LS)
1365                         endOfLine();
1366                     stringbuffer.writeUTF8(u);
1367                     continue;
1368                 }
1369                 break;
1370             }
1371             stringbuffer.writeByte(c);
1372         }
1373     }
1374 
1375     /**************************************
1376      * Lex hex strings:
1377      *      x"0A ae 34FE BD"
1378      */
1379     private TOK hexStringConstant(Token* t)
1380     {
1381         Loc start = loc();
1382         uint n = 0;
1383         uint v = ~0; // dead assignment, needed to suppress warning
1384         p++;
1385         stringbuffer.setsize(0);
1386         while (1)
1387         {
1388             dchar c = *p++;
1389             switch (c)
1390             {
1391             case ' ':
1392             case '\t':
1393             case '\v':
1394             case '\f':
1395                 continue; // skip white space
1396             case '\r':
1397                 if (*p == '\n')
1398                     continue; // ignore '\r' if followed by '\n'
1399                 // Treat isolated '\r' as if it were a '\n'
1400                 goto case '\n';
1401             case '\n':
1402                 endOfLine();
1403                 continue;
1404             case 0:
1405             case 0x1A:
1406                 error("unterminated string constant starting at %s", start.toChars());
1407                 t.setString();
1408                 // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
1409                 p--;
1410                 return TOK.hexadecimalString;
1411             case '"':
1412                 if (n & 1)
1413                 {
1414                     error("odd number (%d) of hex characters in hex string", n);
1415                     stringbuffer.writeByte(v);
1416                 }
1417                 t.setString(stringbuffer);
1418                 stringPostfix(t);
1419                 return TOK.hexadecimalString;
1420             default:
1421                 if (c >= '0' && c <= '9')
1422                     c -= '0';
1423                 else if (c >= 'a' && c <= 'f')
1424                     c -= 'a' - 10;
1425                 else if (c >= 'A' && c <= 'F')
1426                     c -= 'A' - 10;
1427                 else if (c & 0x80)
1428                 {
1429                     p--;
1430                     const u = decodeUTF();
1431                     p++;
1432                     if (u == PS || u == LS)
1433                         endOfLine();
1434                     else
1435                         error("non-hex character \\u%04x in hex string", u);
1436                 }
1437                 else
1438                     error("non-hex character '%c' in hex string", c);
1439                 if (n & 1)
1440                 {
1441                     v = (v << 4) | c;
1442                     stringbuffer.writeByte(v);
1443                 }
1444                 else
1445                     v = c;
1446                 n++;
1447                 break;
1448             }
1449         }
1450         assert(0); // see bug 15731
1451     }
1452 
1453     /**
1454     Lex a delimited string. Some examples of delimited strings are:
1455     ---
1456     q"(foo(xxx))"      // "foo(xxx)"
1457     q"[foo$(LPAREN)]"  // "foo$(LPAREN)"
1458     q"/foo]/"          // "foo]"
1459     q"HERE
1460     foo
1461     HERE"              // "foo\n"
1462     ---
1463     It is assumed that `p` points to the opening double-quote '"'.
1464     Params:
1465         result = pointer to the token that accepts the result
1466     */
1467     private void delimitedStringConstant(Token* result)
1468     {
1469         result.value = TOK.string_;
1470         Loc start = loc();
1471         dchar delimleft = 0;
1472         dchar delimright = 0;
1473         uint nest = 1;
1474         uint nestcount = ~0; // dead assignment, needed to suppress warning
1475         Identifier hereid = null;
1476         uint blankrol = 0;
1477         uint startline = 0;
1478         p++;
1479         stringbuffer.setsize(0);
1480         while (1)
1481         {
1482             dchar c = *p++;
1483             //printf("c = '%c'\n", c);
1484             switch (c)
1485             {
1486             case '\n':
1487             Lnextline:
1488                 endOfLine();
1489                 startline = 1;
1490                 if (blankrol)
1491                 {
1492                     blankrol = 0;
1493                     continue;
1494                 }
1495                 if (hereid)
1496                 {
1497                     stringbuffer.writeUTF8(c);
1498                     continue;
1499                 }
1500                 break;
1501             case '\r':
1502                 if (*p == '\n')
1503                     continue; // ignore
1504                 c = '\n'; // treat EndOfLine as \n character
1505                 goto Lnextline;
1506             case 0:
1507             case 0x1A:
1508                 error("unterminated delimited string constant starting at %s", start.toChars());
1509                 result.setString();
1510                 // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
1511                 p--;
1512                 return;
1513             default:
1514                 if (c & 0x80)
1515                 {
1516                     p--;
1517                     c = decodeUTF();
1518                     p++;
1519                     if (c == PS || c == LS)
1520                         goto Lnextline;
1521                 }
1522                 break;
1523             }
1524             if (delimleft == 0)
1525             {
1526                 delimleft = c;
1527                 nest = 1;
1528                 nestcount = 1;
1529                 if (c == '(')
1530                     delimright = ')';
1531                 else if (c == '{')
1532                     delimright = '}';
1533                 else if (c == '[')
1534                     delimright = ']';
1535                 else if (c == '<')
1536                     delimright = '>';
1537                 else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c)))
1538                 {
1539                     // Start of identifier; must be a heredoc
1540                     Token tok;
1541                     p--;
1542                     scan(&tok); // read in heredoc identifier
1543                     if (tok.value != TOK.identifier)
1544                     {
1545                         error("identifier expected for heredoc, not %s", tok.toChars());
1546                         delimright = c;
1547                     }
1548                     else
1549                     {
1550                         hereid = tok.ident;
1551                         //printf("hereid = '%s'\n", hereid.toChars());
1552                         blankrol = 1;
1553                     }
1554                     nest = 0;
1555                 }
1556                 else
1557                 {
1558                     delimright = c;
1559                     nest = 0;
1560                     if (isspace(c))
1561                         error("delimiter cannot be whitespace");
1562                 }
1563             }
1564             else
1565             {
1566                 if (blankrol)
1567                 {
1568                     error("heredoc rest of line should be blank");
1569                     blankrol = 0;
1570                     continue;
1571                 }
1572                 if (nest == 1)
1573                 {
1574                     if (c == delimleft)
1575                         nestcount++;
1576                     else if (c == delimright)
1577                     {
1578                         nestcount--;
1579                         if (nestcount == 0)
1580                             goto Ldone;
1581                     }
1582                 }
1583                 else if (c == delimright)
1584                     goto Ldone;
1585                 if (startline && (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c))) && hereid)
1586                 {
1587                     Token tok;
1588                     auto psave = p;
1589                     p--;
1590                     scan(&tok); // read in possible heredoc identifier
1591                     //printf("endid = '%s'\n", tok.ident.toChars());
1592                     if (tok.value == TOK.identifier && tok.ident is hereid)
1593                     {
1594                         /* should check that rest of line is blank
1595                          */
1596                         goto Ldone;
1597                     }
1598                     p = psave;
1599                 }
1600                 stringbuffer.writeUTF8(c);
1601                 startline = 0;
1602             }
1603         }
1604     Ldone:
1605         if (*p == '"')
1606             p++;
1607         else if (hereid)
1608             error("delimited string must end in %s\"", hereid.toChars());
1609         else
1610             error("delimited string must end in %c\"", delimright);
1611         result.setString(stringbuffer);
1612         stringPostfix(result);
1613     }
1614 
1615     /**
1616     Lex a token string. Some examples of token strings are:
1617     ---
1618     q{ foo(xxx) }    // " foo(xxx) "
1619     q{foo$(LPAREN)}  // "foo$(LPAREN)"
1620     q{{foo}"}"}      // "{foo}"}""
1621     ---
1622     It is assumed that `p` points to the opening curly-brace '{'.
1623     Params:
1624         result = pointer to the token that accepts the result
1625     */
1626     private void tokenStringConstant(Token* result)
1627     {
1628         result.value = TOK.string_;
1629 
1630         uint nest = 1;
1631         const start = loc();
1632         const pstart = ++p;
1633         inTokenStringConstant++;
1634         scope(exit) inTokenStringConstant--;
1635         while (1)
1636         {
1637             Token tok;
1638             scan(&tok);
1639             switch (tok.value)
1640             {
1641             case TOK.leftCurly:
1642                 nest++;
1643                 continue;
1644             case TOK.rightCurly:
1645                 if (--nest == 0)
1646                 {
1647                     result.setString(pstart, p - 1 - pstart);
1648                     stringPostfix(result);
1649                     return;
1650                 }
1651                 continue;
1652             case TOK.endOfFile:
1653                 error("unterminated token string constant starting at %s", start.toChars());
1654                 result.setString();
1655                 return;
1656             default:
1657                 continue;
1658             }
1659         }
1660     }
1661 
1662     /**
1663     Scan a double-quoted string while building the processed string value by
1664     handling escape sequences. The result is returned in the given `t` token.
1665     This function assumes that `p` currently points to the opening double-quote
1666     of the string.
1667     Params:
1668         t = the token to set the resulting string to
1669     */
1670     private void escapeStringConstant(Token* t)
1671     {
1672         t.value = TOK.string_;
1673 
1674         const start = loc();
1675         p++;
1676         stringbuffer.setsize(0);
1677         while (1)
1678         {
1679             dchar c = *p++;
1680             switch (c)
1681             {
1682             case '\\':
1683                 switch (*p)
1684                 {
1685                 case 'u':
1686                 case 'U':
1687                 case '&':
1688                     c = escapeSequence();
1689                     stringbuffer.writeUTF8(c);
1690                     continue;
1691                 default:
1692                     c = escapeSequence();
1693                     break;
1694                 }
1695                 break;
1696             case '\n':
1697                 endOfLine();
1698                 break;
1699             case '\r':
1700                 if (*p == '\n')
1701                     continue; // ignore
1702                 c = '\n'; // treat EndOfLine as \n character
1703                 endOfLine();
1704                 break;
1705             case '"':
1706                 t.setString(stringbuffer);
1707                 stringPostfix(t);
1708                 return;
1709             case 0:
1710             case 0x1A:
1711                 // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
1712                 p--;
1713                 error("unterminated string constant starting at %s", start.toChars());
1714                 t.setString();
1715                 return;
1716             default:
1717                 if (c & 0x80)
1718                 {
1719                     p--;
1720                     c = decodeUTF();
1721                     if (c == LS || c == PS)
1722                     {
1723                         c = '\n';
1724                         endOfLine();
1725                     }
1726                     p++;
1727                     stringbuffer.writeUTF8(c);
1728                     continue;
1729                 }
1730                 break;
1731             }
1732             stringbuffer.writeByte(c);
1733         }
1734     }
1735 
1736     /**************************************
1737      */
1738     private TOK charConstant(Token* t)
1739     {
1740         TOK tk = TOK.charLiteral;
1741         //printf("Lexer::charConstant\n");
1742         p++;
1743         dchar c = *p++;
1744         switch (c)
1745         {
1746         case '\\':
1747             switch (*p)
1748             {
1749             case 'u':
1750                 t.unsvalue = escapeSequence();
1751                 tk = TOK.wcharLiteral;
1752                 break;
1753             case 'U':
1754             case '&':
1755                 t.unsvalue = escapeSequence();
1756                 tk = TOK.dcharLiteral;
1757                 break;
1758             default:
1759                 t.unsvalue = escapeSequence();
1760                 break;
1761             }
1762             break;
1763         case '\n':
1764         L1:
1765             endOfLine();
1766             goto case;
1767         case '\r':
1768             goto case '\'';
1769         case 0:
1770         case 0x1A:
1771             // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
1772             p--;
1773             goto case;
1774         case '\'':
1775             error("unterminated character constant");
1776             t.unsvalue = '?';
1777             return tk;
1778         default:
1779             if (c & 0x80)
1780             {
1781                 p--;
1782                 c = decodeUTF();
1783                 p++;
1784                 if (c == LS || c == PS)
1785                     goto L1;
1786                 if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE))
1787                     tk = TOK.wcharLiteral;
1788                 else
1789                     tk = TOK.dcharLiteral;
1790             }
1791             t.unsvalue = c;
1792             break;
1793         }
1794         if (*p != '\'')
1795         {
1796             while (*p != '\'' && *p != 0x1A && *p != 0 && *p != '\n' &&
1797                     *p != '\r' && *p != ';' && *p != ')' && *p != ']' && *p != '}')
1798             {
1799                 if (*p & 0x80)
1800                 {
1801                     const s = p;
1802                     c = decodeUTF();
1803                     if (c == LS || c == PS)
1804                     {
1805                         p = s;
1806                         break;
1807                     }
1808                 }
1809                 p++;
1810             }
1811 
1812             if (*p == '\'')
1813             {
1814                 error("character constant has multiple characters");
1815                 p++;
1816             }
1817             else
1818                 error("unterminated character constant");
1819             t.unsvalue = '?';
1820             return tk;
1821         }
1822         p++;
1823         return tk;
1824     }
1825 
1826     /***************************************
1827      * Get postfix of string literal.
1828      */
1829     private void stringPostfix(Token* t) pure @nogc
1830     {
1831         switch (*p)
1832         {
1833         case 'c':
1834         case 'w':
1835         case 'd':
1836             t.postfix = *p;
1837             p++;
1838             break;
1839         default:
1840             t.postfix = 0;
1841             break;
1842         }
1843     }
1844 
1845     /**************************************
1846      * Read in a number.
1847      * If it's an integer, store it in tok.TKutok.Vlong.
1848      *      integers can be decimal, octal or hex
1849      *      Handle the suffixes U, UL, LU, L, etc.
1850      * If it's double, store it in tok.TKutok.Vdouble.
1851      * Returns:
1852      *      TKnum
1853      *      TKdouble,...
1854      */
1855     private TOK number(Token* t)
1856     {
1857         int base = 10;
1858         const start = p;
1859         uinteger_t n = 0; // unsigned >=64 bit integer type
1860         int d;
1861         bool err = false;
1862         bool overflow = false;
1863         bool anyBinaryDigitsNoSingleUS = false;
1864         bool anyHexDigitsNoSingleUS = false;
1865         dchar c = *p;
1866         if (c == '0')
1867         {
1868             ++p;
1869             c = *p;
1870             switch (c)
1871             {
1872             case '0':
1873             case '1':
1874             case '2':
1875             case '3':
1876             case '4':
1877             case '5':
1878             case '6':
1879             case '7':
1880             case '8':
1881             case '9':
1882                 base = 8;
1883                 break;
1884             case 'x':
1885             case 'X':
1886                 ++p;
1887                 base = 16;
1888                 break;
1889             case 'b':
1890             case 'B':
1891                 ++p;
1892                 base = 2;
1893                 break;
1894             case '.':
1895                 if (p[1] == '.')
1896                     goto Ldone; // if ".."
1897                 if (isalpha(p[1]) || p[1] == '_' || p[1] & 0x80)
1898                     goto Ldone; // if ".identifier" or ".unicode"
1899                 goto Lreal; // '.' is part of current token
1900             case 'i':
1901             case 'f':
1902             case 'F':
1903                 goto Lreal;
1904             case '_':
1905                 ++p;
1906                 base = 8;
1907                 break;
1908             case 'L':
1909                 if (p[1] == 'i')
1910                     goto Lreal;
1911                 break;
1912             default:
1913                 break;
1914             }
1915         }
1916         while (1)
1917         {
1918             c = *p;
1919             switch (c)
1920             {
1921             case '0':
1922             case '1':
1923             case '2':
1924             case '3':
1925             case '4':
1926             case '5':
1927             case '6':
1928             case '7':
1929             case '8':
1930             case '9':
1931                 ++p;
1932                 d = c - '0';
1933                 break;
1934             case 'a':
1935             case 'b':
1936             case 'c':
1937             case 'd':
1938             case 'e':
1939             case 'f':
1940             case 'A':
1941             case 'B':
1942             case 'C':
1943             case 'D':
1944             case 'E':
1945             case 'F':
1946                 ++p;
1947                 if (base != 16)
1948                 {
1949                     if (c == 'e' || c == 'E' || c == 'f' || c == 'F')
1950                         goto Lreal;
1951                 }
1952                 if (c >= 'a')
1953                     d = c + 10 - 'a';
1954                 else
1955                     d = c + 10 - 'A';
1956                 break;
1957             case 'L':
1958                 if (p[1] == 'i')
1959                     goto Lreal;
1960                 goto Ldone;
1961             case '.':
1962                 if (p[1] == '.')
1963                     goto Ldone; // if ".."
1964                 if (base == 10 && (isalpha(p[1]) || p[1] == '_' || p[1] & 0x80))
1965                     goto Ldone; // if ".identifier" or ".unicode"
1966                 if (base == 16 && (!ishex(p[1]) || p[1] == '_' || p[1] & 0x80))
1967                     goto Ldone; // if ".identifier" or ".unicode"
1968                 if (base == 2)
1969                     goto Ldone; // if ".identifier" or ".unicode"
1970                 goto Lreal; // otherwise as part of a floating point literal
1971             case 'p':
1972             case 'P':
1973             case 'i':
1974             Lreal:
1975                 p = start;
1976                 return inreal(t);
1977             case '_':
1978                 ++p;
1979                 continue;
1980             default:
1981                 goto Ldone;
1982             }
1983             // got a digit here, set any necessary flags, check for errors
1984             anyHexDigitsNoSingleUS = true;
1985             anyBinaryDigitsNoSingleUS = true;
1986             if (!err && d >= base)
1987             {
1988                 error("%s digit expected, not `%c`", base == 2 ? "binary".ptr :
1989                                                      base == 8 ? "octal".ptr :
1990                                                      "decimal".ptr, c);
1991                 err = true;
1992             }
1993             // Avoid expensive overflow check if we aren't at risk of overflow
1994             if (n <= 0x0FFF_FFFF_FFFF_FFFFUL)
1995                 n = n * base + d;
1996             else
1997             {
1998                 import core.checkedint : mulu, addu;
1999 
2000                 n = mulu(n, base, overflow);
2001                 n = addu(n, d, overflow);
2002             }
2003         }
2004     Ldone:
2005         if (overflow && !err)
2006         {
2007             error("integer overflow");
2008             err = true;
2009         }
2010         if ((base == 2 && !anyBinaryDigitsNoSingleUS) ||
2011             (base == 16 && !anyHexDigitsNoSingleUS))
2012             error("`%.*s` isn't a valid integer literal, use `%.*s0` instead", cast(int)(p - start), start, 2, start);
2013         enum FLAGS : int
2014         {
2015             none = 0,
2016             decimal = 1, // decimal
2017             unsigned = 2, // u or U suffix
2018             long_ = 4, // L suffix
2019         }
2020 
2021         FLAGS flags = (base == 10) ? FLAGS.decimal : FLAGS.none;
2022         // Parse trailing 'u', 'U', 'l' or 'L' in any combination
2023         const psuffix = p;
2024         while (1)
2025         {
2026             FLAGS f;
2027             switch (*p)
2028             {
2029             case 'U':
2030             case 'u':
2031                 f = FLAGS.unsigned;
2032                 goto L1;
2033             case 'l':
2034                 f = FLAGS.long_;
2035                 error("lower case integer suffix 'l' is not allowed. Please use 'L' instead");
2036                 goto L1;
2037             case 'L':
2038                 f = FLAGS.long_;
2039             L1:
2040                 p++;
2041                 if ((flags & f) && !err)
2042                 {
2043                     error("unrecognized token");
2044                     err = true;
2045                 }
2046                 flags = cast(FLAGS)(flags | f);
2047                 continue;
2048             default:
2049                 break;
2050             }
2051             break;
2052         }
2053         if (base == 8 && n >= 8)
2054         {
2055             if (err)
2056                 // can't translate invalid octal value, just show a generic message
2057                 error("octal literals larger than 7 are no longer supported");
2058             else
2059                 error("octal literals `0%llo%.*s` are no longer supported, use `std.conv.octal!%llo%.*s` instead",
2060                     n, cast(int)(p - psuffix), psuffix, n, cast(int)(p - psuffix), psuffix);
2061         }
2062         TOK result;
2063         switch (flags)
2064         {
2065         case FLAGS.none:
2066             /* Octal or Hexadecimal constant.
2067              * First that fits: int, uint, long, ulong
2068              */
2069             if (n & 0x8000000000000000L)
2070                 result = TOK.uns64Literal;
2071             else if (n & 0xFFFFFFFF00000000L)
2072                 result = TOK.int64Literal;
2073             else if (n & 0x80000000)
2074                 result = TOK.uns32Literal;
2075             else
2076                 result = TOK.int32Literal;
2077             break;
2078         case FLAGS.decimal:
2079             /* First that fits: int, long, long long
2080              */
2081             if (n & 0x8000000000000000L)
2082             {
2083                 result = TOK.uns64Literal;
2084             }
2085             else if (n & 0xFFFFFFFF80000000L)
2086                 result = TOK.int64Literal;
2087             else
2088                 result = TOK.int32Literal;
2089             break;
2090         case FLAGS.unsigned:
2091         case FLAGS.decimal | FLAGS.unsigned:
2092             /* First that fits: uint, ulong
2093              */
2094             if (n & 0xFFFFFFFF00000000L)
2095                 result = TOK.uns64Literal;
2096             else
2097                 result = TOK.uns32Literal;
2098             break;
2099         case FLAGS.decimal | FLAGS.long_:
2100             if (n & 0x8000000000000000L)
2101             {
2102                 if (!err)
2103                 {
2104                     error("signed integer overflow");
2105                     err = true;
2106                 }
2107                 result = TOK.uns64Literal;
2108             }
2109             else
2110                 result = TOK.int64Literal;
2111             break;
2112         case FLAGS.long_:
2113             if (n & 0x8000000000000000L)
2114                 result = TOK.uns64Literal;
2115             else
2116                 result = TOK.int64Literal;
2117             break;
2118         case FLAGS.unsigned | FLAGS.long_:
2119         case FLAGS.decimal | FLAGS.unsigned | FLAGS.long_:
2120             result = TOK.uns64Literal;
2121             break;
2122         default:
2123             debug
2124             {
2125                 printf("%x\n", flags);
2126             }
2127             assert(0);
2128         }
2129         t.unsvalue = n;
2130         return result;
2131     }
2132 
2133     /**************************************
2134      * Read in characters, converting them to real.
2135      * Bugs:
2136      *      Exponent overflow not detected.
2137      *      Too much requested precision is not detected.
2138      */
2139     private TOK inreal(Token* t)
2140     {
2141         //printf("Lexer::inreal()\n");
2142         debug
2143         {
2144             assert(*p == '.' || isdigit(*p));
2145         }
2146         bool isWellformedString = true;
2147         stringbuffer.setsize(0);
2148         auto pstart = p;
2149         bool hex = false;
2150         dchar c = *p++;
2151         // Leading '0x'
2152         if (c == '0')
2153         {
2154             c = *p++;
2155             if (c == 'x' || c == 'X')
2156             {
2157                 hex = true;
2158                 c = *p++;
2159             }
2160         }
2161         // Digits to left of '.'
2162         while (1)
2163         {
2164             if (c == '.')
2165             {
2166                 c = *p++;
2167                 break;
2168             }
2169             if (isdigit(c) || (hex && isxdigit(c)) || c == '_')
2170             {
2171                 c = *p++;
2172                 continue;
2173             }
2174             break;
2175         }
2176         // Digits to right of '.'
2177         while (1)
2178         {
2179             if (isdigit(c) || (hex && isxdigit(c)) || c == '_')
2180             {
2181                 c = *p++;
2182                 continue;
2183             }
2184             break;
2185         }
2186         if (c == 'e' || c == 'E' || (hex && (c == 'p' || c == 'P')))
2187         {
2188             c = *p++;
2189             if (c == '-' || c == '+')
2190             {
2191                 c = *p++;
2192             }
2193             bool anyexp = false;
2194             while (1)
2195             {
2196                 if (isdigit(c))
2197                 {
2198                     anyexp = true;
2199                     c = *p++;
2200                     continue;
2201                 }
2202                 if (c == '_')
2203                 {
2204                     c = *p++;
2205                     continue;
2206                 }
2207                 if (!anyexp)
2208                 {
2209                     error("missing exponent");
2210                     isWellformedString = false;
2211                 }
2212                 break;
2213             }
2214         }
2215         else if (hex)
2216         {
2217             error("exponent required for hex float");
2218             isWellformedString = false;
2219         }
2220         --p;
2221         while (pstart < p)
2222         {
2223             if (*pstart != '_')
2224                 stringbuffer.writeByte(*pstart);
2225             ++pstart;
2226         }
2227         stringbuffer.writeByte(0);
2228         auto sbufptr = cast(const(char)*)stringbuffer[].ptr;
2229         TOK result;
2230         bool isOutOfRange = false;
2231         t.floatvalue = (isWellformedString ? CTFloat.parse(sbufptr, &isOutOfRange) : CTFloat.zero);
2232         switch (*p)
2233         {
2234         case 'F':
2235         case 'f':
2236             if (isWellformedString && !isOutOfRange)
2237                 isOutOfRange = Port.isFloat32LiteralOutOfRange(sbufptr);
2238             result = TOK.float32Literal;
2239             p++;
2240             break;
2241         default:
2242             if (isWellformedString && !isOutOfRange)
2243                 isOutOfRange = Port.isFloat64LiteralOutOfRange(sbufptr);
2244             result = TOK.float64Literal;
2245             break;
2246         case 'l':
2247             error("use 'L' suffix instead of 'l'");
2248             goto case 'L';
2249         case 'L':
2250             result = TOK.float80Literal;
2251             p++;
2252             break;
2253         }
2254         if (*p == 'i' || *p == 'I')
2255         {
2256             if (*p == 'I')
2257                 error("use 'i' suffix instead of 'I'");
2258             p++;
2259             switch (result)
2260             {
2261             case TOK.float32Literal:
2262                 result = TOK.imaginary32Literal;
2263                 break;
2264             case TOK.float64Literal:
2265                 result = TOK.imaginary64Literal;
2266                 break;
2267             case TOK.float80Literal:
2268                 result = TOK.imaginary80Literal;
2269                 break;
2270             default:
2271                 break;
2272             }
2273         }
2274         const isLong = (result == TOK.float80Literal || result == TOK.imaginary80Literal);
2275         if (isOutOfRange && !isLong)
2276         {
2277             const char* suffix = (result == TOK.float32Literal || result == TOK.imaginary32Literal) ? "f" : "";
2278             error(scanloc, "number `%s%s` is not representable", sbufptr, suffix);
2279         }
2280         debug
2281         {
2282             switch (result)
2283             {
2284             case TOK.float32Literal:
2285             case TOK.float64Literal:
2286             case TOK.float80Literal:
2287             case TOK.imaginary32Literal:
2288             case TOK.imaginary64Literal:
2289             case TOK.imaginary80Literal:
2290                 break;
2291             default:
2292                 assert(0);
2293             }
2294         }
2295         return result;
2296     }
2297 
2298     final Loc loc() pure @nogc
2299     {
2300         scanloc.charnum = cast(uint)(1 + p - line);
2301         return scanloc;
2302     }
2303 
2304     final void error(const(char)* format, ...)
2305     {
2306         va_list args;
2307         va_start(args, format);
2308         handleDiagnostic(token.loc, Severity.error, format, args);
2309         va_end(args);
2310     }
2311 
2312     final void error(const ref Loc loc, const(char)* format, ...)
2313     {
2314         va_list args;
2315         va_start(args, format);
2316         handleDiagnostic(loc, Severity.error, format, args);
2317         va_end(args);
2318     }
2319 
2320     final void errorSupplemental(const ref Loc loc, const(char)* format, ...)
2321     {
2322         va_list args;
2323         va_start(args, format);
2324         handleDiagnostic(loc, Severity.error, format, args, true);
2325         va_end(args);
2326     }
2327 
2328     final void warning(const ref Loc loc, const(char)* format, ...)
2329     {
2330         va_list args;
2331         va_start(args, format);
2332         handleDiagnostic(loc, Severity.warning, format, args);
2333         va_end(args);
2334     }
2335 
2336     final void warningSupplemental(const ref Loc loc, const(char)* format, ...)
2337     {
2338         va_list args;
2339         va_start(args, format);
2340         handleDiagnostic(loc, Severity.warning, format, args, true);
2341         va_end(args);
2342     }
2343 
2344     final void deprecation(const(char)* format, ...)
2345     {
2346         va_list args;
2347         va_start(args, format);
2348         handleDiagnostic(token.loc, Severity.deprecation, format, args);
2349         va_end(args);
2350     }
2351 
2352     final void deprecationSupplemental(const(char)* format, ...)
2353     {
2354         va_list args;
2355         va_start(args, format);
2356         handleDiagnostic(token.loc, Severity.deprecation, format, args, true);
2357         va_end(args);
2358     }
2359 
2360     /*********************************************
2361      * parse:
2362      *      #line linnum [filespec]
2363      * also allow __LINE__ for linnum, and __FILE__ for filespec
2364      */
2365     private void poundLine()
2366     {
2367         auto linnum = this.scanloc.linnum;
2368         const(char)* filespec = null;
2369         const loc = this.loc();
2370         Token tok;
2371         scan(&tok);
2372         if (tok.value == TOK.int32Literal || tok.value == TOK.int64Literal)
2373         {
2374             const lin = cast(int)(tok.unsvalue - 1);
2375             if (lin != tok.unsvalue - 1)
2376                 error("line number `%lld` out of range", cast(ulong)tok.unsvalue);
2377             else
2378                 linnum = lin;
2379         }
2380         else if (tok.value == TOK.line)
2381         {
2382         }
2383         else
2384             goto Lerr;
2385         while (1)
2386         {
2387             switch (*p)
2388             {
2389             case 0:
2390             case 0x1A:
2391             case '\n':
2392             Lnewline:
2393                 if (!inTokenStringConstant)
2394                 {
2395                     this.scanloc.linnum = linnum;
2396                     if (filespec)
2397                         this.scanloc.filename = filespec;
2398                 }
2399                 return;
2400             case '\r':
2401                 p++;
2402                 if (*p != '\n')
2403                 {
2404                     p--;
2405                     goto Lnewline;
2406                 }
2407                 continue;
2408             case ' ':
2409             case '\t':
2410             case '\v':
2411             case '\f':
2412                 p++;
2413                 continue; // skip white space
2414             case '_':
2415                 if (memcmp(p, "__FILE__".ptr, 8) == 0)
2416                 {
2417                     p += 8;
2418                     filespec = mem.xstrdup(scanloc.filename);
2419                     continue;
2420                 }
2421                 goto Lerr;
2422             case '"':
2423                 if (filespec)
2424                     goto Lerr;
2425                 stringbuffer.setsize(0);
2426                 p++;
2427                 while (1)
2428                 {
2429                     uint c;
2430                     c = *p;
2431                     switch (c)
2432                     {
2433                     case '\n':
2434                     case '\r':
2435                     case 0:
2436                     case 0x1A:
2437                         goto Lerr;
2438                     case '"':
2439                         stringbuffer.writeByte(0);
2440                         filespec = mem.xstrdup(cast(const(char)*)stringbuffer[].ptr);
2441                         p++;
2442                         break;
2443                     default:
2444                         if (c & 0x80)
2445                         {
2446                             uint u = decodeUTF();
2447                             if (u == PS || u == LS)
2448                                 goto Lerr;
2449                         }
2450                         stringbuffer.writeByte(c);
2451                         p++;
2452                         continue;
2453                     }
2454                     break;
2455                 }
2456                 continue;
2457             default:
2458                 if (*p & 0x80)
2459                 {
2460                     uint u = decodeUTF();
2461                     if (u == PS || u == LS)
2462                         goto Lnewline;
2463                 }
2464                 goto Lerr;
2465             }
2466         }
2467     Lerr:
2468         error(loc, "#line integer [\"filespec\"]\\n expected");
2469     }
2470 
2471     /********************************************
2472      * Decode UTF character.
2473      * Issue error messages for invalid sequences.
2474      * Return decoded character, advance p to last character in UTF sequence.
2475      */
2476     private uint decodeUTF()
2477     {
2478         const s = p;
2479         assert(*s & 0x80);
2480         // Check length of remaining string up to 4 UTF-8 characters
2481         size_t len;
2482         for (len = 1; len < 4 && s[len]; len++)
2483         {
2484         }
2485         size_t idx = 0;
2486         dchar u;
2487         const msg = utf_decodeChar(s[0 .. len], idx, u);
2488         p += idx - 1;
2489         if (msg)
2490         {
2491             error("%.*s", cast(int)msg.length, msg.ptr);
2492         }
2493         return u;
2494     }
2495 
2496     /***************************************************
2497      * Parse doc comment embedded between t.ptr and p.
2498      * Remove trailing blanks and tabs from lines.
2499      * Replace all newlines with \n.
2500      * Remove leading comment character from each line.
2501      * Decide if it's a lineComment or a blockComment.
2502      * Append to previous one for this token.
2503      *
2504      * If newParagraph is true, an extra newline will be
2505      * added between adjoining doc comments.
2506      */
2507     private void getDocComment(Token* t, uint lineComment, bool newParagraph) pure
2508     {
2509         /* ct tells us which kind of comment it is: '/', '*', or '+'
2510          */
2511         const ct = t.ptr[2];
2512         /* Start of comment text skips over / * *, / + +, or / / /
2513          */
2514         const(char)* q = t.ptr + 3; // start of comment text
2515         const(char)* qend = p;
2516         if (ct == '*' || ct == '+')
2517             qend -= 2;
2518         /* Scan over initial row of ****'s or ++++'s or ////'s
2519          */
2520         for (; q < qend; q++)
2521         {
2522             if (*q != ct)
2523                 break;
2524         }
2525         /* Remove leading spaces until start of the comment
2526          */
2527         int linestart = 0;
2528         if (ct == '/')
2529         {
2530             while (q < qend && (*q == ' ' || *q == '\t'))
2531                 ++q;
2532         }
2533         else if (q < qend)
2534         {
2535             if (*q == '\r')
2536             {
2537                 ++q;
2538                 if (q < qend && *q == '\n')
2539                     ++q;
2540                 linestart = 1;
2541             }
2542             else if (*q == '\n')
2543             {
2544                 ++q;
2545                 linestart = 1;
2546             }
2547         }
2548         /* Remove trailing row of ****'s or ++++'s
2549          */
2550         if (ct != '/')
2551         {
2552             for (; q < qend; qend--)
2553             {
2554                 if (qend[-1] != ct)
2555                     break;
2556             }
2557         }
2558         /* Comment is now [q .. qend].
2559          * Canonicalize it into buf[].
2560          */
2561         OutBuffer buf;
2562 
2563         void trimTrailingWhitespace()
2564         {
2565             const s = buf[];
2566             auto len = s.length;
2567             while (len && (s[len - 1] == ' ' || s[len - 1] == '\t'))
2568                 --len;
2569             buf.setsize(len);
2570         }
2571 
2572         for (; q < qend; q++)
2573         {
2574             char c = *q;
2575             switch (c)
2576             {
2577             case '*':
2578             case '+':
2579                 if (linestart && c == ct)
2580                 {
2581                     linestart = 0;
2582                     /* Trim preceding whitespace up to preceding \n
2583                      */
2584                     trimTrailingWhitespace();
2585                     continue;
2586                 }
2587                 break;
2588             case ' ':
2589             case '\t':
2590                 break;
2591             case '\r':
2592                 if (q[1] == '\n')
2593                     continue; // skip the \r
2594                 goto Lnewline;
2595             default:
2596                 if (c == 226)
2597                 {
2598                     // If LS or PS
2599                     if (q[1] == 128 && (q[2] == 168 || q[2] == 169))
2600                     {
2601                         q += 2;
2602                         goto Lnewline;
2603                     }
2604                 }
2605                 linestart = 0;
2606                 break;
2607             Lnewline:
2608                 c = '\n'; // replace all newlines with \n
2609                 goto case;
2610             case '\n':
2611                 linestart = 1;
2612                 /* Trim trailing whitespace
2613                  */
2614                 trimTrailingWhitespace();
2615                 break;
2616             }
2617             buf.writeByte(c);
2618         }
2619         /* Trim trailing whitespace (if the last line does not have newline)
2620          */
2621         trimTrailingWhitespace();
2622 
2623         // Always end with a newline
2624         const s = buf[];
2625         if (s.length == 0 || s[$ - 1] != '\n')
2626             buf.writeByte('\n');
2627 
2628         // It's a line comment if the start of the doc comment comes
2629         // after other non-whitespace on the same line.
2630         auto dc = (lineComment && anyToken) ? &t.lineComment : &t.blockComment;
2631         // Combine with previous doc comment, if any
2632         if (*dc)
2633             *dc = combineComments(*dc, buf[], newParagraph).toDString();
2634         else
2635             *dc = buf.extractSlice(true);
2636     }
2637 
2638     /********************************************
2639      * Combine two document comments into one,
2640      * separated by an extra newline if newParagraph is true.
2641      */
2642     static const(char)* combineComments(const(char)[] c1, const(char)[] c2, bool newParagraph) pure
2643     {
2644         //printf("Lexer::combineComments('%s', '%s', '%i')\n", c1, c2, newParagraph);
2645         const(int) newParagraphSize = newParagraph ? 1 : 0; // Size of the combining '\n'
2646         if (!c1)
2647             return c2.ptr;
2648         if (!c2)
2649             return c1.ptr;
2650 
2651         int insertNewLine = 0;
2652         if (c1.length && c1[$ - 1] != '\n')
2653             insertNewLine = 1;
2654         const retSize = c1.length + insertNewLine + newParagraphSize + c2.length;
2655         auto p = cast(char*)mem.xmalloc_noscan(retSize + 1);
2656         p[0 .. c1.length] = c1[];
2657         if (insertNewLine)
2658             p[c1.length] = '\n';
2659         if (newParagraph)
2660             p[c1.length + insertNewLine] = '\n';
2661         p[retSize - c2.length .. retSize] = c2[];
2662         p[retSize] = 0;
2663         return p;
2664     }
2665 
2666 private:
2667     void endOfLine() pure @nogc @safe
2668     {
2669         scanloc.linnum++;
2670         line = p;
2671     }
2672 }
2673 
2674 /// Support for `__DATE__`, `__TIME__`, and `__TIMESTAMP__`
2675 private struct TimeStampInfo
2676 {
2677     private __gshared bool initdone = false;
2678 
2679     // Note: Those properties need to be guarded by a call to `init`
2680     // The API isn't safe, and quite brittle, but it was left this way
2681     // over performance concerns.
2682     // This is currently only called once, from the lexer.
2683     __gshared char[11 + 1] date;
2684     __gshared char[8 + 1] time;
2685     __gshared char[24 + 1] timestamp;
2686 
2687     public static void initialize(const ref Loc loc) nothrow
2688     {
2689         if (initdone)
2690             return;
2691 
2692         initdone = true;
2693         time_t ct;
2694         // https://issues.dlang.org/show_bug.cgi?id=20444
2695         if (auto p = getenv("SOURCE_DATE_EPOCH"))
2696         {
2697             if (!ct.parseDigits(p.toDString()))
2698                 error(loc, "Value of environment variable `SOURCE_DATE_EPOCH` should be a valid UNIX timestamp, not: `%s`", p);
2699         }
2700         else
2701             .time(&ct);
2702         const p = ctime(&ct);
2703         assert(p);
2704         sprintf(&date[0], "%.6s %.4s", p + 4, p + 20);
2705         sprintf(&time[0], "%.8s", p + 11);
2706         sprintf(&timestamp[0], "%.24s", p);
2707     }
2708 }
2709 
2710 unittest
2711 {
2712     import dmd.console;
2713     nothrow bool assertDiagnosticHandler(const ref Loc loc, Color headerColor, const(char)* header,
2714                                    const(char)* format, va_list ap, const(char)* p1, const(char)* p2)
2715     {
2716         assert(0);
2717     }
2718     diagnosticHandler = &assertDiagnosticHandler;
2719 
2720     static void test(T)(string sequence, T expected)
2721     {
2722         auto p = cast(const(char)*)sequence.ptr;
2723         assert(expected == Lexer.escapeSequence(Loc.initial, p));
2724         assert(p == sequence.ptr + sequence.length);
2725     }
2726 
2727     test(`'`, '\'');
2728     test(`"`, '"');
2729     test(`?`, '?');
2730     test(`\`, '\\');
2731     test(`0`, '\0');
2732     test(`a`, '\a');
2733     test(`b`, '\b');
2734     test(`f`, '\f');
2735     test(`n`, '\n');
2736     test(`r`, '\r');
2737     test(`t`, '\t');
2738     test(`v`, '\v');
2739 
2740     test(`x00`, 0x00);
2741     test(`xff`, 0xff);
2742     test(`xFF`, 0xff);
2743     test(`xa7`, 0xa7);
2744     test(`x3c`, 0x3c);
2745     test(`xe2`, 0xe2);
2746 
2747     test(`1`, '\1');
2748     test(`42`, '\42');
2749     test(`357`, '\357');
2750 
2751     test(`u1234`, '\u1234');
2752     test(`uf0e4`, '\uf0e4');
2753 
2754     test(`U0001f603`, '\U0001f603');
2755 
2756     test(`&quot;`, '"');
2757     test(`&lt;`, '<');
2758     test(`&gt;`, '>');
2759 
2760     diagnosticHandler = null;
2761 }
2762 unittest
2763 {
2764     import dmd.console;
2765     string expected;
2766     bool gotError;
2767 
2768     nothrow bool expectDiagnosticHandler(const ref Loc loc, Color headerColor, const(char)* header,
2769                                          const(char)* format, va_list ap, const(char)* p1, const(char)* p2)
2770     {
2771         assert(cast(Classification)headerColor == Classification.error);
2772 
2773         gotError = true;
2774         char[100] buffer = void;
2775         auto actual = buffer[0 .. vsprintf(buffer.ptr, format, ap)];
2776         assert(expected == actual);
2777         return true;
2778     }
2779 
2780     diagnosticHandler = &expectDiagnosticHandler;
2781 
2782     void test(string sequence, string expectedError, dchar expectedReturnValue, uint expectedScanLength)
2783     {
2784         uint errors = global.errors;
2785         gotError = false;
2786         expected = expectedError;
2787         auto p = cast(const(char)*)sequence.ptr;
2788         auto actualReturnValue = Lexer.escapeSequence(Loc.initial, p);
2789         assert(gotError);
2790         assert(expectedReturnValue == actualReturnValue);
2791 
2792         auto actualScanLength = p - sequence.ptr;
2793         assert(expectedScanLength == actualScanLength);
2794         global.errors = errors;
2795     }
2796 
2797     test("c", `undefined escape sequence \c`, 'c', 1);
2798     test("!", `undefined escape sequence \!`, '!', 1);
2799 
2800     test("x1", `escape hex sequence has 1 hex digits instead of 2`, '\x01', 2);
2801 
2802     test("u1"  , `escape hex sequence has 1 hex digits instead of 4`,   0x1, 2);
2803     test("u12" , `escape hex sequence has 2 hex digits instead of 4`,  0x12, 3);
2804     test("u123", `escape hex sequence has 3 hex digits instead of 4`, 0x123, 4);
2805 
2806     test("U0"      , `escape hex sequence has 1 hex digits instead of 8`,       0x0, 2);
2807     test("U00"     , `escape hex sequence has 2 hex digits instead of 8`,      0x00, 3);
2808     test("U000"    , `escape hex sequence has 3 hex digits instead of 8`,     0x000, 4);
2809     test("U0000"   , `escape hex sequence has 4 hex digits instead of 8`,    0x0000, 5);
2810     test("U0001f"  , `escape hex sequence has 5 hex digits instead of 8`,   0x0001f, 6);
2811     test("U0001f6" , `escape hex sequence has 6 hex digits instead of 8`,  0x0001f6, 7);
2812     test("U0001f60", `escape hex sequence has 7 hex digits instead of 8`, 0x0001f60, 8);
2813 
2814     test("ud800"    , `invalid UTF character \U0000d800`, '?', 5);
2815     test("udfff"    , `invalid UTF character \U0000dfff`, '?', 5);
2816     test("U00110000", `invalid UTF character \U00110000`, '?', 9);
2817 
2818     test("xg0"      , `undefined escape hex sequence \xg`, 'g', 2);
2819     test("ug000"    , `undefined escape hex sequence \ug`, 'g', 2);
2820     test("Ug0000000", `undefined escape hex sequence \Ug`, 'g', 2);
2821 
2822     test("&BAD;", `unnamed character entity &BAD;`  , '?', 5);
2823     test("&quot", `unterminated named entity &quot;`, '?', 5);
2824 
2825     test("400", `escape octal sequence \400 is larger than \377`, 0x100, 3);
2826 
2827     diagnosticHandler = null;
2828 }