dmd.lexer source code

1 /**
2  * Implements the lexical analyzer, which converts source code into lexical tokens.
3  *
4  * Specification: $(LINK2 https://dlang.org/spec/lex.html, Lexical)
5  *
6  * Copyright:   Copyright (C) 1999-2021 by The D Language Foundation, All Rights Reserved
7  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
8  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
9  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/lexer.d, _lexer.d)
10  * Documentation:  https://dlang.org/phobos/dmd_lexer.html
11  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/lexer.d
12  */
13 
14 module dmd.lexer;
15 
16 import core.stdc.ctype;
17 import core.stdc.errno;
18 import core.stdc.stdarg;
19 import core.stdc.stdio;
20 import core.stdc.stdlib : getenv;
21 import core.stdc.string;
22 import core.stdc.time;
23 
24 import dmd.diagnostic : DiagnosticHandler, Severity, DefaultDiagnosticHandler, DefaultDiagnosticReporter;
25 import dmd.entity;
26 import dmd.errors;
27 import dmd.globals;
28 import dmd.id;
29 import dmd.identifier;
30 import dmd.root.ctfloat;
31 import dmd.root.outbuffer;
32 import dmd.root.port;
33 import dmd.root.rmem;
34 import dmd.root.string;
35 import dmd.tokens;
36 import dmd.utf;
37 import dmd.utils;
38 
39 nothrow:
40 
41 private enum LS = 0x2028;       // UTF line separator
42 private enum PS = 0x2029;       // UTF paragraph separator
43 
44 /********************************************
45  * Do our own char maps
46  */
47 private static immutable cmtable = () {
48     ubyte[256] table;
49     foreach (const c; 0 .. table.length)
50     {
51         if ('0' <= c && c <= '7')
52             table[c] |= CMoctal;
53         if (c_isxdigit(c))
54             table[c] |= CMhex;
55         if (c_isalnum(c) || c == '_')
56             table[c] |= CMidchar;
57 
58         switch (c)
59         {
60             case 'x': case 'X':
61             case 'b': case 'B':
62                 table[c] |= CMzerosecond;
63                 break;
64 
65             case '0': .. case '9':
66             case 'e': case 'E':
67             case 'f': case 'F':
68             case 'l': case 'L':
69             case 'p': case 'P':
70             case 'u': case 'U':
71             case 'i':
72             case '.':
73             case '_':
74                 table[c] |= CMzerosecond | CMdigitsecond;
75                 break;
76 
77             default:
78                 break;
79         }
80 
81         switch (c)
82         {
83             case '\\':
84             case '\n':
85             case '\r':
86             case 0:
87             case 0x1A:
88             case '\'':
89                 break;
90             default:
91                 if (!(c & 0x80))
92                     table[c] |= CMsinglechar;
93                 break;
94         }
95     }
96     return table;
97 }();
98 
99 private
100 {
101     enum CMoctal  = 0x1;
102     enum CMhex    = 0x2;
103     enum CMidchar = 0x4;
104     enum CMzerosecond = 0x8;
105     enum CMdigitsecond = 0x10;
106     enum CMsinglechar = 0x20;
107 }
108 
109 private bool isoctal(const char c) pure @nogc @safe
110 {
111     return (cmtable[c] & CMoctal) != 0;
112 }
113 
114 private bool ishex(const char c) pure @nogc @safe
115 {
116     return (cmtable[c] & CMhex) != 0;
117 }
118 
119 private bool isidchar(const char c) pure @nogc @safe
120 {
121     return (cmtable[c] & CMidchar) != 0;
122 }
123 
124 private bool isZeroSecond(const char c) pure @nogc @safe
125 {
126     return (cmtable[c] & CMzerosecond) != 0;
127 }
128 
129 private bool isDigitSecond(const char c) pure @nogc @safe
130 {
131     return (cmtable[c] & CMdigitsecond) != 0;
132 }
133 
134 private bool issinglechar(const char c) pure @nogc @safe
135 {
136     return (cmtable[c] & CMsinglechar) != 0;
137 }
138 
139 private bool c_isxdigit(const int c) pure @nogc @safe
140 {
141     return (( c >= '0' && c <= '9') ||
142             ( c >= 'a' && c <= 'f') ||
143             ( c >= 'A' && c <= 'F'));
144 }
145 
146 private bool c_isalnum(const int c) pure @nogc @safe
147 {
148     return (( c >= '0' && c <= '9') ||
149             ( c >= 'a' && c <= 'z') ||
150             ( c >= 'A' && c <= 'Z'));
151 }
152 
153 unittest
154 {
155     //printf("lexer.unittest\n");
156     /* Not much here, just trying things out.
157      */
158     string text = "int"; // We rely on the implicit null-terminator
159     DefaultDiagnosticHandler diagnosticHandler;
160     scope Lexer lex1 = new Lexer(null, text.ptr, 0, text.length, 0, 0, diagnosticHandler.diagnosticHandler);
161     TOK tok;
162     tok = lex1.nextToken();
163     diagnosticHandler.report();
164     //printf("tok == %s, %d, %d\n", Token::toChars(tok), tok, TOK.int32);
165     assert(tok == TOK.int32);
166     tok = lex1.nextToken();
167     diagnosticHandler.report();
168     assert(tok == TOK.endOfFile);
169     tok = lex1.nextToken();
170     diagnosticHandler.report();
171     assert(tok == TOK.endOfFile);
172     tok = lex1.nextToken();
173     diagnosticHandler.report();
174     assert(tok == TOK.endOfFile);
175 }
176 
177 unittest
178 {
179     // We don't want to see Lexer error output during these tests.
180     uint errors = global.startGagging();
181     scope(exit) global.endGagging(errors);
182 
183     // Test malformed input: even malformed input should end in a TOK.endOfFile.
184     static immutable char[][] testcases =
185     [   // Testcase must end with 0 or 0x1A.
186         [0], // not malformed, but pathological
187         ['\'', 0],
188         ['\'', 0x1A],
189         ['{', '{', 'q', '{', 0],
190         [0xFF, 0],
191         [0xFF, 0x80, 0],
192         [0xFF, 0xFF, 0],
193         [0xFF, 0xFF, 0],
194         ['x', '"', 0x1A],
195     ];
196 
197     foreach (testcase; testcases)
198     {
199         DefaultDiagnosticHandler diagnosticHandler;
200         scope Lexer lex2 = new Lexer(null, testcase.ptr, 0, testcase.length-1, 0, 0, diagnosticHandler.diagnosticHandler);
201         TOK tok = lex2.nextToken();
202         diagnosticHandler.report();
203         size_t iterations = 1;
204         while ((tok != TOK.endOfFile) && (iterations++ < testcase.length))
205         {
206             tok = lex2.nextToken();
207         }
208         assert(tok == TOK.endOfFile);
209         tok = lex2.nextToken();
210         assert(tok == TOK.endOfFile);
211     }
212 }
213 
214 version (DMDLIB)
215 {
216     version = LocOffset;
217 }
218 
219 /***********************************************************
220  */
221 class Lexer
222 {
223     private __gshared OutBuffer stringbuffer;
224 
225     Loc scanloc;            // for error messages
226     Loc prevloc;            // location of token before current
227 
228     const(char)* p;         // current character
229 
230     Token token;
231 
232     private
233     {
234         const(char)* base;      // pointer to start of buffer
235         const(char)* end;       // pointer to last element of buffer
236         const(char)* line;      // start of current line
237 
238         bool doDocComment;      // collect doc comment information
239         bool anyToken;          // seen at least one token
240         bool commentToken;      // comments are TOK.comment's
241         int inTokenStringConstant; // can be larger than 1 when in nested q{} strings
242         int lastDocLine;        // last line of previous doc comment
243 
244         Token* tokenFreelist;
245         DiagnosticHandler handleDiagnostic;
246         DefaultDiagnosticReporter diagnosticReporter;
247     }
248 
249   nothrow:
250 
251     /*********************
252      * Creates a Lexer for the source code base[begoffset..endoffset+1].
253      * The last character, base[endoffset], must be null (0) or EOF (0x1A).
254      *
255      * Params:
256      *  filename = used for error messages
257      *  base = source code, must be terminated by a null (0) or EOF (0x1A) character
258      *  begoffset = starting offset into base[]
259      *  endoffset = the last offset to read into base[]
260      *  doDocComment = handle documentation comments
261      *  commentToken = comments become TOK.comment's
262      *  diagnosticHandler = diagnostic handler
263      */
264     this(const(char)* filename, const(char)* base, size_t begoffset,
265         size_t endoffset, bool doDocComment, bool commentToken,
266         DiagnosticHandler handleDiagnostic) pure
267     {
268         scanloc = Loc(filename, 1, 1);
269         //printf("Lexer::Lexer(%p,%d)\n",base,length);
270         //printf("lexer.filename = %s\n", filename);
271         token = Token.init;
272         this.base = base;
273         this.end = base + endoffset;
274         p = base + begoffset;
275         line = p;
276         this.doDocComment = doDocComment;
277         this.commentToken = commentToken;
278         this.inTokenStringConstant = 0;
279         this.lastDocLine = 0;
280         this.handleDiagnostic = handleDiagnostic;
281 
282         //initKeywords();
283         /* If first line starts with '#!', ignore the line
284          */
285         if (p && p[0] == '#' && p[1] == '!')
286         {
287             p += 2;
288             while (1)
289             {
290                 char c = *p++;
291                 switch (c)
292                 {
293                 case 0:
294                 case 0x1A:
295                     p--;
296                     goto case;
297                 case '\n':
298                     break;
299                 default:
300                     continue;
301                 }
302                 break;
303             }
304             endOfLine();
305         }
306     }
307 
308     /// Returns: a newly allocated `Token`.
309     Token* allocateToken() pure nothrow @safe
310     {
311         if (tokenFreelist)
312         {
313             Token* t = tokenFreelist;
314             tokenFreelist = t.next;
315             t.next = null;
316             return t;
317         }
318         return new Token();
319     }
320 
321     /// Frees the given token by returning it to the freelist.
322     private void releaseToken(Token* token) pure nothrow @nogc @safe
323     {
324         if (mem.isGCEnabled)
325             *token = Token.init;
326         token.next = tokenFreelist;
327         tokenFreelist = token;
328     }
329 
330     TOK nextToken()
331     {
332         prevloc = token.loc;
333         if (token.next)
334         {
335             Token* t = token.next;
336             memcpy(&token, t, Token.sizeof);
337             releaseToken(t);
338         }
339         else
340         {
341             scan(&token);
342         }
343         //printf(token.toChars());
344         return token.value;
345     }
346 
347     /***********************
348      * Look ahead at next token's value.
349      */
350     final TOK peekNext()
351     {
352         return peek(&token).value;
353     }
354 
355     /***********************
356      * Look 2 tokens ahead at value.
357      */
358     final TOK peekNext2()
359     {
360         Token* t = peek(&token);
361         return peek(t).value;
362     }
363 
364     /****************************
365      * Turn next token in buffer into a token.
366      */
367     final void scan(Token* t)
368     {
369         const lastLine = scanloc.linnum;
370         Loc startLoc;
371         t.blockComment = null;
372         t.lineComment = null;
373 
374         while (1)
375         {
376             t.ptr = p;
377             //printf("p = %p, *p = '%c'\n",p,*p);
378             t.loc = loc();
379             switch (*p)
380             {
381             case 0:
382             case 0x1A:
383                 t.value = TOK.endOfFile; // end of file
384                 // Intentionally not advancing `p`, such that subsequent calls keep returning TOK.endOfFile.
385                 return;
386             case ' ':
387             case '\t':
388             case '\v':
389             case '\f':
390                 p++;
391                 continue; // skip white space
392             case '\r':
393                 p++;
394                 if (*p != '\n') // if CR stands by itself
395                 {
396                     endOfLine();
397                     goto skipFourSpaces;
398                 }
399                 continue; // skip white space
400             case '\n':
401                 p++;
402                 endOfLine();
403                 skipFourSpaces:
404                 while (*(cast(uint*)p) == 0x20202020) //' ' == 0x20
405                 {
406                     p+=4;
407                 }
408                 continue; // skip white space
409             case '0':
410                 if (!isZeroSecond(p[1]))        // if numeric literal does not continue
411                 {
412                     ++p;
413                     t.unsvalue = 0;
414                     t.value = TOK.int32Literal;
415                     return;
416                 }
417                 goto Lnumber;
418 
419             case '1': .. case '9':
420                 if (!isDigitSecond(p[1]))       // if numeric literal does not continue
421                 {
422                     t.unsvalue = *p - '0';
423                     ++p;
424                     t.value = TOK.int32Literal;
425                     return;
426                 }
427             Lnumber:
428                 t.value = number(t);
429                 return;
430 
431             case '\'':
432                 if (issinglechar(p[1]) && p[2] == '\'')
433                 {
434                     t.unsvalue = p[1];        // simple one character literal
435                     t.value = TOK.charLiteral;
436                     p += 3;
437                 }
438                 else
439                     t.value = charConstant(t);
440                 return;
441             case 'r':
442                 if (p[1] != '"')
443                     goto case_ident;
444                 p++;
445                 goto case '`';
446             case '`':
447                 wysiwygStringConstant(t);
448                 return;
449             case 'x':
450                 if (p[1] != '"')
451                     goto case_ident;
452                 p++;
453                 auto start = p;
454                 auto hexString = new OutBuffer();
455                 t.value = hexStringConstant(t);
456                 hexString.write(start[0 .. p - start]);
457                 error("Built-in hex string literals are obsolete, use `std.conv.hexString!%s` instead.", hexString.extractChars());
458                 return;
459             case 'q':
460                 if (p[1] == '"')
461                 {
462                     p++;
463                     delimitedStringConstant(t);
464                     return;
465                 }
466                 else if (p[1] == '{')
467                 {
468                     p++;
469                     tokenStringConstant(t);
470                     return;
471                 }
472                 else
473                     goto case_ident;
474             case '"':
475                 escapeStringConstant(t);
476                 return;
477             case 'a':
478             case 'b':
479             case 'c':
480             case 'd':
481             case 'e':
482             case 'f':
483             case 'g':
484             case 'h':
485             case 'i':
486             case 'j':
487             case 'k':
488             case 'l':
489             case 'm':
490             case 'n':
491             case 'o':
492             case 'p':
493                 /*case 'q': case 'r':*/
494             case 's':
495             case 't':
496             case 'u':
497             case 'v':
498             case 'w':
499                 /*case 'x':*/
500             case 'y':
501             case 'z':
502             case 'A':
503             case 'B':
504             case 'C':
505             case 'D':
506             case 'E':
507             case 'F':
508             case 'G':
509             case 'H':
510             case 'I':
511             case 'J':
512             case 'K':
513             case 'L':
514             case 'M':
515             case 'N':
516             case 'O':
517             case 'P':
518             case 'Q':
519             case 'R':
520             case 'S':
521             case 'T':
522             case 'U':
523             case 'V':
524             case 'W':
525             case 'X':
526             case 'Y':
527             case 'Z':
528             case '_':
529             case_ident:
530                 {
531                     while (1)
532                     {
533                         const c = *++p;
534                         if (isidchar(c))
535                             continue;
536                         else if (c & 0x80)
537                         {
538                             const s = p;
539                             const u = decodeUTF();
540                             if (isUniAlpha(u))
541                                 continue;
542                             error("char 0x%04x not allowed in identifier", u);
543                             p = s;
544                         }
545                         break;
546                     }
547                     Identifier id = Identifier.idPool(cast(char*)t.ptr, cast(uint)(p - t.ptr));
548                     t.ident = id;
549                     t.value = cast(TOK)id.getValue();
550                     anyToken = 1;
551                     if (*t.ptr == '_') // if special identifier token
552                     {
553                         // Lazy initialization
554                         TimeStampInfo.initialize(t.loc);
555 
556                         if (id == Id.DATE)
557                         {
558                             t.ustring = TimeStampInfo.date.ptr;
559                             goto Lstr;
560                         }
561                         else if (id == Id.TIME)
562                         {
563                             t.ustring = TimeStampInfo.time.ptr;
564                             goto Lstr;
565                         }
566                         else if (id == Id.VENDOR)
567                         {
568                             t.ustring = global.vendor.xarraydup.ptr;
569                             goto Lstr;
570                         }
571                         else if (id == Id.TIMESTAMP)
572                         {
573                             t.ustring = TimeStampInfo.timestamp.ptr;
574                         Lstr:
575                             t.value = TOK.string_;
576                             t.postfix = 0;
577                             t.len = cast(uint)strlen(t.ustring);
578                         }
579                         else if (id == Id.VERSIONX)
580                         {
581                             t.value = TOK.int64Literal;
582                             t.unsvalue = global.versionNumber();
583                         }
584                         else if (id == Id.EOFX)
585                         {
586                             t.value = TOK.endOfFile;
587                             // Advance scanner to end of file
588                             while (!(*p == 0 || *p == 0x1A))
589                                 p++;
590                         }
591                     }
592                     //printf("t.value = %d\n",t.value);
593                     return;
594                 }
595             case '/':
596                 p++;
597                 switch (*p)
598                 {
599                 case '=':
600                     p++;
601                     t.value = TOK.divAssign;
602                     return;
603                 case '*':
604                     p++;
605                     startLoc = loc();
606                     while (1)
607                     {
608                         while (1)
609                         {
610                             const c = *p;
611                             switch (c)
612                             {
613                             case '/':
614                                 break;
615                             case '\n':
616                                 endOfLine();
617                                 p++;
618                                 continue;
619                             case '\r':
620                                 p++;
621                                 if (*p != '\n')
622                                     endOfLine();
623                                 continue;
624                             case 0:
625                             case 0x1A:
626                                 error("unterminated /* */ comment");
627                                 p = end;
628                                 t.loc = loc();
629                                 t.value = TOK.endOfFile;
630                                 return;
631                             default:
632                                 if (c & 0x80)
633                                 {
634                                     const u = decodeUTF();
635                                     if (u == PS || u == LS)
636                                         endOfLine();
637                                 }
638                                 p++;
639                                 continue;
640                             }
641                             break;
642                         }
643                         p++;
644                         if (p[-2] == '*' && p - 3 != t.ptr)
645                             break;
646                     }
647                     if (commentToken)
648                     {
649                         t.loc = startLoc;
650                         t.value = TOK.comment;
651                         return;
652                     }
653                     else if (doDocComment && t.ptr[2] == '*' && p - 4 != t.ptr)
654                     {
655                         // if /** but not /**/
656                         getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
657                         lastDocLine = scanloc.linnum;
658                     }
659                     continue;
660                 case '/': // do // style comments
661                     startLoc = loc();
662                     while (1)
663                     {
664                         const c = *++p;
665                         switch (c)
666                         {
667                         case '\n':
668                             break;
669                         case '\r':
670                             if (p[1] == '\n')
671                                 p++;
672                             break;
673                         case 0:
674                         case 0x1A:
675                             if (commentToken)
676                             {
677                                 p = end;
678                                 t.loc = startLoc;
679                                 t.value = TOK.comment;
680                                 return;
681                             }
682                             if (doDocComment && t.ptr[2] == '/')
683                             {
684                                 getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
685                                 lastDocLine = scanloc.linnum;
686                             }
687                             p = end;
688                             t.loc = loc();
689                             t.value = TOK.endOfFile;
690                             return;
691                         default:
692                             if (c & 0x80)
693                             {
694                                 const u = decodeUTF();
695                                 if (u == PS || u == LS)
696                                     break;
697                             }
698                             continue;
699                         }
700                         break;
701                     }
702                     if (commentToken)
703                     {
704                         p++;
705                         endOfLine();
706                         t.loc = startLoc;
707                         t.value = TOK.comment;
708                         return;
709                     }
710                     if (doDocComment && t.ptr[2] == '/')
711                     {
712                         getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
713                         lastDocLine = scanloc.linnum;
714                     }
715                     p++;
716                     endOfLine();
717                     continue;
718                 case '+':
719                     {
720                         int nest;
721                         startLoc = loc();
722                         p++;
723                         nest = 1;
724                         while (1)
725                         {
726                             char c = *p;
727                             switch (c)
728                             {
729                             case '/':
730                                 p++;
731                                 if (*p == '+')
732                                 {
733                                     p++;
734                                     nest++;
735                                 }
736                                 continue;
737                             case '+':
738                                 p++;
739                                 if (*p == '/')
740                                 {
741                                     p++;
742                                     if (--nest == 0)
743                                         break;
744                                 }
745                                 continue;
746                             case '\r':
747                                 p++;
748                                 if (*p != '\n')
749                                     endOfLine();
750                                 continue;
751                             case '\n':
752                                 endOfLine();
753                                 p++;
754                                 continue;
755                             case 0:
756                             case 0x1A:
757                                 error("unterminated /+ +/ comment");
758                                 p = end;
759                                 t.loc = loc();
760                                 t.value = TOK.endOfFile;
761                                 return;
762                             default:
763                                 if (c & 0x80)
764                                 {
765                                     uint u = decodeUTF();
766                                     if (u == PS || u == LS)
767                                         endOfLine();
768                                 }
769                                 p++;
770                                 continue;
771                             }
772                             break;
773                         }
774                         if (commentToken)
775                         {
776                             t.loc = startLoc;
777                             t.value = TOK.comment;
778                             return;
779                         }
780                         if (doDocComment && t.ptr[2] == '+' && p - 4 != t.ptr)
781                         {
782                             // if /++ but not /++/
783                             getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
784                             lastDocLine = scanloc.linnum;
785                         }
786                         continue;
787                     }
788                 default:
789                     break;
790                 }
791                 t.value = TOK.div;
792                 return;
793             case '.':
794                 p++;
795                 if (isdigit(*p))
796                 {
797                     /* Note that we don't allow ._1 and ._ as being
798                      * valid floating point numbers.
799                      */
800                     p--;
801                     t.value = inreal(t);
802                 }
803                 else if (p[0] == '.')
804                 {
805                     if (p[1] == '.')
806                     {
807                         p += 2;
808                         t.value = TOK.dotDotDot;
809                     }
810                     else
811                     {
812                         p++;
813                         t.value = TOK.slice;
814                     }
815                 }
816                 else
817                     t.value = TOK.dot;
818                 return;
819             case '&':
820                 p++;
821                 if (*p == '=')
822                 {
823                     p++;
824                     t.value = TOK.andAssign;
825                 }
826                 else if (*p == '&')
827                 {
828                     p++;
829                     t.value = TOK.andAnd;
830                 }
831                 else
832                     t.value = TOK.and;
833                 return;
834             case '|':
835                 p++;
836                 if (*p == '=')
837                 {
838                     p++;
839                     t.value = TOK.orAssign;
840                 }
841                 else if (*p == '|')
842                 {
843                     p++;
844                     t.value = TOK.orOr;
845                 }
846                 else
847                     t.value = TOK.or;
848                 return;
849             case '-':
850                 p++;
851                 if (*p == '=')
852                 {
853                     p++;
854                     t.value = TOK.minAssign;
855                 }
856                 else if (*p == '-')
857                 {
858                     p++;
859                     t.value = TOK.minusMinus;
860                 }
861                 else
862                     t.value = TOK.min;
863                 return;
864             case '+':
865                 p++;
866                 if (*p == '=')
867                 {
868                     p++;
869                     t.value = TOK.addAssign;
870                 }
871                 else if (*p == '+')
872                 {
873                     p++;
874                     t.value = TOK.plusPlus;
875                 }
876                 else
877                     t.value = TOK.add;
878                 return;
879             case '<':
880                 p++;
881                 if (*p == '=')
882                 {
883                     p++;
884                     t.value = TOK.lessOrEqual; // <=
885                 }
886                 else if (*p == '<')
887                 {
888                     p++;
889                     if (*p == '=')
890                     {
891                         p++;
892                         t.value = TOK.leftShiftAssign; // <<=
893                     }
894                     else
895                         t.value = TOK.leftShift; // <<
896                 }
897                 else
898                     t.value = TOK.lessThan; // <
899                 return;
900             case '>':
901                 p++;
902                 if (*p == '=')
903                 {
904                     p++;
905                     t.value = TOK.greaterOrEqual; // >=
906                 }
907                 else if (*p == '>')
908                 {
909                     p++;
910                     if (*p == '=')
911                     {
912                         p++;
913                         t.value = TOK.rightShiftAssign; // >>=
914                     }
915                     else if (*p == '>')
916                     {
917                         p++;
918                         if (*p == '=')
919                         {
920                             p++;
921                             t.value = TOK.unsignedRightShiftAssign; // >>>=
922                         }
923                         else
924                             t.value = TOK.unsignedRightShift; // >>>
925                     }
926                     else
927                         t.value = TOK.rightShift; // >>
928                 }
929                 else
930                     t.value = TOK.greaterThan; // >
931                 return;
932             case '!':
933                 p++;
934                 if (*p == '=')
935                 {
936                     p++;
937                     t.value = TOK.notEqual; // !=
938                 }
939                 else
940                     t.value = TOK.not; // !
941                 return;
942             case '=':
943                 p++;
944                 if (*p == '=')
945                 {
946                     p++;
947                     t.value = TOK.equal; // ==
948                 }
949                 else if (*p == '>')
950                 {
951                     p++;
952                     t.value = TOK.goesTo; // =>
953                 }
954                 else
955                     t.value = TOK.assign; // =
956                 return;
957             case '~':
958                 p++;
959                 if (*p == '=')
960                 {
961                     p++;
962                     t.value = TOK.concatenateAssign; // ~=
963                 }
964                 else
965                     t.value = TOK.tilde; // ~
966                 return;
967             case '^':
968                 p++;
969                 if (*p == '^')
970                 {
971                     p++;
972                     if (*p == '=')
973                     {
974                         p++;
975                         t.value = TOK.powAssign; // ^^=
976                     }
977                     else
978                         t.value = TOK.pow; // ^^
979                 }
980                 else if (*p == '=')
981                 {
982                     p++;
983                     t.value = TOK.xorAssign; // ^=
984                 }
985                 else
986                     t.value = TOK.xor; // ^
987                 return;
988             case '(':
989                 p++;
990                 t.value = TOK.leftParentheses;
991                 return;
992             case ')':
993                 p++;
994                 t.value = TOK.rightParentheses;
995                 return;
996             case '[':
997                 p++;
998                 t.value = TOK.leftBracket;
999                 return;
1000             case ']':
1001                 p++;
1002                 t.value = TOK.rightBracket;
1003                 return;
1004             case '{':
1005                 p++;
1006                 t.value = TOK.leftCurly;
1007                 return;
1008             case '}':
1009                 p++;
1010                 t.value = TOK.rightCurly;
1011                 return;
1012             case '?':
1013                 p++;
1014                 t.value = TOK.question;
1015                 return;
1016             case ',':
1017                 p++;
1018                 t.value = TOK.comma;
1019                 return;
1020             case ';':
1021                 p++;
1022                 t.value = TOK.semicolon;
1023                 return;
1024             case ':':
1025                 p++;
1026                 t.value = TOK.colon;
1027                 return;
1028             case '$':
1029                 p++;
1030                 t.value = TOK.dollar;
1031                 return;
1032             case '@':
1033                 p++;
1034                 t.value = TOK.at;
1035                 return;
1036             case '*':
1037                 p++;
1038                 if (*p == '=')
1039                 {
1040                     p++;
1041                     t.value = TOK.mulAssign;
1042                 }
1043                 else
1044                     t.value = TOK.mul;
1045                 return;
1046             case '%':
1047                 p++;
1048                 if (*p == '=')
1049                 {
1050                     p++;
1051                     t.value = TOK.modAssign;
1052                 }
1053                 else
1054                     t.value = TOK.mod;
1055                 return;
1056             case '#':
1057                 {
1058                     p++;
1059                     Token n;
1060                     scan(&n);
1061                     if (n.value == TOK.identifier)
1062                     {
1063                         if (n.ident == Id.line)
1064                         {
1065                             poundLine();
1066                             continue;
1067                         }
1068                         else
1069                         {
1070                             const locx = loc();
1071                             warning(locx, "C preprocessor directive `#%s` is not supported", n.ident.toChars());
1072                         }
1073                     }
1074                     else if (n.value == TOK.if_)
1075                     {
1076                         error("C preprocessor directive `#if` is not supported, use `version` or `static if`");
1077                     }
1078                     t.value = TOK.pound;
1079                     return;
1080                 }
1081             default:
1082                 {
1083                     dchar c = *p;
1084                     if (c & 0x80)
1085                     {
1086                         c = decodeUTF();
1087                         // Check for start of unicode identifier
1088                         if (isUniAlpha(c))
1089                             goto case_ident;
1090                         if (c == PS || c == LS)
1091                         {
1092                             endOfLine();
1093                             p++;
1094                             continue;
1095                         }
1096                     }
1097                     if (c < 0x80 && isprint(c))
1098                         error("character '%c' is not a valid token", c);
1099                     else
1100                         error("character 0x%02x is not a valid token", c);
1101                     p++;
1102                     continue;
1103                 }
1104             }
1105         }
1106     }
1107 
1108     final Token* peek(Token* ct)
1109     {
1110         Token* t;
1111         if (ct.next)
1112             t = ct.next;
1113         else
1114         {
1115             t = allocateToken();
1116             scan(t);
1117             ct.next = t;
1118         }
1119         return t;
1120     }
1121 
1122     /*********************************
1123      * tk is on the opening (.
1124      * Look ahead and return token that is past the closing ).
1125      */
1126     final Token* peekPastParen(Token* tk)
1127     {
1128         //printf("peekPastParen()\n");
1129         int parens = 1;
1130         int curlynest = 0;
1131         while (1)
1132         {
1133             tk = peek(tk);
1134             //tk.print();
1135             switch (tk.value)
1136             {
1137             case TOK.leftParentheses:
1138                 parens++;
1139                 continue;
1140             case TOK.rightParentheses:
1141                 --parens;
1142                 if (parens)
1143                     continue;
1144                 tk = peek(tk);
1145                 break;
1146             case TOK.leftCurly:
1147                 curlynest++;
1148                 continue;
1149             case TOK.rightCurly:
1150                 if (--curlynest >= 0)
1151                     continue;
1152                 break;
1153             case TOK.semicolon:
1154                 if (curlynest)
1155                     continue;
1156                 break;
1157             case TOK.endOfFile:
1158                 break;
1159             default:
1160                 continue;
1161             }
1162             return tk;
1163         }
1164     }
1165 
1166     /*******************************************
1167      * Parse escape sequence.
1168      */
1169     private uint escapeSequence()
1170     {
1171         return Lexer.escapeSequence(token.loc, p);
1172     }
1173 
1174     /**
1175     Parse the given string literal escape sequence into a single character.
1176     Params:
1177         loc = the location of the current token
1178         sequence = pointer to string with escape sequence to parse. this is a reference
1179                    variable that is also used to return the position after the sequence
1180     Returns:
1181         the escaped sequence as a single character
1182     */
1183     private static dchar escapeSequence(const ref Loc loc, ref const(char)* sequence)
1184     {
1185         const(char)* p = sequence; // cache sequence reference on stack
1186         scope(exit) sequence = p;
1187 
1188         uint c = *p;
1189         int ndigits;
1190         switch (c)
1191         {
1192         case '\'':
1193         case '"':
1194         case '?':
1195         case '\\':
1196         Lconsume:
1197             p++;
1198             break;
1199         case 'a':
1200             c = 7;
1201             goto Lconsume;
1202         case 'b':
1203             c = 8;
1204             goto Lconsume;
1205         case 'f':
1206             c = 12;
1207             goto Lconsume;
1208         case 'n':
1209             c = 10;
1210             goto Lconsume;
1211         case 'r':
1212             c = 13;
1213             goto Lconsume;
1214         case 't':
1215             c = 9;
1216             goto Lconsume;
1217         case 'v':
1218             c = 11;
1219             goto Lconsume;
1220         case 'u':
1221             ndigits = 4;
1222             goto Lhex;
1223         case 'U':
1224             ndigits = 8;
1225             goto Lhex;
1226         case 'x':
1227             ndigits = 2;
1228         Lhex:
1229             p++;
1230             c = *p;
1231             if (ishex(cast(char)c))
1232             {
1233                 uint v = 0;
1234                 int n = 0;
1235                 while (1)
1236                 {
1237                     if (isdigit(cast(char)c))
1238                         c -= '0';
1239                     else if (islower(c))
1240                         c -= 'a' - 10;
1241                     else
1242                         c -= 'A' - 10;
1243                     v = v * 16 + c;
1244                     c = *++p;
1245                     if (++n == ndigits)
1246                         break;
1247                     if (!ishex(cast(char)c))
1248                     {
1249                         .error(loc, "escape hex sequence has %d hex digits instead of %d", n, ndigits);
1250                         break;
1251                     }
1252                 }
1253                 if (ndigits != 2 && !utf_isValidDchar(v))
1254                 {
1255                     .error(loc, "invalid UTF character \\U%08x", v);
1256                     v = '?'; // recover with valid UTF character
1257                 }
1258                 c = v;
1259             }
1260             else
1261             {
1262                 .error(loc, "undefined escape hex sequence \\%c%c", sequence[0], c);
1263                 p++;
1264             }
1265             break;
1266         case '&':
1267             // named character entity
1268             for (const idstart = ++p; 1; p++)
1269             {
1270                 switch (*p)
1271                 {
1272                 case ';':
1273                     c = HtmlNamedEntity(idstart, p - idstart);
1274                     if (c == ~0)
1275                     {
1276                         .error(loc, "unnamed character entity &%.*s;", cast(int)(p - idstart), idstart);
1277                         c = '?';
1278                     }
1279                     p++;
1280                     break;
1281                 default:
1282                     if (isalpha(*p) || (p != idstart && isdigit(*p)))
1283                         continue;
1284                     .error(loc, "unterminated named entity &%.*s;", cast(int)(p - idstart + 1), idstart);
1285                     c = '?';
1286                     break;
1287                 }
1288                 break;
1289             }
1290             break;
1291         case 0:
1292         case 0x1A:
1293             // end of file
1294             c = '\\';
1295             break;
1296         default:
1297             if (isoctal(cast(char)c))
1298             {
1299                 uint v = 0;
1300                 int n = 0;
1301                 do
1302                 {
1303                     v = v * 8 + (c - '0');
1304                     c = *++p;
1305                 }
1306                 while (++n < 3 && isoctal(cast(char)c));
1307                 c = v;
1308                 if (c > 0xFF)
1309                     .error(loc, "escape octal sequence \\%03o is larger than \\377", c);
1310             }
1311             else
1312             {
1313                 .error(loc, "undefined escape sequence \\%c", c);
1314                 p++;
1315             }
1316             break;
1317         }
1318         return c;
1319     }
1320 
1321     /**
1322     Lex a wysiwyg string. `p` must be pointing to the first character before the
1323     contents of the string literal. The character pointed to by `p` will be used as
1324     the terminating character (i.e. backtick or double-quote).
1325     Params:
1326         result = pointer to the token that accepts the result
1327     */
1328     private void wysiwygStringConstant(Token* result)
1329     {
1330         result.value = TOK.string_;
1331         Loc start = loc();
1332         auto terminator = p[0];
1333         p++;
1334         stringbuffer.setsize(0);
1335         while (1)
1336         {
1337             dchar c = p[0];
1338             p++;
1339             switch (c)
1340             {
1341             case '\n':
1342                 endOfLine();
1343                 break;
1344             case '\r':
1345                 if (p[0] == '\n')
1346                     continue; // ignore
1347                 c = '\n'; // treat EndOfLine as \n character
1348                 endOfLine();
1349                 break;
1350             case 0:
1351             case 0x1A:
1352                 error("unterminated string constant starting at %s", start.toChars());
1353                 result.setString();
1354                 // rewind `p` so it points to the EOF character
1355                 p--;
1356                 return;
1357             default:
1358                 if (c == terminator)
1359                 {
1360                     result.setString(stringbuffer);
1361                     stringPostfix(result);
1362                     return;
1363                 }
1364                 else if (c & 0x80)
1365                 {
1366                     p--;
1367                     const u = decodeUTF();
1368                     p++;
1369                     if (u == PS || u == LS)
1370                         endOfLine();
1371                     stringbuffer.writeUTF8(u);
1372                     continue;
1373                 }
1374                 break;
1375             }
1376             stringbuffer.writeByte(c);
1377         }
1378     }
1379 
1380     /**************************************
1381      * Lex hex strings:
1382      *      x"0A ae 34FE BD"
1383      */
1384     private TOK hexStringConstant(Token* t)
1385     {
1386         Loc start = loc();
1387         uint n = 0;
1388         uint v = ~0; // dead assignment, needed to suppress warning
1389         p++;
1390         stringbuffer.setsize(0);
1391         while (1)
1392         {
1393             dchar c = *p++;
1394             switch (c)
1395             {
1396             case ' ':
1397             case '\t':
1398             case '\v':
1399             case '\f':
1400                 continue; // skip white space
1401             case '\r':
1402                 if (*p == '\n')
1403                     continue; // ignore '\r' if followed by '\n'
1404                 // Treat isolated '\r' as if it were a '\n'
1405                 goto case '\n';
1406             case '\n':
1407                 endOfLine();
1408                 continue;
1409             case 0:
1410             case 0x1A:
1411                 error("unterminated string constant starting at %s", start.toChars());
1412                 t.setString();
1413                 // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
1414                 p--;
1415                 return TOK.hexadecimalString;
1416             case '"':
1417                 if (n & 1)
1418                 {
1419                     error("odd number (%d) of hex characters in hex string", n);
1420                     stringbuffer.writeByte(v);
1421                 }
1422                 t.setString(stringbuffer);
1423                 stringPostfix(t);
1424                 return TOK.hexadecimalString;
1425             default:
1426                 if (c >= '0' && c <= '9')
1427                     c -= '0';
1428                 else if (c >= 'a' && c <= 'f')
1429                     c -= 'a' - 10;
1430                 else if (c >= 'A' && c <= 'F')
1431                     c -= 'A' - 10;
1432                 else if (c & 0x80)
1433                 {
1434                     p--;
1435                     const u = decodeUTF();
1436                     p++;
1437                     if (u == PS || u == LS)
1438                         endOfLine();
1439                     else
1440                         error("non-hex character \\u%04x in hex string", u);
1441                 }
1442                 else
1443                     error("non-hex character '%c' in hex string", c);
1444                 if (n & 1)
1445                 {
1446                     v = (v << 4) | c;
1447                     stringbuffer.writeByte(v);
1448                 }
1449                 else
1450                     v = c;
1451                 n++;
1452                 break;
1453             }
1454         }
1455         assert(0); // see bug 15731
1456     }
1457 
1458     /**
1459     Lex a delimited string. Some examples of delimited strings are:
1460     ---
1461     q"(foo(xxx))"      // "foo(xxx)"
1462     q"[foo$(LPAREN)]"  // "foo$(LPAREN)"
1463     q"/foo]/"          // "foo]"
1464     q"HERE
1465     foo
1466     HERE"              // "foo\n"
1467     ---
1468     It is assumed that `p` points to the opening double-quote '"'.
1469     Params:
1470         result = pointer to the token that accepts the result
1471     */
1472     private void delimitedStringConstant(Token* result)
1473     {
1474         result.value = TOK.string_;
1475         Loc start = loc();
1476         dchar delimleft = 0;
1477         dchar delimright = 0;
1478         uint nest = 1;
1479         uint nestcount = ~0; // dead assignment, needed to suppress warning
1480         Identifier hereid = null;
1481         uint blankrol = 0;
1482         uint startline = 0;
1483         p++;
1484         stringbuffer.setsize(0);
1485         while (1)
1486         {
1487             dchar c = *p++;
1488             //printf("c = '%c'\n", c);
1489             switch (c)
1490             {
1491             case '\n':
1492             Lnextline:
1493                 endOfLine();
1494                 startline = 1;
1495                 if (blankrol)
1496                 {
1497                     blankrol = 0;
1498                     continue;
1499                 }
1500                 if (hereid)
1501                 {
1502                     stringbuffer.writeUTF8(c);
1503                     continue;
1504                 }
1505                 break;
1506             case '\r':
1507                 if (*p == '\n')
1508                     continue; // ignore
1509                 c = '\n'; // treat EndOfLine as \n character
1510                 goto Lnextline;
1511             case 0:
1512             case 0x1A:
1513                 error("unterminated delimited string constant starting at %s", start.toChars());
1514                 result.setString();
1515                 // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
1516                 p--;
1517                 return;
1518             default:
1519                 if (c & 0x80)
1520                 {
1521                     p--;
1522                     c = decodeUTF();
1523                     p++;
1524                     if (c == PS || c == LS)
1525                         goto Lnextline;
1526                 }
1527                 break;
1528             }
1529             if (delimleft == 0)
1530             {
1531                 delimleft = c;
1532                 nest = 1;
1533                 nestcount = 1;
1534                 if (c == '(')
1535                     delimright = ')';
1536                 else if (c == '{')
1537                     delimright = '}';
1538                 else if (c == '[')
1539                     delimright = ']';
1540                 else if (c == '<')
1541                     delimright = '>';
1542                 else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c)))
1543                 {
1544                     // Start of identifier; must be a heredoc
1545                     Token tok;
1546                     p--;
1547                     scan(&tok); // read in heredoc identifier
1548                     if (tok.value != TOK.identifier)
1549                     {
1550                         error("identifier expected for heredoc, not %s", tok.toChars());
1551                         delimright = c;
1552                     }
1553                     else
1554                     {
1555                         hereid = tok.ident;
1556                         //printf("hereid = '%s'\n", hereid.toChars());
1557                         blankrol = 1;
1558                     }
1559                     nest = 0;
1560                 }
1561                 else
1562                 {
1563                     delimright = c;
1564                     nest = 0;
1565                     if (isspace(c))
1566                         error("delimiter cannot be whitespace");
1567                 }
1568             }
1569             else
1570             {
1571                 if (blankrol)
1572                 {
1573                     error("heredoc rest of line should be blank");
1574                     blankrol = 0;
1575                     continue;
1576                 }
1577                 if (nest == 1)
1578                 {
1579                     if (c == delimleft)
1580                         nestcount++;
1581                     else if (c == delimright)
1582                     {
1583                         nestcount--;
1584                         if (nestcount == 0)
1585                             goto Ldone;
1586                     }
1587                 }
1588                 else if (c == delimright)
1589                     goto Ldone;
1590                 if (startline && (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c))) && hereid)
1591                 {
1592                     Token tok;
1593                     auto psave = p;
1594                     p--;
1595                     scan(&tok); // read in possible heredoc identifier
1596                     //printf("endid = '%s'\n", tok.ident.toChars());
1597                     if (tok.value == TOK.identifier && tok.ident is hereid)
1598                     {
1599                         /* should check that rest of line is blank
1600                          */
1601                         goto Ldone;
1602                     }
1603                     p = psave;
1604                 }
1605                 stringbuffer.writeUTF8(c);
1606                 startline = 0;
1607             }
1608         }
1609     Ldone:
1610         if (*p == '"')
1611             p++;
1612         else if (hereid)
1613             error("delimited string must end in %s\"", hereid.toChars());
1614         else
1615             error("delimited string must end in %c\"", delimright);
1616         result.setString(stringbuffer);
1617         stringPostfix(result);
1618     }
1619 
1620     /**
1621     Lex a token string. Some examples of token strings are:
1622     ---
1623     q{ foo(xxx) }    // " foo(xxx) "
1624     q{foo$(LPAREN)}  // "foo$(LPAREN)"
1625     q{{foo}"}"}      // "{foo}"}""
1626     ---
1627     It is assumed that `p` points to the opening curly-brace '{'.
1628     Params:
1629         result = pointer to the token that accepts the result
1630     */
1631     private void tokenStringConstant(Token* result)
1632     {
1633         result.value = TOK.string_;
1634 
1635         uint nest = 1;
1636         const start = loc();
1637         const pstart = ++p;
1638         inTokenStringConstant++;
1639         scope(exit) inTokenStringConstant--;
1640         while (1)
1641         {
1642             Token tok;
1643             scan(&tok);
1644             switch (tok.value)
1645             {
1646             case TOK.leftCurly:
1647                 nest++;
1648                 continue;
1649             case TOK.rightCurly:
1650                 if (--nest == 0)
1651                 {
1652                     result.setString(pstart, p - 1 - pstart);
1653                     stringPostfix(result);
1654                     return;
1655                 }
1656                 continue;
1657             case TOK.endOfFile:
1658                 error("unterminated token string constant starting at %s", start.toChars());
1659                 result.setString();
1660                 return;
1661             default:
1662                 continue;
1663             }
1664         }
1665     }
1666 
1667     /**
1668     Scan a double-quoted string while building the processed string value by
1669     handling escape sequences. The result is returned in the given `t` token.
1670     This function assumes that `p` currently points to the opening double-quote
1671     of the string.
1672     Params:
1673         t = the token to set the resulting string to
1674     */
1675     private void escapeStringConstant(Token* t)
1676     {
1677         t.value = TOK.string_;
1678 
1679         const start = loc();
1680         p++;
1681         stringbuffer.setsize(0);
1682         while (1)
1683         {
1684             dchar c = *p++;
1685             switch (c)
1686             {
1687             case '\\':
1688                 switch (*p)
1689                 {
1690                 case 'u':
1691                 case 'U':
1692                 case '&':
1693                     c = escapeSequence();
1694                     stringbuffer.writeUTF8(c);
1695                     continue;
1696                 default:
1697                     c = escapeSequence();
1698                     break;
1699                 }
1700                 break;
1701             case '\n':
1702                 endOfLine();
1703                 break;
1704             case '\r':
1705                 if (*p == '\n')
1706                     continue; // ignore
1707                 c = '\n'; // treat EndOfLine as \n character
1708                 endOfLine();
1709                 break;
1710             case '"':
1711                 t.setString(stringbuffer);
1712                 stringPostfix(t);
1713                 return;
1714             case 0:
1715             case 0x1A:
1716                 // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
1717                 p--;
1718                 error("unterminated string constant starting at %s", start.toChars());
1719                 t.setString();
1720                 return;
1721             default:
1722                 if (c & 0x80)
1723                 {
1724                     p--;
1725                     c = decodeUTF();
1726                     if (c == LS || c == PS)
1727                     {
1728                         c = '\n';
1729                         endOfLine();
1730                     }
1731                     p++;
1732                     stringbuffer.writeUTF8(c);
1733                     continue;
1734                 }
1735                 break;
1736             }
1737             stringbuffer.writeByte(c);
1738         }
1739     }
1740 
1741     /**************************************
1742      */
1743     private TOK charConstant(Token* t)
1744     {
1745         TOK tk = TOK.charLiteral;
1746         //printf("Lexer::charConstant\n");
1747         p++;
1748         dchar c = *p++;
1749         switch (c)
1750         {
1751         case '\\':
1752             switch (*p)
1753             {
1754             case 'u':
1755                 t.unsvalue = escapeSequence();
1756                 tk = TOK.wcharLiteral;
1757                 break;
1758             case 'U':
1759             case '&':
1760                 t.unsvalue = escapeSequence();
1761                 tk = TOK.dcharLiteral;
1762                 break;
1763             default:
1764                 t.unsvalue = escapeSequence();
1765                 break;
1766             }
1767             break;
1768         case '\n':
1769         L1:
1770             endOfLine();
1771             goto case;
1772         case '\r':
1773             goto case '\'';
1774         case 0:
1775         case 0x1A:
1776             // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
1777             p--;
1778             goto case;
1779         case '\'':
1780             error("unterminated character constant");
1781             t.unsvalue = '?';
1782             return tk;
1783         default:
1784             if (c & 0x80)
1785             {
1786                 p--;
1787                 c = decodeUTF();
1788                 p++;
1789                 if (c == LS || c == PS)
1790                     goto L1;
1791                 if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE))
1792                     tk = TOK.wcharLiteral;
1793                 else
1794                     tk = TOK.dcharLiteral;
1795             }
1796             t.unsvalue = c;
1797             break;
1798         }
1799         if (*p != '\'')
1800         {
1801             while (*p != '\'' && *p != 0x1A && *p != 0 && *p != '\n' &&
1802                     *p != '\r' && *p != ';' && *p != ')' && *p != ']' && *p != '}')
1803             {
1804                 if (*p & 0x80)
1805                 {
1806                     const s = p;
1807                     c = decodeUTF();
1808                     if (c == LS || c == PS)
1809                     {
1810                         p = s;
1811                         break;
1812                     }
1813                 }
1814                 p++;
1815             }
1816 
1817             if (*p == '\'')
1818             {
1819                 error("character constant has multiple characters");
1820                 p++;
1821             }
1822             else
1823                 error("unterminated character constant");
1824             t.unsvalue = '?';
1825             return tk;
1826         }
1827         p++;
1828         return tk;
1829     }
1830 
1831     /***************************************
1832      * Get postfix of string literal.
1833      */
1834     private void stringPostfix(Token* t) pure @nogc
1835     {
1836         switch (*p)
1837         {
1838         case 'c':
1839         case 'w':
1840         case 'd':
1841             t.postfix = *p;
1842             p++;
1843             break;
1844         default:
1845             t.postfix = 0;
1846             break;
1847         }
1848     }
1849 
1850     /**************************************
1851      * Read in a number.
1852      * If it's an integer, store it in tok.TKutok.Vlong.
1853      *      integers can be decimal, octal or hex
1854      *      Handle the suffixes U, UL, LU, L, etc.
1855      * If it's double, store it in tok.TKutok.Vdouble.
1856      * Returns:
1857      *      TKnum
1858      *      TKdouble,...
1859      */
1860     private TOK number(Token* t)
1861     {
1862         int base = 10;
1863         const start = p;
1864         uinteger_t n = 0; // unsigned >=64 bit integer type
1865         int d;
1866         bool err = false;
1867         bool overflow = false;
1868         bool anyBinaryDigitsNoSingleUS = false;
1869         bool anyHexDigitsNoSingleUS = false;
1870         dchar c = *p;
1871         if (c == '0')
1872         {
1873             ++p;
1874             c = *p;
1875             switch (c)
1876             {
1877             case '0':
1878             case '1':
1879             case '2':
1880             case '3':
1881             case '4':
1882             case '5':
1883             case '6':
1884             case '7':
1885             case '8':
1886             case '9':
1887                 base = 8;
1888                 break;
1889             case 'x':
1890             case 'X':
1891                 ++p;
1892                 base = 16;
1893                 break;
1894             case 'b':
1895             case 'B':
1896                 ++p;
1897                 base = 2;
1898                 break;
1899             case '.':
1900                 if (p[1] == '.')
1901                     goto Ldone; // if ".."
1902                 if (isalpha(p[1]) || p[1] == '_' || p[1] & 0x80)
1903                     goto Ldone; // if ".identifier" or ".unicode"
1904                 goto Lreal; // '.' is part of current token
1905             case 'i':
1906             case 'f':
1907             case 'F':
1908                 goto Lreal;
1909             case '_':
1910                 ++p;
1911                 base = 8;
1912                 break;
1913             case 'L':
1914                 if (p[1] == 'i')
1915                     goto Lreal;
1916                 break;
1917             default:
1918                 break;
1919             }
1920         }
1921         while (1)
1922         {
1923             c = *p;
1924             switch (c)
1925             {
1926             case '0':
1927             case '1':
1928             case '2':
1929             case '3':
1930             case '4':
1931             case '5':
1932             case '6':
1933             case '7':
1934             case '8':
1935             case '9':
1936                 ++p;
1937                 d = c - '0';
1938                 break;
1939             case 'a':
1940             case 'b':
1941             case 'c':
1942             case 'd':
1943             case 'e':
1944             case 'f':
1945             case 'A':
1946             case 'B':
1947             case 'C':
1948             case 'D':
1949             case 'E':
1950             case 'F':
1951                 ++p;
1952                 if (base != 16)
1953                 {
1954                     if (c == 'e' || c == 'E' || c == 'f' || c == 'F')
1955                         goto Lreal;
1956                 }
1957                 if (c >= 'a')
1958                     d = c + 10 - 'a';
1959                 else
1960                     d = c + 10 - 'A';
1961                 break;
1962             case 'L':
1963                 if (p[1] == 'i')
1964                     goto Lreal;
1965                 goto Ldone;
1966             case '.':
1967                 if (p[1] == '.')
1968                     goto Ldone; // if ".."
1969                 if (base == 10 && (isalpha(p[1]) || p[1] == '_' || p[1] & 0x80))
1970                     goto Ldone; // if ".identifier" or ".unicode"
1971                 if (base == 16 && (!ishex(p[1]) || p[1] == '_' || p[1] & 0x80))
1972                     goto Ldone; // if ".identifier" or ".unicode"
1973                 if (base == 2)
1974                     goto Ldone; // if ".identifier" or ".unicode"
1975                 goto Lreal; // otherwise as part of a floating point literal
1976             case 'p':
1977             case 'P':
1978             case 'i':
1979             Lreal:
1980                 p = start;
1981                 return inreal(t);
1982             case '_':
1983                 ++p;
1984                 continue;
1985             default:
1986                 goto Ldone;
1987             }
1988             // got a digit here, set any necessary flags, check for errors
1989             anyHexDigitsNoSingleUS = true;
1990             anyBinaryDigitsNoSingleUS = true;
1991             if (!err && d >= base)
1992             {
1993                 error("%s digit expected, not `%c`", base == 2 ? "binary".ptr :
1994                                                      base == 8 ? "octal".ptr :
1995                                                      "decimal".ptr, c);
1996                 err = true;
1997             }
1998             // Avoid expensive overflow check if we aren't at risk of overflow
1999             if (n <= 0x0FFF_FFFF_FFFF_FFFFUL)
2000                 n = n * base + d;
2001             else
2002             {
2003                 import core.checkedint : mulu, addu;
2004 
2005                 n = mulu(n, base, overflow);
2006                 n = addu(n, d, overflow);
2007             }
2008         }
2009     Ldone:
2010         if (overflow && !err)
2011         {
2012             error("integer overflow");
2013             err = true;
2014         }
2015         if ((base == 2 && !anyBinaryDigitsNoSingleUS) ||
2016             (base == 16 && !anyHexDigitsNoSingleUS))
2017             error("`%.*s` isn't a valid integer literal, use `%.*s0` instead", cast(int)(p - start), start, 2, start);
2018         enum FLAGS : int
2019         {
2020             none = 0,
2021             decimal = 1, // decimal
2022             unsigned = 2, // u or U suffix
2023             long_ = 4, // L suffix
2024         }
2025 
2026         FLAGS flags = (base == 10) ? FLAGS.decimal : FLAGS.none;
2027         // Parse trailing 'u', 'U', 'l' or 'L' in any combination
2028         const psuffix = p;
2029         while (1)
2030         {
2031             FLAGS f;
2032             switch (*p)
2033             {
2034             case 'U':
2035             case 'u':
2036                 f = FLAGS.unsigned;
2037                 goto L1;
2038             case 'l':
2039                 f = FLAGS.long_;
2040                 error("lower case integer suffix 'l' is not allowed. Please use 'L' instead");
2041                 goto L1;
2042             case 'L':
2043                 f = FLAGS.long_;
2044             L1:
2045                 p++;
2046                 if ((flags & f) && !err)
2047                 {
2048                     error("unrecognized token");
2049                     err = true;
2050                 }
2051                 flags = cast(FLAGS)(flags | f);
2052                 continue;
2053             default:
2054                 break;
2055             }
2056             break;
2057         }
2058         if (base == 8 && n >= 8)
2059         {
2060             if (err)
2061                 // can't translate invalid octal value, just show a generic message
2062                 error("octal literals larger than 7 are no longer supported");
2063             else
2064                 error("octal literals `0%llo%.*s` are no longer supported, use `std.conv.octal!%llo%.*s` instead",
2065                     n, cast(int)(p - psuffix), psuffix, n, cast(int)(p - psuffix), psuffix);
2066         }
2067         TOK result;
2068         switch (flags)
2069         {
2070         case FLAGS.none:
2071             /* Octal or Hexadecimal constant.
2072              * First that fits: int, uint, long, ulong
2073              */
2074             if (n & 0x8000000000000000L)
2075                 result = TOK.uns64Literal;
2076             else if (n & 0xFFFFFFFF00000000L)
2077                 result = TOK.int64Literal;
2078             else if (n & 0x80000000)
2079                 result = TOK.uns32Literal;
2080             else
2081                 result = TOK.int32Literal;
2082             break;
2083         case FLAGS.decimal:
2084             /* First that fits: int, long, long long
2085              */
2086             if (n & 0x8000000000000000L)
2087             {
2088                 result = TOK.uns64Literal;
2089             }
2090             else if (n & 0xFFFFFFFF80000000L)
2091                 result = TOK.int64Literal;
2092             else
2093                 result = TOK.int32Literal;
2094             break;
2095         case FLAGS.unsigned:
2096         case FLAGS.decimal | FLAGS.unsigned:
2097             /* First that fits: uint, ulong
2098              */
2099             if (n & 0xFFFFFFFF00000000L)
2100                 result = TOK.uns64Literal;
2101             else
2102                 result = TOK.uns32Literal;
2103             break;
2104         case FLAGS.decimal | FLAGS.long_:
2105             if (n & 0x8000000000000000L)
2106             {
2107                 if (!err)
2108                 {
2109                     error("signed integer overflow");
2110                     err = true;
2111                 }
2112                 result = TOK.uns64Literal;
2113             }
2114             else
2115                 result = TOK.int64Literal;
2116             break;
2117         case FLAGS.long_:
2118             if (n & 0x8000000000000000L)
2119                 result = TOK.uns64Literal;
2120             else
2121                 result = TOK.int64Literal;
2122             break;
2123         case FLAGS.unsigned | FLAGS.long_:
2124         case FLAGS.decimal | FLAGS.unsigned | FLAGS.long_:
2125             result = TOK.uns64Literal;
2126             break;
2127         default:
2128             debug
2129             {
2130                 printf("%x\n", flags);
2131             }
2132             assert(0);
2133         }
2134         t.unsvalue = n;
2135         return result;
2136     }
2137 
2138     /**************************************
2139      * Read in characters, converting them to real.
2140      * Bugs:
2141      *      Exponent overflow not detected.
2142      *      Too much requested precision is not detected.
2143      */
2144     private TOK inreal(Token* t)
2145     {
2146         //printf("Lexer::inreal()\n");
2147         debug
2148         {
2149             assert(*p == '.' || isdigit(*p));
2150         }
2151         bool isWellformedString = true;
2152         stringbuffer.setsize(0);
2153         auto pstart = p;
2154         bool hex = false;
2155         dchar c = *p++;
2156         // Leading '0x'
2157         if (c == '0')
2158         {
2159             c = *p++;
2160             if (c == 'x' || c == 'X')
2161             {
2162                 hex = true;
2163                 c = *p++;
2164             }
2165         }
2166         // Digits to left of '.'
2167         while (1)
2168         {
2169             if (c == '.')
2170             {
2171                 c = *p++;
2172                 break;
2173             }
2174             if (isdigit(c) || (hex && isxdigit(c)) || c == '_')
2175             {
2176                 c = *p++;
2177                 continue;
2178             }
2179             break;
2180         }
2181         // Digits to right of '.'
2182         while (1)
2183         {
2184             if (isdigit(c) || (hex && isxdigit(c)) || c == '_')
2185             {
2186                 c = *p++;
2187                 continue;
2188             }
2189             break;
2190         }
2191         if (c == 'e' || c == 'E' || (hex && (c == 'p' || c == 'P')))
2192         {
2193             c = *p++;
2194             if (c == '-' || c == '+')
2195             {
2196                 c = *p++;
2197             }
2198             bool anyexp = false;
2199             while (1)
2200             {
2201                 if (isdigit(c))
2202                 {
2203                     anyexp = true;
2204                     c = *p++;
2205                     continue;
2206                 }
2207                 if (c == '_')
2208                 {
2209                     c = *p++;
2210                     continue;
2211                 }
2212                 if (!anyexp)
2213                 {
2214                     error("missing exponent");
2215                     isWellformedString = false;
2216                 }
2217                 break;
2218             }
2219         }
2220         else if (hex)
2221         {
2222             error("exponent required for hex float");
2223             isWellformedString = false;
2224         }
2225         --p;
2226         while (pstart < p)
2227         {
2228             if (*pstart != '_')
2229                 stringbuffer.writeByte(*pstart);
2230             ++pstart;
2231         }
2232         stringbuffer.writeByte(0);
2233         auto sbufptr = cast(const(char)*)stringbuffer[].ptr;
2234         TOK result;
2235         bool isOutOfRange = false;
2236         t.floatvalue = (isWellformedString ? CTFloat.parse(sbufptr, &isOutOfRange) : CTFloat.zero);
2237         switch (*p)
2238         {
2239         case 'F':
2240         case 'f':
2241             if (isWellformedString && !isOutOfRange)
2242                 isOutOfRange = Port.isFloat32LiteralOutOfRange(sbufptr);
2243             result = TOK.float32Literal;
2244             p++;
2245             break;
2246         default:
2247             if (isWellformedString && !isOutOfRange)
2248                 isOutOfRange = Port.isFloat64LiteralOutOfRange(sbufptr);
2249             result = TOK.float64Literal;
2250             break;
2251         case 'l':
2252             error("use 'L' suffix instead of 'l'");
2253             goto case 'L';
2254         case 'L':
2255             result = TOK.float80Literal;
2256             p++;
2257             break;
2258         }
2259         if (*p == 'i' || *p == 'I')
2260         {
2261             if (*p == 'I')
2262                 error("use 'i' suffix instead of 'I'");
2263             p++;
2264             switch (result)
2265             {
2266             case TOK.float32Literal:
2267                 result = TOK.imaginary32Literal;
2268                 break;
2269             case TOK.float64Literal:
2270                 result = TOK.imaginary64Literal;
2271                 break;
2272             case TOK.float80Literal:
2273                 result = TOK.imaginary80Literal;
2274                 break;
2275             default:
2276                 break;
2277             }
2278         }
2279         const isLong = (result == TOK.float80Literal || result == TOK.imaginary80Literal);
2280         if (isOutOfRange && !isLong)
2281         {
2282             const char* suffix = (result == TOK.float32Literal || result == TOK.imaginary32Literal) ? "f" : "";
2283             error(scanloc, "number `%s%s` is not representable", sbufptr, suffix);
2284         }
2285         debug
2286         {
2287             switch (result)
2288             {
2289             case TOK.float32Literal:
2290             case TOK.float64Literal:
2291             case TOK.float80Literal:
2292             case TOK.imaginary32Literal:
2293             case TOK.imaginary64Literal:
2294             case TOK.imaginary80Literal:
2295                 break;
2296             default:
2297                 assert(0);
2298             }
2299         }
2300         return result;
2301     }
2302 
2303     final Loc loc() pure @nogc
2304     {
2305         scanloc.charnum = cast(uint)(1 + p - line);
2306         version (LocOffset)
2307             scanloc.fileOffset = cast(uint)(p - base);
2308         return scanloc;
2309     }
2310 
2311     final void error(const(char)* format, ...)
2312     {
2313         va_list args;
2314         va_start(args, format);
2315         handleDiagnostic(token.loc, Severity.error, format, args);
2316         va_end(args);
2317     }
2318 
2319     final void error(const ref Loc loc, const(char)* format, ...)
2320     {
2321         va_list args;
2322         va_start(args, format);
2323         handleDiagnostic(loc, Severity.error, format, args);
2324         va_end(args);
2325     }
2326 
2327     final void errorSupplemental(const ref Loc loc, const(char)* format, ...)
2328     {
2329         va_list args;
2330         va_start(args, format);
2331         handleDiagnostic(loc, Severity.error, format, args, true);
2332         va_end(args);
2333     }
2334 
2335     final void warning(const ref Loc loc, const(char)* format, ...)
2336     {
2337         va_list args;
2338         va_start(args, format);
2339         handleDiagnostic(loc, Severity.warning, format, args);
2340         va_end(args);
2341     }
2342 
2343     final void warningSupplemental(const ref Loc loc, const(char)* format, ...)
2344     {
2345         va_list args;
2346         va_start(args, format);
2347         handleDiagnostic(loc, Severity.warning, format, args, true);
2348         va_end(args);
2349     }
2350 
2351     final void deprecation(const(char)* format, ...)
2352     {
2353         va_list args;
2354         va_start(args, format);
2355         handleDiagnostic(token.loc, Severity.deprecation, format, args);
2356         va_end(args);
2357     }
2358 
2359     final void deprecationSupplemental(const(char)* format, ...)
2360     {
2361         va_list args;
2362         va_start(args, format);
2363         handleDiagnostic(token.loc, Severity.deprecation, format, args, true);
2364         va_end(args);
2365     }
2366 
2367     /*********************************************
2368      * parse:
2369      *      #line linnum [filespec]
2370      * also allow __LINE__ for linnum, and __FILE__ for filespec
2371      */
2372     private void poundLine()
2373     {
2374         auto linnum = this.scanloc.linnum;
2375         const(char)* filespec = null;
2376         const loc = this.loc();
2377         Token tok;
2378         scan(&tok);
2379         if (tok.value == TOK.int32Literal || tok.value == TOK.int64Literal)
2380         {
2381             const lin = cast(int)(tok.unsvalue - 1);
2382             if (lin != tok.unsvalue - 1)
2383                 error("line number `%lld` out of range", cast(ulong)tok.unsvalue);
2384             else
2385                 linnum = lin;
2386         }
2387         else if (tok.value == TOK.line)
2388         {
2389         }
2390         else
2391             goto Lerr;
2392         while (1)
2393         {
2394             switch (*p)
2395             {
2396             case 0:
2397             case 0x1A:
2398             case '\n':
2399             Lnewline:
2400                 if (!inTokenStringConstant)
2401                 {
2402                     this.scanloc.linnum = linnum;
2403                     if (filespec)
2404                         this.scanloc.filename = filespec;
2405                 }
2406                 return;
2407             case '\r':
2408                 p++;
2409                 if (*p != '\n')
2410                 {
2411                     p--;
2412                     goto Lnewline;
2413                 }
2414                 continue;
2415             case ' ':
2416             case '\t':
2417             case '\v':
2418             case '\f':
2419                 p++;
2420                 continue; // skip white space
2421             case '_':
2422                 if (memcmp(p, "__FILE__".ptr, 8) == 0)
2423                 {
2424                     p += 8;
2425                     filespec = mem.xstrdup(scanloc.filename);
2426                     continue;
2427                 }
2428                 goto Lerr;
2429             case '"':
2430                 if (filespec)
2431                     goto Lerr;
2432                 stringbuffer.setsize(0);
2433                 p++;
2434                 while (1)
2435                 {
2436                     uint c;
2437                     c = *p;
2438                     switch (c)
2439                     {
2440                     case '\n':
2441                     case '\r':
2442                     case 0:
2443                     case 0x1A:
2444                         goto Lerr;
2445                     case '"':
2446                         stringbuffer.writeByte(0);
2447                         filespec = mem.xstrdup(cast(const(char)*)stringbuffer[].ptr);
2448                         p++;
2449                         break;
2450                     default:
2451                         if (c & 0x80)
2452                         {
2453                             uint u = decodeUTF();
2454                             if (u == PS || u == LS)
2455                                 goto Lerr;
2456                         }
2457                         stringbuffer.writeByte(c);
2458                         p++;
2459                         continue;
2460                     }
2461                     break;
2462                 }
2463                 continue;
2464             default:
2465                 if (*p & 0x80)
2466                 {
2467                     uint u = decodeUTF();
2468                     if (u == PS || u == LS)
2469                         goto Lnewline;
2470                 }
2471                 goto Lerr;
2472             }
2473         }
2474     Lerr:
2475         error(loc, "#line integer [\"filespec\"]\\n expected");
2476     }
2477 
2478     /********************************************
2479      * Decode UTF character.
2480      * Issue error messages for invalid sequences.
2481      * Return decoded character, advance p to last character in UTF sequence.
2482      */
2483     private uint decodeUTF()
2484     {
2485         const s = p;
2486         assert(*s & 0x80);
2487         // Check length of remaining string up to 4 UTF-8 characters
2488         size_t len;
2489         for (len = 1; len < 4 && s[len]; len++)
2490         {
2491         }
2492         size_t idx = 0;
2493         dchar u;
2494         const msg = utf_decodeChar(s[0 .. len], idx, u);
2495         p += idx - 1;
2496         if (msg)
2497         {
2498             error("%.*s", cast(int)msg.length, msg.ptr);
2499         }
2500         return u;
2501     }
2502 
2503     /***************************************************
2504      * Parse doc comment embedded between t.ptr and p.
2505      * Remove trailing blanks and tabs from lines.
2506      * Replace all newlines with \n.
2507      * Remove leading comment character from each line.
2508      * Decide if it's a lineComment or a blockComment.
2509      * Append to previous one for this token.
2510      *
2511      * If newParagraph is true, an extra newline will be
2512      * added between adjoining doc comments.
2513      */
2514     private void getDocComment(Token* t, uint lineComment, bool newParagraph) pure
2515     {
2516         /* ct tells us which kind of comment it is: '/', '*', or '+'
2517          */
2518         const ct = t.ptr[2];
2519         /* Start of comment text skips over / * *, / + +, or / / /
2520          */
2521         const(char)* q = t.ptr + 3; // start of comment text
2522         const(char)* qend = p;
2523         if (ct == '*' || ct == '+')
2524             qend -= 2;
2525         /* Scan over initial row of ****'s or ++++'s or ////'s
2526          */
2527         for (; q < qend; q++)
2528         {
2529             if (*q != ct)
2530                 break;
2531         }
2532         /* Remove leading spaces until start of the comment
2533          */
2534         int linestart = 0;
2535         if (ct == '/')
2536         {
2537             while (q < qend && (*q == ' ' || *q == '\t'))
2538                 ++q;
2539         }
2540         else if (q < qend)
2541         {
2542             if (*q == '\r')
2543             {
2544                 ++q;
2545                 if (q < qend && *q == '\n')
2546                     ++q;
2547                 linestart = 1;
2548             }
2549             else if (*q == '\n')
2550             {
2551                 ++q;
2552                 linestart = 1;
2553             }
2554         }
2555         /* Remove trailing row of ****'s or ++++'s
2556          */
2557         if (ct != '/')
2558         {
2559             for (; q < qend; qend--)
2560             {
2561                 if (qend[-1] != ct)
2562                     break;
2563             }
2564         }
2565         /* Comment is now [q .. qend].
2566          * Canonicalize it into buf[].
2567          */
2568         OutBuffer buf;
2569 
2570         void trimTrailingWhitespace()
2571         {
2572             const s = buf[];
2573             auto len = s.length;
2574             while (len && (s[len - 1] == ' ' || s[len - 1] == '\t'))
2575                 --len;
2576             buf.setsize(len);
2577         }
2578 
2579         for (; q < qend; q++)
2580         {
2581             char c = *q;
2582             switch (c)
2583             {
2584             case '*':
2585             case '+':
2586                 if (linestart && c == ct)
2587                 {
2588                     linestart = 0;
2589                     /* Trim preceding whitespace up to preceding \n
2590                      */
2591                     trimTrailingWhitespace();
2592                     continue;
2593                 }
2594                 break;
2595             case ' ':
2596             case '\t':
2597                 break;
2598             case '\r':
2599                 if (q[1] == '\n')
2600                     continue; // skip the \r
2601                 goto Lnewline;
2602             default:
2603                 if (c == 226)
2604                 {
2605                     // If LS or PS
2606                     if (q[1] == 128 && (q[2] == 168 || q[2] == 169))
2607                     {
2608                         q += 2;
2609                         goto Lnewline;
2610                     }
2611                 }
2612                 linestart = 0;
2613                 break;
2614             Lnewline:
2615                 c = '\n'; // replace all newlines with \n
2616                 goto case;
2617             case '\n':
2618                 linestart = 1;
2619                 /* Trim trailing whitespace
2620                  */
2621                 trimTrailingWhitespace();
2622                 break;
2623             }
2624             buf.writeByte(c);
2625         }
2626         /* Trim trailing whitespace (if the last line does not have newline)
2627          */
2628         trimTrailingWhitespace();
2629 
2630         // Always end with a newline
2631         const s = buf[];
2632         if (s.length == 0 || s[$ - 1] != '\n')
2633             buf.writeByte('\n');
2634 
2635         // It's a line comment if the start of the doc comment comes
2636         // after other non-whitespace on the same line.
2637         auto dc = (lineComment && anyToken) ? &t.lineComment : &t.blockComment;
2638         // Combine with previous doc comment, if any
2639         if (*dc)
2640             *dc = combineComments(*dc, buf[], newParagraph).toDString();
2641         else
2642             *dc = buf.extractSlice(true);
2643     }
2644 
2645     /********************************************
2646      * Combine two document comments into one,
2647      * separated by an extra newline if newParagraph is true.
2648      */
2649     static const(char)* combineComments(const(char)[] c1, const(char)[] c2, bool newParagraph) pure
2650     {
2651         //printf("Lexer::combineComments('%s', '%s', '%i')\n", c1, c2, newParagraph);
2652         const(int) newParagraphSize = newParagraph ? 1 : 0; // Size of the combining '\n'
2653         if (!c1)
2654             return c2.ptr;
2655         if (!c2)
2656             return c1.ptr;
2657 
2658         int insertNewLine = 0;
2659         if (c1.length && c1[$ - 1] != '\n')
2660             insertNewLine = 1;
2661         const retSize = c1.length + insertNewLine + newParagraphSize + c2.length;
2662         auto p = cast(char*)mem.xmalloc_noscan(retSize + 1);
2663         p[0 .. c1.length] = c1[];
2664         if (insertNewLine)
2665             p[c1.length] = '\n';
2666         if (newParagraph)
2667             p[c1.length + insertNewLine] = '\n';
2668         p[retSize - c2.length .. retSize] = c2[];
2669         p[retSize] = 0;
2670         return p;
2671     }
2672 
2673 private:
2674     void endOfLine() pure @nogc @safe
2675     {
2676         scanloc.linnum++;
2677         line = p;
2678     }
2679 }
2680 
2681 /// Support for `__DATE__`, `__TIME__`, and `__TIMESTAMP__`
2682 private struct TimeStampInfo
2683 {
2684     private __gshared bool initdone = false;
2685 
2686     // Note: Those properties need to be guarded by a call to `init`
2687     // The API isn't safe, and quite brittle, but it was left this way
2688     // over performance concerns.
2689     // This is currently only called once, from the lexer.
2690     __gshared char[11 + 1] date;
2691     __gshared char[8 + 1] time;
2692     __gshared char[24 + 1] timestamp;
2693 
2694     public static void initialize(const ref Loc loc) nothrow
2695     {
2696         if (initdone)
2697             return;
2698 
2699         initdone = true;
2700         time_t ct;
2701         // https://issues.dlang.org/show_bug.cgi?id=20444
2702         if (auto p = getenv("SOURCE_DATE_EPOCH"))
2703         {
2704             if (!ct.parseDigits(p.toDString()))
2705                 error(loc, "Value of environment variable `SOURCE_DATE_EPOCH` should be a valid UNIX timestamp, not: `%s`", p);
2706         }
2707         else
2708             .time(&ct);
2709         const p = ctime(&ct);
2710         assert(p);
2711         sprintf(&date[0], "%.6s %.4s", p + 4, p + 20);
2712         sprintf(&time[0], "%.8s", p + 11);
2713         sprintf(&timestamp[0], "%.24s", p);
2714     }
2715 }
2716 
2717 unittest
2718 {
2719     import dmd.console;
2720     nothrow bool assertDiagnosticHandler(const ref Loc loc, Color headerColor, const(char)* header,
2721                                    const(char)* format, va_list ap, const(char)* p1, const(char)* p2)
2722     {
2723         assert(0);
2724     }
2725     diagnosticHandler = &assertDiagnosticHandler;
2726 
2727     static void test(T)(string sequence, T expected)
2728     {
2729         auto p = cast(const(char)*)sequence.ptr;
2730         assert(expected == Lexer.escapeSequence(Loc.initial, p));
2731         assert(p == sequence.ptr + sequence.length);
2732     }
2733 
2734     test(`'`, '\'');
2735     test(`"`, '"');
2736     test(`?`, '?');
2737     test(`\`, '\\');
2738     test(`0`, '\0');
2739     test(`a`, '\a');
2740     test(`b`, '\b');
2741     test(`f`, '\f');
2742     test(`n`, '\n');
2743     test(`r`, '\r');
2744     test(`t`, '\t');
2745     test(`v`, '\v');
2746 
2747     test(`x00`, 0x00);
2748     test(`xff`, 0xff);
2749     test(`xFF`, 0xff);
2750     test(`xa7`, 0xa7);
2751     test(`x3c`, 0x3c);
2752     test(`xe2`, 0xe2);
2753 
2754     test(`1`, '\1');
2755     test(`42`, '\42');
2756     test(`357`, '\357');
2757 
2758     test(`u1234`, '\u1234');
2759     test(`uf0e4`, '\uf0e4');
2760 
2761     test(`U0001f603`, '\U0001f603');
2762 
2763     test(`&quot;`, '"');
2764     test(`&lt;`, '<');
2765     test(`&gt;`, '>');
2766 
2767     diagnosticHandler = null;
2768 }
2769 unittest
2770 {
2771     import dmd.console;
2772     string expected;
2773     bool gotError;
2774 
2775     nothrow bool expectDiagnosticHandler(const ref Loc loc, Color headerColor, const(char)* header,
2776                                          const(char)* format, va_list ap, const(char)* p1, const(char)* p2)
2777     {
2778         assert(cast(Classification)headerColor == Classification.error);
2779 
2780         gotError = true;
2781         char[100] buffer = void;
2782         auto actual = buffer[0 .. vsprintf(buffer.ptr, format, ap)];
2783         assert(expected == actual);
2784         return true;
2785     }
2786 
2787     diagnosticHandler = &expectDiagnosticHandler;
2788 
2789     void test(string sequence, string expectedError, dchar expectedReturnValue, uint expectedScanLength)
2790     {
2791         uint errors = global.errors;
2792         gotError = false;
2793         expected = expectedError;
2794         auto p = cast(const(char)*)sequence.ptr;
2795         auto actualReturnValue = Lexer.escapeSequence(Loc.initial, p);
2796         assert(gotError);
2797         assert(expectedReturnValue == actualReturnValue);
2798 
2799         auto actualScanLength = p - sequence.ptr;
2800         assert(expectedScanLength == actualScanLength);
2801         global.errors = errors;
2802     }
2803 
2804     test("c", `undefined escape sequence \c`, 'c', 1);
2805     test("!", `undefined escape sequence \!`, '!', 1);
2806 
2807     test("x1", `escape hex sequence has 1 hex digits instead of 2`, '\x01', 2);
2808 
2809     test("u1"  , `escape hex sequence has 1 hex digits instead of 4`,   0x1, 2);
2810     test("u12" , `escape hex sequence has 2 hex digits instead of 4`,  0x12, 3);
2811     test("u123", `escape hex sequence has 3 hex digits instead of 4`, 0x123, 4);
2812 
2813     test("U0"      , `escape hex sequence has 1 hex digits instead of 8`,       0x0, 2);
2814     test("U00"     , `escape hex sequence has 2 hex digits instead of 8`,      0x00, 3);
2815     test("U000"    , `escape hex sequence has 3 hex digits instead of 8`,     0x000, 4);
2816     test("U0000"   , `escape hex sequence has 4 hex digits instead of 8`,    0x0000, 5);
2817     test("U0001f"  , `escape hex sequence has 5 hex digits instead of 8`,   0x0001f, 6);
2818     test("U0001f6" , `escape hex sequence has 6 hex digits instead of 8`,  0x0001f6, 7);
2819     test("U0001f60", `escape hex sequence has 7 hex digits instead of 8`, 0x0001f60, 8);
2820 
2821     test("ud800"    , `invalid UTF character \U0000d800`, '?', 5);
2822     test("udfff"    , `invalid UTF character \U0000dfff`, '?', 5);
2823     test("U00110000", `invalid UTF character \U00110000`, '?', 9);
2824 
2825     test("xg0"      , `undefined escape hex sequence \xg`, 'g', 2);
2826     test("ug000"    , `undefined escape hex sequence \ug`, 'g', 2);
2827     test("Ug0000000", `undefined escape hex sequence \Ug`, 'g', 2);
2828 
2829     test("&BAD;", `unnamed character entity &BAD;`  , '?', 5);
2830     test("&quot", `unterminated named entity &quot;`, '?', 5);
2831 
2832     test("400", `escape octal sequence \400 is larger than \377`, 0x100, 3);
2833 
2834     diagnosticHandler = null;
2835 }