1 /**
2  * Implements the lexical analyzer, which converts source code into lexical tokens.
3  *
4  * Specification: $(LINK2 https://dlang.org/spec/lex.html, Lexical)
5  *
6  * Copyright:   Copyright (C) 1999-2020 by The D Language Foundation, All Rights Reserved
7  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
8  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
9  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/lexer.d, _lexer.d)
10  * Documentation:  https://dlang.org/phobos/dmd_lexer.html
11  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/lexer.d
12  */
13 
14 module dmd.lexer;
15 
16 import core.stdc.ctype;
17 import core.stdc.errno;
18 import core.stdc.stdarg;
19 import core.stdc.stdio;
20 import core.stdc.stdlib : getenv;
21 import core.stdc.string;
22 import core.stdc.time;
23 
24 import dmd.diagnostic : DiagnosticHandler, Severity, DefaultDiagnosticHandler, DefaultDiagnosticReporter;
25 import dmd.entity;
26 import dmd.errors;
27 import dmd.globals;
28 import dmd.id;
29 import dmd.identifier;
30 import dmd.root.ctfloat;
31 import dmd.root.outbuffer;
32 import dmd.root.port;
33 import dmd.root.rmem;
34 import dmd.root.string;
35 import dmd.tokens;
36 import dmd.utf;
37 import dmd.utils;
38 
39 nothrow:
40 
41 private enum LS = 0x2028;       // UTF line separator
42 private enum PS = 0x2029;       // UTF paragraph separator
43 
44 /********************************************
45  * Do our own char maps
46  */
47 private static immutable cmtable = () {
48     ubyte[256] table;
49     foreach (const c; 0 .. table.length)
50     {
51         if ('0' <= c && c <= '7')
52             table[c] |= CMoctal;
53         if (c_isxdigit(c))
54             table[c] |= CMhex;
55         if (c_isalnum(c) || c == '_')
56             table[c] |= CMidchar;
57 
58         switch (c)
59         {
60             case 'x': case 'X':
61             case 'b': case 'B':
62                 table[c] |= CMzerosecond;
63                 break;
64 
65             case '0': .. case '9':
66             case 'e': case 'E':
67             case 'f': case 'F':
68             case 'l': case 'L':
69             case 'p': case 'P':
70             case 'u': case 'U':
71             case 'i':
72             case '.':
73             case '_':
74                 table[c] |= CMzerosecond | CMdigitsecond;
75                 break;
76 
77             default:
78                 break;
79         }
80 
81         switch (c)
82         {
83             case '\\':
84             case '\n':
85             case '\r':
86             case 0:
87             case 0x1A:
88             case '\'':
89                 break;
90             default:
91                 if (!(c & 0x80))
92                     table[c] |= CMsinglechar;
93                 break;
94         }
95     }
96     return table;
97 }();
98 
99 private
100 {
101     enum CMoctal  = 0x1;
102     enum CMhex    = 0x2;
103     enum CMidchar = 0x4;
104     enum CMzerosecond = 0x8;
105     enum CMdigitsecond = 0x10;
106     enum CMsinglechar = 0x20;
107 }
108 
109 private bool isoctal(const char c) pure @nogc @safe
110 {
111     return (cmtable[c] & CMoctal) != 0;
112 }
113 
114 private bool ishex(const char c) pure @nogc @safe
115 {
116     return (cmtable[c] & CMhex) != 0;
117 }
118 
119 private bool isidchar(const char c) pure @nogc @safe
120 {
121     return (cmtable[c] & CMidchar) != 0;
122 }
123 
124 private bool isZeroSecond(const char c) pure @nogc @safe
125 {
126     return (cmtable[c] & CMzerosecond) != 0;
127 }
128 
129 private bool isDigitSecond(const char c) pure @nogc @safe
130 {
131     return (cmtable[c] & CMdigitsecond) != 0;
132 }
133 
134 private bool issinglechar(const char c) pure @nogc @safe
135 {
136     return (cmtable[c] & CMsinglechar) != 0;
137 }
138 
139 private bool c_isxdigit(const int c) pure @nogc @safe
140 {
141     return (( c >= '0' && c <= '9') ||
142             ( c >= 'a' && c <= 'f') ||
143             ( c >= 'A' && c <= 'F'));
144 }
145 
146 private bool c_isalnum(const int c) pure @nogc @safe
147 {
148     return (( c >= '0' && c <= '9') ||
149             ( c >= 'a' && c <= 'z') ||
150             ( c >= 'A' && c <= 'Z'));
151 }
152 
153 unittest
154 {
155     //printf("lexer.unittest\n");
156     /* Not much here, just trying things out.
157      */
158     string text = "int"; // We rely on the implicit null-terminator
159     DefaultDiagnosticHandler diagnosticHandler;
160     scope Lexer lex1 = new Lexer(null, text.ptr, 0, text.length, 0, 0, diagnosticHandler.diagnosticHandler);
161     TOK tok;
162     tok = lex1.nextToken();
163     diagnosticHandler.report();
164     //printf("tok == %s, %d, %d\n", Token::toChars(tok), tok, TOK.int32);
165     assert(tok == TOK.int32);
166     tok = lex1.nextToken();
167     diagnosticHandler.report();
168     assert(tok == TOK.endOfFile);
169     tok = lex1.nextToken();
170     diagnosticHandler.report();
171     assert(tok == TOK.endOfFile);
172     tok = lex1.nextToken();
173     diagnosticHandler.report();
174     assert(tok == TOK.endOfFile);
175 }
176 
177 unittest
178 {
179     // We don't want to see Lexer error output during these tests.
180     uint errors = global.startGagging();
181     scope(exit) global.endGagging(errors);
182 
183     // Test malformed input: even malformed input should end in a TOK.endOfFile.
184     static immutable char[][] testcases =
185     [   // Testcase must end with 0 or 0x1A.
186         [0], // not malformed, but pathological
187         ['\'', 0],
188         ['\'', 0x1A],
189         ['{', '{', 'q', '{', 0],
190         [0xFF, 0],
191         [0xFF, 0x80, 0],
192         [0xFF, 0xFF, 0],
193         [0xFF, 0xFF, 0],
194         ['x', '"', 0x1A],
195     ];
196 
197     foreach (testcase; testcases)
198     {
199         DefaultDiagnosticHandler diagnosticHandler;
200         scope Lexer lex2 = new Lexer(null, testcase.ptr, 0, testcase.length-1, 0, 0, diagnosticHandler.diagnosticHandler);
201         TOK tok = lex2.nextToken();
202         diagnosticHandler.report();
203         size_t iterations = 1;
204         while ((tok != TOK.endOfFile) && (iterations++ < testcase.length))
205         {
206             tok = lex2.nextToken();
207         }
208         assert(tok == TOK.endOfFile);
209         tok = lex2.nextToken();
210         assert(tok == TOK.endOfFile);
211     }
212 }
213 
214 /***********************************************************
215  */
216 class Lexer
217 {
218     private __gshared OutBuffer stringbuffer;
219 
220     Loc scanloc;            // for error messages
221     Loc prevloc;            // location of token before current
222 
223     const(char)* p;         // current character
224 
225     Token token;
226 
227     private
228     {
229         const(char)* base;      // pointer to start of buffer
230         const(char)* end;       // pointer to last element of buffer
231         const(char)* line;      // start of current line
232 
233         bool doDocComment;      // collect doc comment information
234         bool anyToken;          // seen at least one token
235         bool commentToken;      // comments are TOK.comment's
236         int inTokenStringConstant; // can be larger than 1 when in nested q{} strings
237         int lastDocLine;        // last line of previous doc comment
238 
239         Token* tokenFreelist;
240         DiagnosticHandler handleDiagnostic;
241         DefaultDiagnosticReporter diagnosticReporter;
242     }
243 
244   nothrow:
245 
246     /*********************
247      * Creates a Lexer for the source code base[begoffset..endoffset+1].
248      * The last character, base[endoffset], must be null (0) or EOF (0x1A).
249      *
250      * Params:
251      *  filename = used for error messages
252      *  base = source code, must be terminated by a null (0) or EOF (0x1A) character
253      *  begoffset = starting offset into base[]
254      *  endoffset = the last offset to read into base[]
255      *  doDocComment = handle documentation comments
256      *  commentToken = comments become TOK.comment's
257      *  diagnosticHandler = diagnostic handler
258      */
259     this(const(char)* filename, const(char)* base, size_t begoffset,
260         size_t endoffset, bool doDocComment, bool commentToken,
261         DiagnosticHandler handleDiagnostic) pure
262     {
263         scanloc = Loc(filename, 1, 1);
264         //printf("Lexer::Lexer(%p,%d)\n",base,length);
265         //printf("lexer.filename = %s\n", filename);
266         token = Token.init;
267         this.base = base;
268         this.end = base + endoffset;
269         p = base + begoffset;
270         line = p;
271         this.doDocComment = doDocComment;
272         this.commentToken = commentToken;
273         this.inTokenStringConstant = 0;
274         this.lastDocLine = 0;
275         this.handleDiagnostic = handleDiagnostic;
276 
277         //initKeywords();
278         /* If first line starts with '#!', ignore the line
279          */
280         if (p && p[0] == '#' && p[1] == '!')
281         {
282             p += 2;
283             while (1)
284             {
285                 char c = *p++;
286                 switch (c)
287                 {
288                 case 0:
289                 case 0x1A:
290                     p--;
291                     goto case;
292                 case '\n':
293                     break;
294                 default:
295                     continue;
296                 }
297                 break;
298             }
299             endOfLine();
300         }
301     }
302 
303     /// Returns: a newly allocated `Token`.
304     Token* allocateToken() pure nothrow @safe
305     {
306         if (tokenFreelist)
307         {
308             Token* t = tokenFreelist;
309             tokenFreelist = t.next;
310             t.next = null;
311             return t;
312         }
313         return new Token();
314     }
315 
316     /// Frees the given token by returning it to the freelist.
317     private void releaseToken(Token* token) pure nothrow @nogc @safe
318     {
319         if (mem.isGCEnabled)
320             *token = Token.init;
321         token.next = tokenFreelist;
322         tokenFreelist = token;
323     }
324 
325     TOK nextToken()
326     {
327         prevloc = token.loc;
328         if (token.next)
329         {
330             Token* t = token.next;
331             memcpy(&token, t, Token.sizeof);
332             releaseToken(t);
333         }
334         else
335         {
336             scan(&token);
337         }
338         //printf(token.toChars());
339         return token.value;
340     }
341 
342     /***********************
343      * Look ahead at next token's value.
344      */
345     final TOK peekNext()
346     {
347         return peek(&token).value;
348     }
349 
350     /***********************
351      * Look 2 tokens ahead at value.
352      */
353     final TOK peekNext2()
354     {
355         Token* t = peek(&token);
356         return peek(t).value;
357     }
358 
359     /****************************
360      * Turn next token in buffer into a token.
361      */
362     final void scan(Token* t)
363     {
364         const lastLine = scanloc.linnum;
365         Loc startLoc;
366         t.blockComment = null;
367         t.lineComment = null;
368 
369         while (1)
370         {
371             t.ptr = p;
372             //printf("p = %p, *p = '%c'\n",p,*p);
373             t.loc = loc();
374             switch (*p)
375             {
376             case 0:
377             case 0x1A:
378                 t.value = TOK.endOfFile; // end of file
379                 // Intentionally not advancing `p`, such that subsequent calls keep returning TOK.endOfFile.
380                 return;
381             case ' ':
382             case '\t':
383             case '\v':
384             case '\f':
385                 p++;
386                 continue; // skip white space
387             case '\r':
388                 p++;
389                 if (*p != '\n') // if CR stands by itself
390                     endOfLine();
391                 continue; // skip white space
392             case '\n':
393                 p++;
394                 endOfLine();
395                 continue; // skip white space
396             case '0':
397                 if (!isZeroSecond(p[1]))        // if numeric literal does not continue
398                 {
399                     ++p;
400                     t.unsvalue = 0;
401                     t.value = TOK.int32Literal;
402                     return;
403                 }
404                 goto Lnumber;
405 
406             case '1': .. case '9':
407                 if (!isDigitSecond(p[1]))       // if numeric literal does not continue
408                 {
409                     t.unsvalue = *p - '0';
410                     ++p;
411                     t.value = TOK.int32Literal;
412                     return;
413                 }
414             Lnumber:
415                 t.value = number(t);
416                 return;
417 
418             case '\'':
419                 if (issinglechar(p[1]) && p[2] == '\'')
420                 {
421                     t.unsvalue = p[1];        // simple one character literal
422                     t.value = TOK.charLiteral;
423                     p += 3;
424                 }
425                 else
426                     t.value = charConstant(t);
427                 return;
428             case 'r':
429                 if (p[1] != '"')
430                     goto case_ident;
431                 p++;
432                 goto case '`';
433             case '`':
434                 wysiwygStringConstant(t);
435                 return;
436             case 'x':
437                 if (p[1] != '"')
438                     goto case_ident;
439                 p++;
440                 auto start = p;
441                 auto hexString = new OutBuffer();
442                 t.value = hexStringConstant(t);
443                 hexString.write(start[0 .. p - start]);
444                 error("Built-in hex string literals are obsolete, use `std.conv.hexString!%s` instead.", hexString.extractChars());
445                 return;
446             case 'q':
447                 if (p[1] == '"')
448                 {
449                     p++;
450                     delimitedStringConstant(t);
451                     return;
452                 }
453                 else if (p[1] == '{')
454                 {
455                     p++;
456                     tokenStringConstant(t);
457                     return;
458                 }
459                 else
460                     goto case_ident;
461             case '"':
462                 escapeStringConstant(t);
463                 return;
464             case 'a':
465             case 'b':
466             case 'c':
467             case 'd':
468             case 'e':
469             case 'f':
470             case 'g':
471             case 'h':
472             case 'i':
473             case 'j':
474             case 'k':
475             case 'l':
476             case 'm':
477             case 'n':
478             case 'o':
479             case 'p':
480                 /*case 'q': case 'r':*/
481             case 's':
482             case 't':
483             case 'u':
484             case 'v':
485             case 'w':
486                 /*case 'x':*/
487             case 'y':
488             case 'z':
489             case 'A':
490             case 'B':
491             case 'C':
492             case 'D':
493             case 'E':
494             case 'F':
495             case 'G':
496             case 'H':
497             case 'I':
498             case 'J':
499             case 'K':
500             case 'L':
501             case 'M':
502             case 'N':
503             case 'O':
504             case 'P':
505             case 'Q':
506             case 'R':
507             case 'S':
508             case 'T':
509             case 'U':
510             case 'V':
511             case 'W':
512             case 'X':
513             case 'Y':
514             case 'Z':
515             case '_':
516             case_ident:
517                 {
518                     while (1)
519                     {
520                         const c = *++p;
521                         if (isidchar(c))
522                             continue;
523                         else if (c & 0x80)
524                         {
525                             const s = p;
526                             const u = decodeUTF();
527                             if (isUniAlpha(u))
528                                 continue;
529                             error("char 0x%04x not allowed in identifier", u);
530                             p = s;
531                         }
532                         break;
533                     }
534                     Identifier id = Identifier.idPool(cast(char*)t.ptr, cast(uint)(p - t.ptr));
535                     t.ident = id;
536                     t.value = cast(TOK)id.getValue();
537                     anyToken = 1;
538                     if (*t.ptr == '_') // if special identifier token
539                     {
540                         // Lazy initialization
541                         TimeStampInfo.initialize(t.loc);
542 
543                         if (id == Id.DATE)
544                         {
545                             t.ustring = TimeStampInfo.date.ptr;
546                             goto Lstr;
547                         }
548                         else if (id == Id.TIME)
549                         {
550                             t.ustring = TimeStampInfo.time.ptr;
551                             goto Lstr;
552                         }
553                         else if (id == Id.VENDOR)
554                         {
555                             t.ustring = global.vendor.xarraydup.ptr;
556                             goto Lstr;
557                         }
558                         else if (id == Id.TIMESTAMP)
559                         {
560                             t.ustring = TimeStampInfo.timestamp.ptr;
561                         Lstr:
562                             t.value = TOK.string_;
563                             t.postfix = 0;
564                             t.len = cast(uint)strlen(t.ustring);
565                         }
566                         else if (id == Id.VERSIONX)
567                         {
568                             t.value = TOK.int64Literal;
569                             t.unsvalue = global.versionNumber();
570                         }
571                         else if (id == Id.EOFX)
572                         {
573                             t.value = TOK.endOfFile;
574                             // Advance scanner to end of file
575                             while (!(*p == 0 || *p == 0x1A))
576                                 p++;
577                         }
578                     }
579                     //printf("t.value = %d\n",t.value);
580                     return;
581                 }
582             case '/':
583                 p++;
584                 switch (*p)
585                 {
586                 case '=':
587                     p++;
588                     t.value = TOK.divAssign;
589                     return;
590                 case '*':
591                     p++;
592                     startLoc = loc();
593                     while (1)
594                     {
595                         while (1)
596                         {
597                             const c = *p;
598                             switch (c)
599                             {
600                             case '/':
601                                 break;
602                             case '\n':
603                                 endOfLine();
604                                 p++;
605                                 continue;
606                             case '\r':
607                                 p++;
608                                 if (*p != '\n')
609                                     endOfLine();
610                                 continue;
611                             case 0:
612                             case 0x1A:
613                                 error("unterminated /* */ comment");
614                                 p = end;
615                                 t.loc = loc();
616                                 t.value = TOK.endOfFile;
617                                 return;
618                             default:
619                                 if (c & 0x80)
620                                 {
621                                     const u = decodeUTF();
622                                     if (u == PS || u == LS)
623                                         endOfLine();
624                                 }
625                                 p++;
626                                 continue;
627                             }
628                             break;
629                         }
630                         p++;
631                         if (p[-2] == '*' && p - 3 != t.ptr)
632                             break;
633                     }
634                     if (commentToken)
635                     {
636                         t.loc = startLoc;
637                         t.value = TOK.comment;
638                         return;
639                     }
640                     else if (doDocComment && t.ptr[2] == '*' && p - 4 != t.ptr)
641                     {
642                         // if /** but not /**/
643                         getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
644                         lastDocLine = scanloc.linnum;
645                     }
646                     continue;
647                 case '/': // do // style comments
648                     startLoc = loc();
649                     while (1)
650                     {
651                         const c = *++p;
652                         switch (c)
653                         {
654                         case '\n':
655                             break;
656                         case '\r':
657                             if (p[1] == '\n')
658                                 p++;
659                             break;
660                         case 0:
661                         case 0x1A:
662                             if (commentToken)
663                             {
664                                 p = end;
665                                 t.loc = startLoc;
666                                 t.value = TOK.comment;
667                                 return;
668                             }
669                             if (doDocComment && t.ptr[2] == '/')
670                             {
671                                 getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
672                                 lastDocLine = scanloc.linnum;
673                             }
674                             p = end;
675                             t.loc = loc();
676                             t.value = TOK.endOfFile;
677                             return;
678                         default:
679                             if (c & 0x80)
680                             {
681                                 const u = decodeUTF();
682                                 if (u == PS || u == LS)
683                                     break;
684                             }
685                             continue;
686                         }
687                         break;
688                     }
689                     if (commentToken)
690                     {
691                         p++;
692                         endOfLine();
693                         t.loc = startLoc;
694                         t.value = TOK.comment;
695                         return;
696                     }
697                     if (doDocComment && t.ptr[2] == '/')
698                     {
699                         getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
700                         lastDocLine = scanloc.linnum;
701                     }
702                     p++;
703                     endOfLine();
704                     continue;
705                 case '+':
706                     {
707                         int nest;
708                         startLoc = loc();
709                         p++;
710                         nest = 1;
711                         while (1)
712                         {
713                             char c = *p;
714                             switch (c)
715                             {
716                             case '/':
717                                 p++;
718                                 if (*p == '+')
719                                 {
720                                     p++;
721                                     nest++;
722                                 }
723                                 continue;
724                             case '+':
725                                 p++;
726                                 if (*p == '/')
727                                 {
728                                     p++;
729                                     if (--nest == 0)
730                                         break;
731                                 }
732                                 continue;
733                             case '\r':
734                                 p++;
735                                 if (*p != '\n')
736                                     endOfLine();
737                                 continue;
738                             case '\n':
739                                 endOfLine();
740                                 p++;
741                                 continue;
742                             case 0:
743                             case 0x1A:
744                                 error("unterminated /+ +/ comment");
745                                 p = end;
746                                 t.loc = loc();
747                                 t.value = TOK.endOfFile;
748                                 return;
749                             default:
750                                 if (c & 0x80)
751                                 {
752                                     uint u = decodeUTF();
753                                     if (u == PS || u == LS)
754                                         endOfLine();
755                                 }
756                                 p++;
757                                 continue;
758                             }
759                             break;
760                         }
761                         if (commentToken)
762                         {
763                             t.loc = startLoc;
764                             t.value = TOK.comment;
765                             return;
766                         }
767                         if (doDocComment && t.ptr[2] == '+' && p - 4 != t.ptr)
768                         {
769                             // if /++ but not /++/
770                             getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
771                             lastDocLine = scanloc.linnum;
772                         }
773                         continue;
774                     }
775                 default:
776                     break;
777                 }
778                 t.value = TOK.div;
779                 return;
780             case '.':
781                 p++;
782                 if (isdigit(*p))
783                 {
784                     /* Note that we don't allow ._1 and ._ as being
785                      * valid floating point numbers.
786                      */
787                     p--;
788                     t.value = inreal(t);
789                 }
790                 else if (p[0] == '.')
791                 {
792                     if (p[1] == '.')
793                     {
794                         p += 2;
795                         t.value = TOK.dotDotDot;
796                     }
797                     else
798                     {
799                         p++;
800                         t.value = TOK.slice;
801                     }
802                 }
803                 else
804                     t.value = TOK.dot;
805                 return;
806             case '&':
807                 p++;
808                 if (*p == '=')
809                 {
810                     p++;
811                     t.value = TOK.andAssign;
812                 }
813                 else if (*p == '&')
814                 {
815                     p++;
816                     t.value = TOK.andAnd;
817                 }
818                 else
819                     t.value = TOK.and;
820                 return;
821             case '|':
822                 p++;
823                 if (*p == '=')
824                 {
825                     p++;
826                     t.value = TOK.orAssign;
827                 }
828                 else if (*p == '|')
829                 {
830                     p++;
831                     t.value = TOK.orOr;
832                 }
833                 else
834                     t.value = TOK.or;
835                 return;
836             case '-':
837                 p++;
838                 if (*p == '=')
839                 {
840                     p++;
841                     t.value = TOK.minAssign;
842                 }
843                 else if (*p == '-')
844                 {
845                     p++;
846                     t.value = TOK.minusMinus;
847                 }
848                 else
849                     t.value = TOK.min;
850                 return;
851             case '+':
852                 p++;
853                 if (*p == '=')
854                 {
855                     p++;
856                     t.value = TOK.addAssign;
857                 }
858                 else if (*p == '+')
859                 {
860                     p++;
861                     t.value = TOK.plusPlus;
862                 }
863                 else
864                     t.value = TOK.add;
865                 return;
866             case '<':
867                 p++;
868                 if (*p == '=')
869                 {
870                     p++;
871                     t.value = TOK.lessOrEqual; // <=
872                 }
873                 else if (*p == '<')
874                 {
875                     p++;
876                     if (*p == '=')
877                     {
878                         p++;
879                         t.value = TOK.leftShiftAssign; // <<=
880                     }
881                     else
882                         t.value = TOK.leftShift; // <<
883                 }
884                 else
885                     t.value = TOK.lessThan; // <
886                 return;
887             case '>':
888                 p++;
889                 if (*p == '=')
890                 {
891                     p++;
892                     t.value = TOK.greaterOrEqual; // >=
893                 }
894                 else if (*p == '>')
895                 {
896                     p++;
897                     if (*p == '=')
898                     {
899                         p++;
900                         t.value = TOK.rightShiftAssign; // >>=
901                     }
902                     else if (*p == '>')
903                     {
904                         p++;
905                         if (*p == '=')
906                         {
907                             p++;
908                             t.value = TOK.unsignedRightShiftAssign; // >>>=
909                         }
910                         else
911                             t.value = TOK.unsignedRightShift; // >>>
912                     }
913                     else
914                         t.value = TOK.rightShift; // >>
915                 }
916                 else
917                     t.value = TOK.greaterThan; // >
918                 return;
919             case '!':
920                 p++;
921                 if (*p == '=')
922                 {
923                     p++;
924                     t.value = TOK.notEqual; // !=
925                 }
926                 else
927                     t.value = TOK.not; // !
928                 return;
929             case '=':
930                 p++;
931                 if (*p == '=')
932                 {
933                     p++;
934                     t.value = TOK.equal; // ==
935                 }
936                 else if (*p == '>')
937                 {
938                     p++;
939                     t.value = TOK.goesTo; // =>
940                 }
941                 else
942                     t.value = TOK.assign; // =
943                 return;
944             case '~':
945                 p++;
946                 if (*p == '=')
947                 {
948                     p++;
949                     t.value = TOK.concatenateAssign; // ~=
950                 }
951                 else
952                     t.value = TOK.tilde; // ~
953                 return;
954             case '^':
955                 p++;
956                 if (*p == '^')
957                 {
958                     p++;
959                     if (*p == '=')
960                     {
961                         p++;
962                         t.value = TOK.powAssign; // ^^=
963                     }
964                     else
965                         t.value = TOK.pow; // ^^
966                 }
967                 else if (*p == '=')
968                 {
969                     p++;
970                     t.value = TOK.xorAssign; // ^=
971                 }
972                 else
973                     t.value = TOK.xor; // ^
974                 return;
975             case '(':
976                 p++;
977                 t.value = TOK.leftParentheses;
978                 return;
979             case ')':
980                 p++;
981                 t.value = TOK.rightParentheses;
982                 return;
983             case '[':
984                 p++;
985                 t.value = TOK.leftBracket;
986                 return;
987             case ']':
988                 p++;
989                 t.value = TOK.rightBracket;
990                 return;
991             case '{':
992                 p++;
993                 t.value = TOK.leftCurly;
994                 return;
995             case '}':
996                 p++;
997                 t.value = TOK.rightCurly;
998                 return;
999             case '?':
1000                 p++;
1001                 t.value = TOK.question;
1002                 return;
1003             case ',':
1004                 p++;
1005                 t.value = TOK.comma;
1006                 return;
1007             case ';':
1008                 p++;
1009                 t.value = TOK.semicolon;
1010                 return;
1011             case ':':
1012                 p++;
1013                 t.value = TOK.colon;
1014                 return;
1015             case '$':
1016                 p++;
1017                 t.value = TOK.dollar;
1018                 return;
1019             case '@':
1020                 p++;
1021                 t.value = TOK.at;
1022                 return;
1023             case '*':
1024                 p++;
1025                 if (*p == '=')
1026                 {
1027                     p++;
1028                     t.value = TOK.mulAssign;
1029                 }
1030                 else
1031                     t.value = TOK.mul;
1032                 return;
1033             case '%':
1034                 p++;
1035                 if (*p == '=')
1036                 {
1037                     p++;
1038                     t.value = TOK.modAssign;
1039                 }
1040                 else
1041                     t.value = TOK.mod;
1042                 return;
1043             case '#':
1044                 {
1045                     p++;
1046                     Token n;
1047                     scan(&n);
1048                     if (n.value == TOK.identifier)
1049                     {
1050                         if (n.ident == Id.line)
1051                         {
1052                             poundLine();
1053                             continue;
1054                         }
1055                         else
1056                         {
1057                             const locx = loc();
1058                             warning(locx, "C preprocessor directive `#%s` is not supported", n.ident.toChars());
1059                         }
1060                     }
1061                     else if (n.value == TOK.if_)
1062                     {
1063                         error("C preprocessor directive `#if` is not supported, use `version` or `static if`");
1064                     }
1065                     t.value = TOK.pound;
1066                     return;
1067                 }
1068             default:
1069                 {
1070                     dchar c = *p;
1071                     if (c & 0x80)
1072                     {
1073                         c = decodeUTF();
1074                         // Check for start of unicode identifier
1075                         if (isUniAlpha(c))
1076                             goto case_ident;
1077                         if (c == PS || c == LS)
1078                         {
1079                             endOfLine();
1080                             p++;
1081                             continue;
1082                         }
1083                     }
1084                     if (c < 0x80 && isprint(c))
1085                         error("character '%c' is not a valid token", c);
1086                     else
1087                         error("character 0x%02x is not a valid token", c);
1088                     p++;
1089                     continue;
1090                 }
1091             }
1092         }
1093     }
1094 
1095     final Token* peek(Token* ct)
1096     {
1097         Token* t;
1098         if (ct.next)
1099             t = ct.next;
1100         else
1101         {
1102             t = allocateToken();
1103             scan(t);
1104             ct.next = t;
1105         }
1106         return t;
1107     }
1108 
1109     /*********************************
1110      * tk is on the opening (.
1111      * Look ahead and return token that is past the closing ).
1112      */
1113     final Token* peekPastParen(Token* tk)
1114     {
1115         //printf("peekPastParen()\n");
1116         int parens = 1;
1117         int curlynest = 0;
1118         while (1)
1119         {
1120             tk = peek(tk);
1121             //tk.print();
1122             switch (tk.value)
1123             {
1124             case TOK.leftParentheses:
1125                 parens++;
1126                 continue;
1127             case TOK.rightParentheses:
1128                 --parens;
1129                 if (parens)
1130                     continue;
1131                 tk = peek(tk);
1132                 break;
1133             case TOK.leftCurly:
1134                 curlynest++;
1135                 continue;
1136             case TOK.rightCurly:
1137                 if (--curlynest >= 0)
1138                     continue;
1139                 break;
1140             case TOK.semicolon:
1141                 if (curlynest)
1142                     continue;
1143                 break;
1144             case TOK.endOfFile:
1145                 break;
1146             default:
1147                 continue;
1148             }
1149             return tk;
1150         }
1151     }
1152 
1153     /*******************************************
1154      * Parse escape sequence.
1155      */
1156     private uint escapeSequence()
1157     {
1158         return Lexer.escapeSequence(token.loc, p);
1159     }
1160 
1161     /**
1162     Parse the given string literal escape sequence into a single character.
1163     Params:
1164         loc = the location of the current token
1165         sequence = pointer to string with escape sequence to parse. this is a reference
1166                    variable that is also used to return the position after the sequence
1167     Returns:
1168         the escaped sequence as a single character
1169     */
1170     private static dchar escapeSequence(const ref Loc loc, ref const(char)* sequence)
1171     {
1172         const(char)* p = sequence; // cache sequence reference on stack
1173         scope(exit) sequence = p;
1174 
1175         uint c = *p;
1176         int ndigits;
1177         switch (c)
1178         {
1179         case '\'':
1180         case '"':
1181         case '?':
1182         case '\\':
1183         Lconsume:
1184             p++;
1185             break;
1186         case 'a':
1187             c = 7;
1188             goto Lconsume;
1189         case 'b':
1190             c = 8;
1191             goto Lconsume;
1192         case 'f':
1193             c = 12;
1194             goto Lconsume;
1195         case 'n':
1196             c = 10;
1197             goto Lconsume;
1198         case 'r':
1199             c = 13;
1200             goto Lconsume;
1201         case 't':
1202             c = 9;
1203             goto Lconsume;
1204         case 'v':
1205             c = 11;
1206             goto Lconsume;
1207         case 'u':
1208             ndigits = 4;
1209             goto Lhex;
1210         case 'U':
1211             ndigits = 8;
1212             goto Lhex;
1213         case 'x':
1214             ndigits = 2;
1215         Lhex:
1216             p++;
1217             c = *p;
1218             if (ishex(cast(char)c))
1219             {
1220                 uint v = 0;
1221                 int n = 0;
1222                 while (1)
1223                 {
1224                     if (isdigit(cast(char)c))
1225                         c -= '0';
1226                     else if (islower(c))
1227                         c -= 'a' - 10;
1228                     else
1229                         c -= 'A' - 10;
1230                     v = v * 16 + c;
1231                     c = *++p;
1232                     if (++n == ndigits)
1233                         break;
1234                     if (!ishex(cast(char)c))
1235                     {
1236                         .error(loc, "escape hex sequence has %d hex digits instead of %d", n, ndigits);
1237                         break;
1238                     }
1239                 }
1240                 if (ndigits != 2 && !utf_isValidDchar(v))
1241                 {
1242                     .error(loc, "invalid UTF character \\U%08x", v);
1243                     v = '?'; // recover with valid UTF character
1244                 }
1245                 c = v;
1246             }
1247             else
1248             {
1249                 .error(loc, "undefined escape hex sequence \\%c%c", sequence[0], c);
1250                 p++;
1251             }
1252             break;
1253         case '&':
1254             // named character entity
1255             for (const idstart = ++p; 1; p++)
1256             {
1257                 switch (*p)
1258                 {
1259                 case ';':
1260                     c = HtmlNamedEntity(idstart, p - idstart);
1261                     if (c == ~0)
1262                     {
1263                         .error(loc, "unnamed character entity &%.*s;", cast(int)(p - idstart), idstart);
1264                         c = '?';
1265                     }
1266                     p++;
1267                     break;
1268                 default:
1269                     if (isalpha(*p) || (p != idstart && isdigit(*p)))
1270                         continue;
1271                     .error(loc, "unterminated named entity &%.*s;", cast(int)(p - idstart + 1), idstart);
1272                     c = '?';
1273                     break;
1274                 }
1275                 break;
1276             }
1277             break;
1278         case 0:
1279         case 0x1A:
1280             // end of file
1281             c = '\\';
1282             break;
1283         default:
1284             if (isoctal(cast(char)c))
1285             {
1286                 uint v = 0;
1287                 int n = 0;
1288                 do
1289                 {
1290                     v = v * 8 + (c - '0');
1291                     c = *++p;
1292                 }
1293                 while (++n < 3 && isoctal(cast(char)c));
1294                 c = v;
1295                 if (c > 0xFF)
1296                     .error(loc, "escape octal sequence \\%03o is larger than \\377", c);
1297             }
1298             else
1299             {
1300                 .error(loc, "undefined escape sequence \\%c", c);
1301                 p++;
1302             }
1303             break;
1304         }
1305         return c;
1306     }
1307 
1308     /**
1309     Lex a wysiwyg string. `p` must be pointing to the first character before the
1310     contents of the string literal. The character pointed to by `p` will be used as
1311     the terminating character (i.e. backtick or double-quote).
1312     Params:
1313         result = pointer to the token that accepts the result
1314     */
1315     private void wysiwygStringConstant(Token* result)
1316     {
1317         result.value = TOK.string_;
1318         Loc start = loc();
1319         auto terminator = p[0];
1320         p++;
1321         stringbuffer.setsize(0);
1322         while (1)
1323         {
1324             dchar c = p[0];
1325             p++;
1326             switch (c)
1327             {
1328             case '\n':
1329                 endOfLine();
1330                 break;
1331             case '\r':
1332                 if (p[0] == '\n')
1333                     continue; // ignore
1334                 c = '\n'; // treat EndOfLine as \n character
1335                 endOfLine();
1336                 break;
1337             case 0:
1338             case 0x1A:
1339                 error("unterminated string constant starting at %s", start.toChars());
1340                 result.setString();
1341                 // rewind `p` so it points to the EOF character
1342                 p--;
1343                 return;
1344             default:
1345                 if (c == terminator)
1346                 {
1347                     result.setString(stringbuffer);
1348                     stringPostfix(result);
1349                     return;
1350                 }
1351                 else if (c & 0x80)
1352                 {
1353                     p--;
1354                     const u = decodeUTF();
1355                     p++;
1356                     if (u == PS || u == LS)
1357                         endOfLine();
1358                     stringbuffer.writeUTF8(u);
1359                     continue;
1360                 }
1361                 break;
1362             }
1363             stringbuffer.writeByte(c);
1364         }
1365     }
1366 
1367     /**************************************
1368      * Lex hex strings:
1369      *      x"0A ae 34FE BD"
1370      */
1371     private TOK hexStringConstant(Token* t)
1372     {
1373         Loc start = loc();
1374         uint n = 0;
1375         uint v = ~0; // dead assignment, needed to suppress warning
1376         p++;
1377         stringbuffer.setsize(0);
1378         while (1)
1379         {
1380             dchar c = *p++;
1381             switch (c)
1382             {
1383             case ' ':
1384             case '\t':
1385             case '\v':
1386             case '\f':
1387                 continue; // skip white space
1388             case '\r':
1389                 if (*p == '\n')
1390                     continue; // ignore '\r' if followed by '\n'
1391                 // Treat isolated '\r' as if it were a '\n'
1392                 goto case '\n';
1393             case '\n':
1394                 endOfLine();
1395                 continue;
1396             case 0:
1397             case 0x1A:
1398                 error("unterminated string constant starting at %s", start.toChars());
1399                 t.setString();
1400                 // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
1401                 p--;
1402                 return TOK.hexadecimalString;
1403             case '"':
1404                 if (n & 1)
1405                 {
1406                     error("odd number (%d) of hex characters in hex string", n);
1407                     stringbuffer.writeByte(v);
1408                 }
1409                 t.setString(stringbuffer);
1410                 stringPostfix(t);
1411                 return TOK.hexadecimalString;
1412             default:
1413                 if (c >= '0' && c <= '9')
1414                     c -= '0';
1415                 else if (c >= 'a' && c <= 'f')
1416                     c -= 'a' - 10;
1417                 else if (c >= 'A' && c <= 'F')
1418                     c -= 'A' - 10;
1419                 else if (c & 0x80)
1420                 {
1421                     p--;
1422                     const u = decodeUTF();
1423                     p++;
1424                     if (u == PS || u == LS)
1425                         endOfLine();
1426                     else
1427                         error("non-hex character \\u%04x in hex string", u);
1428                 }
1429                 else
1430                     error("non-hex character '%c' in hex string", c);
1431                 if (n & 1)
1432                 {
1433                     v = (v << 4) | c;
1434                     stringbuffer.writeByte(v);
1435                 }
1436                 else
1437                     v = c;
1438                 n++;
1439                 break;
1440             }
1441         }
1442         assert(0); // see bug 15731
1443     }
1444 
1445     /**
1446     Lex a delimited string. Some examples of delimited strings are:
1447     ---
1448     q"(foo(xxx))"      // "foo(xxx)"
1449     q"[foo$(LPAREN)]"  // "foo$(LPAREN)"
1450     q"/foo]/"          // "foo]"
1451     q"HERE
1452     foo
1453     HERE"              // "foo\n"
1454     ---
1455     It is assumed that `p` points to the opening double-quote '"'.
1456     Params:
1457         result = pointer to the token that accepts the result
1458     */
1459     private void delimitedStringConstant(Token* result)
1460     {
1461         result.value = TOK.string_;
1462         Loc start = loc();
1463         dchar delimleft = 0;
1464         dchar delimright = 0;
1465         uint nest = 1;
1466         uint nestcount = ~0; // dead assignment, needed to suppress warning
1467         Identifier hereid = null;
1468         uint blankrol = 0;
1469         uint startline = 0;
1470         p++;
1471         stringbuffer.setsize(0);
1472         while (1)
1473         {
1474             dchar c = *p++;
1475             //printf("c = '%c'\n", c);
1476             switch (c)
1477             {
1478             case '\n':
1479             Lnextline:
1480                 endOfLine();
1481                 startline = 1;
1482                 if (blankrol)
1483                 {
1484                     blankrol = 0;
1485                     continue;
1486                 }
1487                 if (hereid)
1488                 {
1489                     stringbuffer.writeUTF8(c);
1490                     continue;
1491                 }
1492                 break;
1493             case '\r':
1494                 if (*p == '\n')
1495                     continue; // ignore
1496                 c = '\n'; // treat EndOfLine as \n character
1497                 goto Lnextline;
1498             case 0:
1499             case 0x1A:
1500                 error("unterminated delimited string constant starting at %s", start.toChars());
1501                 result.setString();
1502                 // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
1503                 p--;
1504                 return;
1505             default:
1506                 if (c & 0x80)
1507                 {
1508                     p--;
1509                     c = decodeUTF();
1510                     p++;
1511                     if (c == PS || c == LS)
1512                         goto Lnextline;
1513                 }
1514                 break;
1515             }
1516             if (delimleft == 0)
1517             {
1518                 delimleft = c;
1519                 nest = 1;
1520                 nestcount = 1;
1521                 if (c == '(')
1522                     delimright = ')';
1523                 else if (c == '{')
1524                     delimright = '}';
1525                 else if (c == '[')
1526                     delimright = ']';
1527                 else if (c == '<')
1528                     delimright = '>';
1529                 else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c)))
1530                 {
1531                     // Start of identifier; must be a heredoc
1532                     Token tok;
1533                     p--;
1534                     scan(&tok); // read in heredoc identifier
1535                     if (tok.value != TOK.identifier)
1536                     {
1537                         error("identifier expected for heredoc, not %s", tok.toChars());
1538                         delimright = c;
1539                     }
1540                     else
1541                     {
1542                         hereid = tok.ident;
1543                         //printf("hereid = '%s'\n", hereid.toChars());
1544                         blankrol = 1;
1545                     }
1546                     nest = 0;
1547                 }
1548                 else
1549                 {
1550                     delimright = c;
1551                     nest = 0;
1552                     if (isspace(c))
1553                         error("delimiter cannot be whitespace");
1554                 }
1555             }
1556             else
1557             {
1558                 if (blankrol)
1559                 {
1560                     error("heredoc rest of line should be blank");
1561                     blankrol = 0;
1562                     continue;
1563                 }
1564                 if (nest == 1)
1565                 {
1566                     if (c == delimleft)
1567                         nestcount++;
1568                     else if (c == delimright)
1569                     {
1570                         nestcount--;
1571                         if (nestcount == 0)
1572                             goto Ldone;
1573                     }
1574                 }
1575                 else if (c == delimright)
1576                     goto Ldone;
1577                 if (startline && (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c))) && hereid)
1578                 {
1579                     Token tok;
1580                     auto psave = p;
1581                     p--;
1582                     scan(&tok); // read in possible heredoc identifier
1583                     //printf("endid = '%s'\n", tok.ident.toChars());
1584                     if (tok.value == TOK.identifier && tok.ident is hereid)
1585                     {
1586                         /* should check that rest of line is blank
1587                          */
1588                         goto Ldone;
1589                     }
1590                     p = psave;
1591                 }
1592                 stringbuffer.writeUTF8(c);
1593                 startline = 0;
1594             }
1595         }
1596     Ldone:
1597         if (*p == '"')
1598             p++;
1599         else if (hereid)
1600             error("delimited string must end in %s\"", hereid.toChars());
1601         else
1602             error("delimited string must end in %c\"", delimright);
1603         result.setString(stringbuffer);
1604         stringPostfix(result);
1605     }
1606 
1607     /**
1608     Lex a token string. Some examples of token strings are:
1609     ---
1610     q{ foo(xxx) }    // " foo(xxx) "
1611     q{foo$(LPAREN)}  // "foo$(LPAREN)"
1612     q{{foo}"}"}      // "{foo}"}""
1613     ---
1614     It is assumed that `p` points to the opening curly-brace '{'.
1615     Params:
1616         result = pointer to the token that accepts the result
1617     */
1618     private void tokenStringConstant(Token* result)
1619     {
1620         result.value = TOK.string_;
1621 
1622         uint nest = 1;
1623         const start = loc();
1624         const pstart = ++p;
1625         inTokenStringConstant++;
1626         scope(exit) inTokenStringConstant--;
1627         while (1)
1628         {
1629             Token tok;
1630             scan(&tok);
1631             switch (tok.value)
1632             {
1633             case TOK.leftCurly:
1634                 nest++;
1635                 continue;
1636             case TOK.rightCurly:
1637                 if (--nest == 0)
1638                 {
1639                     result.setString(pstart, p - 1 - pstart);
1640                     stringPostfix(result);
1641                     return;
1642                 }
1643                 continue;
1644             case TOK.endOfFile:
1645                 error("unterminated token string constant starting at %s", start.toChars());
1646                 result.setString();
1647                 return;
1648             default:
1649                 continue;
1650             }
1651         }
1652     }
1653 
1654     /**
1655     Scan a double-quoted string while building the processed string value by
1656     handling escape sequences. The result is returned in the given `t` token.
1657     This function assumes that `p` currently points to the opening double-quote
1658     of the string.
1659     Params:
1660         t = the token to set the resulting string to
1661     */
1662     private void escapeStringConstant(Token* t)
1663     {
1664         t.value = TOK.string_;
1665 
1666         const start = loc();
1667         p++;
1668         stringbuffer.setsize(0);
1669         while (1)
1670         {
1671             dchar c = *p++;
1672             switch (c)
1673             {
1674             case '\\':
1675                 switch (*p)
1676                 {
1677                 case 'u':
1678                 case 'U':
1679                 case '&':
1680                     c = escapeSequence();
1681                     stringbuffer.writeUTF8(c);
1682                     continue;
1683                 default:
1684                     c = escapeSequence();
1685                     break;
1686                 }
1687                 break;
1688             case '\n':
1689                 endOfLine();
1690                 break;
1691             case '\r':
1692                 if (*p == '\n')
1693                     continue; // ignore
1694                 c = '\n'; // treat EndOfLine as \n character
1695                 endOfLine();
1696                 break;
1697             case '"':
1698                 t.setString(stringbuffer);
1699                 stringPostfix(t);
1700                 return;
1701             case 0:
1702             case 0x1A:
1703                 // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
1704                 p--;
1705                 error("unterminated string constant starting at %s", start.toChars());
1706                 t.setString();
1707                 return;
1708             default:
1709                 if (c & 0x80)
1710                 {
1711                     p--;
1712                     c = decodeUTF();
1713                     if (c == LS || c == PS)
1714                     {
1715                         c = '\n';
1716                         endOfLine();
1717                     }
1718                     p++;
1719                     stringbuffer.writeUTF8(c);
1720                     continue;
1721                 }
1722                 break;
1723             }
1724             stringbuffer.writeByte(c);
1725         }
1726     }
1727 
1728     /**************************************
1729      */
1730     private TOK charConstant(Token* t)
1731     {
1732         TOK tk = TOK.charLiteral;
1733         //printf("Lexer::charConstant\n");
1734         p++;
1735         dchar c = *p++;
1736         switch (c)
1737         {
1738         case '\\':
1739             switch (*p)
1740             {
1741             case 'u':
1742                 t.unsvalue = escapeSequence();
1743                 tk = TOK.wcharLiteral;
1744                 break;
1745             case 'U':
1746             case '&':
1747                 t.unsvalue = escapeSequence();
1748                 tk = TOK.dcharLiteral;
1749                 break;
1750             default:
1751                 t.unsvalue = escapeSequence();
1752                 break;
1753             }
1754             break;
1755         case '\n':
1756         L1:
1757             endOfLine();
1758             goto case;
1759         case '\r':
1760             goto case '\'';
1761         case 0:
1762         case 0x1A:
1763             // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
1764             p--;
1765             goto case;
1766         case '\'':
1767             error("unterminated character constant");
1768             t.unsvalue = '?';
1769             return tk;
1770         default:
1771             if (c & 0x80)
1772             {
1773                 p--;
1774                 c = decodeUTF();
1775                 p++;
1776                 if (c == LS || c == PS)
1777                     goto L1;
1778                 if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE))
1779                     tk = TOK.wcharLiteral;
1780                 else
1781                     tk = TOK.dcharLiteral;
1782             }
1783             t.unsvalue = c;
1784             break;
1785         }
1786         if (*p != '\'')
1787         {
1788             while (*p != '\'' && *p != 0x1A && *p != 0 && *p != '\n' &&
1789                     *p != '\r' && *p != ';' && *p != ')' && *p != ']' && *p != '}')
1790             {
1791                 if (*p & 0x80)
1792                 {
1793                     const s = p;
1794                     c = decodeUTF();
1795                     if (c == LS || c == PS)
1796                     {
1797                         p = s;
1798                         break;
1799                     }
1800                 }
1801                 p++;
1802             }
1803 
1804             if (*p == '\'')
1805             {
1806                 error("character constant has multiple characters");
1807                 p++;
1808             }
1809             else
1810                 error("unterminated character constant");
1811             t.unsvalue = '?';
1812             return tk;
1813         }
1814         p++;
1815         return tk;
1816     }
1817 
1818     /***************************************
1819      * Get postfix of string literal.
1820      */
1821     private void stringPostfix(Token* t) pure @nogc
1822     {
1823         switch (*p)
1824         {
1825         case 'c':
1826         case 'w':
1827         case 'd':
1828             t.postfix = *p;
1829             p++;
1830             break;
1831         default:
1832             t.postfix = 0;
1833             break;
1834         }
1835     }
1836 
1837     /**************************************
1838      * Read in a number.
1839      * If it's an integer, store it in tok.TKutok.Vlong.
1840      *      integers can be decimal, octal or hex
1841      *      Handle the suffixes U, UL, LU, L, etc.
1842      * If it's double, store it in tok.TKutok.Vdouble.
1843      * Returns:
1844      *      TKnum
1845      *      TKdouble,...
1846      */
1847     private TOK number(Token* t)
1848     {
1849         int base = 10;
1850         const start = p;
1851         uinteger_t n = 0; // unsigned >=64 bit integer type
1852         int d;
1853         bool err = false;
1854         bool overflow = false;
1855         bool anyBinaryDigitsNoSingleUS = false;
1856         bool anyHexDigitsNoSingleUS = false;
1857         dchar c = *p;
1858         if (c == '0')
1859         {
1860             ++p;
1861             c = *p;
1862             switch (c)
1863             {
1864             case '0':
1865             case '1':
1866             case '2':
1867             case '3':
1868             case '4':
1869             case '5':
1870             case '6':
1871             case '7':
1872             case '8':
1873             case '9':
1874                 base = 8;
1875                 break;
1876             case 'x':
1877             case 'X':
1878                 ++p;
1879                 base = 16;
1880                 break;
1881             case 'b':
1882             case 'B':
1883                 ++p;
1884                 base = 2;
1885                 break;
1886             case '.':
1887                 if (p[1] == '.')
1888                     goto Ldone; // if ".."
1889                 if (isalpha(p[1]) || p[1] == '_' || p[1] & 0x80)
1890                     goto Ldone; // if ".identifier" or ".unicode"
1891                 goto Lreal; // '.' is part of current token
1892             case 'i':
1893             case 'f':
1894             case 'F':
1895                 goto Lreal;
1896             case '_':
1897                 ++p;
1898                 base = 8;
1899                 break;
1900             case 'L':
1901                 if (p[1] == 'i')
1902                     goto Lreal;
1903                 break;
1904             default:
1905                 break;
1906             }
1907         }
1908         while (1)
1909         {
1910             c = *p;
1911             switch (c)
1912             {
1913             case '0':
1914             case '1':
1915             case '2':
1916             case '3':
1917             case '4':
1918             case '5':
1919             case '6':
1920             case '7':
1921             case '8':
1922             case '9':
1923                 ++p;
1924                 d = c - '0';
1925                 break;
1926             case 'a':
1927             case 'b':
1928             case 'c':
1929             case 'd':
1930             case 'e':
1931             case 'f':
1932             case 'A':
1933             case 'B':
1934             case 'C':
1935             case 'D':
1936             case 'E':
1937             case 'F':
1938                 ++p;
1939                 if (base != 16)
1940                 {
1941                     if (c == 'e' || c == 'E' || c == 'f' || c == 'F')
1942                         goto Lreal;
1943                 }
1944                 if (c >= 'a')
1945                     d = c + 10 - 'a';
1946                 else
1947                     d = c + 10 - 'A';
1948                 break;
1949             case 'L':
1950                 if (p[1] == 'i')
1951                     goto Lreal;
1952                 goto Ldone;
1953             case '.':
1954                 if (p[1] == '.')
1955                     goto Ldone; // if ".."
1956                 if (base == 10 && (isalpha(p[1]) || p[1] == '_' || p[1] & 0x80))
1957                     goto Ldone; // if ".identifier" or ".unicode"
1958                 if (base == 16 && (!ishex(p[1]) || p[1] == '_' || p[1] & 0x80))
1959                     goto Ldone; // if ".identifier" or ".unicode"
1960                 if (base == 2)
1961                     goto Ldone; // if ".identifier" or ".unicode"
1962                 goto Lreal; // otherwise as part of a floating point literal
1963             case 'p':
1964             case 'P':
1965             case 'i':
1966             Lreal:
1967                 p = start;
1968                 return inreal(t);
1969             case '_':
1970                 ++p;
1971                 continue;
1972             default:
1973                 goto Ldone;
1974             }
1975             // got a digit here, set any necessary flags, check for errors
1976             anyHexDigitsNoSingleUS = true;
1977             anyBinaryDigitsNoSingleUS = true;
1978             if (!err && d >= base)
1979             {
1980                 error("%s digit expected, not `%c`", base == 2 ? "binary".ptr :
1981                                                      base == 8 ? "octal".ptr :
1982                                                      "decimal".ptr, c);
1983                 err = true;
1984             }
1985             // Avoid expensive overflow check if we aren't at risk of overflow
1986             if (n <= 0x0FFF_FFFF_FFFF_FFFFUL)
1987                 n = n * base + d;
1988             else
1989             {
1990                 import core.checkedint : mulu, addu;
1991 
1992                 n = mulu(n, base, overflow);
1993                 n = addu(n, d, overflow);
1994             }
1995         }
1996     Ldone:
1997         if (overflow && !err)
1998         {
1999             error("integer overflow");
2000             err = true;
2001         }
2002         if ((base == 2 && !anyBinaryDigitsNoSingleUS) ||
2003             (base == 16 && !anyHexDigitsNoSingleUS))
2004             error("`%.*s` isn't a valid integer literal, use `%.*s0` instead", cast(int)(p - start), start, 2, start);
2005         enum FLAGS : int
2006         {
2007             none = 0,
2008             decimal = 1, // decimal
2009             unsigned = 2, // u or U suffix
2010             long_ = 4, // L suffix
2011         }
2012 
2013         FLAGS flags = (base == 10) ? FLAGS.decimal : FLAGS.none;
2014         // Parse trailing 'u', 'U', 'l' or 'L' in any combination
2015         const psuffix = p;
2016         while (1)
2017         {
2018             FLAGS f;
2019             switch (*p)
2020             {
2021             case 'U':
2022             case 'u':
2023                 f = FLAGS.unsigned;
2024                 goto L1;
2025             case 'l':
2026                 f = FLAGS.long_;
2027                 error("lower case integer suffix 'l' is not allowed. Please use 'L' instead");
2028                 goto L1;
2029             case 'L':
2030                 f = FLAGS.long_;
2031             L1:
2032                 p++;
2033                 if ((flags & f) && !err)
2034                 {
2035                     error("unrecognized token");
2036                     err = true;
2037                 }
2038                 flags = cast(FLAGS)(flags | f);
2039                 continue;
2040             default:
2041                 break;
2042             }
2043             break;
2044         }
2045         if (base == 8 && n >= 8)
2046         {
2047             if (err)
2048                 // can't translate invalid octal value, just show a generic message
2049                 error("octal literals larger than 7 are no longer supported");
2050             else
2051                 error("octal literals `0%llo%.*s` are no longer supported, use `std.conv.octal!%llo%.*s` instead",
2052                     n, cast(int)(p - psuffix), psuffix, n, cast(int)(p - psuffix), psuffix);
2053         }
2054         TOK result;
2055         switch (flags)
2056         {
2057         case FLAGS.none:
2058             /* Octal or Hexadecimal constant.
2059              * First that fits: int, uint, long, ulong
2060              */
2061             if (n & 0x8000000000000000L)
2062                 result = TOK.uns64Literal;
2063             else if (n & 0xFFFFFFFF00000000L)
2064                 result = TOK.int64Literal;
2065             else if (n & 0x80000000)
2066                 result = TOK.uns32Literal;
2067             else
2068                 result = TOK.int32Literal;
2069             break;
2070         case FLAGS.decimal:
2071             /* First that fits: int, long, long long
2072              */
2073             if (n & 0x8000000000000000L)
2074             {
2075                 result = TOK.uns64Literal;
2076             }
2077             else if (n & 0xFFFFFFFF80000000L)
2078                 result = TOK.int64Literal;
2079             else
2080                 result = TOK.int32Literal;
2081             break;
2082         case FLAGS.unsigned:
2083         case FLAGS.decimal | FLAGS.unsigned:
2084             /* First that fits: uint, ulong
2085              */
2086             if (n & 0xFFFFFFFF00000000L)
2087                 result = TOK.uns64Literal;
2088             else
2089                 result = TOK.uns32Literal;
2090             break;
2091         case FLAGS.decimal | FLAGS.long_:
2092             if (n & 0x8000000000000000L)
2093             {
2094                 if (!err)
2095                 {
2096                     error("signed integer overflow");
2097                     err = true;
2098                 }
2099                 result = TOK.uns64Literal;
2100             }
2101             else
2102                 result = TOK.int64Literal;
2103             break;
2104         case FLAGS.long_:
2105             if (n & 0x8000000000000000L)
2106                 result = TOK.uns64Literal;
2107             else
2108                 result = TOK.int64Literal;
2109             break;
2110         case FLAGS.unsigned | FLAGS.long_:
2111         case FLAGS.decimal | FLAGS.unsigned | FLAGS.long_:
2112             result = TOK.uns64Literal;
2113             break;
2114         default:
2115             debug
2116             {
2117                 printf("%x\n", flags);
2118             }
2119             assert(0);
2120         }
2121         t.unsvalue = n;
2122         return result;
2123     }
2124 
2125     /**************************************
2126      * Read in characters, converting them to real.
2127      * Bugs:
2128      *      Exponent overflow not detected.
2129      *      Too much requested precision is not detected.
2130      */
2131     private TOK inreal(Token* t)
2132     {
2133         //printf("Lexer::inreal()\n");
2134         debug
2135         {
2136             assert(*p == '.' || isdigit(*p));
2137         }
2138         bool isWellformedString = true;
2139         stringbuffer.setsize(0);
2140         auto pstart = p;
2141         bool hex = false;
2142         dchar c = *p++;
2143         // Leading '0x'
2144         if (c == '0')
2145         {
2146             c = *p++;
2147             if (c == 'x' || c == 'X')
2148             {
2149                 hex = true;
2150                 c = *p++;
2151             }
2152         }
2153         // Digits to left of '.'
2154         while (1)
2155         {
2156             if (c == '.')
2157             {
2158                 c = *p++;
2159                 break;
2160             }
2161             if (isdigit(c) || (hex && isxdigit(c)) || c == '_')
2162             {
2163                 c = *p++;
2164                 continue;
2165             }
2166             break;
2167         }
2168         // Digits to right of '.'
2169         while (1)
2170         {
2171             if (isdigit(c) || (hex && isxdigit(c)) || c == '_')
2172             {
2173                 c = *p++;
2174                 continue;
2175             }
2176             break;
2177         }
2178         if (c == 'e' || c == 'E' || (hex && (c == 'p' || c == 'P')))
2179         {
2180             c = *p++;
2181             if (c == '-' || c == '+')
2182             {
2183                 c = *p++;
2184             }
2185             bool anyexp = false;
2186             while (1)
2187             {
2188                 if (isdigit(c))
2189                 {
2190                     anyexp = true;
2191                     c = *p++;
2192                     continue;
2193                 }
2194                 if (c == '_')
2195                 {
2196                     c = *p++;
2197                     continue;
2198                 }
2199                 if (!anyexp)
2200                 {
2201                     error("missing exponent");
2202                     isWellformedString = false;
2203                 }
2204                 break;
2205             }
2206         }
2207         else if (hex)
2208         {
2209             error("exponent required for hex float");
2210             isWellformedString = false;
2211         }
2212         --p;
2213         while (pstart < p)
2214         {
2215             if (*pstart != '_')
2216                 stringbuffer.writeByte(*pstart);
2217             ++pstart;
2218         }
2219         stringbuffer.writeByte(0);
2220         auto sbufptr = cast(const(char)*)stringbuffer[].ptr;
2221         TOK result;
2222         bool isOutOfRange = false;
2223         t.floatvalue = (isWellformedString ? CTFloat.parse(sbufptr, &isOutOfRange) : CTFloat.zero);
2224         switch (*p)
2225         {
2226         case 'F':
2227         case 'f':
2228             if (isWellformedString && !isOutOfRange)
2229                 isOutOfRange = Port.isFloat32LiteralOutOfRange(sbufptr);
2230             result = TOK.float32Literal;
2231             p++;
2232             break;
2233         default:
2234             if (isWellformedString && !isOutOfRange)
2235                 isOutOfRange = Port.isFloat64LiteralOutOfRange(sbufptr);
2236             result = TOK.float64Literal;
2237             break;
2238         case 'l':
2239             error("use 'L' suffix instead of 'l'");
2240             goto case 'L';
2241         case 'L':
2242             result = TOK.float80Literal;
2243             p++;
2244             break;
2245         }
2246         if (*p == 'i' || *p == 'I')
2247         {
2248             if (*p == 'I')
2249                 error("use 'i' suffix instead of 'I'");
2250             p++;
2251             switch (result)
2252             {
2253             case TOK.float32Literal:
2254                 result = TOK.imaginary32Literal;
2255                 break;
2256             case TOK.float64Literal:
2257                 result = TOK.imaginary64Literal;
2258                 break;
2259             case TOK.float80Literal:
2260                 result = TOK.imaginary80Literal;
2261                 break;
2262             default:
2263                 break;
2264             }
2265         }
2266         const isLong = (result == TOK.float80Literal || result == TOK.imaginary80Literal);
2267         if (isOutOfRange && !isLong)
2268         {
2269             const char* suffix = (result == TOK.float32Literal || result == TOK.imaginary32Literal) ? "f" : "";
2270             error(scanloc, "number `%s%s` is not representable", sbufptr, suffix);
2271         }
2272         debug
2273         {
2274             switch (result)
2275             {
2276             case TOK.float32Literal:
2277             case TOK.float64Literal:
2278             case TOK.float80Literal:
2279             case TOK.imaginary32Literal:
2280             case TOK.imaginary64Literal:
2281             case TOK.imaginary80Literal:
2282                 break;
2283             default:
2284                 assert(0);
2285             }
2286         }
2287         return result;
2288     }
2289 
2290     final Loc loc() pure @nogc
2291     {
2292         scanloc.charnum = cast(uint)(1 + p - line);
2293         scanloc.offset = cast(uint)(p - base);
2294         return scanloc;
2295     }
2296 
2297     final void error(const(char)* format, ...)
2298     {
2299         va_list args;
2300         va_start(args, format);
2301         handleDiagnostic(token.loc, Severity.error, format, args);
2302         va_end(args);
2303     }
2304 
2305     final void error(const ref Loc loc, const(char)* format, ...)
2306     {
2307         va_list args;
2308         va_start(args, format);
2309         handleDiagnostic(loc, Severity.error, format, args);
2310         va_end(args);
2311     }
2312 
2313     final void errorSupplemental(const ref Loc loc, const(char)* format, ...)
2314     {
2315         va_list args;
2316         va_start(args, format);
2317         handleDiagnostic(loc, Severity.error, format, args, true);
2318         va_end(args);
2319     }
2320 
2321     final void warning(const ref Loc loc, const(char)* format, ...)
2322     {
2323         va_list args;
2324         va_start(args, format);
2325         handleDiagnostic(loc, Severity.warning, format, args);
2326         va_end(args);
2327     }
2328 
2329     final void warningSupplemental(const ref Loc loc, const(char)* format, ...)
2330     {
2331         va_list args;
2332         va_start(args, format);
2333         handleDiagnostic(loc, Severity.warning, format, args, true);
2334         va_end(args);
2335     }
2336 
2337     final void deprecation(const(char)* format, ...)
2338     {
2339         va_list args;
2340         va_start(args, format);
2341         handleDiagnostic(token.loc, Severity.deprecation, format, args);
2342         va_end(args);
2343     }
2344 
2345     final void deprecationSupplemental(const(char)* format, ...)
2346     {
2347         va_list args;
2348         va_start(args, format);
2349         handleDiagnostic(token.loc, Severity.deprecation, format, args, true);
2350         va_end(args);
2351     }
2352 
2353     /*********************************************
2354      * parse:
2355      *      #line linnum [filespec]
2356      * also allow __LINE__ for linnum, and __FILE__ for filespec
2357      */
2358     private void poundLine()
2359     {
2360         auto linnum = this.scanloc.linnum;
2361         const(char)* filespec = null;
2362         const loc = this.loc();
2363         Token tok;
2364         scan(&tok);
2365         if (tok.value == TOK.int32Literal || tok.value == TOK.int64Literal)
2366         {
2367             const lin = cast(int)(tok.unsvalue - 1);
2368             if (lin != tok.unsvalue - 1)
2369                 error("line number `%lld` out of range", cast(ulong)tok.unsvalue);
2370             else
2371                 linnum = lin;
2372         }
2373         else if (tok.value == TOK.line)
2374         {
2375         }
2376         else
2377             goto Lerr;
2378         while (1)
2379         {
2380             switch (*p)
2381             {
2382             case 0:
2383             case 0x1A:
2384             case '\n':
2385             Lnewline:
2386                 if (!inTokenStringConstant)
2387                 {
2388                     this.scanloc.linnum = linnum;
2389                     if (filespec)
2390                         this.scanloc.filename = filespec;
2391                 }
2392                 return;
2393             case '\r':
2394                 p++;
2395                 if (*p != '\n')
2396                 {
2397                     p--;
2398                     goto Lnewline;
2399                 }
2400                 continue;
2401             case ' ':
2402             case '\t':
2403             case '\v':
2404             case '\f':
2405                 p++;
2406                 continue; // skip white space
2407             case '_':
2408                 if (memcmp(p, "__FILE__".ptr, 8) == 0)
2409                 {
2410                     p += 8;
2411                     filespec = mem.xstrdup(scanloc.filename);
2412                     continue;
2413                 }
2414                 goto Lerr;
2415             case '"':
2416                 if (filespec)
2417                     goto Lerr;
2418                 stringbuffer.setsize(0);
2419                 p++;
2420                 while (1)
2421                 {
2422                     uint c;
2423                     c = *p;
2424                     switch (c)
2425                     {
2426                     case '\n':
2427                     case '\r':
2428                     case 0:
2429                     case 0x1A:
2430                         goto Lerr;
2431                     case '"':
2432                         stringbuffer.writeByte(0);
2433                         filespec = mem.xstrdup(cast(const(char)*)stringbuffer[].ptr);
2434                         p++;
2435                         break;
2436                     default:
2437                         if (c & 0x80)
2438                         {
2439                             uint u = decodeUTF();
2440                             if (u == PS || u == LS)
2441                                 goto Lerr;
2442                         }
2443                         stringbuffer.writeByte(c);
2444                         p++;
2445                         continue;
2446                     }
2447                     break;
2448                 }
2449                 continue;
2450             default:
2451                 if (*p & 0x80)
2452                 {
2453                     uint u = decodeUTF();
2454                     if (u == PS || u == LS)
2455                         goto Lnewline;
2456                 }
2457                 goto Lerr;
2458             }
2459         }
2460     Lerr:
2461         error(loc, "#line integer [\"filespec\"]\\n expected");
2462     }
2463 
2464     /********************************************
2465      * Decode UTF character.
2466      * Issue error messages for invalid sequences.
2467      * Return decoded character, advance p to last character in UTF sequence.
2468      */
2469     private uint decodeUTF()
2470     {
2471         const s = p;
2472         assert(*s & 0x80);
2473         // Check length of remaining string up to 4 UTF-8 characters
2474         size_t len;
2475         for (len = 1; len < 4 && s[len]; len++)
2476         {
2477         }
2478         size_t idx = 0;
2479         dchar u;
2480         const msg = utf_decodeChar(s[0 .. len], idx, u);
2481         p += idx - 1;
2482         if (msg)
2483         {
2484             error("%.*s", cast(int)msg.length, msg.ptr);
2485         }
2486         return u;
2487     }
2488 
2489     /***************************************************
2490      * Parse doc comment embedded between t.ptr and p.
2491      * Remove trailing blanks and tabs from lines.
2492      * Replace all newlines with \n.
2493      * Remove leading comment character from each line.
2494      * Decide if it's a lineComment or a blockComment.
2495      * Append to previous one for this token.
2496      *
2497      * If newParagraph is true, an extra newline will be
2498      * added between adjoining doc comments.
2499      */
2500     private void getDocComment(Token* t, uint lineComment, bool newParagraph) pure
2501     {
2502         /* ct tells us which kind of comment it is: '/', '*', or '+'
2503          */
2504         const ct = t.ptr[2];
2505         /* Start of comment text skips over / * *, / + +, or / / /
2506          */
2507         const(char)* q = t.ptr + 3; // start of comment text
2508         const(char)* qend = p;
2509         if (ct == '*' || ct == '+')
2510             qend -= 2;
2511         /* Scan over initial row of ****'s or ++++'s or ////'s
2512          */
2513         for (; q < qend; q++)
2514         {
2515             if (*q != ct)
2516                 break;
2517         }
2518         /* Remove leading spaces until start of the comment
2519          */
2520         int linestart = 0;
2521         if (ct == '/')
2522         {
2523             while (q < qend && (*q == ' ' || *q == '\t'))
2524                 ++q;
2525         }
2526         else if (q < qend)
2527         {
2528             if (*q == '\r')
2529             {
2530                 ++q;
2531                 if (q < qend && *q == '\n')
2532                     ++q;
2533                 linestart = 1;
2534             }
2535             else if (*q == '\n')
2536             {
2537                 ++q;
2538                 linestart = 1;
2539             }
2540         }
2541         /* Remove trailing row of ****'s or ++++'s
2542          */
2543         if (ct != '/')
2544         {
2545             for (; q < qend; qend--)
2546             {
2547                 if (qend[-1] != ct)
2548                     break;
2549             }
2550         }
2551         /* Comment is now [q .. qend].
2552          * Canonicalize it into buf[].
2553          */
2554         OutBuffer buf;
2555 
2556         void trimTrailingWhitespace()
2557         {
2558             const s = buf[];
2559             auto len = s.length;
2560             while (len && (s[len - 1] == ' ' || s[len - 1] == '\t'))
2561                 --len;
2562             buf.setsize(len);
2563         }
2564 
2565         for (; q < qend; q++)
2566         {
2567             char c = *q;
2568             switch (c)
2569             {
2570             case '*':
2571             case '+':
2572                 if (linestart && c == ct)
2573                 {
2574                     linestart = 0;
2575                     /* Trim preceding whitespace up to preceding \n
2576                      */
2577                     trimTrailingWhitespace();
2578                     continue;
2579                 }
2580                 break;
2581             case ' ':
2582             case '\t':
2583                 break;
2584             case '\r':
2585                 if (q[1] == '\n')
2586                     continue; // skip the \r
2587                 goto Lnewline;
2588             default:
2589                 if (c == 226)
2590                 {
2591                     // If LS or PS
2592                     if (q[1] == 128 && (q[2] == 168 || q[2] == 169))
2593                     {
2594                         q += 2;
2595                         goto Lnewline;
2596                     }
2597                 }
2598                 linestart = 0;
2599                 break;
2600             Lnewline:
2601                 c = '\n'; // replace all newlines with \n
2602                 goto case;
2603             case '\n':
2604                 linestart = 1;
2605                 /* Trim trailing whitespace
2606                  */
2607                 trimTrailingWhitespace();
2608                 break;
2609             }
2610             buf.writeByte(c);
2611         }
2612         /* Trim trailing whitespace (if the last line does not have newline)
2613          */
2614         trimTrailingWhitespace();
2615 
2616         // Always end with a newline
2617         const s = buf[];
2618         if (s.length == 0 || s[$ - 1] != '\n')
2619             buf.writeByte('\n');
2620 
2621         // It's a line comment if the start of the doc comment comes
2622         // after other non-whitespace on the same line.
2623         auto dc = (lineComment && anyToken) ? &t.lineComment : &t.blockComment;
2624         // Combine with previous doc comment, if any
2625         if (*dc)
2626             *dc = combineComments(*dc, buf[], newParagraph).toDString();
2627         else
2628             *dc = buf.extractSlice(true);
2629     }
2630 
2631     /********************************************
2632      * Combine two document comments into one,
2633      * separated by an extra newline if newParagraph is true.
2634      */
2635     static const(char)* combineComments(const(char)[] c1, const(char)[] c2, bool newParagraph) pure
2636     {
2637         //printf("Lexer::combineComments('%s', '%s', '%i')\n", c1, c2, newParagraph);
2638         const(int) newParagraphSize = newParagraph ? 1 : 0; // Size of the combining '\n'
2639         if (!c1)
2640             return c2.ptr;
2641         if (!c2)
2642             return c1.ptr;
2643 
2644         int insertNewLine = 0;
2645         if (c1.length && c1[$ - 1] != '\n')
2646             insertNewLine = 1;
2647         const retSize = c1.length + insertNewLine + newParagraphSize + c2.length;
2648         auto p = cast(char*)mem.xmalloc_noscan(retSize + 1);
2649         p[0 .. c1.length] = c1[];
2650         if (insertNewLine)
2651             p[c1.length] = '\n';
2652         if (newParagraph)
2653             p[c1.length + insertNewLine] = '\n';
2654         p[retSize - c2.length .. retSize] = c2[];
2655         p[retSize] = 0;
2656         return p;
2657     }
2658 
2659 private:
2660     void endOfLine() pure @nogc @safe
2661     {
2662         scanloc.linnum++;
2663         line = p;
2664     }
2665 }
2666 
2667 /// Support for `__DATE__`, `__TIME__`, and `__TIMESTAMP__`
2668 private struct TimeStampInfo
2669 {
2670     private __gshared bool initdone = false;
2671 
2672     // Note: Those properties need to be guarded by a call to `init`
2673     // The API isn't safe, and quite brittle, but it was left this way
2674     // over performance concerns.
2675     // This is currently only called once, from the lexer.
2676     __gshared char[11 + 1] date;
2677     __gshared char[8 + 1] time;
2678     __gshared char[24 + 1] timestamp;
2679 
2680     public static void initialize(const ref Loc loc) nothrow
2681     {
2682         if (initdone)
2683             return;
2684 
2685         initdone = true;
2686         time_t ct;
2687         // https://issues.dlang.org/show_bug.cgi?id=20444
2688         if (auto p = getenv("SOURCE_DATE_EPOCH"))
2689         {
2690             if (!ct.parseDigits(p.toDString()))
2691                 error(loc, "Value of environment variable `SOURCE_DATE_EPOCH` should be a valid UNIX timestamp, not: `%s`", p);
2692         }
2693         else
2694             .time(&ct);
2695         const p = ctime(&ct);
2696         assert(p);
2697         sprintf(&date[0], "%.6s %.4s", p + 4, p + 20);
2698         sprintf(&time[0], "%.8s", p + 11);
2699         sprintf(&timestamp[0], "%.24s", p);
2700     }
2701 }
2702 
2703 unittest
2704 {
2705     import dmd.console;
2706     nothrow bool assertDiagnosticHandler(const ref Loc loc, Color headerColor, const(char)* header,
2707                                    const(char)* format, va_list ap, const(char)* p1, const(char)* p2)
2708     {
2709         assert(0);
2710     }
2711     diagnosticHandler = &assertDiagnosticHandler;
2712 
2713     static void test(T)(string sequence, T expected)
2714     {
2715         auto p = cast(const(char)*)sequence.ptr;
2716         assert(expected == Lexer.escapeSequence(Loc.initial, p));
2717         assert(p == sequence.ptr + sequence.length);
2718     }
2719 
2720     test(`'`, '\'');
2721     test(`"`, '"');
2722     test(`?`, '?');
2723     test(`\`, '\\');
2724     test(`0`, '\0');
2725     test(`a`, '\a');
2726     test(`b`, '\b');
2727     test(`f`, '\f');
2728     test(`n`, '\n');
2729     test(`r`, '\r');
2730     test(`t`, '\t');
2731     test(`v`, '\v');
2732 
2733     test(`x00`, 0x00);
2734     test(`xff`, 0xff);
2735     test(`xFF`, 0xff);
2736     test(`xa7`, 0xa7);
2737     test(`x3c`, 0x3c);
2738     test(`xe2`, 0xe2);
2739 
2740     test(`1`, '\1');
2741     test(`42`, '\42');
2742     test(`357`, '\357');
2743 
2744     test(`u1234`, '\u1234');
2745     test(`uf0e4`, '\uf0e4');
2746 
2747     test(`U0001f603`, '\U0001f603');
2748 
2749     test(`&quot;`, '"');
2750     test(`&lt;`, '<');
2751     test(`&gt;`, '>');
2752 
2753     diagnosticHandler = null;
2754 }
2755 unittest
2756 {
2757     import dmd.console;
2758     string expected;
2759     bool gotError;
2760 
2761     nothrow bool expectDiagnosticHandler(const ref Loc loc, Color headerColor, const(char)* header,
2762                                          const(char)* format, va_list ap, const(char)* p1, const(char)* p2)
2763     {
2764         assert(cast(Classification)headerColor == Classification.error);
2765 
2766         gotError = true;
2767         char[100] buffer = void;
2768         auto actual = buffer[0 .. vsprintf(buffer.ptr, format, ap)];
2769         assert(expected == actual);
2770         return true;
2771     }
2772 
2773     diagnosticHandler = &expectDiagnosticHandler;
2774 
2775     void test(string sequence, string expectedError, dchar expectedReturnValue, uint expectedScanLength)
2776     {
2777         uint errors = global.errors;
2778         gotError = false;
2779         expected = expectedError;
2780         auto p = cast(const(char)*)sequence.ptr;
2781         auto actualReturnValue = Lexer.escapeSequence(Loc.initial, p);
2782         assert(gotError);
2783         assert(expectedReturnValue == actualReturnValue);
2784 
2785         auto actualScanLength = p - sequence.ptr;
2786         assert(expectedScanLength == actualScanLength);
2787         global.errors = errors;
2788     }
2789 
2790     test("c", `undefined escape sequence \c`, 'c', 1);
2791     test("!", `undefined escape sequence \!`, '!', 1);
2792 
2793     test("x1", `escape hex sequence has 1 hex digits instead of 2`, '\x01', 2);
2794 
2795     test("u1"  , `escape hex sequence has 1 hex digits instead of 4`,   0x1, 2);
2796     test("u12" , `escape hex sequence has 2 hex digits instead of 4`,  0x12, 3);
2797     test("u123", `escape hex sequence has 3 hex digits instead of 4`, 0x123, 4);
2798 
2799     test("U0"      , `escape hex sequence has 1 hex digits instead of 8`,       0x0, 2);
2800     test("U00"     , `escape hex sequence has 2 hex digits instead of 8`,      0x00, 3);
2801     test("U000"    , `escape hex sequence has 3 hex digits instead of 8`,     0x000, 4);
2802     test("U0000"   , `escape hex sequence has 4 hex digits instead of 8`,    0x0000, 5);
2803     test("U0001f"  , `escape hex sequence has 5 hex digits instead of 8`,   0x0001f, 6);
2804     test("U0001f6" , `escape hex sequence has 6 hex digits instead of 8`,  0x0001f6, 7);
2805     test("U0001f60", `escape hex sequence has 7 hex digits instead of 8`, 0x0001f60, 8);
2806 
2807     test("ud800"    , `invalid UTF character \U0000d800`, '?', 5);
2808     test("udfff"    , `invalid UTF character \U0000dfff`, '?', 5);
2809     test("U00110000", `invalid UTF character \U00110000`, '?', 9);
2810 
2811     test("xg0"      , `undefined escape hex sequence \xg`, 'g', 2);
2812     test("ug000"    , `undefined escape hex sequence \ug`, 'g', 2);
2813     test("Ug0000000", `undefined escape hex sequence \Ug`, 'g', 2);
2814 
2815     test("&BAD;", `unnamed character entity &BAD;`  , '?', 5);
2816     test("&quot", `unterminated named entity &quot;`, '?', 5);
2817 
2818     test("400", `escape octal sequence \400 is larger than \377`, 0x100, 3);
2819 
2820     diagnosticHandler = null;
2821 }