1 /**
2  * Check the arguments to `printf` and `scanf` against the `format` string.
3  *
4  * Copyright:   Copyright (C) 1999-2020 by The D Language Foundation, All Rights Reserved
5  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
6  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/chkformat.d, _chkformat.d)
8  * Documentation:  https://dlang.org/phobos/dmd_chkformat.html
9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/chkformat.d
10  */
11 module dmd.chkformat;
12 
13 //import core.stdc.stdio : printf, scanf;
14 import core.stdc.ctype : isdigit;
15 
16 import dmd.errors;
17 import dmd.expression;
18 import dmd.globals;
19 import dmd.mtype;
20 import dmd.target;
21 
22 
23 /******************************************
24  * Check that arguments to a printf format string are compatible
25  * with that string. Issue errors for incompatibilities.
26  *
27  * Follows the C99 specification for printf.
28  *
29  * Takes a generous, rather than strict, view of compatiblity.
30  * For example, an unsigned value can be formatted with a signed specifier.
31  *
32  * Diagnosed incompatibilities are:
33  *
34  * 1. incompatible sizes which will cause argument misalignment
35  * 2. deferencing arguments that are not pointers
36  * 3. insufficient number of arguments
37  * 4. struct arguments
38  * 5. array and slice arguments
39  * 6. non-pointer arguments to `s` specifier
40  * 7. non-standard formats
41  * 8. undefined behavior per C99
42  *
43  * Per the C Standard, extra arguments are ignored.
44  *
45  * No attempt is made to fix the arguments or the format string.
46  *
47  * Params:
48  *      loc = location for error messages
49  *      format = format string
50  *      args = arguments to match with format string
51  *      isVa_list = if a "v" function (format check only)
52  *
53  * Returns:
54  *      `true` if errors occurred
55  * References:
56  * C99 7.19.6.1
57  * http://www.cplusplus.com/reference/cstdio/printf/
58  */
59 bool checkPrintfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args, bool isVa_list)
60 {
61     //printf("checkPrintFormat('%.*s')\n", cast(int)format.length, format.ptr);
62     size_t n = 0;
63     for (size_t i = 0; i < format.length;)
64     {
65         if (format[i] != '%')
66         {
67             ++i;
68             continue;
69         }
70         bool widthStar;
71         bool precisionStar;
72         size_t j = i;
73         const fmt = parsePrintfFormatSpecifier(format, j, widthStar, precisionStar);
74         const slice = format[i .. j];
75         i = j;
76 
77         if (fmt == Format.percent)
78             continue;                   // "%%", no arguments
79 
80         if (isVa_list)
81         {
82             // format check only
83             if (fmt == Format.error)
84                 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
85             continue;
86         }
87 
88         Expression getNextArg()
89         {
90             if (n == args.length)
91             {
92                 deprecation(loc, "more format specifiers than %d arguments", cast(int)n);
93                 return null;
94             }
95             return args[n++];
96         }
97 
98         void errorMsg(const char* prefix, const char[] specifier, Expression arg, const char* texpect, Type tactual)
99         {
100             deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`",
101                   prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars());
102         }
103 
104         if (widthStar)
105         {
106             auto e = getNextArg();
107             if (!e)
108                 return true;
109             auto t = e.type.toBasetype();
110             if (t.ty != Tint32 && t.ty != Tuns32)
111                 errorMsg("width ", slice, e, "int", t);
112         }
113 
114         if (precisionStar)
115         {
116             auto e = getNextArg();
117             if (!e)
118                 return true;
119             auto t = e.type.toBasetype();
120             if (t.ty != Tint32 && t.ty != Tuns32)
121                 errorMsg("precision ", slice, e, "int", t);
122         }
123 
124         auto e = getNextArg();
125         if (!e)
126             return true;
127         auto t = e.type.toBasetype();
128         auto tnext = t.nextOf();
129         const c_longsize = target.c.longsize;
130         const is64bit = global.params.is64bit;
131 
132         // Types which are promoted to int are allowed.
133         // Spec: C99 6.5.2.2.7
134         final switch (fmt)
135         {
136             case Format.u:      // unsigned int
137             case Format.d:      // int
138                 if (t.ty != Tint32 && t.ty != Tuns32)
139                     errorMsg(null, slice, e, "int", t);
140                 break;
141 
142             case Format.hhu:    // unsigned char
143             case Format.hhd:    // signed char
144                 if (t.ty != Tint32 && t.ty != Tuns32 && t.ty != Tint8 && t.ty != Tuns8)
145                     errorMsg(null, slice, e, "byte", t);
146                 break;
147 
148             case Format.hu:     // unsigned short int
149             case Format.hd:     // short int
150                 if (t.ty != Tint32 && t.ty != Tuns32 && t.ty != Tint16 && t.ty != Tuns16)
151                     errorMsg(null, slice, e, "short", t);
152                 break;
153 
154             case Format.lu:     // unsigned long int
155             case Format.ld:     // long int
156                 if (!(t.isintegral() && t.size() == c_longsize))
157                     errorMsg(null, slice, e, (c_longsize == 4 ? "int" : "long"), t);
158                 break;
159 
160             case Format.llu:    // unsigned long long int
161             case Format.lld:    // long long int
162                 if (t.ty != Tint64 && t.ty != Tuns64)
163                     errorMsg(null, slice, e, "long", t);
164                 break;
165 
166             case Format.ju:     // uintmax_t
167             case Format.jd:     // intmax_t
168                 if (t.ty != Tint64 && t.ty != Tuns64)
169                     errorMsg(null, slice, e, "core.stdc.stdint.intmax_t", t);
170                 break;
171 
172             case Format.zd:     // size_t
173                 if (!(t.isintegral() && t.size() == (is64bit ? 8 : 4)))
174                     errorMsg(null, slice, e, "size_t", t);
175                 break;
176 
177             case Format.td:     // ptrdiff_t
178                 if (!(t.isintegral() && t.size() == (is64bit ? 8 : 4)))
179                     errorMsg(null, slice, e, "ptrdiff_t", t);
180                 break;
181 
182             case Format.lg:
183             case Format.g:      // double
184                 if (t.ty != Tfloat64 && t.ty != Timaginary64)
185                     errorMsg(null, slice, e, "double", t);
186                 break;
187 
188             case Format.Lg:     // long double
189                 if (t.ty != Tfloat80 && t.ty != Timaginary80)
190                     errorMsg(null, slice, e, "real", t);
191                 break;
192 
193             case Format.p:      // pointer
194                 if (t.ty != Tpointer && t.ty != Tnull && t.ty != Tclass && t.ty != Tdelegate && t.ty != Taarray)
195                     errorMsg(null, slice, e, "void*", t);
196                 break;
197 
198             case Format.n:      // pointer to int
199                 if (!(t.ty == Tpointer && tnext.ty == Tint32))
200                     errorMsg(null, slice, e, "int*", t);
201                 break;
202 
203             case Format.ln:     // pointer to long int
204                 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.size() == c_longsize))
205                     errorMsg(null, slice, e, (c_longsize == 4 ? "int*" : "long*"), t);
206                 break;
207 
208             case Format.lln:    // pointer to long long int
209                 if (!(t.ty == Tpointer && tnext.ty == Tint64))
210                     errorMsg(null, slice, e, "long*", t);
211                 break;
212 
213             case Format.hn:     // pointer to short
214                 if (!(t.ty == Tpointer && tnext.ty == Tint16))
215                     errorMsg(null, slice, e, "short*", t);
216                 break;
217 
218             case Format.hhn:    // pointer to signed char
219                 if (!(t.ty == Tpointer && tnext.ty == Tint16))
220                     errorMsg(null, slice, e, "byte*", t);
221                 break;
222 
223             case Format.jn:     // pointer to intmax_t
224                 if (!(t.ty == Tpointer && tnext.ty == Tint64))
225                     errorMsg(null, slice, e, "core.stdc.stdint.intmax_t*", t);
226                 break;
227 
228             case Format.zn:     // pointer to size_t
229                 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32)))
230                     errorMsg(null, slice, e, "size_t*", t);
231                 break;
232 
233             case Format.tn:     // pointer to ptrdiff_t
234                 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tint64 : Tint32)))
235                     errorMsg(null, slice, e, "ptrdiff_t*", t);
236                 break;
237 
238             case Format.c:      // char
239                 if (t.ty != Tint32 && t.ty != Tuns32)
240                     errorMsg(null, slice, e, "char", t);
241                 break;
242 
243             case Format.lc:     // wint_t
244                 if (t.ty != Tint32 && t.ty != Tuns32)
245                     errorMsg(null, slice, e, "wchar_t", t);
246                 break;
247 
248             case Format.s:      // pointer to char string
249                 if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8)))
250                     errorMsg(null, slice, e, "char*", t);
251                 break;
252 
253             case Format.ls:     // pointer to wchar_t string
254                 const twchar_t = global.params.isWindows ? Twchar : Tdchar;
255                 if (!(t.ty == Tpointer && tnext.ty == twchar_t))
256                     errorMsg(null, slice, e, "wchar_t*", t);
257                 break;
258 
259             case Format.error:
260                 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
261                 break;
262 
263             case Format.percent:
264                 assert(0);
265         }
266     }
267     return false;
268 }
269 
270 /******************************************
271  * Check that arguments to a scanf format string are compatible
272  * with that string. Issue errors for incompatibilities.
273  *
274  * Follows the C99 specification for scanf.
275  *
276  * Takes a generous, rather than strict, view of compatiblity.
277  * For example, an unsigned value can be formatted with a signed specifier.
278  *
279  * Diagnosed incompatibilities are:
280  *
281  * 1. incompatible sizes which will cause argument misalignment
282  * 2. deferencing arguments that are not pointers
283  * 3. insufficient number of arguments
284  * 4. struct arguments
285  * 5. array and slice arguments
286  * 6. non-standard formats
287  * 7. undefined behavior per C99
288  *
289  * Per the C Standard, extra arguments are ignored.
290  *
291  * No attempt is made to fix the arguments or the format string.
292  *
293  * Params:
294  *      loc = location for error messages
295  *      format = format string
296  *      args = arguments to match with format string
297  *      isVa_list = if a "v" function (format check only)
298  *
299  * Returns:
300  *      `true` if errors occurred
301  * References:
302  * C99 7.19.6.2
303  * http://www.cplusplus.com/reference/cstdio/scanf/
304  */
305 bool checkScanfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args, bool isVa_list)
306 {
307     size_t n = 0;
308     for (size_t i = 0; i < format.length;)
309     {
310         if (format[i] != '%')
311         {
312             ++i;
313             continue;
314         }
315         bool asterisk;
316         size_t j = i;
317         const fmt = parseScanfFormatSpecifier(format, j, asterisk);
318         const slice = format[i .. j];
319         i = j;
320 
321         if (fmt == Format.percent || asterisk)
322             continue;   // "%%", "%*": no arguments
323 
324         if (isVa_list)
325         {
326             // format check only
327             if (fmt == Format.error)
328                 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
329             continue;
330         }
331 
332         Expression getNextArg()
333         {
334             if (n == args.length)
335             {
336                 if (!asterisk)
337                     deprecation(loc, "more format specifiers than %d arguments", cast(int)n);
338                 return null;
339             }
340             return args[n++];
341         }
342 
343         void errorMsg(const char* prefix, const char[] specifier, Expression arg, const char* texpect, Type tactual)
344         {
345             deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`",
346                   prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars());
347         }
348 
349         auto e = getNextArg();
350         if (!e)
351             return true;
352 
353         auto t = e.type.toBasetype();
354         auto tnext = t.nextOf();
355         const c_longsize = target.c.longsize;
356         const is64bit = global.params.is64bit;
357 
358         final switch (fmt)
359         {
360             case Format.n:
361             case Format.d:      // pointer to int
362                 if (!(t.ty == Tpointer && tnext.ty == Tint32))
363                     errorMsg(null, slice, e, "int*", t);
364                 break;
365 
366             case Format.hhn:
367             case Format.hhd:    // pointer to signed char
368                 if (!(t.ty == Tpointer && tnext.ty == Tint16))
369                     errorMsg(null, slice, e, "byte*", t);
370                 break;
371 
372             case Format.hn:
373             case Format.hd:     // pointer to short
374                 if (!(t.ty == Tpointer && tnext.ty == Tint16))
375                     errorMsg(null, slice, e, "short*", t);
376                 break;
377 
378             case Format.ln:
379             case Format.ld:     // pointer to long int
380                 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.size() == c_longsize))
381                     errorMsg(null, slice, e, (c_longsize == 4 ? "int*" : "long*"), t);
382                 break;
383 
384             case Format.lln:
385             case Format.lld:    // pointer to long long int
386                 if (!(t.ty == Tpointer && tnext.ty == Tint64))
387                     errorMsg(null, slice, e, "long*", t);
388                 break;
389 
390             case Format.jn:
391             case Format.jd:     // pointer to intmax_t
392                 if (!(t.ty == Tpointer && tnext.ty == Tint64))
393                     errorMsg(null, slice, e, "core.stdc.stdint.intmax_t*", t);
394                 break;
395 
396             case Format.zn:
397             case Format.zd:     // pointer to size_t
398                 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32)))
399                     errorMsg(null, slice, e, "size_t*", t);
400                 break;
401 
402             case Format.tn:
403             case Format.td:     // pointer to ptrdiff_t
404                 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tint64 : Tint32)))
405                     errorMsg(null, slice, e, "ptrdiff_t*", t);
406                 break;
407 
408             case Format.u:      // pointer to unsigned int
409                 if (!(t.ty == Tpointer && tnext.ty == Tuns32))
410                     errorMsg(null, slice, e, "uint*", t);
411                 break;
412 
413             case Format.hhu:    // pointer to unsigned char
414                 if (!(t.ty == Tpointer && tnext.ty == Tuns8))
415                     errorMsg(null, slice, e, "ubyte*", t);
416                 break;
417 
418             case Format.hu:     // pointer to unsigned short int
419                 if (!(t.ty == Tpointer && tnext.ty == Tuns16))
420                     errorMsg(null, slice, e, "ushort*", t);
421                 break;
422 
423             case Format.lu:     // pointer to unsigned long int
424                 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32)))
425                     errorMsg(null, slice, e, (c_longsize == 4 ? "uint*" : "ulong*"), t);
426                 break;
427 
428             case Format.llu:    // pointer to unsigned long long int
429                 if (!(t.ty == Tpointer && tnext.ty == Tuns64))
430                     errorMsg(null, slice, e, "ulong*", t);
431                 break;
432 
433             case Format.ju:     // pointer to uintmax_t
434                 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32)))
435                     errorMsg(null, slice, e, "ulong*", t);
436                 break;
437 
438             case Format.g:      // pointer to float
439                 if (!(t.ty == Tpointer && tnext.ty == Tfloat32))
440                     errorMsg(null, slice, e, "float*", t);
441                 break;
442             case Format.lg:     // pointer to double
443                 if (!(t.ty == Tpointer && tnext.ty == Tfloat64))
444                     errorMsg(null, slice, e, "double*", t);
445                 break;
446             case Format.Lg:     // pointer to long double
447                 if (!(t.ty == Tpointer && tnext.ty == Tfloat80))
448                     errorMsg(null, slice, e, "real*", t);
449                 break;
450 
451             case Format.c:
452             case Format.s:      // pointer to char string
453                 if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8)))
454                     errorMsg(null, slice, e, "char*", t);
455                 break;
456 
457             case Format.lc:
458             case Format.ls:     // pointer to wchar_t string
459                 const twchar_t = global.params.isWindows ? Twchar : Tdchar;
460                 if (!(t.ty == Tpointer && tnext.ty == twchar_t))
461                     errorMsg(null, slice, e, "wchar_t*", t);
462                 break;
463 
464             case Format.p:      // double pointer
465                 if (!(t.ty == Tpointer && tnext.ty == Tpointer))
466                     errorMsg(null, slice, e, "void**", t);
467                 break;
468 
469             case Format.error:
470                 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
471                 break;
472 
473             case Format.percent:
474                 assert(0);
475         }
476     }
477     return false;
478 }
479 
480 private:
481 
482 /**************************************
483  * Parse the *format specifier* which is of the form:
484  *
485  * `%[*][width][length]specifier`
486  *
487  * Params:
488  *      format = format string
489  *      idx = index of `%` of start of format specifier,
490  *          which gets updated to index past the end of it,
491  *          even if `Format.error` is returned
492  *      asterisk = set if there is a `*` sub-specifier
493  * Returns:
494  *      Format
495  */
496 pure nothrow @safe
497 Format parseScanfFormatSpecifier(scope const char[] format, ref size_t idx,
498         out bool asterisk)
499 {
500     auto i = idx;
501     assert(format[i] == '%');
502     const length = format.length;
503 
504     Format error()
505     {
506         idx = i;
507         return Format.error;
508     }
509 
510     ++i;
511     if (i == length)
512         return error();
513 
514     if (format[i] == '%')
515     {
516         idx = i + 1;
517         return Format.percent;
518     }
519 
520     // * sub-specifier
521     if (format[i] == '*')
522     {
523         ++i;
524         if (i == length)
525             return error();
526         asterisk = true;
527     }
528 
529     // fieldWidth
530     while (isdigit(format[i]))
531     {
532         i++;
533         if (i == length)
534             return error();
535     }
536 
537     /* Read the scanset
538      * A scanset can be anything, so we just check that it is paired
539      */
540     if (format[i] == '[')
541     {
542         while (i < length)
543         {
544             if (format[i] == ']')
545                 break;
546             ++i;
547         }
548 
549         // no `]` found
550         if (i == length)
551             return error();
552 
553         ++i;
554         // no specifier after `]`
555         // it could be mixed with the one above, but then idx won't have the right index
556         if (i == length)
557             return error();
558     }
559 
560     /* Read the specifier
561      */
562     char genSpec;
563     Format specifier = parseGenericFormatSpecifier(format, i, genSpec);
564     if (specifier == Format.error)
565         return error();
566 
567     idx = i;
568     return specifier;  // success
569 }
570 
571 /**************************************
572  * Parse the *format specifier* which is of the form:
573  *
574  * `%[flags][field width][.precision][length modifier]specifier`
575  *
576  * Params:
577  *      format = format string
578  *      idx = index of `%` of start of format specifier,
579  *          which gets updated to index past the end of it,
580  *          even if `Format.error` is returned
581  *      widthStar = set if * for width
582  *      precisionStar = set if * for precision
583  * Returns:
584  *      Format
585  */
586 pure nothrow @safe
587 Format parsePrintfFormatSpecifier(scope const char[] format, ref size_t idx,
588         out bool widthStar, out bool precisionStar)
589 {
590     auto i = idx;
591     assert(format[i] == '%');
592     const length = format.length;
593     bool hash;
594     bool zero;
595     bool flags;
596     bool width;
597     bool precision;
598 
599     Format error()
600     {
601         idx = i;
602         return Format.error;
603     }
604 
605     ++i;
606     if (i == length)
607         return error();
608 
609     if (format[i] == '%')
610     {
611         idx = i + 1;
612         return Format.percent;
613     }
614 
615     /* Read the `flags`
616      */
617     while (1)
618     {
619         const c = format[i];
620         if (c == '-' ||
621             c == '+' ||
622             c == ' ')
623         {
624             flags = true;
625         }
626         else if (c == '#')
627         {
628             hash = true;
629         }
630         else if (c == '0')
631         {
632             zero = true;
633         }
634         else
635             break;
636         ++i;
637         if (i == length)
638             return error();
639     }
640 
641     /* Read the `field width`
642      */
643     {
644         const c = format[i];
645         if (c == '*')
646         {
647             width = true;
648             widthStar = true;
649             ++i;
650             if (i == length)
651                 return error();
652         }
653         else if ('1' <= c && c <= '9')
654         {
655             width = true;
656             ++i;
657             if (i == length)
658                 return error();
659             while ('0' <= format[i] && format[i] <= '9')
660             {
661                ++i;
662                if (i == length)
663                     return error();
664             }
665         }
666     }
667 
668     /* Read the `precision`
669      */
670     if (format[i] == '.')
671     {
672         precision = true;
673         ++i;
674         if (i == length)
675             return error();
676         const c = format[i];
677         if (c == '*')
678         {
679             precisionStar = true;
680             ++i;
681             if (i == length)
682                 return error();
683         }
684         else if ('0' <= c && c <= '9')
685         {
686             ++i;
687             if (i == length)
688                 return error();
689             while ('0' <= format[i] && format[i] <= '9')
690             {
691                ++i;
692                if (i == length)
693                     return error();
694             }
695         }
696     }
697 
698     /* Read the specifier
699      */
700     char genSpec;
701     Format specifier = parseGenericFormatSpecifier(format, i, genSpec);
702     if (specifier == Format.error)
703         return error();
704 
705     switch (genSpec)
706     {
707         case 'c':
708         case 's':
709             if (hash || zero)
710                 return error();
711             break;
712 
713         case 'd':
714         case 'i':
715             if (hash)
716                 return error();
717             break;
718 
719         case 'n':
720             if (hash || zero || precision || width || flags)
721                 return error();
722             break;
723 
724         default:
725             break;
726     }
727 
728     idx = i;
729     return specifier;  // success
730 }
731 
732 /* Different kinds of formatting specifications, variations we don't
733    care about are merged. (Like we don't care about the difference between
734    f, e, g, a, etc.)
735 
736    For `scanf`, every format is a pointer.
737  */
738 enum Format
739 {
740     d,          // int
741     hhd,        // signed char
742     hd,         // short int
743     ld,         // long int
744     lld,        // long long int
745     jd,         // intmax_t
746     zd,         // size_t
747     td,         // ptrdiff_t
748     u,          // unsigned int
749     hhu,        // unsigned char
750     hu,         // unsigned short int
751     lu,         // unsigned long int
752     llu,        // unsigned long long int
753     ju,         // uintmax_t
754     g,          // float (scanf) / double (printf)
755     lg,         // double (scanf)
756     Lg,         // long double (both)
757     s,          // char string (both)
758     ls,         // wchar_t string (both)
759     c,          // char (printf)
760     lc,         // wint_t (printf)
761     p,          // pointer
762     n,          // pointer to int
763     hhn,        // pointer to signed char
764     hn,         // pointer to short
765     ln,         // pointer to long int
766     lln,        // pointer to long long int
767     jn,         // pointer to intmax_t
768     zn,         // pointer to size_t
769     tn,         // pointer to ptrdiff_t
770     percent,    // %% (i.e. no argument)
771     error,      // invalid format specification
772 }
773 
774 /**************************************
775  * Parse the *length specifier* and the *specifier* of the following form:
776  * `[length]specifier`
777  *
778  * Params:
779  *      format = format string
780  *      idx = index of of start of format specifier,
781  *          which gets updated to index past the end of it,
782  *          even if `Format.error` is returned
783  *      genSpecifier = Generic specifier. For instance, it will be set to `d` if the
784  *           format is `hdd`.
785  * Returns:
786  *      Format
787  */
788 pure @safe nothrow
789 Format parseGenericFormatSpecifier(scope const char[] format,
790     ref size_t idx, out char genSpecifier)
791 {
792     const length = format.length;
793 
794     /* Read the `length modifier`
795      */
796     const lm = format[idx];
797     bool lm1;        // if jztL
798     bool lm2;        // if `hh` or `ll`
799     if (lm == 'j' ||
800         lm == 'z' ||
801         lm == 't' ||
802         lm == 'L')
803     {
804         ++idx;
805         if (idx == length)
806             return Format.error;
807         lm1 = true;
808     }
809     else if (lm == 'h' || lm == 'l')
810     {
811         ++idx;
812         if (idx == length)
813             return Format.error;
814         lm2 = lm == format[idx];
815         if (lm2)
816         {
817             ++idx;
818             if (idx == length)
819                 return Format.error;
820         }
821     }
822 
823     /* Read the `specifier`
824      */
825     Format specifier;
826     const sc = format[idx];
827     genSpecifier = sc;
828     switch (sc)
829     {
830         case 'd':
831         case 'i':
832             if (lm == 'L')
833                 specifier = Format.error;
834             else
835                 specifier = lm == 'h' && lm2 ? Format.hhd :
836                             lm == 'h'        ? Format.hd  :
837                             lm == 'l' && lm2 ? Format.lld :
838                             lm == 'l'        ? Format.ld  :
839                             lm == 'j'        ? Format.jd  :
840                             lm == 'z'        ? Format.zd  :
841                             lm == 't'        ? Format.td  :
842                                                Format.d;
843             break;
844 
845         case 'u':
846         case 'o':
847         case 'x':
848         case 'X':
849             if (lm == 'L')
850                 specifier = Format.error;
851             else
852                 specifier = lm == 'h' && lm2 ? Format.hhu :
853                             lm == 'h'        ? Format.hu  :
854                             lm == 'l' && lm2 ? Format.llu :
855                             lm == 'l'        ? Format.lu  :
856                             lm == 'j'        ? Format.ju  :
857                             lm == 'z'        ? Format.zd  :
858                             lm == 't'        ? Format.td  :
859                                                Format.u;
860             break;
861 
862         case 'f':
863         case 'F':
864         case 'e':
865         case 'E':
866         case 'g':
867         case 'G':
868         case 'a':
869         case 'A':
870             if (lm == 'L')
871                 specifier = Format.Lg;
872             else if (lm1 || lm2 || lm == 'h')
873                 specifier = Format.error;
874             else
875                 specifier = lm == 'l' ? Format.lg : Format.g;
876             break;
877 
878         case 'c':
879             if (lm1 || lm2 || lm == 'h')
880                 specifier = Format.error;
881             else
882                 specifier = lm == 'l' ? Format.lc : Format.c;
883             break;
884 
885         case 's':
886             if (lm1 || lm2 || lm == 'h')
887                 specifier = Format.error;
888             else
889                 specifier = lm == 'l' ? Format.ls : Format.s;
890             break;
891 
892         case 'p':
893             if (lm1 || lm2 || lm == 'h' || lm == 'l')
894                 specifier = Format.error;
895             else
896                 specifier = Format.p;
897             break;
898 
899         case 'n':
900             if (lm == 'L')
901                 specifier = Format.error;
902             else
903                 specifier = lm == 'l' && lm2 ? Format.lln :
904                             lm == 'l'        ? Format.ln  :
905                             lm == 'h' && lm2 ? Format.hhn :
906                             lm == 'h'        ? Format.hn  :
907                             lm == 'j'        ? Format.jn  :
908                             lm == 'z'        ? Format.zn  :
909                             lm == 't'        ? Format.tn  :
910                                                Format.n;
911             break;
912 
913         default:
914             specifier = Format.error;
915             break;
916     }
917 
918     ++idx;
919     return specifier; // success
920 }
921 
922 unittest
923 {
924     /* parseGenericFormatSpecifier
925      */
926 
927     char genSpecifier;
928     size_t idx;
929 
930     assert(parseGenericFormatSpecifier("hhd", idx, genSpecifier) == Format.hhd);
931     assert(genSpecifier == 'd');
932 
933     idx = 0;
934     assert(parseGenericFormatSpecifier("hn", idx, genSpecifier) == Format.hn);
935     assert(genSpecifier == 'n');
936 
937     idx = 0;
938     assert(parseGenericFormatSpecifier("ji", idx, genSpecifier) == Format.jd);
939     assert(genSpecifier == 'i');
940 
941     idx = 0;
942     assert(parseGenericFormatSpecifier("lu", idx, genSpecifier) == Format.lu);
943     assert(genSpecifier == 'u');
944 
945     idx = 0;
946     assert(parseGenericFormatSpecifier("k", idx, genSpecifier) == Format.error);
947 
948     /* parsePrintfFormatSpecifier
949      */
950 
951      bool widthStar;
952      bool precisionStar;
953 
954      // one for each Format
955      idx = 0;
956      assert(parsePrintfFormatSpecifier("%d", idx, widthStar, precisionStar) == Format.d);
957      assert(idx == 2);
958      assert(!widthStar && !precisionStar);
959 
960      idx = 0;
961      assert(parsePrintfFormatSpecifier("%ld", idx, widthStar, precisionStar) == Format.ld);
962      assert(idx == 3);
963 
964      idx = 0;
965      assert(parsePrintfFormatSpecifier("%lld", idx, widthStar, precisionStar) == Format.lld);
966      assert(idx == 4);
967 
968      idx = 0;
969      assert(parsePrintfFormatSpecifier("%jd", idx, widthStar, precisionStar) == Format.jd);
970      assert(idx == 3);
971 
972      idx = 0;
973      assert(parsePrintfFormatSpecifier("%zd", idx, widthStar, precisionStar) == Format.zd);
974      assert(idx == 3);
975 
976      idx = 0;
977      assert(parsePrintfFormatSpecifier("%td", idx, widthStar, precisionStar) == Format.td);
978      assert(idx == 3);
979 
980      idx = 0;
981      assert(parsePrintfFormatSpecifier("%g", idx, widthStar, precisionStar) == Format.g);
982      assert(idx == 2);
983 
984      idx = 0;
985      assert(parsePrintfFormatSpecifier("%Lg", idx, widthStar, precisionStar) == Format.Lg);
986      assert(idx == 3);
987 
988      idx = 0;
989      assert(parsePrintfFormatSpecifier("%p", idx, widthStar, precisionStar) == Format.p);
990      assert(idx == 2);
991 
992      idx = 0;
993      assert(parsePrintfFormatSpecifier("%n", idx, widthStar, precisionStar) == Format.n);
994      assert(idx == 2);
995 
996      idx = 0;
997      assert(parsePrintfFormatSpecifier("%ln", idx, widthStar, precisionStar) == Format.ln);
998      assert(idx == 3);
999 
1000      idx = 0;
1001      assert(parsePrintfFormatSpecifier("%lln", idx, widthStar, precisionStar) == Format.lln);
1002      assert(idx == 4);
1003 
1004      idx = 0;
1005      assert(parsePrintfFormatSpecifier("%hn", idx, widthStar, precisionStar) == Format.hn);
1006      assert(idx == 3);
1007 
1008      idx = 0;
1009      assert(parsePrintfFormatSpecifier("%hhn", idx, widthStar, precisionStar) == Format.hhn);
1010      assert(idx == 4);
1011 
1012      idx = 0;
1013      assert(parsePrintfFormatSpecifier("%jn", idx, widthStar, precisionStar) == Format.jn);
1014      assert(idx == 3);
1015 
1016      idx = 0;
1017      assert(parsePrintfFormatSpecifier("%zn", idx, widthStar, precisionStar) == Format.zn);
1018      assert(idx == 3);
1019 
1020      idx = 0;
1021      assert(parsePrintfFormatSpecifier("%tn", idx, widthStar, precisionStar) == Format.tn);
1022      assert(idx == 3);
1023 
1024      idx = 0;
1025      assert(parsePrintfFormatSpecifier("%c", idx, widthStar, precisionStar) == Format.c);
1026      assert(idx == 2);
1027 
1028      idx = 0;
1029      assert(parsePrintfFormatSpecifier("%lc", idx, widthStar, precisionStar) == Format.lc);
1030      assert(idx == 3);
1031 
1032      idx = 0;
1033      assert(parsePrintfFormatSpecifier("%s", idx, widthStar, precisionStar) == Format.s);
1034      assert(idx == 2);
1035 
1036      idx = 0;
1037      assert(parsePrintfFormatSpecifier("%ls", idx, widthStar, precisionStar) == Format.ls);
1038      assert(idx == 3);
1039 
1040      idx = 0;
1041      assert(parsePrintfFormatSpecifier("%%", idx, widthStar, precisionStar) == Format.percent);
1042      assert(idx == 2);
1043 
1044      // Synonyms
1045      idx = 0;
1046      assert(parsePrintfFormatSpecifier("%i", idx, widthStar, precisionStar) == Format.d);
1047      assert(idx == 2);
1048 
1049      idx = 0;
1050      assert(parsePrintfFormatSpecifier("%u", idx, widthStar, precisionStar) == Format.u);
1051      assert(idx == 2);
1052 
1053      idx = 0;
1054      assert(parsePrintfFormatSpecifier("%o", idx, widthStar, precisionStar) == Format.u);
1055      assert(idx == 2);
1056 
1057      idx = 0;
1058      assert(parsePrintfFormatSpecifier("%x", idx, widthStar, precisionStar) == Format.u);
1059      assert(idx == 2);
1060 
1061      idx = 0;
1062      assert(parsePrintfFormatSpecifier("%X", idx, widthStar, precisionStar) == Format.u);
1063      assert(idx == 2);
1064 
1065      idx = 0;
1066      assert(parsePrintfFormatSpecifier("%f", idx, widthStar, precisionStar) == Format.g);
1067      assert(idx == 2);
1068 
1069      idx = 0;
1070      assert(parsePrintfFormatSpecifier("%F", idx, widthStar, precisionStar) == Format.g);
1071      assert(idx == 2);
1072 
1073      idx = 0;
1074      assert(parsePrintfFormatSpecifier("%G", idx, widthStar, precisionStar) == Format.g);
1075      assert(idx == 2);
1076 
1077      idx = 0;
1078      assert(parsePrintfFormatSpecifier("%a", idx, widthStar, precisionStar) == Format.g);
1079      assert(idx == 2);
1080 
1081      idx = 0;
1082      assert(parsePrintfFormatSpecifier("%A", idx, widthStar, precisionStar) == Format.g);
1083      assert(idx == 2);
1084 
1085      idx = 0;
1086      assert(parsePrintfFormatSpecifier("%lg", idx, widthStar, precisionStar) == Format.lg);
1087      assert(idx == 3);
1088 
1089      // width, precision
1090      idx = 0;
1091      assert(parsePrintfFormatSpecifier("%*d", idx, widthStar, precisionStar) == Format.d);
1092      assert(idx == 3);
1093      assert(widthStar && !precisionStar);
1094 
1095      idx = 0;
1096      assert(parsePrintfFormatSpecifier("%.*d", idx, widthStar, precisionStar) == Format.d);
1097      assert(idx == 4);
1098      assert(!widthStar && precisionStar);
1099 
1100      idx = 0;
1101      assert(parsePrintfFormatSpecifier("%*.*d", idx, widthStar, precisionStar) == Format.d);
1102      assert(idx == 5);
1103      assert(widthStar && precisionStar);
1104 
1105      // Too short formats
1106      {
1107          foreach (s; ["%", "%-", "%+", "% ", "%#", "%0", "%*", "%1", "%19", "%.", "%.*", "%.1", "%.12",
1108                       "%j", "%z", "%t", "%l", "%h", "%ll", "%hh"])
1109          {
1110              idx = 0;
1111              assert(parsePrintfFormatSpecifier(s, idx, widthStar, precisionStar) == Format.error);
1112              assert(idx == s.length);
1113          }
1114      }
1115 
1116      // Undefined format combinations
1117      {
1118          foreach (s; ["%#d", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg",
1119                       "%#c", "%0c", "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc",
1120                       "%#s", "%0s", "%js", "%zs", "%ts", "%Ls", "%hs", "%hhs", "%lls",
1121                       "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp",
1122                       "%-n", "%+n", "% n", "%#n", "%0n", "%*n", "%1n", "%19n", "%.n", "%.*n", "%.1n", "%.12n", "%Ln", "%K"])
1123          {
1124              idx = 0;
1125              assert(parsePrintfFormatSpecifier(s, idx, widthStar, precisionStar) == Format.error);
1126              import std.stdio;
1127              assert(idx == s.length);
1128          }
1129      }
1130 
1131     /* parseScanfFormatSpecifier
1132      */
1133 
1134     bool asterisk;
1135 
1136     // one for each Format
1137     idx = 0;
1138     assert(parseScanfFormatSpecifier("%d", idx, asterisk) == Format.d);
1139     assert(idx == 2);
1140     assert(!asterisk);
1141 
1142     idx = 0;
1143     assert(parseScanfFormatSpecifier("%hhd", idx, asterisk) == Format.hhd);
1144     assert(idx == 4);
1145 
1146     idx = 0;
1147     assert(parseScanfFormatSpecifier("%hd", idx, asterisk) == Format.hd);
1148     assert(idx == 3);
1149 
1150     idx = 0;
1151     assert(parseScanfFormatSpecifier("%ld", idx, asterisk) == Format.ld);
1152     assert(idx == 3);
1153 
1154     idx = 0;
1155     assert(parseScanfFormatSpecifier("%lld", idx, asterisk) == Format.lld);
1156     assert(idx == 4);
1157 
1158     idx = 0;
1159     assert(parseScanfFormatSpecifier("%jd", idx, asterisk) == Format.jd);
1160     assert(idx == 3);
1161 
1162     idx = 0;
1163     assert(parseScanfFormatSpecifier("%zd", idx, asterisk) == Format.zd);
1164     assert(idx == 3);
1165 
1166     idx = 0;
1167     assert(parseScanfFormatSpecifier("%td", idx, asterisk,) == Format.td);
1168     assert(idx == 3);
1169 
1170     idx = 0;
1171     assert(parseScanfFormatSpecifier("%u", idx, asterisk) == Format.u);
1172     assert(idx == 2);
1173 
1174     idx = 0;
1175     assert(parseScanfFormatSpecifier("%hhu", idx, asterisk,) == Format.hhu);
1176     assert(idx == 4);
1177 
1178     idx = 0;
1179     assert(parseScanfFormatSpecifier("%hu", idx, asterisk) == Format.hu);
1180     assert(idx == 3);
1181 
1182     idx = 0;
1183     assert(parseScanfFormatSpecifier("%lu", idx, asterisk) == Format.lu);
1184     assert(idx == 3);
1185 
1186     idx = 0;
1187     assert(parseScanfFormatSpecifier("%llu", idx, asterisk) == Format.llu);
1188     assert(idx == 4);
1189 
1190     idx = 0;
1191     assert(parseScanfFormatSpecifier("%ju", idx, asterisk) == Format.ju);
1192     assert(idx == 3);
1193 
1194     idx = 0;
1195     assert(parseScanfFormatSpecifier("%g", idx, asterisk) == Format.g);
1196     assert(idx == 2);
1197 
1198     idx = 0;
1199     assert(parseScanfFormatSpecifier("%lg", idx, asterisk) == Format.lg);
1200     assert(idx == 3);
1201 
1202     idx = 0;
1203     assert(parseScanfFormatSpecifier("%Lg", idx, asterisk) == Format.Lg);
1204     assert(idx == 3);
1205 
1206     idx = 0;
1207     assert(parseScanfFormatSpecifier("%p", idx, asterisk) == Format.p);
1208     assert(idx == 2);
1209 
1210     idx = 0;
1211     assert(parseScanfFormatSpecifier("%s", idx, asterisk) == Format.s);
1212     assert(idx == 2);
1213 
1214     idx = 0;
1215     assert(parseScanfFormatSpecifier("%ls", idx, asterisk,) == Format.ls);
1216     assert(idx == 3);
1217 
1218     idx = 0;
1219     assert(parseScanfFormatSpecifier("%%", idx, asterisk) == Format.percent);
1220     assert(idx == 2);
1221 
1222     // Synonyms
1223     idx = 0;
1224     assert(parseScanfFormatSpecifier("%i", idx, asterisk) == Format.d);
1225     assert(idx == 2);
1226 
1227     idx = 0;
1228     assert(parseScanfFormatSpecifier("%n", idx, asterisk) == Format.n);
1229     assert(idx == 2);
1230 
1231     idx = 0;
1232     assert(parseScanfFormatSpecifier("%o", idx, asterisk) == Format.u);
1233     assert(idx == 2);
1234 
1235     idx = 0;
1236     assert(parseScanfFormatSpecifier("%x", idx, asterisk) == Format.u);
1237     assert(idx == 2);
1238 
1239     idx = 0;
1240     assert(parseScanfFormatSpecifier("%f", idx, asterisk) == Format.g);
1241     assert(idx == 2);
1242 
1243     idx = 0;
1244     assert(parseScanfFormatSpecifier("%e", idx, asterisk) == Format.g);
1245     assert(idx == 2);
1246 
1247     idx = 0;
1248     assert(parseScanfFormatSpecifier("%a", idx, asterisk) == Format.g);
1249     assert(idx == 2);
1250 
1251     idx = 0;
1252     assert(parseScanfFormatSpecifier("%c", idx, asterisk) == Format.c);
1253     assert(idx == 2);
1254 
1255     // asterisk
1256     idx = 0;
1257     assert(parseScanfFormatSpecifier("%*d", idx, asterisk) == Format.d);
1258     assert(idx == 3);
1259     assert(asterisk);
1260 
1261     idx = 0;
1262     assert(parseScanfFormatSpecifier("%9ld", idx, asterisk) == Format.ld);
1263     assert(idx == 4);
1264     assert(!asterisk);
1265 
1266     idx = 0;
1267     assert(parseScanfFormatSpecifier("%*25984hhd", idx, asterisk) == Format.hhd);
1268     assert(idx == 10);
1269     assert(asterisk);
1270 
1271     // scansets
1272     idx = 0;
1273     assert(parseScanfFormatSpecifier("%[a-zA-Z]s", idx, asterisk) == Format.s);
1274     assert(idx == 10);
1275     assert(!asterisk);
1276 
1277     idx = 0;
1278     assert(parseScanfFormatSpecifier("%*25[a-z]hhd", idx, asterisk) == Format.hhd);
1279     assert(idx == 12);
1280     assert(asterisk);
1281 
1282     // Too short formats
1283     foreach (s; ["%", "% ", "%#", "%0", "%*", "%1", "%19",
1284                  "%j", "%z", "%t", "%l", "%h", "%ll", "%hh", "%K"])
1285     {
1286         idx = 0;
1287         assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error);
1288         assert(idx == s.length);
1289     }
1290 
1291 
1292     // Undefined format combinations
1293     foreach (s; ["%Ld", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg",
1294                  "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc",
1295                  "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp",
1296                  "%-", "%+", "%#", "%0", "%.", "%Ln"])
1297     {
1298         idx = 0;
1299         assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error);
1300         assert(idx == s.length);
1301 
1302     }
1303 
1304     // Invalid scansets
1305     foreach (s; ["%[]", "%[s", "%[0-9lld", "%[", "%[a-z]"])
1306     {
1307         idx = 0;
1308         assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error);
1309         assert(idx == s.length);
1310     }
1311 
1312 }