1 /**
2  * Check the arguments to `printf` and `scanf` against the `format` string.
3  *
4  * Copyright:   Copyright (C) 1999-2021 by The D Language Foundation, All Rights Reserved
5  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
6  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/chkformat.d, _chkformat.d)
8  * Documentation:  https://dlang.org/phobos/dmd_chkformat.html
9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/chkformat.d
10  */
11 module dmd.chkformat;
12 
13 //import core.stdc.stdio : printf, scanf;
14 import core.stdc.ctype : isdigit;
15 
16 import dmd.cond;
17 import dmd.errors;
18 import dmd.expression;
19 import dmd.globals;
20 import dmd.identifier;
21 import dmd.mtype;
22 import dmd.target;
23 
24 
25 /******************************************
26  * Check that arguments to a printf format string are compatible
27  * with that string. Issue errors for incompatibilities.
28  *
29  * Follows the C99 specification for printf.
30  *
31  * Takes a generous, rather than strict, view of compatiblity.
32  * For example, an unsigned value can be formatted with a signed specifier.
33  *
34  * Diagnosed incompatibilities are:
35  *
36  * 1. incompatible sizes which will cause argument misalignment
37  * 2. deferencing arguments that are not pointers
38  * 3. insufficient number of arguments
39  * 4. struct arguments
40  * 5. array and slice arguments
41  * 6. non-pointer arguments to `s` specifier
42  * 7. non-standard formats
43  * 8. undefined behavior per C99
44  *
45  * Per the C Standard, extra arguments are ignored.
46  *
47  * No attempt is made to fix the arguments or the format string.
48  *
49  * Params:
50  *      loc = location for error messages
51  *      format = format string
52  *      args = arguments to match with format string
53  *      isVa_list = if a "v" function (format check only)
54  *
55  * Returns:
56  *      `true` if errors occurred
57  * References:
58  * C99 7.19.6.1
59  * http://www.cplusplus.com/reference/cstdio/printf/
60  */
61 bool checkPrintfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args, bool isVa_list)
62 {
63     //printf("checkPrintFormat('%.*s')\n", cast(int)format.length, format.ptr);
64     size_t n, gnu_m_count;    // index in args / number of Format.GNU_m
65     for (size_t i = 0; i < format.length;)
66     {
67         if (format[i] != '%')
68         {
69             ++i;
70             continue;
71         }
72         bool widthStar;
73         bool precisionStar;
74         size_t j = i;
75         const fmt = parsePrintfFormatSpecifier(format, j, widthStar, precisionStar);
76         const slice = format[i .. j];
77         i = j;
78 
79         if (fmt == Format.percent)
80             continue;                   // "%%", no arguments
81 
82         if (isVa_list)
83         {
84             // format check only
85             if (fmt == Format.error)
86                 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
87             continue;
88         }
89 
90         if (fmt == Format.GNU_m)
91             ++gnu_m_count;
92 
93         Expression getNextArg(ref bool skip)
94         {
95             if (n == args.length)
96             {
97                 if (args.length < (n + 1) - gnu_m_count)
98                     deprecation(loc, "more format specifiers than %zd arguments", n);
99                 else
100                     skip = true;
101                 return null;
102             }
103             return args[n++];
104         }
105 
106         void errorMsg(const char* prefix, const char[] specifier, Expression arg, const char* texpect, Type tactual)
107         {
108             deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`",
109                   prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars());
110         }
111 
112         if (widthStar)
113         {
114             bool skip;
115             auto e = getNextArg(skip);
116             if (skip)
117                 continue;
118             if (!e)
119                 return true;
120             auto t = e.type.toBasetype();
121             if (t.ty != Tint32 && t.ty != Tuns32)
122                 errorMsg("width ", slice, e, "int", t);
123         }
124 
125         if (precisionStar)
126         {
127             bool skip;
128             auto e = getNextArg(skip);
129             if (skip)
130                 continue;
131             if (!e)
132                 return true;
133             auto t = e.type.toBasetype();
134             if (t.ty != Tint32 && t.ty != Tuns32)
135                 errorMsg("precision ", slice, e, "int", t);
136         }
137 
138         bool skip;
139         auto e = getNextArg(skip);
140         if (skip)
141             continue;
142         if (!e)
143             return true;
144         auto t = e.type.toBasetype();
145         auto tnext = t.nextOf();
146         const c_longsize = target.c.longsize;
147         const is64bit = global.params.is64bit;
148 
149         // Types which are promoted to int are allowed.
150         // Spec: C99 6.5.2.2.7
151         final switch (fmt)
152         {
153             case Format.u:      // unsigned int
154             case Format.d:      // int
155                 if (t.ty != Tint32 && t.ty != Tuns32)
156                     errorMsg(null, slice, e, "int", t);
157                 break;
158 
159             case Format.hhu:    // unsigned char
160             case Format.hhd:    // signed char
161                 if (t.ty != Tint32 && t.ty != Tuns32 && t.ty != Tint8 && t.ty != Tuns8)
162                     errorMsg(null, slice, e, "byte", t);
163                 break;
164 
165             case Format.hu:     // unsigned short int
166             case Format.hd:     // short int
167                 if (t.ty != Tint32 && t.ty != Tuns32 && t.ty != Tint16 && t.ty != Tuns16)
168                     errorMsg(null, slice, e, "short", t);
169                 break;
170 
171             case Format.lu:     // unsigned long int
172             case Format.ld:     // long int
173                 if (!(t.isintegral() && t.size() == c_longsize))
174                     errorMsg(null, slice, e, (c_longsize == 4 ? "int" : "long"), t);
175                 break;
176 
177             case Format.llu:    // unsigned long long int
178             case Format.lld:    // long long int
179                 if (t.ty != Tint64 && t.ty != Tuns64)
180                     errorMsg(null, slice, e, "long", t);
181                 break;
182 
183             case Format.ju:     // uintmax_t
184             case Format.jd:     // intmax_t
185                 if (t.ty != Tint64 && t.ty != Tuns64)
186                     errorMsg(null, slice, e, "core.stdc.stdint.intmax_t", t);
187                 break;
188 
189             case Format.zd:     // size_t
190                 if (!(t.isintegral() && t.size() == (is64bit ? 8 : 4)))
191                     errorMsg(null, slice, e, "size_t", t);
192                 break;
193 
194             case Format.td:     // ptrdiff_t
195                 if (!(t.isintegral() && t.size() == (is64bit ? 8 : 4)))
196                     errorMsg(null, slice, e, "ptrdiff_t", t);
197                 break;
198 
199             case Format.GNU_a:  // Format.GNU_a is only for scanf
200             case Format.lg:
201             case Format.g:      // double
202                 if (t.ty != Tfloat64 && t.ty != Timaginary64)
203                     errorMsg(null, slice, e, "double", t);
204                 break;
205 
206             case Format.Lg:     // long double
207                 if (t.ty != Tfloat80 && t.ty != Timaginary80)
208                     errorMsg(null, slice, e, "real", t);
209                 break;
210 
211             case Format.p:      // pointer
212                 if (t.ty != Tpointer && t.ty != Tnull && t.ty != Tclass && t.ty != Tdelegate && t.ty != Taarray)
213                     errorMsg(null, slice, e, "void*", t);
214                 break;
215 
216             case Format.n:      // pointer to int
217                 if (!(t.ty == Tpointer && tnext.ty == Tint32))
218                     errorMsg(null, slice, e, "int*", t);
219                 break;
220 
221             case Format.ln:     // pointer to long int
222                 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.size() == c_longsize))
223                     errorMsg(null, slice, e, (c_longsize == 4 ? "int*" : "long*"), t);
224                 break;
225 
226             case Format.lln:    // pointer to long long int
227                 if (!(t.ty == Tpointer && tnext.ty == Tint64))
228                     errorMsg(null, slice, e, "long*", t);
229                 break;
230 
231             case Format.hn:     // pointer to short
232                 if (!(t.ty == Tpointer && tnext.ty == Tint16))
233                     errorMsg(null, slice, e, "short*", t);
234                 break;
235 
236             case Format.hhn:    // pointer to signed char
237                 if (!(t.ty == Tpointer && tnext.ty == Tint16))
238                     errorMsg(null, slice, e, "byte*", t);
239                 break;
240 
241             case Format.jn:     // pointer to intmax_t
242                 if (!(t.ty == Tpointer && tnext.ty == Tint64))
243                     errorMsg(null, slice, e, "core.stdc.stdint.intmax_t*", t);
244                 break;
245 
246             case Format.zn:     // pointer to size_t
247                 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32)))
248                     errorMsg(null, slice, e, "size_t*", t);
249                 break;
250 
251             case Format.tn:     // pointer to ptrdiff_t
252                 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tint64 : Tint32)))
253                     errorMsg(null, slice, e, "ptrdiff_t*", t);
254                 break;
255 
256             case Format.c:      // char
257                 if (t.ty != Tint32 && t.ty != Tuns32)
258                     errorMsg(null, slice, e, "char", t);
259                 break;
260 
261             case Format.lc:     // wint_t
262                 if (t.ty != Tint32 && t.ty != Tuns32)
263                     errorMsg(null, slice, e, "wchar_t", t);
264                 break;
265 
266             case Format.s:      // pointer to char string
267                 if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8)))
268                     errorMsg(null, slice, e, "char*", t);
269                 break;
270 
271             case Format.ls:     // pointer to wchar_t string
272                 const twchar_t = global.params.targetOS == TargetOS.Windows ? Twchar : Tdchar;
273                 if (!(t.ty == Tpointer && tnext.ty == twchar_t))
274                     errorMsg(null, slice, e, "wchar_t*", t);
275                 break;
276 
277             case Format.error:
278                 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
279                 break;
280 
281             case Format.GNU_m:
282                 break;  // not assert(0) because it may go through it if there are extra arguments
283 
284             case Format.percent:
285                 assert(0);
286         }
287     }
288     return false;
289 }
290 
291 /******************************************
292  * Check that arguments to a scanf format string are compatible
293  * with that string. Issue errors for incompatibilities.
294  *
295  * Follows the C99 specification for scanf.
296  *
297  * Takes a generous, rather than strict, view of compatiblity.
298  * For example, an unsigned value can be formatted with a signed specifier.
299  *
300  * Diagnosed incompatibilities are:
301  *
302  * 1. incompatible sizes which will cause argument misalignment
303  * 2. deferencing arguments that are not pointers
304  * 3. insufficient number of arguments
305  * 4. struct arguments
306  * 5. array and slice arguments
307  * 6. non-standard formats
308  * 7. undefined behavior per C99
309  *
310  * Per the C Standard, extra arguments are ignored.
311  *
312  * No attempt is made to fix the arguments or the format string.
313  *
314  * Params:
315  *      loc = location for error messages
316  *      format = format string
317  *      args = arguments to match with format string
318  *      isVa_list = if a "v" function (format check only)
319  *
320  * Returns:
321  *      `true` if errors occurred
322  * References:
323  * C99 7.19.6.2
324  * http://www.cplusplus.com/reference/cstdio/scanf/
325  */
326 bool checkScanfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args, bool isVa_list)
327 {
328     size_t n = 0;
329     for (size_t i = 0; i < format.length;)
330     {
331         if (format[i] != '%')
332         {
333             ++i;
334             continue;
335         }
336         bool asterisk;
337         size_t j = i;
338         const fmt = parseScanfFormatSpecifier(format, j, asterisk);
339         const slice = format[i .. j];
340         i = j;
341 
342         if (fmt == Format.percent || asterisk)
343             continue;   // "%%", "%*": no arguments
344 
345         if (isVa_list)
346         {
347             // format check only
348             if (fmt == Format.error)
349                 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
350             continue;
351         }
352 
353         Expression getNextArg()
354         {
355             if (n == args.length)
356             {
357                 if (!asterisk)
358                     deprecation(loc, "more format specifiers than %d arguments", cast(int)n);
359                 return null;
360             }
361             return args[n++];
362         }
363 
364         void errorMsg(const char* prefix, const char[] specifier, Expression arg, const char* texpect, Type tactual)
365         {
366             deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`",
367                   prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars());
368         }
369 
370         auto e = getNextArg();
371         if (!e)
372             return true;
373 
374         auto t = e.type.toBasetype();
375         auto tnext = t.nextOf();
376         const c_longsize = target.c.longsize;
377         const is64bit = global.params.is64bit;
378 
379         final switch (fmt)
380         {
381             case Format.n:
382             case Format.d:      // pointer to int
383                 if (!(t.ty == Tpointer && tnext.ty == Tint32))
384                     errorMsg(null, slice, e, "int*", t);
385                 break;
386 
387             case Format.hhn:
388             case Format.hhd:    // pointer to signed char
389                 if (!(t.ty == Tpointer && tnext.ty == Tint16))
390                     errorMsg(null, slice, e, "byte*", t);
391                 break;
392 
393             case Format.hn:
394             case Format.hd:     // pointer to short
395                 if (!(t.ty == Tpointer && tnext.ty == Tint16))
396                     errorMsg(null, slice, e, "short*", t);
397                 break;
398 
399             case Format.ln:
400             case Format.ld:     // pointer to long int
401                 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.size() == c_longsize))
402                     errorMsg(null, slice, e, (c_longsize == 4 ? "int*" : "long*"), t);
403                 break;
404 
405             case Format.lln:
406             case Format.lld:    // pointer to long long int
407                 if (!(t.ty == Tpointer && tnext.ty == Tint64))
408                     errorMsg(null, slice, e, "long*", t);
409                 break;
410 
411             case Format.jn:
412             case Format.jd:     // pointer to intmax_t
413                 if (!(t.ty == Tpointer && tnext.ty == Tint64))
414                     errorMsg(null, slice, e, "core.stdc.stdint.intmax_t*", t);
415                 break;
416 
417             case Format.zn:
418             case Format.zd:     // pointer to size_t
419                 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32)))
420                     errorMsg(null, slice, e, "size_t*", t);
421                 break;
422 
423             case Format.tn:
424             case Format.td:     // pointer to ptrdiff_t
425                 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tint64 : Tint32)))
426                     errorMsg(null, slice, e, "ptrdiff_t*", t);
427                 break;
428 
429             case Format.u:      // pointer to unsigned int
430                 if (!(t.ty == Tpointer && tnext.ty == Tuns32))
431                     errorMsg(null, slice, e, "uint*", t);
432                 break;
433 
434             case Format.hhu:    // pointer to unsigned char
435                 if (!(t.ty == Tpointer && tnext.ty == Tuns8))
436                     errorMsg(null, slice, e, "ubyte*", t);
437                 break;
438 
439             case Format.hu:     // pointer to unsigned short int
440                 if (!(t.ty == Tpointer && tnext.ty == Tuns16))
441                     errorMsg(null, slice, e, "ushort*", t);
442                 break;
443 
444             case Format.lu:     // pointer to unsigned long int
445                 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32)))
446                     errorMsg(null, slice, e, (c_longsize == 4 ? "uint*" : "ulong*"), t);
447                 break;
448 
449             case Format.llu:    // pointer to unsigned long long int
450                 if (!(t.ty == Tpointer && tnext.ty == Tuns64))
451                     errorMsg(null, slice, e, "ulong*", t);
452                 break;
453 
454             case Format.ju:     // pointer to uintmax_t
455                 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32)))
456                     errorMsg(null, slice, e, "ulong*", t);
457                 break;
458 
459             case Format.g:      // pointer to float
460                 if (!(t.ty == Tpointer && tnext.ty == Tfloat32))
461                     errorMsg(null, slice, e, "float*", t);
462                 break;
463 
464             case Format.lg:     // pointer to double
465                 if (!(t.ty == Tpointer && tnext.ty == Tfloat64))
466                     errorMsg(null, slice, e, "double*", t);
467                 break;
468 
469             case Format.Lg:     // pointer to long double
470                 if (!(t.ty == Tpointer && tnext.ty == Tfloat80))
471                     errorMsg(null, slice, e, "real*", t);
472                 break;
473 
474             case Format.GNU_a:
475             case Format.GNU_m:
476             case Format.c:
477             case Format.s:      // pointer to char string
478                 if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8)))
479                     errorMsg(null, slice, e, "char*", t);
480                 break;
481 
482             case Format.lc:
483             case Format.ls:     // pointer to wchar_t string
484                 const twchar_t = global.params.targetOS == TargetOS.Windows ? Twchar : Tdchar;
485                 if (!(t.ty == Tpointer && tnext.ty == twchar_t))
486                     errorMsg(null, slice, e, "wchar_t*", t);
487                 break;
488 
489             case Format.p:      // double pointer
490                 if (!(t.ty == Tpointer && tnext.ty == Tpointer))
491                     errorMsg(null, slice, e, "void**", t);
492                 break;
493 
494             case Format.error:
495                 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
496                 break;
497 
498             case Format.percent:
499                 assert(0);
500         }
501     }
502     return false;
503 }
504 
505 private:
506 
507 /**************************************
508  * Parse the *format specifier* which is of the form:
509  *
510  * `%[*][width][length]specifier`
511  *
512  * Params:
513  *      format = format string
514  *      idx = index of `%` of start of format specifier,
515  *          which gets updated to index past the end of it,
516  *          even if `Format.error` is returned
517  *      asterisk = set if there is a `*` sub-specifier
518  * Returns:
519  *      Format
520  */
521 Format parseScanfFormatSpecifier(scope const char[] format, ref size_t idx,
522         out bool asterisk) nothrow pure @safe
523 {
524     auto i = idx;
525     assert(format[i] == '%');
526     const length = format.length;
527 
528     Format error()
529     {
530         idx = i;
531         return Format.error;
532     }
533 
534     ++i;
535     if (i == length)
536         return error();
537 
538     if (format[i] == '%')
539     {
540         idx = i + 1;
541         return Format.percent;
542     }
543 
544     // * sub-specifier
545     if (format[i] == '*')
546     {
547         ++i;
548         if (i == length)
549             return error();
550         asterisk = true;
551     }
552 
553     // fieldWidth
554     while (isdigit(format[i]))
555     {
556         i++;
557         if (i == length)
558             return error();
559     }
560 
561     /* Read the scanset
562      * A scanset can be anything, so we just check that it is paired
563      */
564     if (format[i] == '[')
565     {
566         while (i < length)
567         {
568             if (format[i] == ']')
569                 break;
570             ++i;
571         }
572 
573         // no `]` found
574         if (i == length)
575             return error();
576 
577         ++i;
578         // no specifier after `]`
579         // it could be mixed with the one above, but then idx won't have the right index
580         if (i == length)
581             return error();
582     }
583 
584     /* Read the specifier
585      */
586     char genSpec;
587     Format specifier = parseGenericFormatSpecifier(format, i, genSpec);
588     if (specifier == Format.error)
589         return error();
590 
591     idx = i;
592     return specifier;  // success
593 }
594 
595 /**************************************
596  * Parse the *format specifier* which is of the form:
597  *
598  * `%[flags][field width][.precision][length modifier]specifier`
599  *
600  * Params:
601  *      format = format string
602  *      idx = index of `%` of start of format specifier,
603  *          which gets updated to index past the end of it,
604  *          even if `Format.error` is returned
605  *      widthStar = set if * for width
606  *      precisionStar = set if * for precision
607  * Returns:
608  *      Format
609  */
610 Format parsePrintfFormatSpecifier(scope const char[] format, ref size_t idx,
611         out bool widthStar, out bool precisionStar) nothrow pure @safe
612 {
613     auto i = idx;
614     assert(format[i] == '%');
615     const length = format.length;
616     bool hash;
617     bool zero;
618     bool flags;
619     bool width;
620     bool precision;
621 
622     Format error()
623     {
624         idx = i;
625         return Format.error;
626     }
627 
628     ++i;
629     if (i == length)
630         return error();
631 
632     if (format[i] == '%')
633     {
634         idx = i + 1;
635         return Format.percent;
636     }
637 
638     /* Read the `flags`
639      */
640     while (1)
641     {
642         const c = format[i];
643         if (c == '-' ||
644             c == '+' ||
645             c == ' ')
646         {
647             flags = true;
648         }
649         else if (c == '#')
650         {
651             hash = true;
652         }
653         else if (c == '0')
654         {
655             zero = true;
656         }
657         else
658             break;
659         ++i;
660         if (i == length)
661             return error();
662     }
663 
664     /* Read the `field width`
665      */
666     {
667         const c = format[i];
668         if (c == '*')
669         {
670             width = true;
671             widthStar = true;
672             ++i;
673             if (i == length)
674                 return error();
675         }
676         else if ('1' <= c && c <= '9')
677         {
678             width = true;
679             ++i;
680             if (i == length)
681                 return error();
682             while ('0' <= format[i] && format[i] <= '9')
683             {
684                ++i;
685                if (i == length)
686                     return error();
687             }
688         }
689     }
690 
691     /* Read the `precision`
692      */
693     if (format[i] == '.')
694     {
695         precision = true;
696         ++i;
697         if (i == length)
698             return error();
699         const c = format[i];
700         if (c == '*')
701         {
702             precisionStar = true;
703             ++i;
704             if (i == length)
705                 return error();
706         }
707         else if ('0' <= c && c <= '9')
708         {
709             ++i;
710             if (i == length)
711                 return error();
712             while ('0' <= format[i] && format[i] <= '9')
713             {
714                ++i;
715                if (i == length)
716                     return error();
717             }
718         }
719     }
720 
721     /* Read the specifier
722      */
723     char genSpec;
724     Format specifier = parseGenericFormatSpecifier(format, i, genSpec);
725     if (specifier == Format.error)
726         return error();
727 
728     switch (genSpec)
729     {
730         case 'c':
731         case 's':
732             if (hash || zero)
733                 return error();
734             break;
735 
736         case 'd':
737         case 'i':
738             if (hash)
739                 return error();
740             break;
741 
742         case 'n':
743             if (hash || zero || precision || width || flags)
744                 return error();
745             break;
746 
747         default:
748             break;
749     }
750 
751     idx = i;
752     return specifier;  // success
753 }
754 
755 /* Different kinds of formatting specifications, variations we don't
756    care about are merged. (Like we don't care about the difference between
757    f, e, g, a, etc.)
758 
759    For `scanf`, every format is a pointer.
760  */
761 enum Format
762 {
763     d,          // int
764     hhd,        // signed char
765     hd,         // short int
766     ld,         // long int
767     lld,        // long long int
768     jd,         // intmax_t
769     zd,         // size_t
770     td,         // ptrdiff_t
771     u,          // unsigned int
772     hhu,        // unsigned char
773     hu,         // unsigned short int
774     lu,         // unsigned long int
775     llu,        // unsigned long long int
776     ju,         // uintmax_t
777     g,          // float (scanf) / double (printf)
778     lg,         // double (scanf)
779     Lg,         // long double (both)
780     s,          // char string (both)
781     ls,         // wchar_t string (both)
782     c,          // char (printf)
783     lc,         // wint_t (printf)
784     p,          // pointer
785     n,          // pointer to int
786     hhn,        // pointer to signed char
787     hn,         // pointer to short
788     ln,         // pointer to long int
789     lln,        // pointer to long long int
790     jn,         // pointer to intmax_t
791     zn,         // pointer to size_t
792     tn,         // pointer to ptrdiff_t
793     GNU_a,      // GNU ext. : address to a string with no maximum size (scanf)
794     GNU_m,      // GNU ext. : string corresponding to the error code in errno (printf) / length modifier (scanf)
795     percent,    // %% (i.e. no argument)
796     error,      // invalid format specification
797 }
798 
799 /**************************************
800  * Parse the *length specifier* and the *specifier* of the following form:
801  * `[length]specifier`
802  *
803  * Params:
804  *      format = format string
805  *      idx = index of of start of format specifier,
806  *          which gets updated to index past the end of it,
807  *          even if `Format.error` is returned
808  *      genSpecifier = Generic specifier. For instance, it will be set to `d` if the
809  *           format is `hdd`.
810  * Returns:
811  *      Format
812  */
813 Format parseGenericFormatSpecifier(scope const char[] format,
814     ref size_t idx, out char genSpecifier, bool useGNUExts =
815     findCondition(global.versionids, Identifier.idPool("CRuntime_Glibc"))) nothrow pure @trusted
816 {
817     const length = format.length;
818 
819     /* Read the `length modifier`
820      */
821     const lm = format[idx];
822     bool lm1;        // if jztL
823     bool lm2;        // if `hh` or `ll`
824     if (lm == 'j' ||
825         lm == 'z' ||
826         lm == 't' ||
827         lm == 'L')
828     {
829         ++idx;
830         if (idx == length)
831             return Format.error;
832         lm1 = true;
833     }
834     else if (lm == 'h' || lm == 'l')
835     {
836         ++idx;
837         if (idx == length)
838             return Format.error;
839         lm2 = lm == format[idx];
840         if (lm2)
841         {
842             ++idx;
843             if (idx == length)
844                 return Format.error;
845         }
846     }
847 
848     /* Read the `specifier`
849      */
850     Format specifier;
851     const sc = format[idx];
852     genSpecifier = sc;
853     switch (sc)
854     {
855         case 'd':
856         case 'i':
857             if (lm == 'L')
858                 specifier = Format.error;
859             else
860                 specifier = lm == 'h' && lm2 ? Format.hhd :
861                             lm == 'h'        ? Format.hd  :
862                             lm == 'l' && lm2 ? Format.lld :
863                             lm == 'l'        ? Format.ld  :
864                             lm == 'j'        ? Format.jd  :
865                             lm == 'z'        ? Format.zd  :
866                             lm == 't'        ? Format.td  :
867                                                Format.d;
868             break;
869 
870         case 'u':
871         case 'o':
872         case 'x':
873         case 'X':
874             if (lm == 'L')
875                 specifier = Format.error;
876             else
877                 specifier = lm == 'h' && lm2 ? Format.hhu :
878                             lm == 'h'        ? Format.hu  :
879                             lm == 'l' && lm2 ? Format.llu :
880                             lm == 'l'        ? Format.lu  :
881                             lm == 'j'        ? Format.ju  :
882                             lm == 'z'        ? Format.zd  :
883                             lm == 't'        ? Format.td  :
884                                                Format.u;
885             break;
886 
887         case 'a':
888             if (useGNUExts)
889             {
890                 // https://www.gnu.org/software/libc/manual/html_node/Dynamic-String-Input.html
891                 specifier = Format.GNU_a;
892                 break;
893             }
894             goto case;
895 
896         case 'f':
897         case 'F':
898         case 'e':
899         case 'E':
900         case 'g':
901         case 'G':
902         case 'A':
903             if (lm == 'L')
904                 specifier = Format.Lg;
905             else if (lm1 || lm2 || lm == 'h')
906                 specifier = Format.error;
907             else
908                 specifier = lm == 'l' ? Format.lg : Format.g;
909             break;
910 
911         case 'c':
912             if (lm1 || lm2 || lm == 'h')
913                 specifier = Format.error;
914             else
915                 specifier = lm == 'l' ? Format.lc : Format.c;
916             break;
917 
918         case 's':
919             if (lm1 || lm2 || lm == 'h')
920                 specifier = Format.error;
921             else
922                 specifier = lm == 'l' ? Format.ls : Format.s;
923             break;
924 
925         case 'p':
926             if (lm1 || lm2 || lm == 'h' || lm == 'l')
927                 specifier = Format.error;
928             else
929                 specifier = Format.p;
930             break;
931 
932         case 'n':
933             if (lm == 'L')
934                 specifier = Format.error;
935             else
936                 specifier = lm == 'l' && lm2 ? Format.lln :
937                             lm == 'l'        ? Format.ln  :
938                             lm == 'h' && lm2 ? Format.hhn :
939                             lm == 'h'        ? Format.hn  :
940                             lm == 'j'        ? Format.jn  :
941                             lm == 'z'        ? Format.zn  :
942                             lm == 't'        ? Format.tn  :
943                                                Format.n;
944             break;
945 
946         case 'm':
947             if (useGNUExts)
948             {
949                 // http://www.gnu.org/software/libc/manual/html_node/Other-Output-Conversions.html
950                 specifier = Format.GNU_m;
951                 break;
952             }
953             goto default;
954 
955         default:
956             specifier = Format.error;
957             break;
958     }
959 
960     ++idx;
961     return specifier; // success
962 }
963 
964 unittest
965 {
966     /* parseGenericFormatSpecifier
967      */
968 
969     char genSpecifier;
970     size_t idx;
971 
972     assert(parseGenericFormatSpecifier("hhd", idx, genSpecifier) == Format.hhd);
973     assert(genSpecifier == 'd');
974 
975     idx = 0;
976     assert(parseGenericFormatSpecifier("hn", idx, genSpecifier) == Format.hn);
977     assert(genSpecifier == 'n');
978 
979     idx = 0;
980     assert(parseGenericFormatSpecifier("ji", idx, genSpecifier) == Format.jd);
981     assert(genSpecifier == 'i');
982 
983     idx = 0;
984     assert(parseGenericFormatSpecifier("lu", idx, genSpecifier) == Format.lu);
985     assert(genSpecifier == 'u');
986 
987     idx = 0;
988     assert(parseGenericFormatSpecifier("k", idx, genSpecifier) == Format.error);
989 
990     /* parsePrintfFormatSpecifier
991      */
992 
993      bool widthStar;
994      bool precisionStar;
995 
996      // one for each Format
997      idx = 0;
998      assert(parsePrintfFormatSpecifier("%d", idx, widthStar, precisionStar) == Format.d);
999      assert(idx == 2);
1000      assert(!widthStar && !precisionStar);
1001 
1002      idx = 0;
1003      assert(parsePrintfFormatSpecifier("%ld", idx, widthStar, precisionStar) == Format.ld);
1004      assert(idx == 3);
1005 
1006      idx = 0;
1007      assert(parsePrintfFormatSpecifier("%lld", idx, widthStar, precisionStar) == Format.lld);
1008      assert(idx == 4);
1009 
1010      idx = 0;
1011      assert(parsePrintfFormatSpecifier("%jd", idx, widthStar, precisionStar) == Format.jd);
1012      assert(idx == 3);
1013 
1014      idx = 0;
1015      assert(parsePrintfFormatSpecifier("%zd", idx, widthStar, precisionStar) == Format.zd);
1016      assert(idx == 3);
1017 
1018      idx = 0;
1019      assert(parsePrintfFormatSpecifier("%td", idx, widthStar, precisionStar) == Format.td);
1020      assert(idx == 3);
1021 
1022      idx = 0;
1023      assert(parsePrintfFormatSpecifier("%g", idx, widthStar, precisionStar) == Format.g);
1024      assert(idx == 2);
1025 
1026      idx = 0;
1027      assert(parsePrintfFormatSpecifier("%Lg", idx, widthStar, precisionStar) == Format.Lg);
1028      assert(idx == 3);
1029 
1030      idx = 0;
1031      assert(parsePrintfFormatSpecifier("%p", idx, widthStar, precisionStar) == Format.p);
1032      assert(idx == 2);
1033 
1034      idx = 0;
1035      assert(parsePrintfFormatSpecifier("%n", idx, widthStar, precisionStar) == Format.n);
1036      assert(idx == 2);
1037 
1038      idx = 0;
1039      assert(parsePrintfFormatSpecifier("%ln", idx, widthStar, precisionStar) == Format.ln);
1040      assert(idx == 3);
1041 
1042      idx = 0;
1043      assert(parsePrintfFormatSpecifier("%lln", idx, widthStar, precisionStar) == Format.lln);
1044      assert(idx == 4);
1045 
1046      idx = 0;
1047      assert(parsePrintfFormatSpecifier("%hn", idx, widthStar, precisionStar) == Format.hn);
1048      assert(idx == 3);
1049 
1050      idx = 0;
1051      assert(parsePrintfFormatSpecifier("%hhn", idx, widthStar, precisionStar) == Format.hhn);
1052      assert(idx == 4);
1053 
1054      idx = 0;
1055      assert(parsePrintfFormatSpecifier("%jn", idx, widthStar, precisionStar) == Format.jn);
1056      assert(idx == 3);
1057 
1058      idx = 0;
1059      assert(parsePrintfFormatSpecifier("%zn", idx, widthStar, precisionStar) == Format.zn);
1060      assert(idx == 3);
1061 
1062      idx = 0;
1063      assert(parsePrintfFormatSpecifier("%tn", idx, widthStar, precisionStar) == Format.tn);
1064      assert(idx == 3);
1065 
1066      idx = 0;
1067      assert(parsePrintfFormatSpecifier("%c", idx, widthStar, precisionStar) == Format.c);
1068      assert(idx == 2);
1069 
1070      idx = 0;
1071      assert(parsePrintfFormatSpecifier("%lc", idx, widthStar, precisionStar) == Format.lc);
1072      assert(idx == 3);
1073 
1074      idx = 0;
1075      assert(parsePrintfFormatSpecifier("%s", idx, widthStar, precisionStar) == Format.s);
1076      assert(idx == 2);
1077 
1078      idx = 0;
1079      assert(parsePrintfFormatSpecifier("%ls", idx, widthStar, precisionStar) == Format.ls);
1080      assert(idx == 3);
1081 
1082      idx = 0;
1083      assert(parsePrintfFormatSpecifier("%%", idx, widthStar, precisionStar) == Format.percent);
1084      assert(idx == 2);
1085 
1086      // Synonyms
1087      idx = 0;
1088      assert(parsePrintfFormatSpecifier("%i", idx, widthStar, precisionStar) == Format.d);
1089      assert(idx == 2);
1090 
1091      idx = 0;
1092      assert(parsePrintfFormatSpecifier("%u", idx, widthStar, precisionStar) == Format.u);
1093      assert(idx == 2);
1094 
1095      idx = 0;
1096      assert(parsePrintfFormatSpecifier("%o", idx, widthStar, precisionStar) == Format.u);
1097      assert(idx == 2);
1098 
1099      idx = 0;
1100      assert(parsePrintfFormatSpecifier("%x", idx, widthStar, precisionStar) == Format.u);
1101      assert(idx == 2);
1102 
1103      idx = 0;
1104      assert(parsePrintfFormatSpecifier("%X", idx, widthStar, precisionStar) == Format.u);
1105      assert(idx == 2);
1106 
1107      idx = 0;
1108      assert(parsePrintfFormatSpecifier("%f", idx, widthStar, precisionStar) == Format.g);
1109      assert(idx == 2);
1110 
1111      idx = 0;
1112      assert(parsePrintfFormatSpecifier("%F", idx, widthStar, precisionStar) == Format.g);
1113      assert(idx == 2);
1114 
1115      idx = 0;
1116      assert(parsePrintfFormatSpecifier("%G", idx, widthStar, precisionStar) == Format.g);
1117      assert(idx == 2);
1118 
1119      idx = 0;
1120      Format g = parsePrintfFormatSpecifier("%a", idx, widthStar, precisionStar);
1121      assert(g == Format.g || g == Format.GNU_a);
1122      assert(idx == 2);
1123 
1124      idx = 0;
1125      assert(parsePrintfFormatSpecifier("%A", idx, widthStar, precisionStar) == Format.g);
1126      assert(idx == 2);
1127 
1128      idx = 0;
1129      assert(parsePrintfFormatSpecifier("%lg", idx, widthStar, precisionStar) == Format.lg);
1130      assert(idx == 3);
1131 
1132      // width, precision
1133      idx = 0;
1134      assert(parsePrintfFormatSpecifier("%*d", idx, widthStar, precisionStar) == Format.d);
1135      assert(idx == 3);
1136      assert(widthStar && !precisionStar);
1137 
1138      idx = 0;
1139      assert(parsePrintfFormatSpecifier("%.*d", idx, widthStar, precisionStar) == Format.d);
1140      assert(idx == 4);
1141      assert(!widthStar && precisionStar);
1142 
1143      idx = 0;
1144      assert(parsePrintfFormatSpecifier("%*.*d", idx, widthStar, precisionStar) == Format.d);
1145      assert(idx == 5);
1146      assert(widthStar && precisionStar);
1147 
1148      // Too short formats
1149      {
1150          foreach (s; ["%", "%-", "%+", "% ", "%#", "%0", "%*", "%1", "%19", "%.", "%.*", "%.1", "%.12",
1151                       "%j", "%z", "%t", "%l", "%h", "%ll", "%hh"])
1152          {
1153              idx = 0;
1154              assert(parsePrintfFormatSpecifier(s, idx, widthStar, precisionStar) == Format.error);
1155              assert(idx == s.length);
1156          }
1157      }
1158 
1159      // Undefined format combinations
1160      {
1161          foreach (s; ["%#d", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg",
1162                       "%#c", "%0c", "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc",
1163                       "%#s", "%0s", "%js", "%zs", "%ts", "%Ls", "%hs", "%hhs", "%lls",
1164                       "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp",
1165                       "%-n", "%+n", "% n", "%#n", "%0n", "%*n", "%1n", "%19n", "%.n", "%.*n", "%.1n", "%.12n", "%Ln", "%K"])
1166          {
1167              idx = 0;
1168              assert(parsePrintfFormatSpecifier(s, idx, widthStar, precisionStar) == Format.error);
1169              import std.stdio;
1170              assert(idx == s.length);
1171          }
1172      }
1173 
1174     /* parseScanfFormatSpecifier
1175      */
1176 
1177     bool asterisk;
1178 
1179     // one for each Format
1180     idx = 0;
1181     assert(parseScanfFormatSpecifier("%d", idx, asterisk) == Format.d);
1182     assert(idx == 2);
1183     assert(!asterisk);
1184 
1185     idx = 0;
1186     assert(parseScanfFormatSpecifier("%hhd", idx, asterisk) == Format.hhd);
1187     assert(idx == 4);
1188 
1189     idx = 0;
1190     assert(parseScanfFormatSpecifier("%hd", idx, asterisk) == Format.hd);
1191     assert(idx == 3);
1192 
1193     idx = 0;
1194     assert(parseScanfFormatSpecifier("%ld", idx, asterisk) == Format.ld);
1195     assert(idx == 3);
1196 
1197     idx = 0;
1198     assert(parseScanfFormatSpecifier("%lld", idx, asterisk) == Format.lld);
1199     assert(idx == 4);
1200 
1201     idx = 0;
1202     assert(parseScanfFormatSpecifier("%jd", idx, asterisk) == Format.jd);
1203     assert(idx == 3);
1204 
1205     idx = 0;
1206     assert(parseScanfFormatSpecifier("%zd", idx, asterisk) == Format.zd);
1207     assert(idx == 3);
1208 
1209     idx = 0;
1210     assert(parseScanfFormatSpecifier("%td", idx, asterisk,) == Format.td);
1211     assert(idx == 3);
1212 
1213     idx = 0;
1214     assert(parseScanfFormatSpecifier("%u", idx, asterisk) == Format.u);
1215     assert(idx == 2);
1216 
1217     idx = 0;
1218     assert(parseScanfFormatSpecifier("%hhu", idx, asterisk,) == Format.hhu);
1219     assert(idx == 4);
1220 
1221     idx = 0;
1222     assert(parseScanfFormatSpecifier("%hu", idx, asterisk) == Format.hu);
1223     assert(idx == 3);
1224 
1225     idx = 0;
1226     assert(parseScanfFormatSpecifier("%lu", idx, asterisk) == Format.lu);
1227     assert(idx == 3);
1228 
1229     idx = 0;
1230     assert(parseScanfFormatSpecifier("%llu", idx, asterisk) == Format.llu);
1231     assert(idx == 4);
1232 
1233     idx = 0;
1234     assert(parseScanfFormatSpecifier("%ju", idx, asterisk) == Format.ju);
1235     assert(idx == 3);
1236 
1237     idx = 0;
1238     assert(parseScanfFormatSpecifier("%g", idx, asterisk) == Format.g);
1239     assert(idx == 2);
1240 
1241     idx = 0;
1242     assert(parseScanfFormatSpecifier("%lg", idx, asterisk) == Format.lg);
1243     assert(idx == 3);
1244 
1245     idx = 0;
1246     assert(parseScanfFormatSpecifier("%Lg", idx, asterisk) == Format.Lg);
1247     assert(idx == 3);
1248 
1249     idx = 0;
1250     assert(parseScanfFormatSpecifier("%p", idx, asterisk) == Format.p);
1251     assert(idx == 2);
1252 
1253     idx = 0;
1254     assert(parseScanfFormatSpecifier("%s", idx, asterisk) == Format.s);
1255     assert(idx == 2);
1256 
1257     idx = 0;
1258     assert(parseScanfFormatSpecifier("%ls", idx, asterisk,) == Format.ls);
1259     assert(idx == 3);
1260 
1261     idx = 0;
1262     assert(parseScanfFormatSpecifier("%%", idx, asterisk) == Format.percent);
1263     assert(idx == 2);
1264 
1265     // Synonyms
1266     idx = 0;
1267     assert(parseScanfFormatSpecifier("%i", idx, asterisk) == Format.d);
1268     assert(idx == 2);
1269 
1270     idx = 0;
1271     assert(parseScanfFormatSpecifier("%n", idx, asterisk) == Format.n);
1272     assert(idx == 2);
1273 
1274     idx = 0;
1275     assert(parseScanfFormatSpecifier("%o", idx, asterisk) == Format.u);
1276     assert(idx == 2);
1277 
1278     idx = 0;
1279     assert(parseScanfFormatSpecifier("%x", idx, asterisk) == Format.u);
1280     assert(idx == 2);
1281 
1282     idx = 0;
1283     assert(parseScanfFormatSpecifier("%f", idx, asterisk) == Format.g);
1284     assert(idx == 2);
1285 
1286     idx = 0;
1287     assert(parseScanfFormatSpecifier("%e", idx, asterisk) == Format.g);
1288     assert(idx == 2);
1289 
1290     idx = 0;
1291     g = parseScanfFormatSpecifier("%a", idx, asterisk);
1292     assert(g == Format.g || g == Format.GNU_a);
1293     assert(idx == 2);
1294 
1295     idx = 0;
1296     assert(parseScanfFormatSpecifier("%c", idx, asterisk) == Format.c);
1297     assert(idx == 2);
1298 
1299     // asterisk
1300     idx = 0;
1301     assert(parseScanfFormatSpecifier("%*d", idx, asterisk) == Format.d);
1302     assert(idx == 3);
1303     assert(asterisk);
1304 
1305     idx = 0;
1306     assert(parseScanfFormatSpecifier("%9ld", idx, asterisk) == Format.ld);
1307     assert(idx == 4);
1308     assert(!asterisk);
1309 
1310     idx = 0;
1311     assert(parseScanfFormatSpecifier("%*25984hhd", idx, asterisk) == Format.hhd);
1312     assert(idx == 10);
1313     assert(asterisk);
1314 
1315     // scansets
1316     idx = 0;
1317     assert(parseScanfFormatSpecifier("%[a-zA-Z]s", idx, asterisk) == Format.s);
1318     assert(idx == 10);
1319     assert(!asterisk);
1320 
1321     idx = 0;
1322     assert(parseScanfFormatSpecifier("%*25[a-z]hhd", idx, asterisk) == Format.hhd);
1323     assert(idx == 12);
1324     assert(asterisk);
1325 
1326     // Too short formats
1327     foreach (s; ["%", "% ", "%#", "%0", "%*", "%1", "%19",
1328                  "%j", "%z", "%t", "%l", "%h", "%ll", "%hh", "%K"])
1329     {
1330         idx = 0;
1331         assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error);
1332         assert(idx == s.length);
1333     }
1334 
1335 
1336     // Undefined format combinations
1337     foreach (s; ["%Ld", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg",
1338                  "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc",
1339                  "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp",
1340                  "%-", "%+", "%#", "%0", "%.", "%Ln"])
1341     {
1342         idx = 0;
1343         assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error);
1344         assert(idx == s.length);
1345 
1346     }
1347 
1348     // Invalid scansets
1349     foreach (s; ["%[]", "%[s", "%[0-9lld", "%[", "%[a-z]"])
1350     {
1351         idx = 0;
1352         assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error);
1353         assert(idx == s.length);
1354     }
1355 
1356 }