1 /**
2 * Check the arguments to `printf` and `scanf` against the `format` string.
3 *
4 * Copyright: Copyright (C) 1999-2021 by The D Language Foundation, All Rights Reserved
5 * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright)
6 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/chkformat.d, _chkformat.d)
8 * Documentation: https://dlang.org/phobos/dmd_chkformat.html
9 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/chkformat.d
10 */
11 module dmd.chkformat;
12
13 //import core.stdc.stdio : printf, scanf;
14 import core.stdc.ctype : isdigit;
15
16 import dmd.cond;
17 import dmd.errors;
18 import dmd.expression;
19 import dmd.globals;
20 import dmd.identifier;
21 import dmd.mtype;
22 import dmd.target;
23
24
25 /******************************************
26 * Check that arguments to a printf format string are compatible
27 * with that string. Issue errors for incompatibilities.
28 *
29 * Follows the C99 specification for printf.
30 *
31 * Takes a generous, rather than strict, view of compatiblity.
32 * For example, an unsigned value can be formatted with a signed specifier.
33 *
34 * Diagnosed incompatibilities are:
35 *
36 * 1. incompatible sizes which will cause argument misalignment
37 * 2. deferencing arguments that are not pointers
38 * 3. insufficient number of arguments
39 * 4. struct arguments
40 * 5. array and slice arguments
41 * 6. non-pointer arguments to `s` specifier
42 * 7. non-standard formats
43 * 8. undefined behavior per C99
44 *
45 * Per the C Standard, extra arguments are ignored.
46 *
47 * No attempt is made to fix the arguments or the format string.
48 *
49 * Params:
50 * loc = location for error messages
51 * format = format string
52 * args = arguments to match with format string
53 * isVa_list = if a "v" function (format check only)
54 *
55 * Returns:
56 * `true` if errors occurred
57 * References:
58 * C99 7.19.6.1
59 * http://www.cplusplus.com/reference/cstdio/printf/
60 */
61 bool checkPrintfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args, bool isVa_list)
62 {
63 //printf("checkPrintFormat('%.*s')\n", cast(int)format.length, format.ptr);
64 size_t n, gnu_m_count; // index in args / number of Format.GNU_m
65 for (size_t i = 0; i < format.length;)
66 {
67 if (format[i] != '%')
68 {
69 ++i;
70 continue;
71 }
72 bool widthStar;
73 bool precisionStar;
74 size_t j = i;
75 const fmt = parsePrintfFormatSpecifier(format, j, widthStar, precisionStar);
76 const slice = format[i .. j];
77 i = j;
78
79 if (fmt == Format.percent)
80 continue; // "%%", no arguments
81
82 if (isVa_list)
83 {
84 // format check only
85 if (fmt == Format.error)
86 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
87 continue;
88 }
89
90 if (fmt == Format.GNU_m)
91 ++gnu_m_count;
92
93 Expression getNextArg(ref bool skip)
94 {
95 if (n == args.length)
96 {
97 if (args.length < (n + 1) - gnu_m_count)
98 deprecation(loc, "more format specifiers than %zd arguments", n);
99 else
100 skip = true;
101 return null;
102 }
103 return args[n++];
104 }
105
106 void errorMsg(const char* prefix, const char[] specifier, Expression arg, const char* texpect, Type tactual)
107 {
108 deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`",
109 prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars());
110 }
111
112 if (widthStar)
113 {
114 bool skip;
115 auto e = getNextArg(skip);
116 if (skip)
117 continue;
118 if (!e)
119 return true;
120 auto t = e.type.toBasetype();
121 if (t.ty != Tint32 && t.ty != Tuns32)
122 errorMsg("width ", slice, e, "int", t);
123 }
124
125 if (precisionStar)
126 {
127 bool skip;
128 auto e = getNextArg(skip);
129 if (skip)
130 continue;
131 if (!e)
132 return true;
133 auto t = e.type.toBasetype();
134 if (t.ty != Tint32 && t.ty != Tuns32)
135 errorMsg("precision ", slice, e, "int", t);
136 }
137
138 bool skip;
139 auto e = getNextArg(skip);
140 if (skip)
141 continue;
142 if (!e)
143 return true;
144 auto t = e.type.toBasetype();
145 auto tnext = t.nextOf();
146 const c_longsize = target.c.longsize;
147 const is64bit = global.params.is64bit;
148
149 // Types which are promoted to int are allowed.
150 // Spec: C99 6.5.2.2.7
151 final switch (fmt)
152 {
153 case Format.u: // unsigned int
154 case Format.d: // int
155 if (t.ty != Tint32 && t.ty != Tuns32)
156 errorMsg(null, slice, e, "int", t);
157 break;
158
159 case Format.hhu: // unsigned char
160 case Format.hhd: // signed char
161 if (t.ty != Tint32 && t.ty != Tuns32 && t.ty != Tint8 && t.ty != Tuns8)
162 errorMsg(null, slice, e, "byte", t);
163 break;
164
165 case Format.hu: // unsigned short int
166 case Format.hd: // short int
167 if (t.ty != Tint32 && t.ty != Tuns32 && t.ty != Tint16 && t.ty != Tuns16)
168 errorMsg(null, slice, e, "short", t);
169 break;
170
171 case Format.lu: // unsigned long int
172 case Format.ld: // long int
173 if (!(t.isintegral() && t.size() == c_longsize))
174 errorMsg(null, slice, e, (c_longsize == 4 ? "int" : "long"), t);
175 break;
176
177 case Format.llu: // unsigned long long int
178 case Format.lld: // long long int
179 if (t.ty != Tint64 && t.ty != Tuns64)
180 errorMsg(null, slice, e, "long", t);
181 break;
182
183 case Format.ju: // uintmax_t
184 case Format.jd: // intmax_t
185 if (t.ty != Tint64 && t.ty != Tuns64)
186 errorMsg(null, slice, e, "core.stdc.stdint.intmax_t", t);
187 break;
188
189 case Format.zd: // size_t
190 if (!(t.isintegral() && t.size() == (is64bit ? 8 : 4)))
191 errorMsg(null, slice, e, "size_t", t);
192 break;
193
194 case Format.td: // ptrdiff_t
195 if (!(t.isintegral() && t.size() == (is64bit ? 8 : 4)))
196 errorMsg(null, slice, e, "ptrdiff_t", t);
197 break;
198
199 case Format.GNU_a: // Format.GNU_a is only for scanf
200 case Format.lg:
201 case Format.g: // double
202 if (t.ty != Tfloat64 && t.ty != Timaginary64)
203 errorMsg(null, slice, e, "double", t);
204 break;
205
206 case Format.Lg: // long double
207 if (t.ty != Tfloat80 && t.ty != Timaginary80)
208 errorMsg(null, slice, e, "real", t);
209 break;
210
211 case Format.p: // pointer
212 if (t.ty != Tpointer && t.ty != Tnull && t.ty != Tclass && t.ty != Tdelegate && t.ty != Taarray)
213 errorMsg(null, slice, e, "void*", t);
214 break;
215
216 case Format.n: // pointer to int
217 if (!(t.ty == Tpointer && tnext.ty == Tint32))
218 errorMsg(null, slice, e, "int*", t);
219 break;
220
221 case Format.ln: // pointer to long int
222 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.size() == c_longsize))
223 errorMsg(null, slice, e, (c_longsize == 4 ? "int*" : "long*"), t);
224 break;
225
226 case Format.lln: // pointer to long long int
227 if (!(t.ty == Tpointer && tnext.ty == Tint64))
228 errorMsg(null, slice, e, "long*", t);
229 break;
230
231 case Format.hn: // pointer to short
232 if (!(t.ty == Tpointer && tnext.ty == Tint16))
233 errorMsg(null, slice, e, "short*", t);
234 break;
235
236 case Format.hhn: // pointer to signed char
237 if (!(t.ty == Tpointer && tnext.ty == Tint16))
238 errorMsg(null, slice, e, "byte*", t);
239 break;
240
241 case Format.jn: // pointer to intmax_t
242 if (!(t.ty == Tpointer && tnext.ty == Tint64))
243 errorMsg(null, slice, e, "core.stdc.stdint.intmax_t*", t);
244 break;
245
246 case Format.zn: // pointer to size_t
247 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32)))
248 errorMsg(null, slice, e, "size_t*", t);
249 break;
250
251 case Format.tn: // pointer to ptrdiff_t
252 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tint64 : Tint32)))
253 errorMsg(null, slice, e, "ptrdiff_t*", t);
254 break;
255
256 case Format.c: // char
257 if (t.ty != Tint32 && t.ty != Tuns32)
258 errorMsg(null, slice, e, "char", t);
259 break;
260
261 case Format.lc: // wint_t
262 if (t.ty != Tint32 && t.ty != Tuns32)
263 errorMsg(null, slice, e, "wchar_t", t);
264 break;
265
266 case Format.s: // pointer to char string
267 if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8)))
268 errorMsg(null, slice, e, "char*", t);
269 break;
270
271 case Format.ls: // pointer to wchar_t string
272 const twchar_t = global.params.targetOS == TargetOS.Windows ? Twchar : Tdchar;
273 if (!(t.ty == Tpointer && tnext.ty == twchar_t))
274 errorMsg(null, slice, e, "wchar_t*", t);
275 break;
276
277 case Format.error:
278 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
279 break;
280
281 case Format.GNU_m:
282 break; // not assert(0) because it may go through it if there are extra arguments
283
284 case Format.percent:
285 assert(0);
286 }
287 }
288 return false;
289 }
290
291 /******************************************
292 * Check that arguments to a scanf format string are compatible
293 * with that string. Issue errors for incompatibilities.
294 *
295 * Follows the C99 specification for scanf.
296 *
297 * Takes a generous, rather than strict, view of compatiblity.
298 * For example, an unsigned value can be formatted with a signed specifier.
299 *
300 * Diagnosed incompatibilities are:
301 *
302 * 1. incompatible sizes which will cause argument misalignment
303 * 2. deferencing arguments that are not pointers
304 * 3. insufficient number of arguments
305 * 4. struct arguments
306 * 5. array and slice arguments
307 * 6. non-standard formats
308 * 7. undefined behavior per C99
309 *
310 * Per the C Standard, extra arguments are ignored.
311 *
312 * No attempt is made to fix the arguments or the format string.
313 *
314 * Params:
315 * loc = location for error messages
316 * format = format string
317 * args = arguments to match with format string
318 * isVa_list = if a "v" function (format check only)
319 *
320 * Returns:
321 * `true` if errors occurred
322 * References:
323 * C99 7.19.6.2
324 * http://www.cplusplus.com/reference/cstdio/scanf/
325 */
326 bool checkScanfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args, bool isVa_list)
327 {
328 size_t n = 0;
329 for (size_t i = 0; i < format.length;)
330 {
331 if (format[i] != '%')
332 {
333 ++i;
334 continue;
335 }
336 bool asterisk;
337 size_t j = i;
338 const fmt = parseScanfFormatSpecifier(format, j, asterisk);
339 const slice = format[i .. j];
340 i = j;
341
342 if (fmt == Format.percent || asterisk)
343 continue; // "%%", "%*": no arguments
344
345 if (isVa_list)
346 {
347 // format check only
348 if (fmt == Format.error)
349 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
350 continue;
351 }
352
353 Expression getNextArg()
354 {
355 if (n == args.length)
356 {
357 if (!asterisk)
358 deprecation(loc, "more format specifiers than %d arguments", cast(int)n);
359 return null;
360 }
361 return args[n++];
362 }
363
364 void errorMsg(const char* prefix, const char[] specifier, Expression arg, const char* texpect, Type tactual)
365 {
366 deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`",
367 prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars());
368 }
369
370 auto e = getNextArg();
371 if (!e)
372 return true;
373
374 auto t = e.type.toBasetype();
375 auto tnext = t.nextOf();
376 const c_longsize = target.c.longsize;
377 const is64bit = global.params.is64bit;
378
379 final switch (fmt)
380 {
381 case Format.n:
382 case Format.d: // pointer to int
383 if (!(t.ty == Tpointer && tnext.ty == Tint32))
384 errorMsg(null, slice, e, "int*", t);
385 break;
386
387 case Format.hhn:
388 case Format.hhd: // pointer to signed char
389 if (!(t.ty == Tpointer && tnext.ty == Tint16))
390 errorMsg(null, slice, e, "byte*", t);
391 break;
392
393 case Format.hn:
394 case Format.hd: // pointer to short
395 if (!(t.ty == Tpointer && tnext.ty == Tint16))
396 errorMsg(null, slice, e, "short*", t);
397 break;
398
399 case Format.ln:
400 case Format.ld: // pointer to long int
401 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.size() == c_longsize))
402 errorMsg(null, slice, e, (c_longsize == 4 ? "int*" : "long*"), t);
403 break;
404
405 case Format.lln:
406 case Format.lld: // pointer to long long int
407 if (!(t.ty == Tpointer && tnext.ty == Tint64))
408 errorMsg(null, slice, e, "long*", t);
409 break;
410
411 case Format.jn:
412 case Format.jd: // pointer to intmax_t
413 if (!(t.ty == Tpointer && tnext.ty == Tint64))
414 errorMsg(null, slice, e, "core.stdc.stdint.intmax_t*", t);
415 break;
416
417 case Format.zn:
418 case Format.zd: // pointer to size_t
419 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32)))
420 errorMsg(null, slice, e, "size_t*", t);
421 break;
422
423 case Format.tn:
424 case Format.td: // pointer to ptrdiff_t
425 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tint64 : Tint32)))
426 errorMsg(null, slice, e, "ptrdiff_t*", t);
427 break;
428
429 case Format.u: // pointer to unsigned int
430 if (!(t.ty == Tpointer && tnext.ty == Tuns32))
431 errorMsg(null, slice, e, "uint*", t);
432 break;
433
434 case Format.hhu: // pointer to unsigned char
435 if (!(t.ty == Tpointer && tnext.ty == Tuns8))
436 errorMsg(null, slice, e, "ubyte*", t);
437 break;
438
439 case Format.hu: // pointer to unsigned short int
440 if (!(t.ty == Tpointer && tnext.ty == Tuns16))
441 errorMsg(null, slice, e, "ushort*", t);
442 break;
443
444 case Format.lu: // pointer to unsigned long int
445 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32)))
446 errorMsg(null, slice, e, (c_longsize == 4 ? "uint*" : "ulong*"), t);
447 break;
448
449 case Format.llu: // pointer to unsigned long long int
450 if (!(t.ty == Tpointer && tnext.ty == Tuns64))
451 errorMsg(null, slice, e, "ulong*", t);
452 break;
453
454 case Format.ju: // pointer to uintmax_t
455 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32)))
456 errorMsg(null, slice, e, "ulong*", t);
457 break;
458
459 case Format.g: // pointer to float
460 if (!(t.ty == Tpointer && tnext.ty == Tfloat32))
461 errorMsg(null, slice, e, "float*", t);
462 break;
463
464 case Format.lg: // pointer to double
465 if (!(t.ty == Tpointer && tnext.ty == Tfloat64))
466 errorMsg(null, slice, e, "double*", t);
467 break;
468
469 case Format.Lg: // pointer to long double
470 if (!(t.ty == Tpointer && tnext.ty == Tfloat80))
471 errorMsg(null, slice, e, "real*", t);
472 break;
473
474 case Format.GNU_a:
475 case Format.GNU_m:
476 case Format.c:
477 case Format.s: // pointer to char string
478 if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8)))
479 errorMsg(null, slice, e, "char*", t);
480 break;
481
482 case Format.lc:
483 case Format.ls: // pointer to wchar_t string
484 const twchar_t = global.params.targetOS == TargetOS.Windows ? Twchar : Tdchar;
485 if (!(t.ty == Tpointer && tnext.ty == twchar_t))
486 errorMsg(null, slice, e, "wchar_t*", t);
487 break;
488
489 case Format.p: // double pointer
490 if (!(t.ty == Tpointer && tnext.ty == Tpointer))
491 errorMsg(null, slice, e, "void**", t);
492 break;
493
494 case Format.error:
495 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
496 break;
497
498 case Format.percent:
499 assert(0);
500 }
501 }
502 return false;
503 }
504
505 private:
506
507 /**************************************
508 * Parse the *format specifier* which is of the form:
509 *
510 * `%[*][width][length]specifier`
511 *
512 * Params:
513 * format = format string
514 * idx = index of `%` of start of format specifier,
515 * which gets updated to index past the end of it,
516 * even if `Format.error` is returned
517 * asterisk = set if there is a `*` sub-specifier
518 * Returns:
519 * Format
520 */
521 Format parseScanfFormatSpecifier(scope const char[] format, ref size_t idx,
522 out bool asterisk) nothrow pure @safe
523 {
524 auto i = idx;
525 assert(format[i] == '%');
526 const length = format.length;
527
528 Format error()
529 {
530 idx = i;
531 return Format.error;
532 }
533
534 ++i;
535 if (i == length)
536 return error();
537
538 if (format[i] == '%')
539 {
540 idx = i + 1;
541 return Format.percent;
542 }
543
544 // * sub-specifier
545 if (format[i] == '*')
546 {
547 ++i;
548 if (i == length)
549 return error();
550 asterisk = true;
551 }
552
553 // fieldWidth
554 while (isdigit(format[i]))
555 {
556 i++;
557 if (i == length)
558 return error();
559 }
560
561 /* Read the scanset
562 * A scanset can be anything, so we just check that it is paired
563 */
564 if (format[i] == '[')
565 {
566 while (i < length)
567 {
568 if (format[i] == ']')
569 break;
570 ++i;
571 }
572
573 // no `]` found
574 if (i == length)
575 return error();
576
577 ++i;
578 // no specifier after `]`
579 // it could be mixed with the one above, but then idx won't have the right index
580 if (i == length)
581 return error();
582 }
583
584 /* Read the specifier
585 */
586 char genSpec;
587 Format specifier = parseGenericFormatSpecifier(format, i, genSpec);
588 if (specifier == Format.error)
589 return error();
590
591 idx = i;
592 return specifier; // success
593 }
594
595 /**************************************
596 * Parse the *format specifier* which is of the form:
597 *
598 * `%[flags][field width][.precision][length modifier]specifier`
599 *
600 * Params:
601 * format = format string
602 * idx = index of `%` of start of format specifier,
603 * which gets updated to index past the end of it,
604 * even if `Format.error` is returned
605 * widthStar = set if * for width
606 * precisionStar = set if * for precision
607 * Returns:
608 * Format
609 */
610 Format parsePrintfFormatSpecifier(scope const char[] format, ref size_t idx,
611 out bool widthStar, out bool precisionStar) nothrow pure @safe
612 {
613 auto i = idx;
614 assert(format[i] == '%');
615 const length = format.length;
616 bool hash;
617 bool zero;
618 bool flags;
619 bool width;
620 bool precision;
621
622 Format error()
623 {
624 idx = i;
625 return Format.error;
626 }
627
628 ++i;
629 if (i == length)
630 return error();
631
632 if (format[i] == '%')
633 {
634 idx = i + 1;
635 return Format.percent;
636 }
637
638 /* Read the `flags`
639 */
640 while (1)
641 {
642 const c = format[i];
643 if (c == '-' ||
644 c == '+' ||
645 c == ' ')
646 {
647 flags = true;
648 }
649 else if (c == '#')
650 {
651 hash = true;
652 }
653 else if (c == '0')
654 {
655 zero = true;
656 }
657 else
658 break;
659 ++i;
660 if (i == length)
661 return error();
662 }
663
664 /* Read the `field width`
665 */
666 {
667 const c = format[i];
668 if (c == '*')
669 {
670 width = true;
671 widthStar = true;
672 ++i;
673 if (i == length)
674 return error();
675 }
676 else if ('1' <= c && c <= '9')
677 {
678 width = true;
679 ++i;
680 if (i == length)
681 return error();
682 while ('0' <= format[i] && format[i] <= '9')
683 {
684 ++i;
685 if (i == length)
686 return error();
687 }
688 }
689 }
690
691 /* Read the `precision`
692 */
693 if (format[i] == '.')
694 {
695 precision = true;
696 ++i;
697 if (i == length)
698 return error();
699 const c = format[i];
700 if (c == '*')
701 {
702 precisionStar = true;
703 ++i;
704 if (i == length)
705 return error();
706 }
707 else if ('0' <= c && c <= '9')
708 {
709 ++i;
710 if (i == length)
711 return error();
712 while ('0' <= format[i] && format[i] <= '9')
713 {
714 ++i;
715 if (i == length)
716 return error();
717 }
718 }
719 }
720
721 /* Read the specifier
722 */
723 char genSpec;
724 Format specifier = parseGenericFormatSpecifier(format, i, genSpec);
725 if (specifier == Format.error)
726 return error();
727
728 switch (genSpec)
729 {
730 case 'c':
731 case 's':
732 if (hash || zero)
733 return error();
734 break;
735
736 case 'd':
737 case 'i':
738 if (hash)
739 return error();
740 break;
741
742 case 'n':
743 if (hash || zero || precision || width || flags)
744 return error();
745 break;
746
747 default:
748 break;
749 }
750
751 idx = i;
752 return specifier; // success
753 }
754
755 /* Different kinds of formatting specifications, variations we don't
756 care about are merged. (Like we don't care about the difference between
757 f, e, g, a, etc.)
758
759 For `scanf`, every format is a pointer.
760 */
761 enum Format
762 {
763 d, // int
764 hhd, // signed char
765 hd, // short int
766 ld, // long int
767 lld, // long long int
768 jd, // intmax_t
769 zd, // size_t
770 td, // ptrdiff_t
771 u, // unsigned int
772 hhu, // unsigned char
773 hu, // unsigned short int
774 lu, // unsigned long int
775 llu, // unsigned long long int
776 ju, // uintmax_t
777 g, // float (scanf) / double (printf)
778 lg, // double (scanf)
779 Lg, // long double (both)
780 s, // char string (both)
781 ls, // wchar_t string (both)
782 c, // char (printf)
783 lc, // wint_t (printf)
784 p, // pointer
785 n, // pointer to int
786 hhn, // pointer to signed char
787 hn, // pointer to short
788 ln, // pointer to long int
789 lln, // pointer to long long int
790 jn, // pointer to intmax_t
791 zn, // pointer to size_t
792 tn, // pointer to ptrdiff_t
793 GNU_a, // GNU ext. : address to a string with no maximum size (scanf)
794 GNU_m, // GNU ext. : string corresponding to the error code in errno (printf) / length modifier (scanf)
795 percent, // %% (i.e. no argument)
796 error, // invalid format specification
797 }
798
799 /**************************************
800 * Parse the *length specifier* and the *specifier* of the following form:
801 * `[length]specifier`
802 *
803 * Params:
804 * format = format string
805 * idx = index of of start of format specifier,
806 * which gets updated to index past the end of it,
807 * even if `Format.error` is returned
808 * genSpecifier = Generic specifier. For instance, it will be set to `d` if the
809 * format is `hdd`.
810 * Returns:
811 * Format
812 */
813 Format parseGenericFormatSpecifier(scope const char[] format,
814 ref size_t idx, out char genSpecifier, bool useGNUExts =
815 findCondition(global.versionids, Identifier.idPool("CRuntime_Glibc"))) nothrow pure @trusted
816 {
817 const length = format.length;
818
819 /* Read the `length modifier`
820 */
821 const lm = format[idx];
822 bool lm1; // if jztL
823 bool lm2; // if `hh` or `ll`
824 if (lm == 'j' ||
825 lm == 'z' ||
826 lm == 't' ||
827 lm == 'L')
828 {
829 ++idx;
830 if (idx == length)
831 return Format.error;
832 lm1 = true;
833 }
834 else if (lm == 'h' || lm == 'l')
835 {
836 ++idx;
837 if (idx == length)
838 return Format.error;
839 lm2 = lm == format[idx];
840 if (lm2)
841 {
842 ++idx;
843 if (idx == length)
844 return Format.error;
845 }
846 }
847
848 /* Read the `specifier`
849 */
850 Format specifier;
851 const sc = format[idx];
852 genSpecifier = sc;
853 switch (sc)
854 {
855 case 'd':
856 case 'i':
857 if (lm == 'L')
858 specifier = Format.error;
859 else
860 specifier = lm == 'h' && lm2 ? Format.hhd :
861 lm == 'h' ? Format.hd :
862 lm == 'l' && lm2 ? Format.lld :
863 lm == 'l' ? Format.ld :
864 lm == 'j' ? Format.jd :
865 lm == 'z' ? Format.zd :
866 lm == 't' ? Format.td :
867 Format.d;
868 break;
869
870 case 'u':
871 case 'o':
872 case 'x':
873 case 'X':
874 if (lm == 'L')
875 specifier = Format.error;
876 else
877 specifier = lm == 'h' && lm2 ? Format.hhu :
878 lm == 'h' ? Format.hu :
879 lm == 'l' && lm2 ? Format.llu :
880 lm == 'l' ? Format.lu :
881 lm == 'j' ? Format.ju :
882 lm == 'z' ? Format.zd :
883 lm == 't' ? Format.td :
884 Format.u;
885 break;
886
887 case 'a':
888 if (useGNUExts)
889 {
890 // https://www.gnu.org/software/libc/manual/html_node/Dynamic-String-Input.html
891 specifier = Format.GNU_a;
892 break;
893 }
894 goto case;
895
896 case 'f':
897 case 'F':
898 case 'e':
899 case 'E':
900 case 'g':
901 case 'G':
902 case 'A':
903 if (lm == 'L')
904 specifier = Format.Lg;
905 else if (lm1 || lm2 || lm == 'h')
906 specifier = Format.error;
907 else
908 specifier = lm == 'l' ? Format.lg : Format.g;
909 break;
910
911 case 'c':
912 if (lm1 || lm2 || lm == 'h')
913 specifier = Format.error;
914 else
915 specifier = lm == 'l' ? Format.lc : Format.c;
916 break;
917
918 case 's':
919 if (lm1 || lm2 || lm == 'h')
920 specifier = Format.error;
921 else
922 specifier = lm == 'l' ? Format.ls : Format.s;
923 break;
924
925 case 'p':
926 if (lm1 || lm2 || lm == 'h' || lm == 'l')
927 specifier = Format.error;
928 else
929 specifier = Format.p;
930 break;
931
932 case 'n':
933 if (lm == 'L')
934 specifier = Format.error;
935 else
936 specifier = lm == 'l' && lm2 ? Format.lln :
937 lm == 'l' ? Format.ln :
938 lm == 'h' && lm2 ? Format.hhn :
939 lm == 'h' ? Format.hn :
940 lm == 'j' ? Format.jn :
941 lm == 'z' ? Format.zn :
942 lm == 't' ? Format.tn :
943 Format.n;
944 break;
945
946 case 'm':
947 if (useGNUExts)
948 {
949 // http://www.gnu.org/software/libc/manual/html_node/Other-Output-Conversions.html
950 specifier = Format.GNU_m;
951 break;
952 }
953 goto default;
954
955 default:
956 specifier = Format.error;
957 break;
958 }
959
960 ++idx;
961 return specifier; // success
962 }
963
964 unittest
965 {
966 /* parseGenericFormatSpecifier
967 */
968
969 char genSpecifier;
970 size_t idx;
971
972 assert(parseGenericFormatSpecifier("hhd", idx, genSpecifier) == Format.hhd);
973 assert(genSpecifier == 'd');
974
975 idx = 0;
976 assert(parseGenericFormatSpecifier("hn", idx, genSpecifier) == Format.hn);
977 assert(genSpecifier == 'n');
978
979 idx = 0;
980 assert(parseGenericFormatSpecifier("ji", idx, genSpecifier) == Format.jd);
981 assert(genSpecifier == 'i');
982
983 idx = 0;
984 assert(parseGenericFormatSpecifier("lu", idx, genSpecifier) == Format.lu);
985 assert(genSpecifier == 'u');
986
987 idx = 0;
988 assert(parseGenericFormatSpecifier("k", idx, genSpecifier) == Format.error);
989
990 /* parsePrintfFormatSpecifier
991 */
992
993 bool widthStar;
994 bool precisionStar;
995
996 // one for each Format
997 idx = 0;
998 assert(parsePrintfFormatSpecifier("%d", idx, widthStar, precisionStar) == Format.d);
999 assert(idx == 2);
1000 assert(!widthStar && !precisionStar);
1001
1002 idx = 0;
1003 assert(parsePrintfFormatSpecifier("%ld", idx, widthStar, precisionStar) == Format.ld);
1004 assert(idx == 3);
1005
1006 idx = 0;
1007 assert(parsePrintfFormatSpecifier("%lld", idx, widthStar, precisionStar) == Format.lld);
1008 assert(idx == 4);
1009
1010 idx = 0;
1011 assert(parsePrintfFormatSpecifier("%jd", idx, widthStar, precisionStar) == Format.jd);
1012 assert(idx == 3);
1013
1014 idx = 0;
1015 assert(parsePrintfFormatSpecifier("%zd", idx, widthStar, precisionStar) == Format.zd);
1016 assert(idx == 3);
1017
1018 idx = 0;
1019 assert(parsePrintfFormatSpecifier("%td", idx, widthStar, precisionStar) == Format.td);
1020 assert(idx == 3);
1021
1022 idx = 0;
1023 assert(parsePrintfFormatSpecifier("%g", idx, widthStar, precisionStar) == Format.g);
1024 assert(idx == 2);
1025
1026 idx = 0;
1027 assert(parsePrintfFormatSpecifier("%Lg", idx, widthStar, precisionStar) == Format.Lg);
1028 assert(idx == 3);
1029
1030 idx = 0;
1031 assert(parsePrintfFormatSpecifier("%p", idx, widthStar, precisionStar) == Format.p);
1032 assert(idx == 2);
1033
1034 idx = 0;
1035 assert(parsePrintfFormatSpecifier("%n", idx, widthStar, precisionStar) == Format.n);
1036 assert(idx == 2);
1037
1038 idx = 0;
1039 assert(parsePrintfFormatSpecifier("%ln", idx, widthStar, precisionStar) == Format.ln);
1040 assert(idx == 3);
1041
1042 idx = 0;
1043 assert(parsePrintfFormatSpecifier("%lln", idx, widthStar, precisionStar) == Format.lln);
1044 assert(idx == 4);
1045
1046 idx = 0;
1047 assert(parsePrintfFormatSpecifier("%hn", idx, widthStar, precisionStar) == Format.hn);
1048 assert(idx == 3);
1049
1050 idx = 0;
1051 assert(parsePrintfFormatSpecifier("%hhn", idx, widthStar, precisionStar) == Format.hhn);
1052 assert(idx == 4);
1053
1054 idx = 0;
1055 assert(parsePrintfFormatSpecifier("%jn", idx, widthStar, precisionStar) == Format.jn);
1056 assert(idx == 3);
1057
1058 idx = 0;
1059 assert(parsePrintfFormatSpecifier("%zn", idx, widthStar, precisionStar) == Format.zn);
1060 assert(idx == 3);
1061
1062 idx = 0;
1063 assert(parsePrintfFormatSpecifier("%tn", idx, widthStar, precisionStar) == Format.tn);
1064 assert(idx == 3);
1065
1066 idx = 0;
1067 assert(parsePrintfFormatSpecifier("%c", idx, widthStar, precisionStar) == Format.c);
1068 assert(idx == 2);
1069
1070 idx = 0;
1071 assert(parsePrintfFormatSpecifier("%lc", idx, widthStar, precisionStar) == Format.lc);
1072 assert(idx == 3);
1073
1074 idx = 0;
1075 assert(parsePrintfFormatSpecifier("%s", idx, widthStar, precisionStar) == Format.s);
1076 assert(idx == 2);
1077
1078 idx = 0;
1079 assert(parsePrintfFormatSpecifier("%ls", idx, widthStar, precisionStar) == Format.ls);
1080 assert(idx == 3);
1081
1082 idx = 0;
1083 assert(parsePrintfFormatSpecifier("%%", idx, widthStar, precisionStar) == Format.percent);
1084 assert(idx == 2);
1085
1086 // Synonyms
1087 idx = 0;
1088 assert(parsePrintfFormatSpecifier("%i", idx, widthStar, precisionStar) == Format.d);
1089 assert(idx == 2);
1090
1091 idx = 0;
1092 assert(parsePrintfFormatSpecifier("%u", idx, widthStar, precisionStar) == Format.u);
1093 assert(idx == 2);
1094
1095 idx = 0;
1096 assert(parsePrintfFormatSpecifier("%o", idx, widthStar, precisionStar) == Format.u);
1097 assert(idx == 2);
1098
1099 idx = 0;
1100 assert(parsePrintfFormatSpecifier("%x", idx, widthStar, precisionStar) == Format.u);
1101 assert(idx == 2);
1102
1103 idx = 0;
1104 assert(parsePrintfFormatSpecifier("%X", idx, widthStar, precisionStar) == Format.u);
1105 assert(idx == 2);
1106
1107 idx = 0;
1108 assert(parsePrintfFormatSpecifier("%f", idx, widthStar, precisionStar) == Format.g);
1109 assert(idx == 2);
1110
1111 idx = 0;
1112 assert(parsePrintfFormatSpecifier("%F", idx, widthStar, precisionStar) == Format.g);
1113 assert(idx == 2);
1114
1115 idx = 0;
1116 assert(parsePrintfFormatSpecifier("%G", idx, widthStar, precisionStar) == Format.g);
1117 assert(idx == 2);
1118
1119 idx = 0;
1120 Format g = parsePrintfFormatSpecifier("%a", idx, widthStar, precisionStar);
1121 assert(g == Format.g || g == Format.GNU_a);
1122 assert(idx == 2);
1123
1124 idx = 0;
1125 assert(parsePrintfFormatSpecifier("%A", idx, widthStar, precisionStar) == Format.g);
1126 assert(idx == 2);
1127
1128 idx = 0;
1129 assert(parsePrintfFormatSpecifier("%lg", idx, widthStar, precisionStar) == Format.lg);
1130 assert(idx == 3);
1131
1132 // width, precision
1133 idx = 0;
1134 assert(parsePrintfFormatSpecifier("%*d", idx, widthStar, precisionStar) == Format.d);
1135 assert(idx == 3);
1136 assert(widthStar && !precisionStar);
1137
1138 idx = 0;
1139 assert(parsePrintfFormatSpecifier("%.*d", idx, widthStar, precisionStar) == Format.d);
1140 assert(idx == 4);
1141 assert(!widthStar && precisionStar);
1142
1143 idx = 0;
1144 assert(parsePrintfFormatSpecifier("%*.*d", idx, widthStar, precisionStar) == Format.d);
1145 assert(idx == 5);
1146 assert(widthStar && precisionStar);
1147
1148 // Too short formats
1149 {
1150 foreach (s; ["%", "%-", "%+", "% ", "%#", "%0", "%*", "%1", "%19", "%.", "%.*", "%.1", "%.12",
1151 "%j", "%z", "%t", "%l", "%h", "%ll", "%hh"])
1152 {
1153 idx = 0;
1154 assert(parsePrintfFormatSpecifier(s, idx, widthStar, precisionStar) == Format.error);
1155 assert(idx == s.length);
1156 }
1157 }
1158
1159 // Undefined format combinations
1160 {
1161 foreach (s; ["%#d", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg",
1162 "%#c", "%0c", "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc",
1163 "%#s", "%0s", "%js", "%zs", "%ts", "%Ls", "%hs", "%hhs", "%lls",
1164 "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp",
1165 "%-n", "%+n", "% n", "%#n", "%0n", "%*n", "%1n", "%19n", "%.n", "%.*n", "%.1n", "%.12n", "%Ln", "%K"])
1166 {
1167 idx = 0;
1168 assert(parsePrintfFormatSpecifier(s, idx, widthStar, precisionStar) == Format.error);
1169 import std.stdio;
1170 assert(idx == s.length);
1171 }
1172 }
1173
1174 /* parseScanfFormatSpecifier
1175 */
1176
1177 bool asterisk;
1178
1179 // one for each Format
1180 idx = 0;
1181 assert(parseScanfFormatSpecifier("%d", idx, asterisk) == Format.d);
1182 assert(idx == 2);
1183 assert(!asterisk);
1184
1185 idx = 0;
1186 assert(parseScanfFormatSpecifier("%hhd", idx, asterisk) == Format.hhd);
1187 assert(idx == 4);
1188
1189 idx = 0;
1190 assert(parseScanfFormatSpecifier("%hd", idx, asterisk) == Format.hd);
1191 assert(idx == 3);
1192
1193 idx = 0;
1194 assert(parseScanfFormatSpecifier("%ld", idx, asterisk) == Format.ld);
1195 assert(idx == 3);
1196
1197 idx = 0;
1198 assert(parseScanfFormatSpecifier("%lld", idx, asterisk) == Format.lld);
1199 assert(idx == 4);
1200
1201 idx = 0;
1202 assert(parseScanfFormatSpecifier("%jd", idx, asterisk) == Format.jd);
1203 assert(idx == 3);
1204
1205 idx = 0;
1206 assert(parseScanfFormatSpecifier("%zd", idx, asterisk) == Format.zd);
1207 assert(idx == 3);
1208
1209 idx = 0;
1210 assert(parseScanfFormatSpecifier("%td", idx, asterisk,) == Format.td);
1211 assert(idx == 3);
1212
1213 idx = 0;
1214 assert(parseScanfFormatSpecifier("%u", idx, asterisk) == Format.u);
1215 assert(idx == 2);
1216
1217 idx = 0;
1218 assert(parseScanfFormatSpecifier("%hhu", idx, asterisk,) == Format.hhu);
1219 assert(idx == 4);
1220
1221 idx = 0;
1222 assert(parseScanfFormatSpecifier("%hu", idx, asterisk) == Format.hu);
1223 assert(idx == 3);
1224
1225 idx = 0;
1226 assert(parseScanfFormatSpecifier("%lu", idx, asterisk) == Format.lu);
1227 assert(idx == 3);
1228
1229 idx = 0;
1230 assert(parseScanfFormatSpecifier("%llu", idx, asterisk) == Format.llu);
1231 assert(idx == 4);
1232
1233 idx = 0;
1234 assert(parseScanfFormatSpecifier("%ju", idx, asterisk) == Format.ju);
1235 assert(idx == 3);
1236
1237 idx = 0;
1238 assert(parseScanfFormatSpecifier("%g", idx, asterisk) == Format.g);
1239 assert(idx == 2);
1240
1241 idx = 0;
1242 assert(parseScanfFormatSpecifier("%lg", idx, asterisk) == Format.lg);
1243 assert(idx == 3);
1244
1245 idx = 0;
1246 assert(parseScanfFormatSpecifier("%Lg", idx, asterisk) == Format.Lg);
1247 assert(idx == 3);
1248
1249 idx = 0;
1250 assert(parseScanfFormatSpecifier("%p", idx, asterisk) == Format.p);
1251 assert(idx == 2);
1252
1253 idx = 0;
1254 assert(parseScanfFormatSpecifier("%s", idx, asterisk) == Format.s);
1255 assert(idx == 2);
1256
1257 idx = 0;
1258 assert(parseScanfFormatSpecifier("%ls", idx, asterisk,) == Format.ls);
1259 assert(idx == 3);
1260
1261 idx = 0;
1262 assert(parseScanfFormatSpecifier("%%", idx, asterisk) == Format.percent);
1263 assert(idx == 2);
1264
1265 // Synonyms
1266 idx = 0;
1267 assert(parseScanfFormatSpecifier("%i", idx, asterisk) == Format.d);
1268 assert(idx == 2);
1269
1270 idx = 0;
1271 assert(parseScanfFormatSpecifier("%n", idx, asterisk) == Format.n);
1272 assert(idx == 2);
1273
1274 idx = 0;
1275 assert(parseScanfFormatSpecifier("%o", idx, asterisk) == Format.u);
1276 assert(idx == 2);
1277
1278 idx = 0;
1279 assert(parseScanfFormatSpecifier("%x", idx, asterisk) == Format.u);
1280 assert(idx == 2);
1281
1282 idx = 0;
1283 assert(parseScanfFormatSpecifier("%f", idx, asterisk) == Format.g);
1284 assert(idx == 2);
1285
1286 idx = 0;
1287 assert(parseScanfFormatSpecifier("%e", idx, asterisk) == Format.g);
1288 assert(idx == 2);
1289
1290 idx = 0;
1291 g = parseScanfFormatSpecifier("%a", idx, asterisk);
1292 assert(g == Format.g || g == Format.GNU_a);
1293 assert(idx == 2);
1294
1295 idx = 0;
1296 assert(parseScanfFormatSpecifier("%c", idx, asterisk) == Format.c);
1297 assert(idx == 2);
1298
1299 // asterisk
1300 idx = 0;
1301 assert(parseScanfFormatSpecifier("%*d", idx, asterisk) == Format.d);
1302 assert(idx == 3);
1303 assert(asterisk);
1304
1305 idx = 0;
1306 assert(parseScanfFormatSpecifier("%9ld", idx, asterisk) == Format.ld);
1307 assert(idx == 4);
1308 assert(!asterisk);
1309
1310 idx = 0;
1311 assert(parseScanfFormatSpecifier("%*25984hhd", idx, asterisk) == Format.hhd);
1312 assert(idx == 10);
1313 assert(asterisk);
1314
1315 // scansets
1316 idx = 0;
1317 assert(parseScanfFormatSpecifier("%[a-zA-Z]s", idx, asterisk) == Format.s);
1318 assert(idx == 10);
1319 assert(!asterisk);
1320
1321 idx = 0;
1322 assert(parseScanfFormatSpecifier("%*25[a-z]hhd", idx, asterisk) == Format.hhd);
1323 assert(idx == 12);
1324 assert(asterisk);
1325
1326 // Too short formats
1327 foreach (s; ["%", "% ", "%#", "%0", "%*", "%1", "%19",
1328 "%j", "%z", "%t", "%l", "%h", "%ll", "%hh", "%K"])
1329 {
1330 idx = 0;
1331 assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error);
1332 assert(idx == s.length);
1333 }
1334
1335
1336 // Undefined format combinations
1337 foreach (s; ["%Ld", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg",
1338 "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc",
1339 "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp",
1340 "%-", "%+", "%#", "%0", "%.", "%Ln"])
1341 {
1342 idx = 0;
1343 assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error);
1344 assert(idx == s.length);
1345
1346 }
1347
1348 // Invalid scansets
1349 foreach (s; ["%[]", "%[s", "%[0-9lld", "%[", "%[a-z]"])
1350 {
1351 idx = 0;
1352 assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error);
1353 assert(idx == s.length);
1354 }
1355
1356 }