1 /** 2 * Check the arguments to `printf` and `scanf` against the `format` string. 3 * 4 * Copyright: Copyright (C) 1999-2021 by The D Language Foundation, All Rights Reserved 5 * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) 6 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 7 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/chkformat.d, _chkformat.d) 8 * Documentation: https://dlang.org/phobos/dmd_chkformat.html 9 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/chkformat.d 10 */ 11 module dmd.chkformat; 12 13 //import core.stdc.stdio : printf, scanf; 14 import core.stdc.ctype : isdigit; 15 16 import dmd.cond; 17 import dmd.errors; 18 import dmd.expression; 19 import dmd.globals; 20 import dmd.identifier; 21 import dmd.mtype; 22 import dmd.target; 23 24 25 /****************************************** 26 * Check that arguments to a printf format string are compatible 27 * with that string. Issue errors for incompatibilities. 28 * 29 * Follows the C99 specification for printf. 30 * 31 * Takes a generous, rather than strict, view of compatiblity. 32 * For example, an unsigned value can be formatted with a signed specifier. 33 * 34 * Diagnosed incompatibilities are: 35 * 36 * 1. incompatible sizes which will cause argument misalignment 37 * 2. deferencing arguments that are not pointers 38 * 3. insufficient number of arguments 39 * 4. struct arguments 40 * 5. array and slice arguments 41 * 6. non-pointer arguments to `s` specifier 42 * 7. non-standard formats 43 * 8. undefined behavior per C99 44 * 45 * Per the C Standard, extra arguments are ignored. 46 * 47 * No attempt is made to fix the arguments or the format string. 48 * 49 * Params: 50 * loc = location for error messages 51 * format = format string 52 * args = arguments to match with format string 53 * isVa_list = if a "v" function (format check only) 54 * 55 * Returns: 56 * `true` if errors occurred 57 * References: 58 * C99 7.19.6.1 59 * http://www.cplusplus.com/reference/cstdio/printf/ 60 */ 61 bool checkPrintfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args, bool isVa_list) 62 { 63 //printf("checkPrintFormat('%.*s')\n", cast(int)format.length, format.ptr); 64 size_t n, gnu_m_count; // index in args / number of Format.GNU_m 65 for (size_t i = 0; i < format.length;) 66 { 67 if (format[i] != '%') 68 { 69 ++i; 70 continue; 71 } 72 bool widthStar; 73 bool precisionStar; 74 size_t j = i; 75 const fmt = parsePrintfFormatSpecifier(format, j, widthStar, precisionStar); 76 const slice = format[i .. j]; 77 i = j; 78 79 if (fmt == Format.percent) 80 continue; // "%%", no arguments 81 82 if (isVa_list) 83 { 84 // format check only 85 if (fmt == Format.error) 86 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr); 87 continue; 88 } 89 90 if (fmt == Format.GNU_m) 91 ++gnu_m_count; 92 93 Expression getNextArg(ref bool skip) 94 { 95 if (n == args.length) 96 { 97 if (args.length < (n + 1) - gnu_m_count) 98 deprecation(loc, "more format specifiers than %zd arguments", n); 99 else 100 skip = true; 101 return null; 102 } 103 return args[n++]; 104 } 105 106 void errorMsg(const char* prefix, const char[] specifier, Expression arg, const char* texpect, Type tactual) 107 { 108 deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`", 109 prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars()); 110 } 111 112 if (widthStar) 113 { 114 bool skip; 115 auto e = getNextArg(skip); 116 if (skip) 117 continue; 118 if (!e) 119 return true; 120 auto t = e.type.toBasetype(); 121 if (t.ty != Tint32 && t.ty != Tuns32) 122 errorMsg("width ", slice, e, "int", t); 123 } 124 125 if (precisionStar) 126 { 127 bool skip; 128 auto e = getNextArg(skip); 129 if (skip) 130 continue; 131 if (!e) 132 return true; 133 auto t = e.type.toBasetype(); 134 if (t.ty != Tint32 && t.ty != Tuns32) 135 errorMsg("precision ", slice, e, "int", t); 136 } 137 138 bool skip; 139 auto e = getNextArg(skip); 140 if (skip) 141 continue; 142 if (!e) 143 return true; 144 auto t = e.type.toBasetype(); 145 auto tnext = t.nextOf(); 146 const c_longsize = target.c.longsize; 147 const is64bit = global.params.is64bit; 148 149 // Types which are promoted to int are allowed. 150 // Spec: C99 6.5.2.2.7 151 final switch (fmt) 152 { 153 case Format.u: // unsigned int 154 case Format.d: // int 155 if (t.ty != Tint32 && t.ty != Tuns32) 156 errorMsg(null, slice, e, "int", t); 157 break; 158 159 case Format.hhu: // unsigned char 160 case Format.hhd: // signed char 161 if (t.ty != Tint32 && t.ty != Tuns32 && t.ty != Tint8 && t.ty != Tuns8) 162 errorMsg(null, slice, e, "byte", t); 163 break; 164 165 case Format.hu: // unsigned short int 166 case Format.hd: // short int 167 if (t.ty != Tint32 && t.ty != Tuns32 && t.ty != Tint16 && t.ty != Tuns16) 168 errorMsg(null, slice, e, "short", t); 169 break; 170 171 case Format.lu: // unsigned long int 172 case Format.ld: // long int 173 if (!(t.isintegral() && t.size() == c_longsize)) 174 errorMsg(null, slice, e, (c_longsize == 4 ? "int" : "long"), t); 175 break; 176 177 case Format.llu: // unsigned long long int 178 case Format.lld: // long long int 179 if (t.ty != Tint64 && t.ty != Tuns64) 180 errorMsg(null, slice, e, "long", t); 181 break; 182 183 case Format.ju: // uintmax_t 184 case Format.jd: // intmax_t 185 if (t.ty != Tint64 && t.ty != Tuns64) 186 errorMsg(null, slice, e, "core.stdc.stdint.intmax_t", t); 187 break; 188 189 case Format.zd: // size_t 190 if (!(t.isintegral() && t.size() == (is64bit ? 8 : 4))) 191 errorMsg(null, slice, e, "size_t", t); 192 break; 193 194 case Format.td: // ptrdiff_t 195 if (!(t.isintegral() && t.size() == (is64bit ? 8 : 4))) 196 errorMsg(null, slice, e, "ptrdiff_t", t); 197 break; 198 199 case Format.GNU_a: // Format.GNU_a is only for scanf 200 case Format.lg: 201 case Format.g: // double 202 if (t.ty != Tfloat64 && t.ty != Timaginary64) 203 errorMsg(null, slice, e, "double", t); 204 break; 205 206 case Format.Lg: // long double 207 if (t.ty != Tfloat80 && t.ty != Timaginary80) 208 errorMsg(null, slice, e, "real", t); 209 break; 210 211 case Format.p: // pointer 212 if (t.ty != Tpointer && t.ty != Tnull && t.ty != Tclass && t.ty != Tdelegate && t.ty != Taarray) 213 errorMsg(null, slice, e, "void*", t); 214 break; 215 216 case Format.n: // pointer to int 217 if (!(t.ty == Tpointer && tnext.ty == Tint32)) 218 errorMsg(null, slice, e, "int*", t); 219 break; 220 221 case Format.ln: // pointer to long int 222 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.size() == c_longsize)) 223 errorMsg(null, slice, e, (c_longsize == 4 ? "int*" : "long*"), t); 224 break; 225 226 case Format.lln: // pointer to long long int 227 if (!(t.ty == Tpointer && tnext.ty == Tint64)) 228 errorMsg(null, slice, e, "long*", t); 229 break; 230 231 case Format.hn: // pointer to short 232 if (!(t.ty == Tpointer && tnext.ty == Tint16)) 233 errorMsg(null, slice, e, "short*", t); 234 break; 235 236 case Format.hhn: // pointer to signed char 237 if (!(t.ty == Tpointer && tnext.ty == Tint16)) 238 errorMsg(null, slice, e, "byte*", t); 239 break; 240 241 case Format.jn: // pointer to intmax_t 242 if (!(t.ty == Tpointer && tnext.ty == Tint64)) 243 errorMsg(null, slice, e, "core.stdc.stdint.intmax_t*", t); 244 break; 245 246 case Format.zn: // pointer to size_t 247 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32))) 248 errorMsg(null, slice, e, "size_t*", t); 249 break; 250 251 case Format.tn: // pointer to ptrdiff_t 252 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tint64 : Tint32))) 253 errorMsg(null, slice, e, "ptrdiff_t*", t); 254 break; 255 256 case Format.c: // char 257 if (t.ty != Tint32 && t.ty != Tuns32) 258 errorMsg(null, slice, e, "char", t); 259 break; 260 261 case Format.lc: // wint_t 262 if (t.ty != Tint32 && t.ty != Tuns32) 263 errorMsg(null, slice, e, "wchar_t", t); 264 break; 265 266 case Format.s: // pointer to char string 267 if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8))) 268 errorMsg(null, slice, e, "char*", t); 269 break; 270 271 case Format.ls: // pointer to wchar_t string 272 const twchar_t = global.params.targetOS == TargetOS.Windows ? Twchar : Tdchar; 273 if (!(t.ty == Tpointer && tnext.ty == twchar_t)) 274 errorMsg(null, slice, e, "wchar_t*", t); 275 break; 276 277 case Format.error: 278 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr); 279 break; 280 281 case Format.GNU_m: 282 break; // not assert(0) because it may go through it if there are extra arguments 283 284 case Format.percent: 285 assert(0); 286 } 287 } 288 return false; 289 } 290 291 /****************************************** 292 * Check that arguments to a scanf format string are compatible 293 * with that string. Issue errors for incompatibilities. 294 * 295 * Follows the C99 specification for scanf. 296 * 297 * Takes a generous, rather than strict, view of compatiblity. 298 * For example, an unsigned value can be formatted with a signed specifier. 299 * 300 * Diagnosed incompatibilities are: 301 * 302 * 1. incompatible sizes which will cause argument misalignment 303 * 2. deferencing arguments that are not pointers 304 * 3. insufficient number of arguments 305 * 4. struct arguments 306 * 5. array and slice arguments 307 * 6. non-standard formats 308 * 7. undefined behavior per C99 309 * 310 * Per the C Standard, extra arguments are ignored. 311 * 312 * No attempt is made to fix the arguments or the format string. 313 * 314 * Params: 315 * loc = location for error messages 316 * format = format string 317 * args = arguments to match with format string 318 * isVa_list = if a "v" function (format check only) 319 * 320 * Returns: 321 * `true` if errors occurred 322 * References: 323 * C99 7.19.6.2 324 * http://www.cplusplus.com/reference/cstdio/scanf/ 325 */ 326 bool checkScanfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args, bool isVa_list) 327 { 328 size_t n = 0; 329 for (size_t i = 0; i < format.length;) 330 { 331 if (format[i] != '%') 332 { 333 ++i; 334 continue; 335 } 336 bool asterisk; 337 size_t j = i; 338 const fmt = parseScanfFormatSpecifier(format, j, asterisk); 339 const slice = format[i .. j]; 340 i = j; 341 342 if (fmt == Format.percent || asterisk) 343 continue; // "%%", "%*": no arguments 344 345 if (isVa_list) 346 { 347 // format check only 348 if (fmt == Format.error) 349 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr); 350 continue; 351 } 352 353 Expression getNextArg() 354 { 355 if (n == args.length) 356 { 357 if (!asterisk) 358 deprecation(loc, "more format specifiers than %d arguments", cast(int)n); 359 return null; 360 } 361 return args[n++]; 362 } 363 364 void errorMsg(const char* prefix, const char[] specifier, Expression arg, const char* texpect, Type tactual) 365 { 366 deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`", 367 prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars()); 368 } 369 370 auto e = getNextArg(); 371 if (!e) 372 return true; 373 374 auto t = e.type.toBasetype(); 375 auto tnext = t.nextOf(); 376 const c_longsize = target.c.longsize; 377 const is64bit = global.params.is64bit; 378 379 final switch (fmt) 380 { 381 case Format.n: 382 case Format.d: // pointer to int 383 if (!(t.ty == Tpointer && tnext.ty == Tint32)) 384 errorMsg(null, slice, e, "int*", t); 385 break; 386 387 case Format.hhn: 388 case Format.hhd: // pointer to signed char 389 if (!(t.ty == Tpointer && tnext.ty == Tint16)) 390 errorMsg(null, slice, e, "byte*", t); 391 break; 392 393 case Format.hn: 394 case Format.hd: // pointer to short 395 if (!(t.ty == Tpointer && tnext.ty == Tint16)) 396 errorMsg(null, slice, e, "short*", t); 397 break; 398 399 case Format.ln: 400 case Format.ld: // pointer to long int 401 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.size() == c_longsize)) 402 errorMsg(null, slice, e, (c_longsize == 4 ? "int*" : "long*"), t); 403 break; 404 405 case Format.lln: 406 case Format.lld: // pointer to long long int 407 if (!(t.ty == Tpointer && tnext.ty == Tint64)) 408 errorMsg(null, slice, e, "long*", t); 409 break; 410 411 case Format.jn: 412 case Format.jd: // pointer to intmax_t 413 if (!(t.ty == Tpointer && tnext.ty == Tint64)) 414 errorMsg(null, slice, e, "core.stdc.stdint.intmax_t*", t); 415 break; 416 417 case Format.zn: 418 case Format.zd: // pointer to size_t 419 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32))) 420 errorMsg(null, slice, e, "size_t*", t); 421 break; 422 423 case Format.tn: 424 case Format.td: // pointer to ptrdiff_t 425 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tint64 : Tint32))) 426 errorMsg(null, slice, e, "ptrdiff_t*", t); 427 break; 428 429 case Format.u: // pointer to unsigned int 430 if (!(t.ty == Tpointer && tnext.ty == Tuns32)) 431 errorMsg(null, slice, e, "uint*", t); 432 break; 433 434 case Format.hhu: // pointer to unsigned char 435 if (!(t.ty == Tpointer && tnext.ty == Tuns8)) 436 errorMsg(null, slice, e, "ubyte*", t); 437 break; 438 439 case Format.hu: // pointer to unsigned short int 440 if (!(t.ty == Tpointer && tnext.ty == Tuns16)) 441 errorMsg(null, slice, e, "ushort*", t); 442 break; 443 444 case Format.lu: // pointer to unsigned long int 445 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32))) 446 errorMsg(null, slice, e, (c_longsize == 4 ? "uint*" : "ulong*"), t); 447 break; 448 449 case Format.llu: // pointer to unsigned long long int 450 if (!(t.ty == Tpointer && tnext.ty == Tuns64)) 451 errorMsg(null, slice, e, "ulong*", t); 452 break; 453 454 case Format.ju: // pointer to uintmax_t 455 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32))) 456 errorMsg(null, slice, e, "ulong*", t); 457 break; 458 459 case Format.g: // pointer to float 460 if (!(t.ty == Tpointer && tnext.ty == Tfloat32)) 461 errorMsg(null, slice, e, "float*", t); 462 break; 463 464 case Format.lg: // pointer to double 465 if (!(t.ty == Tpointer && tnext.ty == Tfloat64)) 466 errorMsg(null, slice, e, "double*", t); 467 break; 468 469 case Format.Lg: // pointer to long double 470 if (!(t.ty == Tpointer && tnext.ty == Tfloat80)) 471 errorMsg(null, slice, e, "real*", t); 472 break; 473 474 case Format.GNU_a: 475 case Format.GNU_m: 476 case Format.c: 477 case Format.s: // pointer to char string 478 if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8))) 479 errorMsg(null, slice, e, "char*", t); 480 break; 481 482 case Format.lc: 483 case Format.ls: // pointer to wchar_t string 484 const twchar_t = global.params.targetOS == TargetOS.Windows ? Twchar : Tdchar; 485 if (!(t.ty == Tpointer && tnext.ty == twchar_t)) 486 errorMsg(null, slice, e, "wchar_t*", t); 487 break; 488 489 case Format.p: // double pointer 490 if (!(t.ty == Tpointer && tnext.ty == Tpointer)) 491 errorMsg(null, slice, e, "void**", t); 492 break; 493 494 case Format.error: 495 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr); 496 break; 497 498 case Format.percent: 499 assert(0); 500 } 501 } 502 return false; 503 } 504 505 private: 506 507 /************************************** 508 * Parse the *format specifier* which is of the form: 509 * 510 * `%[*][width][length]specifier` 511 * 512 * Params: 513 * format = format string 514 * idx = index of `%` of start of format specifier, 515 * which gets updated to index past the end of it, 516 * even if `Format.error` is returned 517 * asterisk = set if there is a `*` sub-specifier 518 * Returns: 519 * Format 520 */ 521 Format parseScanfFormatSpecifier(scope const char[] format, ref size_t idx, 522 out bool asterisk) nothrow pure @safe 523 { 524 auto i = idx; 525 assert(format[i] == '%'); 526 const length = format.length; 527 528 Format error() 529 { 530 idx = i; 531 return Format.error; 532 } 533 534 ++i; 535 if (i == length) 536 return error(); 537 538 if (format[i] == '%') 539 { 540 idx = i + 1; 541 return Format.percent; 542 } 543 544 // * sub-specifier 545 if (format[i] == '*') 546 { 547 ++i; 548 if (i == length) 549 return error(); 550 asterisk = true; 551 } 552 553 // fieldWidth 554 while (isdigit(format[i])) 555 { 556 i++; 557 if (i == length) 558 return error(); 559 } 560 561 /* Read the scanset 562 * A scanset can be anything, so we just check that it is paired 563 */ 564 if (format[i] == '[') 565 { 566 while (i < length) 567 { 568 if (format[i] == ']') 569 break; 570 ++i; 571 } 572 573 // no `]` found 574 if (i == length) 575 return error(); 576 577 ++i; 578 // no specifier after `]` 579 // it could be mixed with the one above, but then idx won't have the right index 580 if (i == length) 581 return error(); 582 } 583 584 /* Read the specifier 585 */ 586 char genSpec; 587 Format specifier = parseGenericFormatSpecifier(format, i, genSpec); 588 if (specifier == Format.error) 589 return error(); 590 591 idx = i; 592 return specifier; // success 593 } 594 595 /************************************** 596 * Parse the *format specifier* which is of the form: 597 * 598 * `%[flags][field width][.precision][length modifier]specifier` 599 * 600 * Params: 601 * format = format string 602 * idx = index of `%` of start of format specifier, 603 * which gets updated to index past the end of it, 604 * even if `Format.error` is returned 605 * widthStar = set if * for width 606 * precisionStar = set if * for precision 607 * Returns: 608 * Format 609 */ 610 Format parsePrintfFormatSpecifier(scope const char[] format, ref size_t idx, 611 out bool widthStar, out bool precisionStar) nothrow pure @safe 612 { 613 auto i = idx; 614 assert(format[i] == '%'); 615 const length = format.length; 616 bool hash; 617 bool zero; 618 bool flags; 619 bool width; 620 bool precision; 621 622 Format error() 623 { 624 idx = i; 625 return Format.error; 626 } 627 628 ++i; 629 if (i == length) 630 return error(); 631 632 if (format[i] == '%') 633 { 634 idx = i + 1; 635 return Format.percent; 636 } 637 638 /* Read the `flags` 639 */ 640 while (1) 641 { 642 const c = format[i]; 643 if (c == '-' || 644 c == '+' || 645 c == ' ') 646 { 647 flags = true; 648 } 649 else if (c == '#') 650 { 651 hash = true; 652 } 653 else if (c == '0') 654 { 655 zero = true; 656 } 657 else 658 break; 659 ++i; 660 if (i == length) 661 return error(); 662 } 663 664 /* Read the `field width` 665 */ 666 { 667 const c = format[i]; 668 if (c == '*') 669 { 670 width = true; 671 widthStar = true; 672 ++i; 673 if (i == length) 674 return error(); 675 } 676 else if ('1' <= c && c <= '9') 677 { 678 width = true; 679 ++i; 680 if (i == length) 681 return error(); 682 while ('0' <= format[i] && format[i] <= '9') 683 { 684 ++i; 685 if (i == length) 686 return error(); 687 } 688 } 689 } 690 691 /* Read the `precision` 692 */ 693 if (format[i] == '.') 694 { 695 precision = true; 696 ++i; 697 if (i == length) 698 return error(); 699 const c = format[i]; 700 if (c == '*') 701 { 702 precisionStar = true; 703 ++i; 704 if (i == length) 705 return error(); 706 } 707 else if ('0' <= c && c <= '9') 708 { 709 ++i; 710 if (i == length) 711 return error(); 712 while ('0' <= format[i] && format[i] <= '9') 713 { 714 ++i; 715 if (i == length) 716 return error(); 717 } 718 } 719 } 720 721 /* Read the specifier 722 */ 723 char genSpec; 724 Format specifier = parseGenericFormatSpecifier(format, i, genSpec); 725 if (specifier == Format.error) 726 return error(); 727 728 switch (genSpec) 729 { 730 case 'c': 731 case 's': 732 if (hash || zero) 733 return error(); 734 break; 735 736 case 'd': 737 case 'i': 738 if (hash) 739 return error(); 740 break; 741 742 case 'n': 743 if (hash || zero || precision || width || flags) 744 return error(); 745 break; 746 747 default: 748 break; 749 } 750 751 idx = i; 752 return specifier; // success 753 } 754 755 /* Different kinds of formatting specifications, variations we don't 756 care about are merged. (Like we don't care about the difference between 757 f, e, g, a, etc.) 758 759 For `scanf`, every format is a pointer. 760 */ 761 enum Format 762 { 763 d, // int 764 hhd, // signed char 765 hd, // short int 766 ld, // long int 767 lld, // long long int 768 jd, // intmax_t 769 zd, // size_t 770 td, // ptrdiff_t 771 u, // unsigned int 772 hhu, // unsigned char 773 hu, // unsigned short int 774 lu, // unsigned long int 775 llu, // unsigned long long int 776 ju, // uintmax_t 777 g, // float (scanf) / double (printf) 778 lg, // double (scanf) 779 Lg, // long double (both) 780 s, // char string (both) 781 ls, // wchar_t string (both) 782 c, // char (printf) 783 lc, // wint_t (printf) 784 p, // pointer 785 n, // pointer to int 786 hhn, // pointer to signed char 787 hn, // pointer to short 788 ln, // pointer to long int 789 lln, // pointer to long long int 790 jn, // pointer to intmax_t 791 zn, // pointer to size_t 792 tn, // pointer to ptrdiff_t 793 GNU_a, // GNU ext. : address to a string with no maximum size (scanf) 794 GNU_m, // GNU ext. : string corresponding to the error code in errno (printf) / length modifier (scanf) 795 percent, // %% (i.e. no argument) 796 error, // invalid format specification 797 } 798 799 /************************************** 800 * Parse the *length specifier* and the *specifier* of the following form: 801 * `[length]specifier` 802 * 803 * Params: 804 * format = format string 805 * idx = index of of start of format specifier, 806 * which gets updated to index past the end of it, 807 * even if `Format.error` is returned 808 * genSpecifier = Generic specifier. For instance, it will be set to `d` if the 809 * format is `hdd`. 810 * Returns: 811 * Format 812 */ 813 Format parseGenericFormatSpecifier(scope const char[] format, 814 ref size_t idx, out char genSpecifier, bool useGNUExts = 815 findCondition(global.versionids, Identifier.idPool("CRuntime_Glibc"))) nothrow pure @trusted 816 { 817 const length = format.length; 818 819 /* Read the `length modifier` 820 */ 821 const lm = format[idx]; 822 bool lm1; // if jztL 823 bool lm2; // if `hh` or `ll` 824 if (lm == 'j' || 825 lm == 'z' || 826 lm == 't' || 827 lm == 'L') 828 { 829 ++idx; 830 if (idx == length) 831 return Format.error; 832 lm1 = true; 833 } 834 else if (lm == 'h' || lm == 'l') 835 { 836 ++idx; 837 if (idx == length) 838 return Format.error; 839 lm2 = lm == format[idx]; 840 if (lm2) 841 { 842 ++idx; 843 if (idx == length) 844 return Format.error; 845 } 846 } 847 848 /* Read the `specifier` 849 */ 850 Format specifier; 851 const sc = format[idx]; 852 genSpecifier = sc; 853 switch (sc) 854 { 855 case 'd': 856 case 'i': 857 if (lm == 'L') 858 specifier = Format.error; 859 else 860 specifier = lm == 'h' && lm2 ? Format.hhd : 861 lm == 'h' ? Format.hd : 862 lm == 'l' && lm2 ? Format.lld : 863 lm == 'l' ? Format.ld : 864 lm == 'j' ? Format.jd : 865 lm == 'z' ? Format.zd : 866 lm == 't' ? Format.td : 867 Format.d; 868 break; 869 870 case 'u': 871 case 'o': 872 case 'x': 873 case 'X': 874 if (lm == 'L') 875 specifier = Format.error; 876 else 877 specifier = lm == 'h' && lm2 ? Format.hhu : 878 lm == 'h' ? Format.hu : 879 lm == 'l' && lm2 ? Format.llu : 880 lm == 'l' ? Format.lu : 881 lm == 'j' ? Format.ju : 882 lm == 'z' ? Format.zd : 883 lm == 't' ? Format.td : 884 Format.u; 885 break; 886 887 case 'a': 888 if (useGNUExts) 889 { 890 // https://www.gnu.org/software/libc/manual/html_node/Dynamic-String-Input.html 891 specifier = Format.GNU_a; 892 break; 893 } 894 goto case; 895 896 case 'f': 897 case 'F': 898 case 'e': 899 case 'E': 900 case 'g': 901 case 'G': 902 case 'A': 903 if (lm == 'L') 904 specifier = Format.Lg; 905 else if (lm1 || lm2 || lm == 'h') 906 specifier = Format.error; 907 else 908 specifier = lm == 'l' ? Format.lg : Format.g; 909 break; 910 911 case 'c': 912 if (lm1 || lm2 || lm == 'h') 913 specifier = Format.error; 914 else 915 specifier = lm == 'l' ? Format.lc : Format.c; 916 break; 917 918 case 's': 919 if (lm1 || lm2 || lm == 'h') 920 specifier = Format.error; 921 else 922 specifier = lm == 'l' ? Format.ls : Format.s; 923 break; 924 925 case 'p': 926 if (lm1 || lm2 || lm == 'h' || lm == 'l') 927 specifier = Format.error; 928 else 929 specifier = Format.p; 930 break; 931 932 case 'n': 933 if (lm == 'L') 934 specifier = Format.error; 935 else 936 specifier = lm == 'l' && lm2 ? Format.lln : 937 lm == 'l' ? Format.ln : 938 lm == 'h' && lm2 ? Format.hhn : 939 lm == 'h' ? Format.hn : 940 lm == 'j' ? Format.jn : 941 lm == 'z' ? Format.zn : 942 lm == 't' ? Format.tn : 943 Format.n; 944 break; 945 946 case 'm': 947 if (useGNUExts) 948 { 949 // http://www.gnu.org/software/libc/manual/html_node/Other-Output-Conversions.html 950 specifier = Format.GNU_m; 951 break; 952 } 953 goto default; 954 955 default: 956 specifier = Format.error; 957 break; 958 } 959 960 ++idx; 961 return specifier; // success 962 } 963 964 unittest 965 { 966 /* parseGenericFormatSpecifier 967 */ 968 969 char genSpecifier; 970 size_t idx; 971 972 assert(parseGenericFormatSpecifier("hhd", idx, genSpecifier) == Format.hhd); 973 assert(genSpecifier == 'd'); 974 975 idx = 0; 976 assert(parseGenericFormatSpecifier("hn", idx, genSpecifier) == Format.hn); 977 assert(genSpecifier == 'n'); 978 979 idx = 0; 980 assert(parseGenericFormatSpecifier("ji", idx, genSpecifier) == Format.jd); 981 assert(genSpecifier == 'i'); 982 983 idx = 0; 984 assert(parseGenericFormatSpecifier("lu", idx, genSpecifier) == Format.lu); 985 assert(genSpecifier == 'u'); 986 987 idx = 0; 988 assert(parseGenericFormatSpecifier("k", idx, genSpecifier) == Format.error); 989 990 /* parsePrintfFormatSpecifier 991 */ 992 993 bool widthStar; 994 bool precisionStar; 995 996 // one for each Format 997 idx = 0; 998 assert(parsePrintfFormatSpecifier("%d", idx, widthStar, precisionStar) == Format.d); 999 assert(idx == 2); 1000 assert(!widthStar && !precisionStar); 1001 1002 idx = 0; 1003 assert(parsePrintfFormatSpecifier("%ld", idx, widthStar, precisionStar) == Format.ld); 1004 assert(idx == 3); 1005 1006 idx = 0; 1007 assert(parsePrintfFormatSpecifier("%lld", idx, widthStar, precisionStar) == Format.lld); 1008 assert(idx == 4); 1009 1010 idx = 0; 1011 assert(parsePrintfFormatSpecifier("%jd", idx, widthStar, precisionStar) == Format.jd); 1012 assert(idx == 3); 1013 1014 idx = 0; 1015 assert(parsePrintfFormatSpecifier("%zd", idx, widthStar, precisionStar) == Format.zd); 1016 assert(idx == 3); 1017 1018 idx = 0; 1019 assert(parsePrintfFormatSpecifier("%td", idx, widthStar, precisionStar) == Format.td); 1020 assert(idx == 3); 1021 1022 idx = 0; 1023 assert(parsePrintfFormatSpecifier("%g", idx, widthStar, precisionStar) == Format.g); 1024 assert(idx == 2); 1025 1026 idx = 0; 1027 assert(parsePrintfFormatSpecifier("%Lg", idx, widthStar, precisionStar) == Format.Lg); 1028 assert(idx == 3); 1029 1030 idx = 0; 1031 assert(parsePrintfFormatSpecifier("%p", idx, widthStar, precisionStar) == Format.p); 1032 assert(idx == 2); 1033 1034 idx = 0; 1035 assert(parsePrintfFormatSpecifier("%n", idx, widthStar, precisionStar) == Format.n); 1036 assert(idx == 2); 1037 1038 idx = 0; 1039 assert(parsePrintfFormatSpecifier("%ln", idx, widthStar, precisionStar) == Format.ln); 1040 assert(idx == 3); 1041 1042 idx = 0; 1043 assert(parsePrintfFormatSpecifier("%lln", idx, widthStar, precisionStar) == Format.lln); 1044 assert(idx == 4); 1045 1046 idx = 0; 1047 assert(parsePrintfFormatSpecifier("%hn", idx, widthStar, precisionStar) == Format.hn); 1048 assert(idx == 3); 1049 1050 idx = 0; 1051 assert(parsePrintfFormatSpecifier("%hhn", idx, widthStar, precisionStar) == Format.hhn); 1052 assert(idx == 4); 1053 1054 idx = 0; 1055 assert(parsePrintfFormatSpecifier("%jn", idx, widthStar, precisionStar) == Format.jn); 1056 assert(idx == 3); 1057 1058 idx = 0; 1059 assert(parsePrintfFormatSpecifier("%zn", idx, widthStar, precisionStar) == Format.zn); 1060 assert(idx == 3); 1061 1062 idx = 0; 1063 assert(parsePrintfFormatSpecifier("%tn", idx, widthStar, precisionStar) == Format.tn); 1064 assert(idx == 3); 1065 1066 idx = 0; 1067 assert(parsePrintfFormatSpecifier("%c", idx, widthStar, precisionStar) == Format.c); 1068 assert(idx == 2); 1069 1070 idx = 0; 1071 assert(parsePrintfFormatSpecifier("%lc", idx, widthStar, precisionStar) == Format.lc); 1072 assert(idx == 3); 1073 1074 idx = 0; 1075 assert(parsePrintfFormatSpecifier("%s", idx, widthStar, precisionStar) == Format.s); 1076 assert(idx == 2); 1077 1078 idx = 0; 1079 assert(parsePrintfFormatSpecifier("%ls", idx, widthStar, precisionStar) == Format.ls); 1080 assert(idx == 3); 1081 1082 idx = 0; 1083 assert(parsePrintfFormatSpecifier("%%", idx, widthStar, precisionStar) == Format.percent); 1084 assert(idx == 2); 1085 1086 // Synonyms 1087 idx = 0; 1088 assert(parsePrintfFormatSpecifier("%i", idx, widthStar, precisionStar) == Format.d); 1089 assert(idx == 2); 1090 1091 idx = 0; 1092 assert(parsePrintfFormatSpecifier("%u", idx, widthStar, precisionStar) == Format.u); 1093 assert(idx == 2); 1094 1095 idx = 0; 1096 assert(parsePrintfFormatSpecifier("%o", idx, widthStar, precisionStar) == Format.u); 1097 assert(idx == 2); 1098 1099 idx = 0; 1100 assert(parsePrintfFormatSpecifier("%x", idx, widthStar, precisionStar) == Format.u); 1101 assert(idx == 2); 1102 1103 idx = 0; 1104 assert(parsePrintfFormatSpecifier("%X", idx, widthStar, precisionStar) == Format.u); 1105 assert(idx == 2); 1106 1107 idx = 0; 1108 assert(parsePrintfFormatSpecifier("%f", idx, widthStar, precisionStar) == Format.g); 1109 assert(idx == 2); 1110 1111 idx = 0; 1112 assert(parsePrintfFormatSpecifier("%F", idx, widthStar, precisionStar) == Format.g); 1113 assert(idx == 2); 1114 1115 idx = 0; 1116 assert(parsePrintfFormatSpecifier("%G", idx, widthStar, precisionStar) == Format.g); 1117 assert(idx == 2); 1118 1119 idx = 0; 1120 Format g = parsePrintfFormatSpecifier("%a", idx, widthStar, precisionStar); 1121 assert(g == Format.g || g == Format.GNU_a); 1122 assert(idx == 2); 1123 1124 idx = 0; 1125 assert(parsePrintfFormatSpecifier("%A", idx, widthStar, precisionStar) == Format.g); 1126 assert(idx == 2); 1127 1128 idx = 0; 1129 assert(parsePrintfFormatSpecifier("%lg", idx, widthStar, precisionStar) == Format.lg); 1130 assert(idx == 3); 1131 1132 // width, precision 1133 idx = 0; 1134 assert(parsePrintfFormatSpecifier("%*d", idx, widthStar, precisionStar) == Format.d); 1135 assert(idx == 3); 1136 assert(widthStar && !precisionStar); 1137 1138 idx = 0; 1139 assert(parsePrintfFormatSpecifier("%.*d", idx, widthStar, precisionStar) == Format.d); 1140 assert(idx == 4); 1141 assert(!widthStar && precisionStar); 1142 1143 idx = 0; 1144 assert(parsePrintfFormatSpecifier("%*.*d", idx, widthStar, precisionStar) == Format.d); 1145 assert(idx == 5); 1146 assert(widthStar && precisionStar); 1147 1148 // Too short formats 1149 { 1150 foreach (s; ["%", "%-", "%+", "% ", "%#", "%0", "%*", "%1", "%19", "%.", "%.*", "%.1", "%.12", 1151 "%j", "%z", "%t", "%l", "%h", "%ll", "%hh"]) 1152 { 1153 idx = 0; 1154 assert(parsePrintfFormatSpecifier(s, idx, widthStar, precisionStar) == Format.error); 1155 assert(idx == s.length); 1156 } 1157 } 1158 1159 // Undefined format combinations 1160 { 1161 foreach (s; ["%#d", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg", 1162 "%#c", "%0c", "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc", 1163 "%#s", "%0s", "%js", "%zs", "%ts", "%Ls", "%hs", "%hhs", "%lls", 1164 "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp", 1165 "%-n", "%+n", "% n", "%#n", "%0n", "%*n", "%1n", "%19n", "%.n", "%.*n", "%.1n", "%.12n", "%Ln", "%K"]) 1166 { 1167 idx = 0; 1168 assert(parsePrintfFormatSpecifier(s, idx, widthStar, precisionStar) == Format.error); 1169 import std.stdio; 1170 assert(idx == s.length); 1171 } 1172 } 1173 1174 /* parseScanfFormatSpecifier 1175 */ 1176 1177 bool asterisk; 1178 1179 // one for each Format 1180 idx = 0; 1181 assert(parseScanfFormatSpecifier("%d", idx, asterisk) == Format.d); 1182 assert(idx == 2); 1183 assert(!asterisk); 1184 1185 idx = 0; 1186 assert(parseScanfFormatSpecifier("%hhd", idx, asterisk) == Format.hhd); 1187 assert(idx == 4); 1188 1189 idx = 0; 1190 assert(parseScanfFormatSpecifier("%hd", idx, asterisk) == Format.hd); 1191 assert(idx == 3); 1192 1193 idx = 0; 1194 assert(parseScanfFormatSpecifier("%ld", idx, asterisk) == Format.ld); 1195 assert(idx == 3); 1196 1197 idx = 0; 1198 assert(parseScanfFormatSpecifier("%lld", idx, asterisk) == Format.lld); 1199 assert(idx == 4); 1200 1201 idx = 0; 1202 assert(parseScanfFormatSpecifier("%jd", idx, asterisk) == Format.jd); 1203 assert(idx == 3); 1204 1205 idx = 0; 1206 assert(parseScanfFormatSpecifier("%zd", idx, asterisk) == Format.zd); 1207 assert(idx == 3); 1208 1209 idx = 0; 1210 assert(parseScanfFormatSpecifier("%td", idx, asterisk,) == Format.td); 1211 assert(idx == 3); 1212 1213 idx = 0; 1214 assert(parseScanfFormatSpecifier("%u", idx, asterisk) == Format.u); 1215 assert(idx == 2); 1216 1217 idx = 0; 1218 assert(parseScanfFormatSpecifier("%hhu", idx, asterisk,) == Format.hhu); 1219 assert(idx == 4); 1220 1221 idx = 0; 1222 assert(parseScanfFormatSpecifier("%hu", idx, asterisk) == Format.hu); 1223 assert(idx == 3); 1224 1225 idx = 0; 1226 assert(parseScanfFormatSpecifier("%lu", idx, asterisk) == Format.lu); 1227 assert(idx == 3); 1228 1229 idx = 0; 1230 assert(parseScanfFormatSpecifier("%llu", idx, asterisk) == Format.llu); 1231 assert(idx == 4); 1232 1233 idx = 0; 1234 assert(parseScanfFormatSpecifier("%ju", idx, asterisk) == Format.ju); 1235 assert(idx == 3); 1236 1237 idx = 0; 1238 assert(parseScanfFormatSpecifier("%g", idx, asterisk) == Format.g); 1239 assert(idx == 2); 1240 1241 idx = 0; 1242 assert(parseScanfFormatSpecifier("%lg", idx, asterisk) == Format.lg); 1243 assert(idx == 3); 1244 1245 idx = 0; 1246 assert(parseScanfFormatSpecifier("%Lg", idx, asterisk) == Format.Lg); 1247 assert(idx == 3); 1248 1249 idx = 0; 1250 assert(parseScanfFormatSpecifier("%p", idx, asterisk) == Format.p); 1251 assert(idx == 2); 1252 1253 idx = 0; 1254 assert(parseScanfFormatSpecifier("%s", idx, asterisk) == Format.s); 1255 assert(idx == 2); 1256 1257 idx = 0; 1258 assert(parseScanfFormatSpecifier("%ls", idx, asterisk,) == Format.ls); 1259 assert(idx == 3); 1260 1261 idx = 0; 1262 assert(parseScanfFormatSpecifier("%%", idx, asterisk) == Format.percent); 1263 assert(idx == 2); 1264 1265 // Synonyms 1266 idx = 0; 1267 assert(parseScanfFormatSpecifier("%i", idx, asterisk) == Format.d); 1268 assert(idx == 2); 1269 1270 idx = 0; 1271 assert(parseScanfFormatSpecifier("%n", idx, asterisk) == Format.n); 1272 assert(idx == 2); 1273 1274 idx = 0; 1275 assert(parseScanfFormatSpecifier("%o", idx, asterisk) == Format.u); 1276 assert(idx == 2); 1277 1278 idx = 0; 1279 assert(parseScanfFormatSpecifier("%x", idx, asterisk) == Format.u); 1280 assert(idx == 2); 1281 1282 idx = 0; 1283 assert(parseScanfFormatSpecifier("%f", idx, asterisk) == Format.g); 1284 assert(idx == 2); 1285 1286 idx = 0; 1287 assert(parseScanfFormatSpecifier("%e", idx, asterisk) == Format.g); 1288 assert(idx == 2); 1289 1290 idx = 0; 1291 g = parseScanfFormatSpecifier("%a", idx, asterisk); 1292 assert(g == Format.g || g == Format.GNU_a); 1293 assert(idx == 2); 1294 1295 idx = 0; 1296 assert(parseScanfFormatSpecifier("%c", idx, asterisk) == Format.c); 1297 assert(idx == 2); 1298 1299 // asterisk 1300 idx = 0; 1301 assert(parseScanfFormatSpecifier("%*d", idx, asterisk) == Format.d); 1302 assert(idx == 3); 1303 assert(asterisk); 1304 1305 idx = 0; 1306 assert(parseScanfFormatSpecifier("%9ld", idx, asterisk) == Format.ld); 1307 assert(idx == 4); 1308 assert(!asterisk); 1309 1310 idx = 0; 1311 assert(parseScanfFormatSpecifier("%*25984hhd", idx, asterisk) == Format.hhd); 1312 assert(idx == 10); 1313 assert(asterisk); 1314 1315 // scansets 1316 idx = 0; 1317 assert(parseScanfFormatSpecifier("%[a-zA-Z]s", idx, asterisk) == Format.s); 1318 assert(idx == 10); 1319 assert(!asterisk); 1320 1321 idx = 0; 1322 assert(parseScanfFormatSpecifier("%*25[a-z]hhd", idx, asterisk) == Format.hhd); 1323 assert(idx == 12); 1324 assert(asterisk); 1325 1326 // Too short formats 1327 foreach (s; ["%", "% ", "%#", "%0", "%*", "%1", "%19", 1328 "%j", "%z", "%t", "%l", "%h", "%ll", "%hh", "%K"]) 1329 { 1330 idx = 0; 1331 assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error); 1332 assert(idx == s.length); 1333 } 1334 1335 1336 // Undefined format combinations 1337 foreach (s; ["%Ld", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg", 1338 "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc", 1339 "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp", 1340 "%-", "%+", "%#", "%0", "%.", "%Ln"]) 1341 { 1342 idx = 0; 1343 assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error); 1344 assert(idx == s.length); 1345 1346 } 1347 1348 // Invalid scansets 1349 foreach (s; ["%[]", "%[s", "%[0-9lld", "%[", "%[a-z]"]) 1350 { 1351 idx = 0; 1352 assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error); 1353 assert(idx == s.length); 1354 } 1355 1356 }