1 /** 2 * Check the arguments to `printf` and `scanf` against the `format` string. 3 * 4 * Copyright: Copyright (C) 1999-2020 by The D Language Foundation, All Rights Reserved 5 * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) 6 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 7 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/chkformat.d, _chkformat.d) 8 * Documentation: https://dlang.org/phobos/dmd_chkformat.html 9 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/chkformat.d 10 */ 11 module dmd.chkformat; 12 13 //import core.stdc.stdio : printf, scanf; 14 import core.stdc.ctype : isdigit; 15 16 import dmd.errors; 17 import dmd.expression; 18 import dmd.globals; 19 import dmd.mtype; 20 import dmd.target; 21 22 23 /****************************************** 24 * Check that arguments to a printf format string are compatible 25 * with that string. Issue errors for incompatibilities. 26 * 27 * Follows the C99 specification for printf. 28 * 29 * Takes a generous, rather than strict, view of compatiblity. 30 * For example, an unsigned value can be formatted with a signed specifier. 31 * 32 * Diagnosed incompatibilities are: 33 * 34 * 1. incompatible sizes which will cause argument misalignment 35 * 2. deferencing arguments that are not pointers 36 * 3. insufficient number of arguments 37 * 4. struct arguments 38 * 5. array and slice arguments 39 * 6. non-pointer arguments to `s` specifier 40 * 7. non-standard formats 41 * 8. undefined behavior per C99 42 * 43 * Per the C Standard, extra arguments are ignored. 44 * 45 * No attempt is made to fix the arguments or the format string. 46 * 47 * Params: 48 * loc = location for error messages 49 * format = format string 50 * args = arguments to match with format string 51 * isVa_list = if a "v" function (format check only) 52 * 53 * Returns: 54 * `true` if errors occurred 55 * References: 56 * C99 7.19.6.1 57 * http://www.cplusplus.com/reference/cstdio/printf/ 58 */ 59 bool checkPrintfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args, bool isVa_list) 60 { 61 //printf("checkPrintFormat('%.*s')\n", cast(int)format.length, format.ptr); 62 size_t n = 0; 63 for (size_t i = 0; i < format.length;) 64 { 65 if (format[i] != '%') 66 { 67 ++i; 68 continue; 69 } 70 bool widthStar; 71 bool precisionStar; 72 size_t j = i; 73 const fmt = parsePrintfFormatSpecifier(format, j, widthStar, precisionStar); 74 const slice = format[i .. j]; 75 i = j; 76 77 if (fmt == Format.percent) 78 continue; // "%%", no arguments 79 80 if (isVa_list) 81 { 82 // format check only 83 if (fmt == Format.error) 84 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr); 85 continue; 86 } 87 88 Expression getNextArg() 89 { 90 if (n == args.length) 91 { 92 deprecation(loc, "more format specifiers than %d arguments", cast(int)n); 93 return null; 94 } 95 return args[n++]; 96 } 97 98 void errorMsg(const char* prefix, const char[] specifier, Expression arg, const char* texpect, Type tactual) 99 { 100 deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`", 101 prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars()); 102 } 103 104 if (widthStar) 105 { 106 auto e = getNextArg(); 107 if (!e) 108 return true; 109 auto t = e.type.toBasetype(); 110 if (t.ty != Tint32 && t.ty != Tuns32) 111 errorMsg("width ", slice, e, "int", t); 112 } 113 114 if (precisionStar) 115 { 116 auto e = getNextArg(); 117 if (!e) 118 return true; 119 auto t = e.type.toBasetype(); 120 if (t.ty != Tint32 && t.ty != Tuns32) 121 errorMsg("precision ", slice, e, "int", t); 122 } 123 124 auto e = getNextArg(); 125 if (!e) 126 return true; 127 auto t = e.type.toBasetype(); 128 auto tnext = t.nextOf(); 129 const c_longsize = target.c.longsize; 130 const is64bit = global.params.is64bit; 131 132 // Types which are promoted to int are allowed. 133 // Spec: C99 6.5.2.2.7 134 final switch (fmt) 135 { 136 case Format.u: // unsigned int 137 case Format.d: // int 138 if (t.ty != Tint32 && t.ty != Tuns32) 139 errorMsg(null, slice, e, "int", t); 140 break; 141 142 case Format.hhu: // unsigned char 143 case Format.hhd: // signed char 144 if (t.ty != Tint32 && t.ty != Tuns32 && t.ty != Tint8 && t.ty != Tuns8) 145 errorMsg(null, slice, e, "byte", t); 146 break; 147 148 case Format.hu: // unsigned short int 149 case Format.hd: // short int 150 if (t.ty != Tint32 && t.ty != Tuns32 && t.ty != Tint16 && t.ty != Tuns16) 151 errorMsg(null, slice, e, "short", t); 152 break; 153 154 case Format.lu: // unsigned long int 155 case Format.ld: // long int 156 if (!(t.isintegral() && t.size() == c_longsize)) 157 errorMsg(null, slice, e, (c_longsize == 4 ? "int" : "long"), t); 158 break; 159 160 case Format.llu: // unsigned long long int 161 case Format.lld: // long long int 162 if (t.ty != Tint64 && t.ty != Tuns64) 163 errorMsg(null, slice, e, "long", t); 164 break; 165 166 case Format.ju: // uintmax_t 167 case Format.jd: // intmax_t 168 if (t.ty != Tint64 && t.ty != Tuns64) 169 errorMsg(null, slice, e, "core.stdc.stdint.intmax_t", t); 170 break; 171 172 case Format.zd: // size_t 173 if (!(t.isintegral() && t.size() == (is64bit ? 8 : 4))) 174 errorMsg(null, slice, e, "size_t", t); 175 break; 176 177 case Format.td: // ptrdiff_t 178 if (!(t.isintegral() && t.size() == (is64bit ? 8 : 4))) 179 errorMsg(null, slice, e, "ptrdiff_t", t); 180 break; 181 182 case Format.lg: 183 case Format.g: // double 184 if (t.ty != Tfloat64 && t.ty != Timaginary64) 185 errorMsg(null, slice, e, "double", t); 186 break; 187 188 case Format.Lg: // long double 189 if (t.ty != Tfloat80 && t.ty != Timaginary80) 190 errorMsg(null, slice, e, "real", t); 191 break; 192 193 case Format.p: // pointer 194 if (t.ty != Tpointer && t.ty != Tnull && t.ty != Tclass && t.ty != Tdelegate && t.ty != Taarray) 195 errorMsg(null, slice, e, "void*", t); 196 break; 197 198 case Format.n: // pointer to int 199 if (!(t.ty == Tpointer && tnext.ty == Tint32)) 200 errorMsg(null, slice, e, "int*", t); 201 break; 202 203 case Format.ln: // pointer to long int 204 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.size() == c_longsize)) 205 errorMsg(null, slice, e, (c_longsize == 4 ? "int*" : "long*"), t); 206 break; 207 208 case Format.lln: // pointer to long long int 209 if (!(t.ty == Tpointer && tnext.ty == Tint64)) 210 errorMsg(null, slice, e, "long*", t); 211 break; 212 213 case Format.hn: // pointer to short 214 if (!(t.ty == Tpointer && tnext.ty == Tint16)) 215 errorMsg(null, slice, e, "short*", t); 216 break; 217 218 case Format.hhn: // pointer to signed char 219 if (!(t.ty == Tpointer && tnext.ty == Tint16)) 220 errorMsg(null, slice, e, "byte*", t); 221 break; 222 223 case Format.jn: // pointer to intmax_t 224 if (!(t.ty == Tpointer && tnext.ty == Tint64)) 225 errorMsg(null, slice, e, "core.stdc.stdint.intmax_t*", t); 226 break; 227 228 case Format.zn: // pointer to size_t 229 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32))) 230 errorMsg(null, slice, e, "size_t*", t); 231 break; 232 233 case Format.tn: // pointer to ptrdiff_t 234 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tint64 : Tint32))) 235 errorMsg(null, slice, e, "ptrdiff_t*", t); 236 break; 237 238 case Format.c: // char 239 if (t.ty != Tint32 && t.ty != Tuns32) 240 errorMsg(null, slice, e, "char", t); 241 break; 242 243 case Format.lc: // wint_t 244 if (t.ty != Tint32 && t.ty != Tuns32) 245 errorMsg(null, slice, e, "wchar_t", t); 246 break; 247 248 case Format.s: // pointer to char string 249 if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8))) 250 errorMsg(null, slice, e, "char*", t); 251 break; 252 253 case Format.ls: // pointer to wchar_t string 254 const twchar_t = global.params.isWindows ? Twchar : Tdchar; 255 if (!(t.ty == Tpointer && tnext.ty == twchar_t)) 256 errorMsg(null, slice, e, "wchar_t*", t); 257 break; 258 259 case Format.error: 260 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr); 261 break; 262 263 case Format.percent: 264 assert(0); 265 } 266 } 267 return false; 268 } 269 270 /****************************************** 271 * Check that arguments to a scanf format string are compatible 272 * with that string. Issue errors for incompatibilities. 273 * 274 * Follows the C99 specification for scanf. 275 * 276 * Takes a generous, rather than strict, view of compatiblity. 277 * For example, an unsigned value can be formatted with a signed specifier. 278 * 279 * Diagnosed incompatibilities are: 280 * 281 * 1. incompatible sizes which will cause argument misalignment 282 * 2. deferencing arguments that are not pointers 283 * 3. insufficient number of arguments 284 * 4. struct arguments 285 * 5. array and slice arguments 286 * 6. non-standard formats 287 * 7. undefined behavior per C99 288 * 289 * Per the C Standard, extra arguments are ignored. 290 * 291 * No attempt is made to fix the arguments or the format string. 292 * 293 * Params: 294 * loc = location for error messages 295 * format = format string 296 * args = arguments to match with format string 297 * isVa_list = if a "v" function (format check only) 298 * 299 * Returns: 300 * `true` if errors occurred 301 * References: 302 * C99 7.19.6.2 303 * http://www.cplusplus.com/reference/cstdio/scanf/ 304 */ 305 bool checkScanfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args, bool isVa_list) 306 { 307 size_t n = 0; 308 for (size_t i = 0; i < format.length;) 309 { 310 if (format[i] != '%') 311 { 312 ++i; 313 continue; 314 } 315 bool asterisk; 316 size_t j = i; 317 const fmt = parseScanfFormatSpecifier(format, j, asterisk); 318 const slice = format[i .. j]; 319 i = j; 320 321 if (fmt == Format.percent || asterisk) 322 continue; // "%%", "%*": no arguments 323 324 if (isVa_list) 325 { 326 // format check only 327 if (fmt == Format.error) 328 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr); 329 continue; 330 } 331 332 Expression getNextArg() 333 { 334 if (n == args.length) 335 { 336 if (!asterisk) 337 deprecation(loc, "more format specifiers than %d arguments", cast(int)n); 338 return null; 339 } 340 return args[n++]; 341 } 342 343 void errorMsg(const char* prefix, const char[] specifier, Expression arg, const char* texpect, Type tactual) 344 { 345 deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`", 346 prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars()); 347 } 348 349 auto e = getNextArg(); 350 if (!e) 351 return true; 352 353 auto t = e.type.toBasetype(); 354 auto tnext = t.nextOf(); 355 const c_longsize = target.c.longsize; 356 const is64bit = global.params.is64bit; 357 358 final switch (fmt) 359 { 360 case Format.n: 361 case Format.d: // pointer to int 362 if (!(t.ty == Tpointer && tnext.ty == Tint32)) 363 errorMsg(null, slice, e, "int*", t); 364 break; 365 366 case Format.hhn: 367 case Format.hhd: // pointer to signed char 368 if (!(t.ty == Tpointer && tnext.ty == Tint16)) 369 errorMsg(null, slice, e, "byte*", t); 370 break; 371 372 case Format.hn: 373 case Format.hd: // pointer to short 374 if (!(t.ty == Tpointer && tnext.ty == Tint16)) 375 errorMsg(null, slice, e, "short*", t); 376 break; 377 378 case Format.ln: 379 case Format.ld: // pointer to long int 380 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.size() == c_longsize)) 381 errorMsg(null, slice, e, (c_longsize == 4 ? "int*" : "long*"), t); 382 break; 383 384 case Format.lln: 385 case Format.lld: // pointer to long long int 386 if (!(t.ty == Tpointer && tnext.ty == Tint64)) 387 errorMsg(null, slice, e, "long*", t); 388 break; 389 390 case Format.jn: 391 case Format.jd: // pointer to intmax_t 392 if (!(t.ty == Tpointer && tnext.ty == Tint64)) 393 errorMsg(null, slice, e, "core.stdc.stdint.intmax_t*", t); 394 break; 395 396 case Format.zn: 397 case Format.zd: // pointer to size_t 398 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32))) 399 errorMsg(null, slice, e, "size_t*", t); 400 break; 401 402 case Format.tn: 403 case Format.td: // pointer to ptrdiff_t 404 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tint64 : Tint32))) 405 errorMsg(null, slice, e, "ptrdiff_t*", t); 406 break; 407 408 case Format.u: // pointer to unsigned int 409 if (!(t.ty == Tpointer && tnext.ty == Tuns32)) 410 errorMsg(null, slice, e, "uint*", t); 411 break; 412 413 case Format.hhu: // pointer to unsigned char 414 if (!(t.ty == Tpointer && tnext.ty == Tuns8)) 415 errorMsg(null, slice, e, "ubyte*", t); 416 break; 417 418 case Format.hu: // pointer to unsigned short int 419 if (!(t.ty == Tpointer && tnext.ty == Tuns16)) 420 errorMsg(null, slice, e, "ushort*", t); 421 break; 422 423 case Format.lu: // pointer to unsigned long int 424 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32))) 425 errorMsg(null, slice, e, (c_longsize == 4 ? "uint*" : "ulong*"), t); 426 break; 427 428 case Format.llu: // pointer to unsigned long long int 429 if (!(t.ty == Tpointer && tnext.ty == Tuns64)) 430 errorMsg(null, slice, e, "ulong*", t); 431 break; 432 433 case Format.ju: // pointer to uintmax_t 434 if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32))) 435 errorMsg(null, slice, e, "ulong*", t); 436 break; 437 438 case Format.g: // pointer to float 439 if (!(t.ty == Tpointer && tnext.ty == Tfloat32)) 440 errorMsg(null, slice, e, "float*", t); 441 break; 442 case Format.lg: // pointer to double 443 if (!(t.ty == Tpointer && tnext.ty == Tfloat64)) 444 errorMsg(null, slice, e, "double*", t); 445 break; 446 case Format.Lg: // pointer to long double 447 if (!(t.ty == Tpointer && tnext.ty == Tfloat80)) 448 errorMsg(null, slice, e, "real*", t); 449 break; 450 451 case Format.c: 452 case Format.s: // pointer to char string 453 if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8))) 454 errorMsg(null, slice, e, "char*", t); 455 break; 456 457 case Format.lc: 458 case Format.ls: // pointer to wchar_t string 459 const twchar_t = global.params.isWindows ? Twchar : Tdchar; 460 if (!(t.ty == Tpointer && tnext.ty == twchar_t)) 461 errorMsg(null, slice, e, "wchar_t*", t); 462 break; 463 464 case Format.p: // double pointer 465 if (!(t.ty == Tpointer && tnext.ty == Tpointer)) 466 errorMsg(null, slice, e, "void**", t); 467 break; 468 469 case Format.error: 470 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr); 471 break; 472 473 case Format.percent: 474 assert(0); 475 } 476 } 477 return false; 478 } 479 480 private: 481 482 /************************************** 483 * Parse the *format specifier* which is of the form: 484 * 485 * `%[*][width][length]specifier` 486 * 487 * Params: 488 * format = format string 489 * idx = index of `%` of start of format specifier, 490 * which gets updated to index past the end of it, 491 * even if `Format.error` is returned 492 * asterisk = set if there is a `*` sub-specifier 493 * Returns: 494 * Format 495 */ 496 pure nothrow @safe 497 Format parseScanfFormatSpecifier(scope const char[] format, ref size_t idx, 498 out bool asterisk) 499 { 500 auto i = idx; 501 assert(format[i] == '%'); 502 const length = format.length; 503 504 Format error() 505 { 506 idx = i; 507 return Format.error; 508 } 509 510 ++i; 511 if (i == length) 512 return error(); 513 514 if (format[i] == '%') 515 { 516 idx = i + 1; 517 return Format.percent; 518 } 519 520 // * sub-specifier 521 if (format[i] == '*') 522 { 523 ++i; 524 if (i == length) 525 return error(); 526 asterisk = true; 527 } 528 529 // fieldWidth 530 while (isdigit(format[i])) 531 { 532 i++; 533 if (i == length) 534 return error(); 535 } 536 537 /* Read the scanset 538 * A scanset can be anything, so we just check that it is paired 539 */ 540 if (format[i] == '[') 541 { 542 while (i < length) 543 { 544 if (format[i] == ']') 545 break; 546 ++i; 547 } 548 549 // no `]` found 550 if (i == length) 551 return error(); 552 553 ++i; 554 // no specifier after `]` 555 // it could be mixed with the one above, but then idx won't have the right index 556 if (i == length) 557 return error(); 558 } 559 560 /* Read the specifier 561 */ 562 char genSpec; 563 Format specifier = parseGenericFormatSpecifier(format, i, genSpec); 564 if (specifier == Format.error) 565 return error(); 566 567 idx = i; 568 return specifier; // success 569 } 570 571 /************************************** 572 * Parse the *format specifier* which is of the form: 573 * 574 * `%[flags][field width][.precision][length modifier]specifier` 575 * 576 * Params: 577 * format = format string 578 * idx = index of `%` of start of format specifier, 579 * which gets updated to index past the end of it, 580 * even if `Format.error` is returned 581 * widthStar = set if * for width 582 * precisionStar = set if * for precision 583 * Returns: 584 * Format 585 */ 586 pure nothrow @safe 587 Format parsePrintfFormatSpecifier(scope const char[] format, ref size_t idx, 588 out bool widthStar, out bool precisionStar) 589 { 590 auto i = idx; 591 assert(format[i] == '%'); 592 const length = format.length; 593 bool hash; 594 bool zero; 595 bool flags; 596 bool width; 597 bool precision; 598 599 Format error() 600 { 601 idx = i; 602 return Format.error; 603 } 604 605 ++i; 606 if (i == length) 607 return error(); 608 609 if (format[i] == '%') 610 { 611 idx = i + 1; 612 return Format.percent; 613 } 614 615 /* Read the `flags` 616 */ 617 while (1) 618 { 619 const c = format[i]; 620 if (c == '-' || 621 c == '+' || 622 c == ' ') 623 { 624 flags = true; 625 } 626 else if (c == '#') 627 { 628 hash = true; 629 } 630 else if (c == '0') 631 { 632 zero = true; 633 } 634 else 635 break; 636 ++i; 637 if (i == length) 638 return error(); 639 } 640 641 /* Read the `field width` 642 */ 643 { 644 const c = format[i]; 645 if (c == '*') 646 { 647 width = true; 648 widthStar = true; 649 ++i; 650 if (i == length) 651 return error(); 652 } 653 else if ('1' <= c && c <= '9') 654 { 655 width = true; 656 ++i; 657 if (i == length) 658 return error(); 659 while ('0' <= format[i] && format[i] <= '9') 660 { 661 ++i; 662 if (i == length) 663 return error(); 664 } 665 } 666 } 667 668 /* Read the `precision` 669 */ 670 if (format[i] == '.') 671 { 672 precision = true; 673 ++i; 674 if (i == length) 675 return error(); 676 const c = format[i]; 677 if (c == '*') 678 { 679 precisionStar = true; 680 ++i; 681 if (i == length) 682 return error(); 683 } 684 else if ('0' <= c && c <= '9') 685 { 686 ++i; 687 if (i == length) 688 return error(); 689 while ('0' <= format[i] && format[i] <= '9') 690 { 691 ++i; 692 if (i == length) 693 return error(); 694 } 695 } 696 } 697 698 /* Read the specifier 699 */ 700 char genSpec; 701 Format specifier = parseGenericFormatSpecifier(format, i, genSpec); 702 if (specifier == Format.error) 703 return error(); 704 705 switch (genSpec) 706 { 707 case 'c': 708 case 's': 709 if (hash || zero) 710 return error(); 711 break; 712 713 case 'd': 714 case 'i': 715 if (hash) 716 return error(); 717 break; 718 719 case 'n': 720 if (hash || zero || precision || width || flags) 721 return error(); 722 break; 723 724 default: 725 break; 726 } 727 728 idx = i; 729 return specifier; // success 730 } 731 732 /* Different kinds of formatting specifications, variations we don't 733 care about are merged. (Like we don't care about the difference between 734 f, e, g, a, etc.) 735 736 For `scanf`, every format is a pointer. 737 */ 738 enum Format 739 { 740 d, // int 741 hhd, // signed char 742 hd, // short int 743 ld, // long int 744 lld, // long long int 745 jd, // intmax_t 746 zd, // size_t 747 td, // ptrdiff_t 748 u, // unsigned int 749 hhu, // unsigned char 750 hu, // unsigned short int 751 lu, // unsigned long int 752 llu, // unsigned long long int 753 ju, // uintmax_t 754 g, // float (scanf) / double (printf) 755 lg, // double (scanf) 756 Lg, // long double (both) 757 s, // char string (both) 758 ls, // wchar_t string (both) 759 c, // char (printf) 760 lc, // wint_t (printf) 761 p, // pointer 762 n, // pointer to int 763 hhn, // pointer to signed char 764 hn, // pointer to short 765 ln, // pointer to long int 766 lln, // pointer to long long int 767 jn, // pointer to intmax_t 768 zn, // pointer to size_t 769 tn, // pointer to ptrdiff_t 770 percent, // %% (i.e. no argument) 771 error, // invalid format specification 772 } 773 774 /************************************** 775 * Parse the *length specifier* and the *specifier* of the following form: 776 * `[length]specifier` 777 * 778 * Params: 779 * format = format string 780 * idx = index of of start of format specifier, 781 * which gets updated to index past the end of it, 782 * even if `Format.error` is returned 783 * genSpecifier = Generic specifier. For instance, it will be set to `d` if the 784 * format is `hdd`. 785 * Returns: 786 * Format 787 */ 788 pure @safe nothrow 789 Format parseGenericFormatSpecifier(scope const char[] format, 790 ref size_t idx, out char genSpecifier) 791 { 792 const length = format.length; 793 794 /* Read the `length modifier` 795 */ 796 const lm = format[idx]; 797 bool lm1; // if jztL 798 bool lm2; // if `hh` or `ll` 799 if (lm == 'j' || 800 lm == 'z' || 801 lm == 't' || 802 lm == 'L') 803 { 804 ++idx; 805 if (idx == length) 806 return Format.error; 807 lm1 = true; 808 } 809 else if (lm == 'h' || lm == 'l') 810 { 811 ++idx; 812 if (idx == length) 813 return Format.error; 814 lm2 = lm == format[idx]; 815 if (lm2) 816 { 817 ++idx; 818 if (idx == length) 819 return Format.error; 820 } 821 } 822 823 /* Read the `specifier` 824 */ 825 Format specifier; 826 const sc = format[idx]; 827 genSpecifier = sc; 828 switch (sc) 829 { 830 case 'd': 831 case 'i': 832 if (lm == 'L') 833 specifier = Format.error; 834 else 835 specifier = lm == 'h' && lm2 ? Format.hhd : 836 lm == 'h' ? Format.hd : 837 lm == 'l' && lm2 ? Format.lld : 838 lm == 'l' ? Format.ld : 839 lm == 'j' ? Format.jd : 840 lm == 'z' ? Format.zd : 841 lm == 't' ? Format.td : 842 Format.d; 843 break; 844 845 case 'u': 846 case 'o': 847 case 'x': 848 case 'X': 849 if (lm == 'L') 850 specifier = Format.error; 851 else 852 specifier = lm == 'h' && lm2 ? Format.hhu : 853 lm == 'h' ? Format.hu : 854 lm == 'l' && lm2 ? Format.llu : 855 lm == 'l' ? Format.lu : 856 lm == 'j' ? Format.ju : 857 lm == 'z' ? Format.zd : 858 lm == 't' ? Format.td : 859 Format.u; 860 break; 861 862 case 'f': 863 case 'F': 864 case 'e': 865 case 'E': 866 case 'g': 867 case 'G': 868 case 'a': 869 case 'A': 870 if (lm == 'L') 871 specifier = Format.Lg; 872 else if (lm1 || lm2 || lm == 'h') 873 specifier = Format.error; 874 else 875 specifier = lm == 'l' ? Format.lg : Format.g; 876 break; 877 878 case 'c': 879 if (lm1 || lm2 || lm == 'h') 880 specifier = Format.error; 881 else 882 specifier = lm == 'l' ? Format.lc : Format.c; 883 break; 884 885 case 's': 886 if (lm1 || lm2 || lm == 'h') 887 specifier = Format.error; 888 else 889 specifier = lm == 'l' ? Format.ls : Format.s; 890 break; 891 892 case 'p': 893 if (lm1 || lm2 || lm == 'h' || lm == 'l') 894 specifier = Format.error; 895 else 896 specifier = Format.p; 897 break; 898 899 case 'n': 900 if (lm == 'L') 901 specifier = Format.error; 902 else 903 specifier = lm == 'l' && lm2 ? Format.lln : 904 lm == 'l' ? Format.ln : 905 lm == 'h' && lm2 ? Format.hhn : 906 lm == 'h' ? Format.hn : 907 lm == 'j' ? Format.jn : 908 lm == 'z' ? Format.zn : 909 lm == 't' ? Format.tn : 910 Format.n; 911 break; 912 913 default: 914 specifier = Format.error; 915 break; 916 } 917 918 ++idx; 919 return specifier; // success 920 } 921 922 unittest 923 { 924 /* parseGenericFormatSpecifier 925 */ 926 927 char genSpecifier; 928 size_t idx; 929 930 assert(parseGenericFormatSpecifier("hhd", idx, genSpecifier) == Format.hhd); 931 assert(genSpecifier == 'd'); 932 933 idx = 0; 934 assert(parseGenericFormatSpecifier("hn", idx, genSpecifier) == Format.hn); 935 assert(genSpecifier == 'n'); 936 937 idx = 0; 938 assert(parseGenericFormatSpecifier("ji", idx, genSpecifier) == Format.jd); 939 assert(genSpecifier == 'i'); 940 941 idx = 0; 942 assert(parseGenericFormatSpecifier("lu", idx, genSpecifier) == Format.lu); 943 assert(genSpecifier == 'u'); 944 945 idx = 0; 946 assert(parseGenericFormatSpecifier("k", idx, genSpecifier) == Format.error); 947 948 /* parsePrintfFormatSpecifier 949 */ 950 951 bool widthStar; 952 bool precisionStar; 953 954 // one for each Format 955 idx = 0; 956 assert(parsePrintfFormatSpecifier("%d", idx, widthStar, precisionStar) == Format.d); 957 assert(idx == 2); 958 assert(!widthStar && !precisionStar); 959 960 idx = 0; 961 assert(parsePrintfFormatSpecifier("%ld", idx, widthStar, precisionStar) == Format.ld); 962 assert(idx == 3); 963 964 idx = 0; 965 assert(parsePrintfFormatSpecifier("%lld", idx, widthStar, precisionStar) == Format.lld); 966 assert(idx == 4); 967 968 idx = 0; 969 assert(parsePrintfFormatSpecifier("%jd", idx, widthStar, precisionStar) == Format.jd); 970 assert(idx == 3); 971 972 idx = 0; 973 assert(parsePrintfFormatSpecifier("%zd", idx, widthStar, precisionStar) == Format.zd); 974 assert(idx == 3); 975 976 idx = 0; 977 assert(parsePrintfFormatSpecifier("%td", idx, widthStar, precisionStar) == Format.td); 978 assert(idx == 3); 979 980 idx = 0; 981 assert(parsePrintfFormatSpecifier("%g", idx, widthStar, precisionStar) == Format.g); 982 assert(idx == 2); 983 984 idx = 0; 985 assert(parsePrintfFormatSpecifier("%Lg", idx, widthStar, precisionStar) == Format.Lg); 986 assert(idx == 3); 987 988 idx = 0; 989 assert(parsePrintfFormatSpecifier("%p", idx, widthStar, precisionStar) == Format.p); 990 assert(idx == 2); 991 992 idx = 0; 993 assert(parsePrintfFormatSpecifier("%n", idx, widthStar, precisionStar) == Format.n); 994 assert(idx == 2); 995 996 idx = 0; 997 assert(parsePrintfFormatSpecifier("%ln", idx, widthStar, precisionStar) == Format.ln); 998 assert(idx == 3); 999 1000 idx = 0; 1001 assert(parsePrintfFormatSpecifier("%lln", idx, widthStar, precisionStar) == Format.lln); 1002 assert(idx == 4); 1003 1004 idx = 0; 1005 assert(parsePrintfFormatSpecifier("%hn", idx, widthStar, precisionStar) == Format.hn); 1006 assert(idx == 3); 1007 1008 idx = 0; 1009 assert(parsePrintfFormatSpecifier("%hhn", idx, widthStar, precisionStar) == Format.hhn); 1010 assert(idx == 4); 1011 1012 idx = 0; 1013 assert(parsePrintfFormatSpecifier("%jn", idx, widthStar, precisionStar) == Format.jn); 1014 assert(idx == 3); 1015 1016 idx = 0; 1017 assert(parsePrintfFormatSpecifier("%zn", idx, widthStar, precisionStar) == Format.zn); 1018 assert(idx == 3); 1019 1020 idx = 0; 1021 assert(parsePrintfFormatSpecifier("%tn", idx, widthStar, precisionStar) == Format.tn); 1022 assert(idx == 3); 1023 1024 idx = 0; 1025 assert(parsePrintfFormatSpecifier("%c", idx, widthStar, precisionStar) == Format.c); 1026 assert(idx == 2); 1027 1028 idx = 0; 1029 assert(parsePrintfFormatSpecifier("%lc", idx, widthStar, precisionStar) == Format.lc); 1030 assert(idx == 3); 1031 1032 idx = 0; 1033 assert(parsePrintfFormatSpecifier("%s", idx, widthStar, precisionStar) == Format.s); 1034 assert(idx == 2); 1035 1036 idx = 0; 1037 assert(parsePrintfFormatSpecifier("%ls", idx, widthStar, precisionStar) == Format.ls); 1038 assert(idx == 3); 1039 1040 idx = 0; 1041 assert(parsePrintfFormatSpecifier("%%", idx, widthStar, precisionStar) == Format.percent); 1042 assert(idx == 2); 1043 1044 // Synonyms 1045 idx = 0; 1046 assert(parsePrintfFormatSpecifier("%i", idx, widthStar, precisionStar) == Format.d); 1047 assert(idx == 2); 1048 1049 idx = 0; 1050 assert(parsePrintfFormatSpecifier("%u", idx, widthStar, precisionStar) == Format.u); 1051 assert(idx == 2); 1052 1053 idx = 0; 1054 assert(parsePrintfFormatSpecifier("%o", idx, widthStar, precisionStar) == Format.u); 1055 assert(idx == 2); 1056 1057 idx = 0; 1058 assert(parsePrintfFormatSpecifier("%x", idx, widthStar, precisionStar) == Format.u); 1059 assert(idx == 2); 1060 1061 idx = 0; 1062 assert(parsePrintfFormatSpecifier("%X", idx, widthStar, precisionStar) == Format.u); 1063 assert(idx == 2); 1064 1065 idx = 0; 1066 assert(parsePrintfFormatSpecifier("%f", idx, widthStar, precisionStar) == Format.g); 1067 assert(idx == 2); 1068 1069 idx = 0; 1070 assert(parsePrintfFormatSpecifier("%F", idx, widthStar, precisionStar) == Format.g); 1071 assert(idx == 2); 1072 1073 idx = 0; 1074 assert(parsePrintfFormatSpecifier("%G", idx, widthStar, precisionStar) == Format.g); 1075 assert(idx == 2); 1076 1077 idx = 0; 1078 assert(parsePrintfFormatSpecifier("%a", idx, widthStar, precisionStar) == Format.g); 1079 assert(idx == 2); 1080 1081 idx = 0; 1082 assert(parsePrintfFormatSpecifier("%A", idx, widthStar, precisionStar) == Format.g); 1083 assert(idx == 2); 1084 1085 idx = 0; 1086 assert(parsePrintfFormatSpecifier("%lg", idx, widthStar, precisionStar) == Format.lg); 1087 assert(idx == 3); 1088 1089 // width, precision 1090 idx = 0; 1091 assert(parsePrintfFormatSpecifier("%*d", idx, widthStar, precisionStar) == Format.d); 1092 assert(idx == 3); 1093 assert(widthStar && !precisionStar); 1094 1095 idx = 0; 1096 assert(parsePrintfFormatSpecifier("%.*d", idx, widthStar, precisionStar) == Format.d); 1097 assert(idx == 4); 1098 assert(!widthStar && precisionStar); 1099 1100 idx = 0; 1101 assert(parsePrintfFormatSpecifier("%*.*d", idx, widthStar, precisionStar) == Format.d); 1102 assert(idx == 5); 1103 assert(widthStar && precisionStar); 1104 1105 // Too short formats 1106 { 1107 foreach (s; ["%", "%-", "%+", "% ", "%#", "%0", "%*", "%1", "%19", "%.", "%.*", "%.1", "%.12", 1108 "%j", "%z", "%t", "%l", "%h", "%ll", "%hh"]) 1109 { 1110 idx = 0; 1111 assert(parsePrintfFormatSpecifier(s, idx, widthStar, precisionStar) == Format.error); 1112 assert(idx == s.length); 1113 } 1114 } 1115 1116 // Undefined format combinations 1117 { 1118 foreach (s; ["%#d", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg", 1119 "%#c", "%0c", "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc", 1120 "%#s", "%0s", "%js", "%zs", "%ts", "%Ls", "%hs", "%hhs", "%lls", 1121 "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp", 1122 "%-n", "%+n", "% n", "%#n", "%0n", "%*n", "%1n", "%19n", "%.n", "%.*n", "%.1n", "%.12n", "%Ln", "%K"]) 1123 { 1124 idx = 0; 1125 assert(parsePrintfFormatSpecifier(s, idx, widthStar, precisionStar) == Format.error); 1126 import std.stdio; 1127 assert(idx == s.length); 1128 } 1129 } 1130 1131 /* parseScanfFormatSpecifier 1132 */ 1133 1134 bool asterisk; 1135 1136 // one for each Format 1137 idx = 0; 1138 assert(parseScanfFormatSpecifier("%d", idx, asterisk) == Format.d); 1139 assert(idx == 2); 1140 assert(!asterisk); 1141 1142 idx = 0; 1143 assert(parseScanfFormatSpecifier("%hhd", idx, asterisk) == Format.hhd); 1144 assert(idx == 4); 1145 1146 idx = 0; 1147 assert(parseScanfFormatSpecifier("%hd", idx, asterisk) == Format.hd); 1148 assert(idx == 3); 1149 1150 idx = 0; 1151 assert(parseScanfFormatSpecifier("%ld", idx, asterisk) == Format.ld); 1152 assert(idx == 3); 1153 1154 idx = 0; 1155 assert(parseScanfFormatSpecifier("%lld", idx, asterisk) == Format.lld); 1156 assert(idx == 4); 1157 1158 idx = 0; 1159 assert(parseScanfFormatSpecifier("%jd", idx, asterisk) == Format.jd); 1160 assert(idx == 3); 1161 1162 idx = 0; 1163 assert(parseScanfFormatSpecifier("%zd", idx, asterisk) == Format.zd); 1164 assert(idx == 3); 1165 1166 idx = 0; 1167 assert(parseScanfFormatSpecifier("%td", idx, asterisk,) == Format.td); 1168 assert(idx == 3); 1169 1170 idx = 0; 1171 assert(parseScanfFormatSpecifier("%u", idx, asterisk) == Format.u); 1172 assert(idx == 2); 1173 1174 idx = 0; 1175 assert(parseScanfFormatSpecifier("%hhu", idx, asterisk,) == Format.hhu); 1176 assert(idx == 4); 1177 1178 idx = 0; 1179 assert(parseScanfFormatSpecifier("%hu", idx, asterisk) == Format.hu); 1180 assert(idx == 3); 1181 1182 idx = 0; 1183 assert(parseScanfFormatSpecifier("%lu", idx, asterisk) == Format.lu); 1184 assert(idx == 3); 1185 1186 idx = 0; 1187 assert(parseScanfFormatSpecifier("%llu", idx, asterisk) == Format.llu); 1188 assert(idx == 4); 1189 1190 idx = 0; 1191 assert(parseScanfFormatSpecifier("%ju", idx, asterisk) == Format.ju); 1192 assert(idx == 3); 1193 1194 idx = 0; 1195 assert(parseScanfFormatSpecifier("%g", idx, asterisk) == Format.g); 1196 assert(idx == 2); 1197 1198 idx = 0; 1199 assert(parseScanfFormatSpecifier("%lg", idx, asterisk) == Format.lg); 1200 assert(idx == 3); 1201 1202 idx = 0; 1203 assert(parseScanfFormatSpecifier("%Lg", idx, asterisk) == Format.Lg); 1204 assert(idx == 3); 1205 1206 idx = 0; 1207 assert(parseScanfFormatSpecifier("%p", idx, asterisk) == Format.p); 1208 assert(idx == 2); 1209 1210 idx = 0; 1211 assert(parseScanfFormatSpecifier("%s", idx, asterisk) == Format.s); 1212 assert(idx == 2); 1213 1214 idx = 0; 1215 assert(parseScanfFormatSpecifier("%ls", idx, asterisk,) == Format.ls); 1216 assert(idx == 3); 1217 1218 idx = 0; 1219 assert(parseScanfFormatSpecifier("%%", idx, asterisk) == Format.percent); 1220 assert(idx == 2); 1221 1222 // Synonyms 1223 idx = 0; 1224 assert(parseScanfFormatSpecifier("%i", idx, asterisk) == Format.d); 1225 assert(idx == 2); 1226 1227 idx = 0; 1228 assert(parseScanfFormatSpecifier("%n", idx, asterisk) == Format.n); 1229 assert(idx == 2); 1230 1231 idx = 0; 1232 assert(parseScanfFormatSpecifier("%o", idx, asterisk) == Format.u); 1233 assert(idx == 2); 1234 1235 idx = 0; 1236 assert(parseScanfFormatSpecifier("%x", idx, asterisk) == Format.u); 1237 assert(idx == 2); 1238 1239 idx = 0; 1240 assert(parseScanfFormatSpecifier("%f", idx, asterisk) == Format.g); 1241 assert(idx == 2); 1242 1243 idx = 0; 1244 assert(parseScanfFormatSpecifier("%e", idx, asterisk) == Format.g); 1245 assert(idx == 2); 1246 1247 idx = 0; 1248 assert(parseScanfFormatSpecifier("%a", idx, asterisk) == Format.g); 1249 assert(idx == 2); 1250 1251 idx = 0; 1252 assert(parseScanfFormatSpecifier("%c", idx, asterisk) == Format.c); 1253 assert(idx == 2); 1254 1255 // asterisk 1256 idx = 0; 1257 assert(parseScanfFormatSpecifier("%*d", idx, asterisk) == Format.d); 1258 assert(idx == 3); 1259 assert(asterisk); 1260 1261 idx = 0; 1262 assert(parseScanfFormatSpecifier("%9ld", idx, asterisk) == Format.ld); 1263 assert(idx == 4); 1264 assert(!asterisk); 1265 1266 idx = 0; 1267 assert(parseScanfFormatSpecifier("%*25984hhd", idx, asterisk) == Format.hhd); 1268 assert(idx == 10); 1269 assert(asterisk); 1270 1271 // scansets 1272 idx = 0; 1273 assert(parseScanfFormatSpecifier("%[a-zA-Z]s", idx, asterisk) == Format.s); 1274 assert(idx == 10); 1275 assert(!asterisk); 1276 1277 idx = 0; 1278 assert(parseScanfFormatSpecifier("%*25[a-z]hhd", idx, asterisk) == Format.hhd); 1279 assert(idx == 12); 1280 assert(asterisk); 1281 1282 // Too short formats 1283 foreach (s; ["%", "% ", "%#", "%0", "%*", "%1", "%19", 1284 "%j", "%z", "%t", "%l", "%h", "%ll", "%hh", "%K"]) 1285 { 1286 idx = 0; 1287 assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error); 1288 assert(idx == s.length); 1289 } 1290 1291 1292 // Undefined format combinations 1293 foreach (s; ["%Ld", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg", 1294 "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc", 1295 "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp", 1296 "%-", "%+", "%#", "%0", "%.", "%Ln"]) 1297 { 1298 idx = 0; 1299 assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error); 1300 assert(idx == s.length); 1301 1302 } 1303 1304 // Invalid scansets 1305 foreach (s; ["%[]", "%[s", "%[0-9lld", "%[", "%[a-z]"]) 1306 { 1307 idx = 0; 1308 assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error); 1309 assert(idx == s.length); 1310 } 1311 1312 }