1 /**
2 * Check the arguments to `printf` and `scanf` against the `format` string.
3 *
4 * Copyright: Copyright (C) 1999-2022 by The D Language Foundation, All Rights Reserved
5 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright)
6 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/chkformat.d, _chkformat.d)
8 * Documentation: https://dlang.org/phobos/dmd_chkformat.html
9 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/chkformat.d
10 */
11 module dmd.chkformat;
12
13 //import core.stdc.stdio : printf, scanf;
14 import core.stdc.ctype : isdigit;
15
16 import dmd.astenums;
17 import dmd.cond;
18 import dmd.errors;
19 import dmd.expression;
20 import dmd.globals;
21 import dmd.identifier;
22 import dmd.mtype;
23 import dmd.target;
24
25
26 /******************************************
27 * Check that arguments to a printf format string are compatible
28 * with that string. Issue errors for incompatibilities.
29 *
30 * Follows the C99 specification for printf.
31 *
32 * Takes a generous, rather than strict, view of compatiblity.
33 * For example, an unsigned value can be formatted with a signed specifier.
34 *
35 * Diagnosed incompatibilities are:
36 *
37 * 1. incompatible sizes which will cause argument misalignment
38 * 2. deferencing arguments that are not pointers
39 * 3. insufficient number of arguments
40 * 4. struct arguments
41 * 5. array and slice arguments
42 * 6. non-pointer arguments to `s` specifier
43 * 7. non-standard formats
44 * 8. undefined behavior per C99
45 *
46 * Per the C Standard, extra arguments are ignored.
47 *
48 * No attempt is made to fix the arguments or the format string.
49 *
50 * Params:
51 * loc = location for error messages
52 * format = format string
53 * args = arguments to match with format string
54 * isVa_list = if a "v" function (format check only)
55 *
56 * Returns:
57 * `true` if errors occurred
58 * References:
59 * C99 7.19.6.1
60 * https://www.cplusplus.com/reference/cstdio/printf/
61 */
checkPrintfFormat(ref const Loc loc,scope const char[]format,scope Expression[]args,bool isVa_list)62 bool checkPrintfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args, bool isVa_list)
63 {
64 //printf("checkPrintFormat('%.*s')\n", cast(int)format.length, format.ptr);
65 size_t n, gnu_m_count; // index in args / number of Format.GNU_m
66 for (size_t i = 0; i < format.length;)
67 {
68 if (format[i] != '%')
69 {
70 ++i;
71 continue;
72 }
73 bool widthStar;
74 bool precisionStar;
75 size_t j = i;
76 const fmt = parsePrintfFormatSpecifier(format, j, widthStar, precisionStar);
77 const slice = format[i .. j];
78 i = j;
79
80 if (fmt == Format.percent)
81 continue; // "%%", no arguments
82
83 if (isVa_list)
84 {
85 // format check only
86 if (fmt == Format.error)
87 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
88 continue;
89 }
90
91 if (fmt == Format.GNU_m)
92 ++gnu_m_count;
93
94 Expression getNextArg(ref bool skip)
95 {
96 if (n == args.length)
97 {
98 if (args.length < (n + 1) - gnu_m_count)
99 deprecation(loc, "more format specifiers than %d arguments", cast(int)n);
100 else
101 skip = true;
102 return null;
103 }
104 return args[n++];
105 }
106
107 void errorMsg(const char* prefix, Expression arg, const char* texpect, Type tactual)
108 {
109 deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`",
110 prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars());
111 }
112
113 if (widthStar)
114 {
115 bool skip;
116 auto e = getNextArg(skip);
117 if (skip)
118 continue;
119 if (!e)
120 return true;
121 auto t = e.type.toBasetype();
122 if (t.ty != Tint32 && t.ty != Tuns32)
123 errorMsg("width ", e, "int", t);
124 }
125
126 if (precisionStar)
127 {
128 bool skip;
129 auto e = getNextArg(skip);
130 if (skip)
131 continue;
132 if (!e)
133 return true;
134 auto t = e.type.toBasetype();
135 if (t.ty != Tint32 && t.ty != Tuns32)
136 errorMsg("precision ", e, "int", t);
137 }
138
139 bool skip;
140 auto e = getNextArg(skip);
141 if (skip)
142 continue;
143 if (!e)
144 return true;
145 auto t = e.type.toBasetype();
146 auto tnext = t.nextOf();
147 const c_longsize = target.c.longsize;
148 const ptrsize = target.ptrsize;
149
150 // Types which are promoted to int are allowed.
151 // Spec: C99 6.5.2.2.7
152 final switch (fmt)
153 {
154 case Format.u: // unsigned int
155 case Format.d: // int
156 if (t.ty != Tint32 && t.ty != Tuns32)
157 errorMsg(null, e, fmt == Format.u ? "uint" : "int", t);
158 break;
159
160 case Format.hhu: // unsigned char
161 case Format.hhd: // signed char
162 if (t.ty != Tint32 && t.ty != Tuns32 && t.ty != Tint8 && t.ty != Tuns8)
163 errorMsg(null, e, fmt == Format.hhu ? "ubyte" : "byte", t);
164 break;
165
166 case Format.hu: // unsigned short int
167 case Format.hd: // short int
168 if (t.ty != Tint32 && t.ty != Tuns32 && t.ty != Tint16 && t.ty != Tuns16)
169 errorMsg(null, e, fmt == Format.hu ? "ushort" : "short", t);
170 break;
171
172 case Format.lu: // unsigned long int
173 case Format.ld: // long int
174 if (!(t.isintegral() && t.size() == c_longsize))
175 {
176 if (fmt == Format.lu)
177 errorMsg(null, e, (c_longsize == 4 ? "uint" : "ulong"), t);
178 else
179 errorMsg(null, e, (c_longsize == 4 ? "int" : "long"), t);
180 }
181 break;
182
183 case Format.llu: // unsigned long long int
184 case Format.lld: // long long int
185 if (t.ty != Tint64 && t.ty != Tuns64)
186 errorMsg(null, e, fmt == Format.llu ? "ulong" : "long", t);
187 break;
188
189 case Format.ju: // uintmax_t
190 case Format.jd: // intmax_t
191 if (t.ty != Tint64 && t.ty != Tuns64)
192 {
193 if (fmt == Format.ju)
194 errorMsg(null, e, "core.stdc.stdint.uintmax_t", t);
195 else
196 errorMsg(null, e, "core.stdc.stdint.intmax_t", t);
197 }
198 break;
199
200 case Format.zd: // size_t
201 if (!(t.isintegral() && t.size() == ptrsize))
202 errorMsg(null, e, "size_t", t);
203 break;
204
205 case Format.td: // ptrdiff_t
206 if (!(t.isintegral() && t.size() == ptrsize))
207 errorMsg(null, e, "ptrdiff_t", t);
208 break;
209
210 case Format.GNU_a: // Format.GNU_a is only for scanf
211 case Format.lg:
212 case Format.g: // double
213 if (t.ty != Tfloat64 && t.ty != Timaginary64)
214 errorMsg(null, e, "double", t);
215 break;
216
217 case Format.Lg: // long double
218 if (t.ty != Tfloat80 && t.ty != Timaginary80)
219 errorMsg(null, e, "real", t);
220 break;
221
222 case Format.p: // pointer
223 if (t.ty != Tpointer && t.ty != Tnull && t.ty != Tclass && t.ty != Tdelegate && t.ty != Taarray)
224 errorMsg(null, e, "void*", t);
225 break;
226
227 case Format.n: // pointer to int
228 if (!(t.ty == Tpointer && tnext.ty == Tint32))
229 errorMsg(null, e, "int*", t);
230 break;
231
232 case Format.ln: // pointer to long int
233 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.size() == c_longsize))
234 errorMsg(null, e, (c_longsize == 4 ? "int*" : "long*"), t);
235 break;
236
237 case Format.lln: // pointer to long long int
238 if (!(t.ty == Tpointer && tnext.ty == Tint64))
239 errorMsg(null, e, "long*", t);
240 break;
241
242 case Format.hn: // pointer to short
243 if (!(t.ty == Tpointer && tnext.ty == Tint16))
244 errorMsg(null, e, "short*", t);
245 break;
246
247 case Format.hhn: // pointer to signed char
248 if (!(t.ty == Tpointer && tnext.ty == Tint16))
249 errorMsg(null, e, "byte*", t);
250 break;
251
252 case Format.jn: // pointer to intmax_t
253 if (!(t.ty == Tpointer && tnext.ty == Tint64))
254 errorMsg(null, e, "core.stdc.stdint.intmax_t*", t);
255 break;
256
257 case Format.zn: // pointer to size_t
258 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.isunsigned() && tnext.size() == ptrsize))
259 errorMsg(null, e, "size_t*", t);
260 break;
261
262 case Format.tn: // pointer to ptrdiff_t
263 if (!(t.ty == Tpointer && tnext.isintegral() && !tnext.isunsigned() && tnext.size() == ptrsize))
264 errorMsg(null, e, "ptrdiff_t*", t);
265 break;
266
267 case Format.c: // char
268 if (t.ty != Tint32 && t.ty != Tuns32)
269 errorMsg(null, e, "char", t);
270 break;
271
272 case Format.lc: // wint_t
273 if (t.ty != Tint32 && t.ty != Tuns32)
274 errorMsg(null, e, "wchar_t", t);
275 break;
276
277 case Format.s: // pointer to char string
278 if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8)))
279 errorMsg(null, e, "char*", t);
280 break;
281
282 case Format.ls: // pointer to wchar_t string
283 if (!(t.ty == Tpointer && tnext.ty.isSomeChar && tnext.size() == target.c.wchar_tsize))
284 errorMsg(null, e, "wchar_t*", t);
285 break;
286
287 case Format.error:
288 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
289 break;
290
291 case Format.GNU_m:
292 break; // not assert(0) because it may go through it if there are extra arguments
293
294 case Format.percent:
295 assert(0);
296 }
297 }
298 return false;
299 }
300
301 /******************************************
302 * Check that arguments to a scanf format string are compatible
303 * with that string. Issue errors for incompatibilities.
304 *
305 * Follows the C99 specification for scanf.
306 *
307 * Takes a generous, rather than strict, view of compatiblity.
308 * For example, an unsigned value can be formatted with a signed specifier.
309 *
310 * Diagnosed incompatibilities are:
311 *
312 * 1. incompatible sizes which will cause argument misalignment
313 * 2. deferencing arguments that are not pointers
314 * 3. insufficient number of arguments
315 * 4. struct arguments
316 * 5. array and slice arguments
317 * 6. non-standard formats
318 * 7. undefined behavior per C99
319 *
320 * Per the C Standard, extra arguments are ignored.
321 *
322 * No attempt is made to fix the arguments or the format string.
323 *
324 * Params:
325 * loc = location for error messages
326 * format = format string
327 * args = arguments to match with format string
328 * isVa_list = if a "v" function (format check only)
329 *
330 * Returns:
331 * `true` if errors occurred
332 * References:
333 * C99 7.19.6.2
334 * https://www.cplusplus.com/reference/cstdio/scanf/
335 */
checkScanfFormat(ref const Loc loc,scope const char[]format,scope Expression[]args,bool isVa_list)336 bool checkScanfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args, bool isVa_list)
337 {
338 size_t n = 0;
339 for (size_t i = 0; i < format.length;)
340 {
341 if (format[i] != '%')
342 {
343 ++i;
344 continue;
345 }
346 bool asterisk;
347 size_t j = i;
348 const fmt = parseScanfFormatSpecifier(format, j, asterisk);
349 const slice = format[i .. j];
350 i = j;
351
352 if (fmt == Format.percent || asterisk)
353 continue; // "%%", "%*": no arguments
354
355 if (isVa_list)
356 {
357 // format check only
358 if (fmt == Format.error)
359 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
360 continue;
361 }
362
363 Expression getNextArg()
364 {
365 if (n == args.length)
366 {
367 if (!asterisk)
368 deprecation(loc, "more format specifiers than %d arguments", cast(int)n);
369 return null;
370 }
371 return args[n++];
372 }
373
374 void errorMsg(const char* prefix, Expression arg, const char* texpect, Type tactual)
375 {
376 deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`",
377 prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars());
378 }
379
380 auto e = getNextArg();
381 if (!e)
382 return true;
383
384 auto t = e.type.toBasetype();
385 auto tnext = t.nextOf();
386 const c_longsize = target.c.longsize;
387 const ptrsize = target.ptrsize;
388
389 final switch (fmt)
390 {
391 case Format.n:
392 case Format.d: // pointer to int
393 if (!(t.ty == Tpointer && tnext.ty == Tint32))
394 errorMsg(null, e, "int*", t);
395 break;
396
397 case Format.hhn:
398 case Format.hhd: // pointer to signed char
399 if (!(t.ty == Tpointer && tnext.ty == Tint16))
400 errorMsg(null, e, "byte*", t);
401 break;
402
403 case Format.hn:
404 case Format.hd: // pointer to short
405 if (!(t.ty == Tpointer && tnext.ty == Tint16))
406 errorMsg(null, e, "short*", t);
407 break;
408
409 case Format.ln:
410 case Format.ld: // pointer to long int
411 if (!(t.ty == Tpointer && tnext.isintegral() && !tnext.isunsigned() && tnext.size() == c_longsize))
412 errorMsg(null, e, (c_longsize == 4 ? "int*" : "long*"), t);
413 break;
414
415 case Format.lln:
416 case Format.lld: // pointer to long long int
417 if (!(t.ty == Tpointer && tnext.ty == Tint64))
418 errorMsg(null, e, "long*", t);
419 break;
420
421 case Format.jn:
422 case Format.jd: // pointer to intmax_t
423 if (!(t.ty == Tpointer && tnext.ty == Tint64))
424 errorMsg(null, e, "core.stdc.stdint.intmax_t*", t);
425 break;
426
427 case Format.zn:
428 case Format.zd: // pointer to size_t
429 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.isunsigned() && tnext.size() == ptrsize))
430 errorMsg(null, e, "size_t*", t);
431 break;
432
433 case Format.tn:
434 case Format.td: // pointer to ptrdiff_t
435 if (!(t.ty == Tpointer && tnext.isintegral() && !tnext.isunsigned() && tnext.size() == ptrsize))
436 errorMsg(null, e, "ptrdiff_t*", t);
437 break;
438
439 case Format.u: // pointer to unsigned int
440 if (!(t.ty == Tpointer && tnext.ty == Tuns32))
441 errorMsg(null, e, "uint*", t);
442 break;
443
444 case Format.hhu: // pointer to unsigned char
445 if (!(t.ty == Tpointer && tnext.ty == Tuns8))
446 errorMsg(null, e, "ubyte*", t);
447 break;
448
449 case Format.hu: // pointer to unsigned short int
450 if (!(t.ty == Tpointer && tnext.ty == Tuns16))
451 errorMsg(null, e, "ushort*", t);
452 break;
453
454 case Format.lu: // pointer to unsigned long int
455 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.isunsigned() && tnext.size() == c_longsize))
456 errorMsg(null, e, (c_longsize == 4 ? "uint*" : "ulong*"), t);
457 break;
458
459 case Format.llu: // pointer to unsigned long long int
460 if (!(t.ty == Tpointer && tnext.ty == Tuns64))
461 errorMsg(null, e, "ulong*", t);
462 break;
463
464 case Format.ju: // pointer to uintmax_t
465 if (!(t.ty == Tpointer && tnext.ty == Tuns64))
466 errorMsg(null, e, "core.stdc.stdint.uintmax_t*", t);
467 break;
468
469 case Format.g: // pointer to float
470 if (!(t.ty == Tpointer && tnext.ty == Tfloat32))
471 errorMsg(null, e, "float*", t);
472 break;
473
474 case Format.lg: // pointer to double
475 if (!(t.ty == Tpointer && tnext.ty == Tfloat64))
476 errorMsg(null, e, "double*", t);
477 break;
478
479 case Format.Lg: // pointer to long double
480 if (!(t.ty == Tpointer && tnext.ty == Tfloat80))
481 errorMsg(null, e, "real*", t);
482 break;
483
484 case Format.GNU_a:
485 case Format.GNU_m:
486 case Format.c:
487 case Format.s: // pointer to char string
488 if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8)))
489 errorMsg(null, e, "char*", t);
490 break;
491
492 case Format.lc:
493 case Format.ls: // pointer to wchar_t string
494 if (!(t.ty == Tpointer && tnext.ty.isSomeChar && tnext.size() == target.c.wchar_tsize))
495 errorMsg(null, e, "wchar_t*", t);
496 break;
497
498 case Format.p: // double pointer
499 if (!(t.ty == Tpointer && tnext.ty == Tpointer))
500 errorMsg(null, e, "void**", t);
501 break;
502
503 case Format.error:
504 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
505 break;
506
507 case Format.percent:
508 assert(0);
509 }
510 }
511 return false;
512 }
513
514 private:
515
516 /**************************************
517 * Parse the *format specifier* which is of the form:
518 *
519 * `%[*][width][length]specifier`
520 *
521 * Params:
522 * format = format string
523 * idx = index of `%` of start of format specifier,
524 * which gets updated to index past the end of it,
525 * even if `Format.error` is returned
526 * asterisk = set if there is a `*` sub-specifier
527 * Returns:
528 * Format
529 */
parseScanfFormatSpecifier(scope const char[]format,ref size_t idx,out bool asterisk)530 Format parseScanfFormatSpecifier(scope const char[] format, ref size_t idx,
531 out bool asterisk) nothrow pure @safe
532 {
533 auto i = idx;
534 assert(format[i] == '%');
535 const length = format.length;
536
537 Format error()
538 {
539 idx = i;
540 return Format.error;
541 }
542
543 ++i;
544 if (i == length)
545 return error();
546
547 if (format[i] == '%')
548 {
549 idx = i + 1;
550 return Format.percent;
551 }
552
553 // * sub-specifier
554 if (format[i] == '*')
555 {
556 ++i;
557 if (i == length)
558 return error();
559 asterisk = true;
560 }
561
562 // fieldWidth
563 while (isdigit(format[i]))
564 {
565 i++;
566 if (i == length)
567 return error();
568 }
569
570 /* Read the scanset
571 * A scanset can be anything, so we just check that it is paired
572 */
573 if (format[i] == '[')
574 {
575 while (i < length)
576 {
577 if (format[i] == ']')
578 break;
579 ++i;
580 }
581
582 // no `]` found
583 if (i == length)
584 return error();
585
586 ++i;
587 // no specifier after `]`
588 // it could be mixed with the one above, but then idx won't have the right index
589 if (i == length)
590 return error();
591 }
592
593 /* Read the specifier
594 */
595 char genSpec;
596 Format specifier = parseGenericFormatSpecifier(format, i, genSpec);
597 if (specifier == Format.error)
598 return error();
599
600 idx = i;
601 return specifier; // success
602 }
603
604 /**************************************
605 * Parse the *format specifier* which is of the form:
606 *
607 * `%[flags][field width][.precision][length modifier]specifier`
608 *
609 * Params:
610 * format = format string
611 * idx = index of `%` of start of format specifier,
612 * which gets updated to index past the end of it,
613 * even if `Format.error` is returned
614 * widthStar = set if * for width
615 * precisionStar = set if * for precision
616 * Returns:
617 * Format
618 */
parsePrintfFormatSpecifier(scope const char[]format,ref size_t idx,out bool widthStar,out bool precisionStar)619 Format parsePrintfFormatSpecifier(scope const char[] format, ref size_t idx,
620 out bool widthStar, out bool precisionStar) nothrow pure @safe
621 {
622 auto i = idx;
623 assert(format[i] == '%');
624 const length = format.length;
625 bool hash;
626 bool zero;
627 bool flags;
628 bool width;
629 bool precision;
630
631 Format error()
632 {
633 idx = i;
634 return Format.error;
635 }
636
637 ++i;
638 if (i == length)
639 return error();
640
641 if (format[i] == '%')
642 {
643 idx = i + 1;
644 return Format.percent;
645 }
646
647 /* Read the `flags`
648 */
649 while (1)
650 {
651 const c = format[i];
652 if (c == '-' ||
653 c == '+' ||
654 c == ' ')
655 {
656 flags = true;
657 }
658 else if (c == '#')
659 {
660 hash = true;
661 }
662 else if (c == '0')
663 {
664 zero = true;
665 }
666 else
667 break;
668 ++i;
669 if (i == length)
670 return error();
671 }
672
673 /* Read the `field width`
674 */
675 {
676 const c = format[i];
677 if (c == '*')
678 {
679 width = true;
680 widthStar = true;
681 ++i;
682 if (i == length)
683 return error();
684 }
685 else if ('1' <= c && c <= '9')
686 {
687 width = true;
688 ++i;
689 if (i == length)
690 return error();
691 while ('0' <= format[i] && format[i] <= '9')
692 {
693 ++i;
694 if (i == length)
695 return error();
696 }
697 }
698 }
699
700 /* Read the `precision`
701 */
702 if (format[i] == '.')
703 {
704 precision = true;
705 ++i;
706 if (i == length)
707 return error();
708 const c = format[i];
709 if (c == '*')
710 {
711 precisionStar = true;
712 ++i;
713 if (i == length)
714 return error();
715 }
716 else if ('0' <= c && c <= '9')
717 {
718 ++i;
719 if (i == length)
720 return error();
721 while ('0' <= format[i] && format[i] <= '9')
722 {
723 ++i;
724 if (i == length)
725 return error();
726 }
727 }
728 }
729
730 /* Read the specifier
731 */
732 char genSpec;
733 Format specifier = parseGenericFormatSpecifier(format, i, genSpec);
734 if (specifier == Format.error)
735 return error();
736
737 switch (genSpec)
738 {
739 case 'c':
740 case 's':
741 if (hash || zero)
742 return error();
743 break;
744
745 case 'd':
746 case 'i':
747 if (hash)
748 return error();
749 break;
750
751 case 'n':
752 if (hash || zero || precision || width || flags)
753 return error();
754 break;
755
756 default:
757 break;
758 }
759
760 idx = i;
761 return specifier; // success
762 }
763
764 /* Different kinds of formatting specifications, variations we don't
765 care about are merged. (Like we don't care about the difference between
766 f, e, g, a, etc.)
767
768 For `scanf`, every format is a pointer.
769 */
770 enum Format
771 {
772 d, // int
773 hhd, // signed char
774 hd, // short int
775 ld, // long int
776 lld, // long long int
777 jd, // intmax_t
778 zd, // size_t
779 td, // ptrdiff_t
780 u, // unsigned int
781 hhu, // unsigned char
782 hu, // unsigned short int
783 lu, // unsigned long int
784 llu, // unsigned long long int
785 ju, // uintmax_t
786 g, // float (scanf) / double (printf)
787 lg, // double (scanf)
788 Lg, // long double (both)
789 s, // char string (both)
790 ls, // wchar_t string (both)
791 c, // char (printf)
792 lc, // wint_t (printf)
793 p, // pointer
794 n, // pointer to int
795 hhn, // pointer to signed char
796 hn, // pointer to short
797 ln, // pointer to long int
798 lln, // pointer to long long int
799 jn, // pointer to intmax_t
800 zn, // pointer to size_t
801 tn, // pointer to ptrdiff_t
802 GNU_a, // GNU ext. : address to a string with no maximum size (scanf)
803 GNU_m, // GNU ext. : string corresponding to the error code in errno (printf) / length modifier (scanf)
804 percent, // %% (i.e. no argument)
805 error, // invalid format specification
806 }
807
808 /**************************************
809 * Parse the *length specifier* and the *specifier* of the following form:
810 * `[length]specifier`
811 *
812 * Params:
813 * format = format string
814 * idx = index of of start of format specifier,
815 * which gets updated to index past the end of it,
816 * even if `Format.error` is returned
817 * genSpecifier = Generic specifier. For instance, it will be set to `d` if the
818 * format is `hdd`.
819 * Returns:
820 * Format
821 */
822 Format parseGenericFormatSpecifier(scope const char[] format,
823 ref size_t idx, out char genSpecifier, bool useGNUExts =
824 findCondition(global.versionids, Identifier.idPool("CRuntime_Glibc"))) nothrow pure @trusted
825 {
826 const length = format.length;
827
828 /* Read the `length modifier`
829 */
830 const lm = format[idx];
831 bool lm1; // if jztL
832 bool lm2; // if `hh` or `ll`
833 if (lm == 'j' ||
834 lm == 'z' ||
835 lm == 't' ||
836 lm == 'L')
837 {
838 ++idx;
839 if (idx == length)
840 return Format.error;
841 lm1 = true;
842 }
843 else if (lm == 'h' || lm == 'l')
844 {
845 ++idx;
846 if (idx == length)
847 return Format.error;
848 lm2 = lm == format[idx];
849 if (lm2)
850 {
851 ++idx;
852 if (idx == length)
853 return Format.error;
854 }
855 }
856
857 /* Read the `specifier`
858 */
859 Format specifier;
860 const sc = format[idx];
861 genSpecifier = sc;
862 switch (sc)
863 {
864 case 'd':
865 case 'i':
866 if (lm == 'L')
867 specifier = Format.error;
868 else
869 specifier = lm == 'h' && lm2 ? Format.hhd :
870 lm == 'h' ? Format.hd :
871 lm == 'l' && lm2 ? Format.lld :
872 lm == 'l' ? Format.ld :
873 lm == 'j' ? Format.jd :
874 lm == 'z' ? Format.zd :
875 lm == 't' ? Format.td :
876 Format.d;
877 break;
878
879 case 'u':
880 case 'o':
881 case 'x':
882 case 'X':
883 if (lm == 'L')
884 specifier = Format.error;
885 else
886 specifier = lm == 'h' && lm2 ? Format.hhu :
887 lm == 'h' ? Format.hu :
888 lm == 'l' && lm2 ? Format.llu :
889 lm == 'l' ? Format.lu :
890 lm == 'j' ? Format.ju :
891 lm == 'z' ? Format.zd :
892 lm == 't' ? Format.td :
893 Format.u;
894 break;
895
896 case 'a':
897 if (useGNUExts)
898 {
899 // https://www.gnu.org/software/libc/manual/html_node/Dynamic-String-Input.html
900 specifier = Format.GNU_a;
901 break;
902 }
903 goto case;
904
905 case 'f':
906 case 'F':
907 case 'e':
908 case 'E':
909 case 'g':
910 case 'G':
911 case 'A':
912 if (lm == 'L')
913 specifier = Format.Lg;
914 else if (lm1 || lm2 || lm == 'h')
915 specifier = Format.error;
916 else
917 specifier = lm == 'l' ? Format.lg : Format.g;
918 break;
919
920 case 'c':
921 if (lm1 || lm2 || lm == 'h')
922 specifier = Format.error;
923 else
924 specifier = lm == 'l' ? Format.lc : Format.c;
925 break;
926
927 case 's':
928 if (lm1 || lm2 || lm == 'h')
929 specifier = Format.error;
930 else
931 specifier = lm == 'l' ? Format.ls : Format.s;
932 break;
933
934 case 'p':
935 if (lm1 || lm2 || lm == 'h' || lm == 'l')
936 specifier = Format.error;
937 else
938 specifier = Format.p;
939 break;
940
941 case 'n':
942 if (lm == 'L')
943 specifier = Format.error;
944 else
945 specifier = lm == 'l' && lm2 ? Format.lln :
946 lm == 'l' ? Format.ln :
947 lm == 'h' && lm2 ? Format.hhn :
948 lm == 'h' ? Format.hn :
949 lm == 'j' ? Format.jn :
950 lm == 'z' ? Format.zn :
951 lm == 't' ? Format.tn :
952 Format.n;
953 break;
954
955 case 'm':
956 if (useGNUExts)
957 {
958 // https://www.gnu.org/software/libc/manual/html_node/Other-Output-Conversions.html
959 specifier = Format.GNU_m;
960 break;
961 }
962 goto default;
963
964 default:
965 specifier = Format.error;
966 break;
967 }
968
969 ++idx;
970 return specifier; // success
971 }
972
973 unittest
974 {
975 /* parseGenericFormatSpecifier
976 */
977
978 char genSpecifier;
979 size_t idx;
980
981 assert(parseGenericFormatSpecifier("hhd", idx, genSpecifier) == Format.hhd);
982 assert(genSpecifier == 'd');
983
984 idx = 0;
985 assert(parseGenericFormatSpecifier("hn", idx, genSpecifier) == Format.hn);
986 assert(genSpecifier == 'n');
987
988 idx = 0;
989 assert(parseGenericFormatSpecifier("ji", idx, genSpecifier) == Format.jd);
990 assert(genSpecifier == 'i');
991
992 idx = 0;
993 assert(parseGenericFormatSpecifier("lu", idx, genSpecifier) == Format.lu);
994 assert(genSpecifier == 'u');
995
996 idx = 0;
997 assert(parseGenericFormatSpecifier("k", idx, genSpecifier) == Format.error);
998
999 /* parsePrintfFormatSpecifier
1000 */
1001
1002 bool widthStar;
1003 bool precisionStar;
1004
1005 // one for each Format
1006 idx = 0;
1007 assert(parsePrintfFormatSpecifier("%d", idx, widthStar, precisionStar) == Format.d);
1008 assert(idx == 2);
1009 assert(!widthStar && !precisionStar);
1010
1011 idx = 0;
1012 assert(parsePrintfFormatSpecifier("%ld", idx, widthStar, precisionStar) == Format.ld);
1013 assert(idx == 3);
1014
1015 idx = 0;
1016 assert(parsePrintfFormatSpecifier("%lld", idx, widthStar, precisionStar) == Format.lld);
1017 assert(idx == 4);
1018
1019 idx = 0;
1020 assert(parsePrintfFormatSpecifier("%jd", idx, widthStar, precisionStar) == Format.jd);
1021 assert(idx == 3);
1022
1023 idx = 0;
1024 assert(parsePrintfFormatSpecifier("%zd", idx, widthStar, precisionStar) == Format.zd);
1025 assert(idx == 3);
1026
1027 idx = 0;
1028 assert(parsePrintfFormatSpecifier("%td", idx, widthStar, precisionStar) == Format.td);
1029 assert(idx == 3);
1030
1031 idx = 0;
1032 assert(parsePrintfFormatSpecifier("%g", idx, widthStar, precisionStar) == Format.g);
1033 assert(idx == 2);
1034
1035 idx = 0;
1036 assert(parsePrintfFormatSpecifier("%Lg", idx, widthStar, precisionStar) == Format.Lg);
1037 assert(idx == 3);
1038
1039 idx = 0;
1040 assert(parsePrintfFormatSpecifier("%p", idx, widthStar, precisionStar) == Format.p);
1041 assert(idx == 2);
1042
1043 idx = 0;
1044 assert(parsePrintfFormatSpecifier("%n", idx, widthStar, precisionStar) == Format.n);
1045 assert(idx == 2);
1046
1047 idx = 0;
1048 assert(parsePrintfFormatSpecifier("%ln", idx, widthStar, precisionStar) == Format.ln);
1049 assert(idx == 3);
1050
1051 idx = 0;
1052 assert(parsePrintfFormatSpecifier("%lln", idx, widthStar, precisionStar) == Format.lln);
1053 assert(idx == 4);
1054
1055 idx = 0;
1056 assert(parsePrintfFormatSpecifier("%hn", idx, widthStar, precisionStar) == Format.hn);
1057 assert(idx == 3);
1058
1059 idx = 0;
1060 assert(parsePrintfFormatSpecifier("%hhn", idx, widthStar, precisionStar) == Format.hhn);
1061 assert(idx == 4);
1062
1063 idx = 0;
1064 assert(parsePrintfFormatSpecifier("%jn", idx, widthStar, precisionStar) == Format.jn);
1065 assert(idx == 3);
1066
1067 idx = 0;
1068 assert(parsePrintfFormatSpecifier("%zn", idx, widthStar, precisionStar) == Format.zn);
1069 assert(idx == 3);
1070
1071 idx = 0;
1072 assert(parsePrintfFormatSpecifier("%tn", idx, widthStar, precisionStar) == Format.tn);
1073 assert(idx == 3);
1074
1075 idx = 0;
1076 assert(parsePrintfFormatSpecifier("%c", idx, widthStar, precisionStar) == Format.c);
1077 assert(idx == 2);
1078
1079 idx = 0;
1080 assert(parsePrintfFormatSpecifier("%lc", idx, widthStar, precisionStar) == Format.lc);
1081 assert(idx == 3);
1082
1083 idx = 0;
1084 assert(parsePrintfFormatSpecifier("%s", idx, widthStar, precisionStar) == Format.s);
1085 assert(idx == 2);
1086
1087 idx = 0;
1088 assert(parsePrintfFormatSpecifier("%ls", idx, widthStar, precisionStar) == Format.ls);
1089 assert(idx == 3);
1090
1091 idx = 0;
1092 assert(parsePrintfFormatSpecifier("%%", idx, widthStar, precisionStar) == Format.percent);
1093 assert(idx == 2);
1094
1095 // Synonyms
1096 idx = 0;
1097 assert(parsePrintfFormatSpecifier("%i", idx, widthStar, precisionStar) == Format.d);
1098 assert(idx == 2);
1099
1100 idx = 0;
1101 assert(parsePrintfFormatSpecifier("%u", idx, widthStar, precisionStar) == Format.u);
1102 assert(idx == 2);
1103
1104 idx = 0;
1105 assert(parsePrintfFormatSpecifier("%o", idx, widthStar, precisionStar) == Format.u);
1106 assert(idx == 2);
1107
1108 idx = 0;
1109 assert(parsePrintfFormatSpecifier("%x", idx, widthStar, precisionStar) == Format.u);
1110 assert(idx == 2);
1111
1112 idx = 0;
1113 assert(parsePrintfFormatSpecifier("%X", idx, widthStar, precisionStar) == Format.u);
1114 assert(idx == 2);
1115
1116 idx = 0;
1117 assert(parsePrintfFormatSpecifier("%f", idx, widthStar, precisionStar) == Format.g);
1118 assert(idx == 2);
1119
1120 idx = 0;
1121 assert(parsePrintfFormatSpecifier("%F", idx, widthStar, precisionStar) == Format.g);
1122 assert(idx == 2);
1123
1124 idx = 0;
1125 assert(parsePrintfFormatSpecifier("%G", idx, widthStar, precisionStar) == Format.g);
1126 assert(idx == 2);
1127
1128 idx = 0;
1129 Format g = parsePrintfFormatSpecifier("%a", idx, widthStar, precisionStar);
1130 assert(g == Format.g || g == Format.GNU_a);
1131 assert(idx == 2);
1132
1133 idx = 0;
1134 assert(parsePrintfFormatSpecifier("%A", idx, widthStar, precisionStar) == Format.g);
1135 assert(idx == 2);
1136
1137 idx = 0;
1138 assert(parsePrintfFormatSpecifier("%lg", idx, widthStar, precisionStar) == Format.lg);
1139 assert(idx == 3);
1140
1141 // width, precision
1142 idx = 0;
1143 assert(parsePrintfFormatSpecifier("%*d", idx, widthStar, precisionStar) == Format.d);
1144 assert(idx == 3);
1145 assert(widthStar && !precisionStar);
1146
1147 idx = 0;
1148 assert(parsePrintfFormatSpecifier("%.*d", idx, widthStar, precisionStar) == Format.d);
1149 assert(idx == 4);
1150 assert(!widthStar && precisionStar);
1151
1152 idx = 0;
1153 assert(parsePrintfFormatSpecifier("%*.*d", idx, widthStar, precisionStar) == Format.d);
1154 assert(idx == 5);
1155 assert(widthStar && precisionStar);
1156
1157 // Too short formats
1158 {
foreach(s;["%","%-","%+","% ","%#","%0","%*","%1","%19","%.","%.*","%.1","%.12","%j","%z","%t","%l","%h","%ll","%hh"])1159 foreach (s; ["%", "%-", "%+", "% ", "%#", "%0", "%*", "%1", "%19", "%.", "%.*", "%.1", "%.12",
1160 "%j", "%z", "%t", "%l", "%h", "%ll", "%hh"])
1161 {
1162 idx = 0;
1163 assert(parsePrintfFormatSpecifier(s, idx, widthStar, precisionStar) == Format.error);
1164 assert(idx == s.length);
1165 }
1166 }
1167
1168 // Undefined format combinations
1169 {
foreach(s;["%#d","%llg","%jg","%zg","%tg","%hg","%hhg","%#c","%0c","%jc","%zc","%tc","%Lc","%hc","%hhc","%llc","%#s","%0s","%js","%zs","%ts","%Ls","%hs","%hhs","%lls","%jp","%zp","%tp","%Lp","%hp","%lp","%hhp","%llp","%-n","%+n","% n","%#n","%0n","%*n","%1n","%19n","%.n","%.*n","%.1n","%.12n","%Ln","%K"])1170 foreach (s; ["%#d", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg",
1171 "%#c", "%0c", "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc",
1172 "%#s", "%0s", "%js", "%zs", "%ts", "%Ls", "%hs", "%hhs", "%lls",
1173 "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp",
1174 "%-n", "%+n", "% n", "%#n", "%0n", "%*n", "%1n", "%19n", "%.n", "%.*n", "%.1n", "%.12n", "%Ln", "%K"])
1175 {
1176 idx = 0;
1177 assert(parsePrintfFormatSpecifier(s, idx, widthStar, precisionStar) == Format.error);
1178 assert(idx == s.length);
1179 }
1180 }
1181
1182 /* parseScanfFormatSpecifier
1183 */
1184
1185 bool asterisk;
1186
1187 // one for each Format
1188 idx = 0;
1189 assert(parseScanfFormatSpecifier("%d", idx, asterisk) == Format.d);
1190 assert(idx == 2);
1191 assert(!asterisk);
1192
1193 idx = 0;
1194 assert(parseScanfFormatSpecifier("%hhd", idx, asterisk) == Format.hhd);
1195 assert(idx == 4);
1196
1197 idx = 0;
1198 assert(parseScanfFormatSpecifier("%hd", idx, asterisk) == Format.hd);
1199 assert(idx == 3);
1200
1201 idx = 0;
1202 assert(parseScanfFormatSpecifier("%ld", idx, asterisk) == Format.ld);
1203 assert(idx == 3);
1204
1205 idx = 0;
1206 assert(parseScanfFormatSpecifier("%lld", idx, asterisk) == Format.lld);
1207 assert(idx == 4);
1208
1209 idx = 0;
1210 assert(parseScanfFormatSpecifier("%jd", idx, asterisk) == Format.jd);
1211 assert(idx == 3);
1212
1213 idx = 0;
1214 assert(parseScanfFormatSpecifier("%zd", idx, asterisk) == Format.zd);
1215 assert(idx == 3);
1216
1217 idx = 0;
1218 assert(parseScanfFormatSpecifier("%td", idx, asterisk,) == Format.td);
1219 assert(idx == 3);
1220
1221 idx = 0;
1222 assert(parseScanfFormatSpecifier("%u", idx, asterisk) == Format.u);
1223 assert(idx == 2);
1224
1225 idx = 0;
1226 assert(parseScanfFormatSpecifier("%hhu", idx, asterisk,) == Format.hhu);
1227 assert(idx == 4);
1228
1229 idx = 0;
1230 assert(parseScanfFormatSpecifier("%hu", idx, asterisk) == Format.hu);
1231 assert(idx == 3);
1232
1233 idx = 0;
1234 assert(parseScanfFormatSpecifier("%lu", idx, asterisk) == Format.lu);
1235 assert(idx == 3);
1236
1237 idx = 0;
1238 assert(parseScanfFormatSpecifier("%llu", idx, asterisk) == Format.llu);
1239 assert(idx == 4);
1240
1241 idx = 0;
1242 assert(parseScanfFormatSpecifier("%ju", idx, asterisk) == Format.ju);
1243 assert(idx == 3);
1244
1245 idx = 0;
1246 assert(parseScanfFormatSpecifier("%g", idx, asterisk) == Format.g);
1247 assert(idx == 2);
1248
1249 idx = 0;
1250 assert(parseScanfFormatSpecifier("%lg", idx, asterisk) == Format.lg);
1251 assert(idx == 3);
1252
1253 idx = 0;
1254 assert(parseScanfFormatSpecifier("%Lg", idx, asterisk) == Format.Lg);
1255 assert(idx == 3);
1256
1257 idx = 0;
1258 assert(parseScanfFormatSpecifier("%p", idx, asterisk) == Format.p);
1259 assert(idx == 2);
1260
1261 idx = 0;
1262 assert(parseScanfFormatSpecifier("%s", idx, asterisk) == Format.s);
1263 assert(idx == 2);
1264
1265 idx = 0;
1266 assert(parseScanfFormatSpecifier("%ls", idx, asterisk,) == Format.ls);
1267 assert(idx == 3);
1268
1269 idx = 0;
1270 assert(parseScanfFormatSpecifier("%%", idx, asterisk) == Format.percent);
1271 assert(idx == 2);
1272
1273 // Synonyms
1274 idx = 0;
1275 assert(parseScanfFormatSpecifier("%i", idx, asterisk) == Format.d);
1276 assert(idx == 2);
1277
1278 idx = 0;
1279 assert(parseScanfFormatSpecifier("%n", idx, asterisk) == Format.n);
1280 assert(idx == 2);
1281
1282 idx = 0;
1283 assert(parseScanfFormatSpecifier("%o", idx, asterisk) == Format.u);
1284 assert(idx == 2);
1285
1286 idx = 0;
1287 assert(parseScanfFormatSpecifier("%x", idx, asterisk) == Format.u);
1288 assert(idx == 2);
1289
1290 idx = 0;
1291 assert(parseScanfFormatSpecifier("%f", idx, asterisk) == Format.g);
1292 assert(idx == 2);
1293
1294 idx = 0;
1295 assert(parseScanfFormatSpecifier("%e", idx, asterisk) == Format.g);
1296 assert(idx == 2);
1297
1298 idx = 0;
1299 g = parseScanfFormatSpecifier("%a", idx, asterisk);
1300 assert(g == Format.g || g == Format.GNU_a);
1301 assert(idx == 2);
1302
1303 idx = 0;
1304 assert(parseScanfFormatSpecifier("%c", idx, asterisk) == Format.c);
1305 assert(idx == 2);
1306
1307 // asterisk
1308 idx = 0;
1309 assert(parseScanfFormatSpecifier("%*d", idx, asterisk) == Format.d);
1310 assert(idx == 3);
1311 assert(asterisk);
1312
1313 idx = 0;
1314 assert(parseScanfFormatSpecifier("%9ld", idx, asterisk) == Format.ld);
1315 assert(idx == 4);
1316 assert(!asterisk);
1317
1318 idx = 0;
1319 assert(parseScanfFormatSpecifier("%*25984hhd", idx, asterisk) == Format.hhd);
1320 assert(idx == 10);
1321 assert(asterisk);
1322
1323 // scansets
1324 idx = 0;
1325 assert(parseScanfFormatSpecifier("%[a-zA-Z]s", idx, asterisk) == Format.s);
1326 assert(idx == 10);
1327 assert(!asterisk);
1328
1329 idx = 0;
1330 assert(parseScanfFormatSpecifier("%*25[a-z]hhd", idx, asterisk) == Format.hhd);
1331 assert(idx == 12);
1332 assert(asterisk);
1333
1334 // Too short formats
foreach(s;["%","% ","%#","%0","%*","%1","%19","%j","%z","%t","%l","%h","%ll","%hh","%K"])1335 foreach (s; ["%", "% ", "%#", "%0", "%*", "%1", "%19",
1336 "%j", "%z", "%t", "%l", "%h", "%ll", "%hh", "%K"])
1337 {
1338 idx = 0;
1339 assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error);
1340 assert(idx == s.length);
1341 }
1342
1343
1344 // Undefined format combinations
foreach(s;["%Ld","%llg","%jg","%zg","%tg","%hg","%hhg","%jc","%zc","%tc","%Lc","%hc","%hhc","%llc","%jp","%zp","%tp","%Lp","%hp","%lp","%hhp","%llp","%-","%+","%#","%0","%.","%Ln"])1345 foreach (s; ["%Ld", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg",
1346 "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc",
1347 "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp",
1348 "%-", "%+", "%#", "%0", "%.", "%Ln"])
1349 {
1350 idx = 0;
1351 assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error);
1352 assert(idx == s.length);
1353
1354 }
1355
1356 // Invalid scansets
foreach(s;["%[]","%[s","%[0-9lld","%[","%[a-z]"])1357 foreach (s; ["%[]", "%[s", "%[0-9lld", "%[", "%[a-z]"])
1358 {
1359 idx = 0;
1360 assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error);
1361 assert(idx == s.length);
1362 }
1363
1364 }
1365