xref: /netbsd-src/external/gpl3/gcc/dist/gcc/d/dmd/chkformat.d (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1 /**
2  * Check the arguments to `printf` and `scanf` against the `format` string.
3  *
4  * Copyright:   Copyright (C) 1999-2022 by The D Language Foundation, All Rights Reserved
5  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
6  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/chkformat.d, _chkformat.d)
8  * Documentation:  https://dlang.org/phobos/dmd_chkformat.html
9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/chkformat.d
10  */
11 module dmd.chkformat;
12 
13 //import core.stdc.stdio : printf, scanf;
14 import core.stdc.ctype : isdigit;
15 
16 import dmd.astenums;
17 import dmd.cond;
18 import dmd.errors;
19 import dmd.expression;
20 import dmd.globals;
21 import dmd.identifier;
22 import dmd.mtype;
23 import dmd.target;
24 
25 
26 /******************************************
27  * Check that arguments to a printf format string are compatible
28  * with that string. Issue errors for incompatibilities.
29  *
30  * Follows the C99 specification for printf.
31  *
32  * Takes a generous, rather than strict, view of compatiblity.
33  * For example, an unsigned value can be formatted with a signed specifier.
34  *
35  * Diagnosed incompatibilities are:
36  *
37  * 1. incompatible sizes which will cause argument misalignment
38  * 2. deferencing arguments that are not pointers
39  * 3. insufficient number of arguments
40  * 4. struct arguments
41  * 5. array and slice arguments
42  * 6. non-pointer arguments to `s` specifier
43  * 7. non-standard formats
44  * 8. undefined behavior per C99
45  *
46  * Per the C Standard, extra arguments are ignored.
47  *
48  * No attempt is made to fix the arguments or the format string.
49  *
50  * Params:
51  *      loc = location for error messages
52  *      format = format string
53  *      args = arguments to match with format string
54  *      isVa_list = if a "v" function (format check only)
55  *
56  * Returns:
57  *      `true` if errors occurred
58  * References:
59  * C99 7.19.6.1
60  * https://www.cplusplus.com/reference/cstdio/printf/
61  */
checkPrintfFormat(ref const Loc loc,scope const char[]format,scope Expression[]args,bool isVa_list)62 bool checkPrintfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args, bool isVa_list)
63 {
64     //printf("checkPrintFormat('%.*s')\n", cast(int)format.length, format.ptr);
65     size_t n, gnu_m_count;    // index in args / number of Format.GNU_m
66     for (size_t i = 0; i < format.length;)
67     {
68         if (format[i] != '%')
69         {
70             ++i;
71             continue;
72         }
73         bool widthStar;
74         bool precisionStar;
75         size_t j = i;
76         const fmt = parsePrintfFormatSpecifier(format, j, widthStar, precisionStar);
77         const slice = format[i .. j];
78         i = j;
79 
80         if (fmt == Format.percent)
81             continue;                   // "%%", no arguments
82 
83         if (isVa_list)
84         {
85             // format check only
86             if (fmt == Format.error)
87                 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
88             continue;
89         }
90 
91         if (fmt == Format.GNU_m)
92             ++gnu_m_count;
93 
94         Expression getNextArg(ref bool skip)
95         {
96             if (n == args.length)
97             {
98                 if (args.length < (n + 1) - gnu_m_count)
99                     deprecation(loc, "more format specifiers than %d arguments", cast(int)n);
100                 else
101                     skip = true;
102                 return null;
103             }
104             return args[n++];
105         }
106 
107         void errorMsg(const char* prefix, Expression arg, const char* texpect, Type tactual)
108         {
109             deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`",
110                   prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars());
111         }
112 
113         if (widthStar)
114         {
115             bool skip;
116             auto e = getNextArg(skip);
117             if (skip)
118                 continue;
119             if (!e)
120                 return true;
121             auto t = e.type.toBasetype();
122             if (t.ty != Tint32 && t.ty != Tuns32)
123                 errorMsg("width ", e, "int", t);
124         }
125 
126         if (precisionStar)
127         {
128             bool skip;
129             auto e = getNextArg(skip);
130             if (skip)
131                 continue;
132             if (!e)
133                 return true;
134             auto t = e.type.toBasetype();
135             if (t.ty != Tint32 && t.ty != Tuns32)
136                 errorMsg("precision ", e, "int", t);
137         }
138 
139         bool skip;
140         auto e = getNextArg(skip);
141         if (skip)
142             continue;
143         if (!e)
144             return true;
145         auto t = e.type.toBasetype();
146         auto tnext = t.nextOf();
147         const c_longsize = target.c.longsize;
148         const ptrsize = target.ptrsize;
149 
150         // Types which are promoted to int are allowed.
151         // Spec: C99 6.5.2.2.7
152         final switch (fmt)
153         {
154             case Format.u:      // unsigned int
155             case Format.d:      // int
156                 if (t.ty != Tint32 && t.ty != Tuns32)
157                     errorMsg(null, e, fmt == Format.u ? "uint" : "int", t);
158                 break;
159 
160             case Format.hhu:    // unsigned char
161             case Format.hhd:    // signed char
162                 if (t.ty != Tint32 && t.ty != Tuns32 && t.ty != Tint8 && t.ty != Tuns8)
163                     errorMsg(null, e, fmt == Format.hhu ? "ubyte" : "byte", t);
164                 break;
165 
166             case Format.hu:     // unsigned short int
167             case Format.hd:     // short int
168                 if (t.ty != Tint32 && t.ty != Tuns32 && t.ty != Tint16 && t.ty != Tuns16)
169                     errorMsg(null, e, fmt == Format.hu ? "ushort" : "short", t);
170                 break;
171 
172             case Format.lu:     // unsigned long int
173             case Format.ld:     // long int
174                 if (!(t.isintegral() && t.size() == c_longsize))
175                 {
176                     if (fmt == Format.lu)
177                         errorMsg(null, e, (c_longsize == 4 ? "uint" : "ulong"), t);
178                     else
179                         errorMsg(null, e, (c_longsize == 4 ? "int" : "long"), t);
180                 }
181                 break;
182 
183             case Format.llu:    // unsigned long long int
184             case Format.lld:    // long long int
185                 if (t.ty != Tint64 && t.ty != Tuns64)
186                     errorMsg(null, e, fmt == Format.llu ? "ulong" : "long", t);
187                 break;
188 
189             case Format.ju:     // uintmax_t
190             case Format.jd:     // intmax_t
191                 if (t.ty != Tint64 && t.ty != Tuns64)
192                 {
193                     if (fmt == Format.ju)
194                         errorMsg(null, e, "core.stdc.stdint.uintmax_t", t);
195                     else
196                         errorMsg(null, e, "core.stdc.stdint.intmax_t", t);
197                 }
198                 break;
199 
200             case Format.zd:     // size_t
201                 if (!(t.isintegral() && t.size() == ptrsize))
202                     errorMsg(null, e, "size_t", t);
203                 break;
204 
205             case Format.td:     // ptrdiff_t
206                 if (!(t.isintegral() && t.size() == ptrsize))
207                     errorMsg(null, e, "ptrdiff_t", t);
208                 break;
209 
210             case Format.GNU_a:  // Format.GNU_a is only for scanf
211             case Format.lg:
212             case Format.g:      // double
213                 if (t.ty != Tfloat64 && t.ty != Timaginary64)
214                     errorMsg(null, e, "double", t);
215                 break;
216 
217             case Format.Lg:     // long double
218                 if (t.ty != Tfloat80 && t.ty != Timaginary80)
219                     errorMsg(null, e, "real", t);
220                 break;
221 
222             case Format.p:      // pointer
223                 if (t.ty != Tpointer && t.ty != Tnull && t.ty != Tclass && t.ty != Tdelegate && t.ty != Taarray)
224                     errorMsg(null, e, "void*", t);
225                 break;
226 
227             case Format.n:      // pointer to int
228                 if (!(t.ty == Tpointer && tnext.ty == Tint32))
229                     errorMsg(null, e, "int*", t);
230                 break;
231 
232             case Format.ln:     // pointer to long int
233                 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.size() == c_longsize))
234                     errorMsg(null, e, (c_longsize == 4 ? "int*" : "long*"), t);
235                 break;
236 
237             case Format.lln:    // pointer to long long int
238                 if (!(t.ty == Tpointer && tnext.ty == Tint64))
239                     errorMsg(null, e, "long*", t);
240                 break;
241 
242             case Format.hn:     // pointer to short
243                 if (!(t.ty == Tpointer && tnext.ty == Tint16))
244                     errorMsg(null, e, "short*", t);
245                 break;
246 
247             case Format.hhn:    // pointer to signed char
248                 if (!(t.ty == Tpointer && tnext.ty == Tint16))
249                     errorMsg(null, e, "byte*", t);
250                 break;
251 
252             case Format.jn:     // pointer to intmax_t
253                 if (!(t.ty == Tpointer && tnext.ty == Tint64))
254                     errorMsg(null, e, "core.stdc.stdint.intmax_t*", t);
255                 break;
256 
257             case Format.zn:     // pointer to size_t
258                 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.isunsigned() && tnext.size() == ptrsize))
259                     errorMsg(null, e, "size_t*", t);
260                 break;
261 
262             case Format.tn:     // pointer to ptrdiff_t
263                 if (!(t.ty == Tpointer && tnext.isintegral() && !tnext.isunsigned() && tnext.size() == ptrsize))
264                     errorMsg(null, e, "ptrdiff_t*", t);
265                 break;
266 
267             case Format.c:      // char
268                 if (t.ty != Tint32 && t.ty != Tuns32)
269                     errorMsg(null, e, "char", t);
270                 break;
271 
272             case Format.lc:     // wint_t
273                 if (t.ty != Tint32 && t.ty != Tuns32)
274                     errorMsg(null, e, "wchar_t", t);
275                 break;
276 
277             case Format.s:      // pointer to char string
278                 if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8)))
279                     errorMsg(null, e, "char*", t);
280                 break;
281 
282             case Format.ls:     // pointer to wchar_t string
283                 if (!(t.ty == Tpointer && tnext.ty.isSomeChar && tnext.size() == target.c.wchar_tsize))
284                     errorMsg(null, e, "wchar_t*", t);
285                 break;
286 
287             case Format.error:
288                 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
289                 break;
290 
291             case Format.GNU_m:
292                 break;  // not assert(0) because it may go through it if there are extra arguments
293 
294             case Format.percent:
295                 assert(0);
296         }
297     }
298     return false;
299 }
300 
301 /******************************************
302  * Check that arguments to a scanf format string are compatible
303  * with that string. Issue errors for incompatibilities.
304  *
305  * Follows the C99 specification for scanf.
306  *
307  * Takes a generous, rather than strict, view of compatiblity.
308  * For example, an unsigned value can be formatted with a signed specifier.
309  *
310  * Diagnosed incompatibilities are:
311  *
312  * 1. incompatible sizes which will cause argument misalignment
313  * 2. deferencing arguments that are not pointers
314  * 3. insufficient number of arguments
315  * 4. struct arguments
316  * 5. array and slice arguments
317  * 6. non-standard formats
318  * 7. undefined behavior per C99
319  *
320  * Per the C Standard, extra arguments are ignored.
321  *
322  * No attempt is made to fix the arguments or the format string.
323  *
324  * Params:
325  *      loc = location for error messages
326  *      format = format string
327  *      args = arguments to match with format string
328  *      isVa_list = if a "v" function (format check only)
329  *
330  * Returns:
331  *      `true` if errors occurred
332  * References:
333  * C99 7.19.6.2
334  * https://www.cplusplus.com/reference/cstdio/scanf/
335  */
checkScanfFormat(ref const Loc loc,scope const char[]format,scope Expression[]args,bool isVa_list)336 bool checkScanfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args, bool isVa_list)
337 {
338     size_t n = 0;
339     for (size_t i = 0; i < format.length;)
340     {
341         if (format[i] != '%')
342         {
343             ++i;
344             continue;
345         }
346         bool asterisk;
347         size_t j = i;
348         const fmt = parseScanfFormatSpecifier(format, j, asterisk);
349         const slice = format[i .. j];
350         i = j;
351 
352         if (fmt == Format.percent || asterisk)
353             continue;   // "%%", "%*": no arguments
354 
355         if (isVa_list)
356         {
357             // format check only
358             if (fmt == Format.error)
359                 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
360             continue;
361         }
362 
363         Expression getNextArg()
364         {
365             if (n == args.length)
366             {
367                 if (!asterisk)
368                     deprecation(loc, "more format specifiers than %d arguments", cast(int)n);
369                 return null;
370             }
371             return args[n++];
372         }
373 
374         void errorMsg(const char* prefix, Expression arg, const char* texpect, Type tactual)
375         {
376             deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`",
377                   prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars());
378         }
379 
380         auto e = getNextArg();
381         if (!e)
382             return true;
383 
384         auto t = e.type.toBasetype();
385         auto tnext = t.nextOf();
386         const c_longsize = target.c.longsize;
387         const ptrsize = target.ptrsize;
388 
389         final switch (fmt)
390         {
391             case Format.n:
392             case Format.d:      // pointer to int
393                 if (!(t.ty == Tpointer && tnext.ty == Tint32))
394                     errorMsg(null, e, "int*", t);
395                 break;
396 
397             case Format.hhn:
398             case Format.hhd:    // pointer to signed char
399                 if (!(t.ty == Tpointer && tnext.ty == Tint16))
400                     errorMsg(null, e, "byte*", t);
401                 break;
402 
403             case Format.hn:
404             case Format.hd:     // pointer to short
405                 if (!(t.ty == Tpointer && tnext.ty == Tint16))
406                     errorMsg(null, e, "short*", t);
407                 break;
408 
409             case Format.ln:
410             case Format.ld:     // pointer to long int
411                 if (!(t.ty == Tpointer && tnext.isintegral() && !tnext.isunsigned() && tnext.size() == c_longsize))
412                     errorMsg(null, e, (c_longsize == 4 ? "int*" : "long*"), t);
413                 break;
414 
415             case Format.lln:
416             case Format.lld:    // pointer to long long int
417                 if (!(t.ty == Tpointer && tnext.ty == Tint64))
418                     errorMsg(null, e, "long*", t);
419                 break;
420 
421             case Format.jn:
422             case Format.jd:     // pointer to intmax_t
423                 if (!(t.ty == Tpointer && tnext.ty == Tint64))
424                     errorMsg(null, e, "core.stdc.stdint.intmax_t*", t);
425                 break;
426 
427             case Format.zn:
428             case Format.zd:     // pointer to size_t
429                 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.isunsigned() && tnext.size() == ptrsize))
430                     errorMsg(null, e, "size_t*", t);
431                 break;
432 
433             case Format.tn:
434             case Format.td:     // pointer to ptrdiff_t
435                 if (!(t.ty == Tpointer && tnext.isintegral() && !tnext.isunsigned() && tnext.size() == ptrsize))
436                     errorMsg(null, e, "ptrdiff_t*", t);
437                 break;
438 
439             case Format.u:      // pointer to unsigned int
440                 if (!(t.ty == Tpointer && tnext.ty == Tuns32))
441                     errorMsg(null, e, "uint*", t);
442                 break;
443 
444             case Format.hhu:    // pointer to unsigned char
445                 if (!(t.ty == Tpointer && tnext.ty == Tuns8))
446                     errorMsg(null, e, "ubyte*", t);
447                 break;
448 
449             case Format.hu:     // pointer to unsigned short int
450                 if (!(t.ty == Tpointer && tnext.ty == Tuns16))
451                     errorMsg(null, e, "ushort*", t);
452                 break;
453 
454             case Format.lu:     // pointer to unsigned long int
455                 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.isunsigned() && tnext.size() == c_longsize))
456                     errorMsg(null, e, (c_longsize == 4 ? "uint*" : "ulong*"), t);
457                 break;
458 
459             case Format.llu:    // pointer to unsigned long long int
460                 if (!(t.ty == Tpointer && tnext.ty == Tuns64))
461                     errorMsg(null, e, "ulong*", t);
462                 break;
463 
464             case Format.ju:     // pointer to uintmax_t
465                 if (!(t.ty == Tpointer && tnext.ty == Tuns64))
466                     errorMsg(null, e, "core.stdc.stdint.uintmax_t*", t);
467                 break;
468 
469             case Format.g:      // pointer to float
470                 if (!(t.ty == Tpointer && tnext.ty == Tfloat32))
471                     errorMsg(null, e, "float*", t);
472                 break;
473 
474             case Format.lg:     // pointer to double
475                 if (!(t.ty == Tpointer && tnext.ty == Tfloat64))
476                     errorMsg(null, e, "double*", t);
477                 break;
478 
479             case Format.Lg:     // pointer to long double
480                 if (!(t.ty == Tpointer && tnext.ty == Tfloat80))
481                     errorMsg(null, e, "real*", t);
482                 break;
483 
484             case Format.GNU_a:
485             case Format.GNU_m:
486             case Format.c:
487             case Format.s:      // pointer to char string
488                 if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8)))
489                     errorMsg(null, e, "char*", t);
490                 break;
491 
492             case Format.lc:
493             case Format.ls:     // pointer to wchar_t string
494                 if (!(t.ty == Tpointer && tnext.ty.isSomeChar && tnext.size() == target.c.wchar_tsize))
495                     errorMsg(null, e, "wchar_t*", t);
496                 break;
497 
498             case Format.p:      // double pointer
499                 if (!(t.ty == Tpointer && tnext.ty == Tpointer))
500                     errorMsg(null, e, "void**", t);
501                 break;
502 
503             case Format.error:
504                 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
505                 break;
506 
507             case Format.percent:
508                 assert(0);
509         }
510     }
511     return false;
512 }
513 
514 private:
515 
516 /**************************************
517  * Parse the *format specifier* which is of the form:
518  *
519  * `%[*][width][length]specifier`
520  *
521  * Params:
522  *      format = format string
523  *      idx = index of `%` of start of format specifier,
524  *          which gets updated to index past the end of it,
525  *          even if `Format.error` is returned
526  *      asterisk = set if there is a `*` sub-specifier
527  * Returns:
528  *      Format
529  */
parseScanfFormatSpecifier(scope const char[]format,ref size_t idx,out bool asterisk)530 Format parseScanfFormatSpecifier(scope const char[] format, ref size_t idx,
531         out bool asterisk) nothrow pure @safe
532 {
533     auto i = idx;
534     assert(format[i] == '%');
535     const length = format.length;
536 
537     Format error()
538     {
539         idx = i;
540         return Format.error;
541     }
542 
543     ++i;
544     if (i == length)
545         return error();
546 
547     if (format[i] == '%')
548     {
549         idx = i + 1;
550         return Format.percent;
551     }
552 
553     // * sub-specifier
554     if (format[i] == '*')
555     {
556         ++i;
557         if (i == length)
558             return error();
559         asterisk = true;
560     }
561 
562     // fieldWidth
563     while (isdigit(format[i]))
564     {
565         i++;
566         if (i == length)
567             return error();
568     }
569 
570     /* Read the scanset
571      * A scanset can be anything, so we just check that it is paired
572      */
573     if (format[i] == '[')
574     {
575         while (i < length)
576         {
577             if (format[i] == ']')
578                 break;
579             ++i;
580         }
581 
582         // no `]` found
583         if (i == length)
584             return error();
585 
586         ++i;
587         // no specifier after `]`
588         // it could be mixed with the one above, but then idx won't have the right index
589         if (i == length)
590             return error();
591     }
592 
593     /* Read the specifier
594      */
595     char genSpec;
596     Format specifier = parseGenericFormatSpecifier(format, i, genSpec);
597     if (specifier == Format.error)
598         return error();
599 
600     idx = i;
601     return specifier;  // success
602 }
603 
604 /**************************************
605  * Parse the *format specifier* which is of the form:
606  *
607  * `%[flags][field width][.precision][length modifier]specifier`
608  *
609  * Params:
610  *      format = format string
611  *      idx = index of `%` of start of format specifier,
612  *          which gets updated to index past the end of it,
613  *          even if `Format.error` is returned
614  *      widthStar = set if * for width
615  *      precisionStar = set if * for precision
616  * Returns:
617  *      Format
618  */
parsePrintfFormatSpecifier(scope const char[]format,ref size_t idx,out bool widthStar,out bool precisionStar)619 Format parsePrintfFormatSpecifier(scope const char[] format, ref size_t idx,
620         out bool widthStar, out bool precisionStar) nothrow pure @safe
621 {
622     auto i = idx;
623     assert(format[i] == '%');
624     const length = format.length;
625     bool hash;
626     bool zero;
627     bool flags;
628     bool width;
629     bool precision;
630 
631     Format error()
632     {
633         idx = i;
634         return Format.error;
635     }
636 
637     ++i;
638     if (i == length)
639         return error();
640 
641     if (format[i] == '%')
642     {
643         idx = i + 1;
644         return Format.percent;
645     }
646 
647     /* Read the `flags`
648      */
649     while (1)
650     {
651         const c = format[i];
652         if (c == '-' ||
653             c == '+' ||
654             c == ' ')
655         {
656             flags = true;
657         }
658         else if (c == '#')
659         {
660             hash = true;
661         }
662         else if (c == '0')
663         {
664             zero = true;
665         }
666         else
667             break;
668         ++i;
669         if (i == length)
670             return error();
671     }
672 
673     /* Read the `field width`
674      */
675     {
676         const c = format[i];
677         if (c == '*')
678         {
679             width = true;
680             widthStar = true;
681             ++i;
682             if (i == length)
683                 return error();
684         }
685         else if ('1' <= c && c <= '9')
686         {
687             width = true;
688             ++i;
689             if (i == length)
690                 return error();
691             while ('0' <= format[i] && format[i] <= '9')
692             {
693                 ++i;
694                 if (i == length)
695                     return error();
696             }
697         }
698     }
699 
700     /* Read the `precision`
701      */
702     if (format[i] == '.')
703     {
704         precision = true;
705         ++i;
706         if (i == length)
707             return error();
708         const c = format[i];
709         if (c == '*')
710         {
711             precisionStar = true;
712             ++i;
713             if (i == length)
714                 return error();
715         }
716         else if ('0' <= c && c <= '9')
717         {
718             ++i;
719             if (i == length)
720                 return error();
721             while ('0' <= format[i] && format[i] <= '9')
722             {
723                 ++i;
724                 if (i == length)
725                     return error();
726             }
727         }
728     }
729 
730     /* Read the specifier
731      */
732     char genSpec;
733     Format specifier = parseGenericFormatSpecifier(format, i, genSpec);
734     if (specifier == Format.error)
735         return error();
736 
737     switch (genSpec)
738     {
739         case 'c':
740         case 's':
741             if (hash || zero)
742                 return error();
743             break;
744 
745         case 'd':
746         case 'i':
747             if (hash)
748                 return error();
749             break;
750 
751         case 'n':
752             if (hash || zero || precision || width || flags)
753                 return error();
754             break;
755 
756         default:
757             break;
758     }
759 
760     idx = i;
761     return specifier;  // success
762 }
763 
764 /* Different kinds of formatting specifications, variations we don't
765    care about are merged. (Like we don't care about the difference between
766    f, e, g, a, etc.)
767 
768    For `scanf`, every format is a pointer.
769  */
770 enum Format
771 {
772     d,          // int
773     hhd,        // signed char
774     hd,         // short int
775     ld,         // long int
776     lld,        // long long int
777     jd,         // intmax_t
778     zd,         // size_t
779     td,         // ptrdiff_t
780     u,          // unsigned int
781     hhu,        // unsigned char
782     hu,         // unsigned short int
783     lu,         // unsigned long int
784     llu,        // unsigned long long int
785     ju,         // uintmax_t
786     g,          // float (scanf) / double (printf)
787     lg,         // double (scanf)
788     Lg,         // long double (both)
789     s,          // char string (both)
790     ls,         // wchar_t string (both)
791     c,          // char (printf)
792     lc,         // wint_t (printf)
793     p,          // pointer
794     n,          // pointer to int
795     hhn,        // pointer to signed char
796     hn,         // pointer to short
797     ln,         // pointer to long int
798     lln,        // pointer to long long int
799     jn,         // pointer to intmax_t
800     zn,         // pointer to size_t
801     tn,         // pointer to ptrdiff_t
802     GNU_a,      // GNU ext. : address to a string with no maximum size (scanf)
803     GNU_m,      // GNU ext. : string corresponding to the error code in errno (printf) / length modifier (scanf)
804     percent,    // %% (i.e. no argument)
805     error,      // invalid format specification
806 }
807 
808 /**************************************
809  * Parse the *length specifier* and the *specifier* of the following form:
810  * `[length]specifier`
811  *
812  * Params:
813  *      format = format string
814  *      idx = index of of start of format specifier,
815  *          which gets updated to index past the end of it,
816  *          even if `Format.error` is returned
817  *      genSpecifier = Generic specifier. For instance, it will be set to `d` if the
818  *           format is `hdd`.
819  * Returns:
820  *      Format
821  */
822 Format parseGenericFormatSpecifier(scope const char[] format,
823     ref size_t idx, out char genSpecifier, bool useGNUExts =
824     findCondition(global.versionids, Identifier.idPool("CRuntime_Glibc"))) nothrow pure @trusted
825 {
826     const length = format.length;
827 
828     /* Read the `length modifier`
829      */
830     const lm = format[idx];
831     bool lm1;        // if jztL
832     bool lm2;        // if `hh` or `ll`
833     if (lm == 'j' ||
834         lm == 'z' ||
835         lm == 't' ||
836         lm == 'L')
837     {
838         ++idx;
839         if (idx == length)
840             return Format.error;
841         lm1 = true;
842     }
843     else if (lm == 'h' || lm == 'l')
844     {
845         ++idx;
846         if (idx == length)
847             return Format.error;
848         lm2 = lm == format[idx];
849         if (lm2)
850         {
851             ++idx;
852             if (idx == length)
853                 return Format.error;
854         }
855     }
856 
857     /* Read the `specifier`
858      */
859     Format specifier;
860     const sc = format[idx];
861     genSpecifier = sc;
862     switch (sc)
863     {
864         case 'd':
865         case 'i':
866             if (lm == 'L')
867                 specifier = Format.error;
868             else
869                 specifier = lm == 'h' && lm2 ? Format.hhd :
870                             lm == 'h'        ? Format.hd  :
871                             lm == 'l' && lm2 ? Format.lld :
872                             lm == 'l'        ? Format.ld  :
873                             lm == 'j'        ? Format.jd  :
874                             lm == 'z'        ? Format.zd  :
875                             lm == 't'        ? Format.td  :
876                                                Format.d;
877             break;
878 
879         case 'u':
880         case 'o':
881         case 'x':
882         case 'X':
883             if (lm == 'L')
884                 specifier = Format.error;
885             else
886                 specifier = lm == 'h' && lm2 ? Format.hhu :
887                             lm == 'h'        ? Format.hu  :
888                             lm == 'l' && lm2 ? Format.llu :
889                             lm == 'l'        ? Format.lu  :
890                             lm == 'j'        ? Format.ju  :
891                             lm == 'z'        ? Format.zd  :
892                             lm == 't'        ? Format.td  :
893                                                Format.u;
894             break;
895 
896         case 'a':
897             if (useGNUExts)
898             {
899                 // https://www.gnu.org/software/libc/manual/html_node/Dynamic-String-Input.html
900                 specifier = Format.GNU_a;
901                 break;
902             }
903             goto case;
904 
905         case 'f':
906         case 'F':
907         case 'e':
908         case 'E':
909         case 'g':
910         case 'G':
911         case 'A':
912             if (lm == 'L')
913                 specifier = Format.Lg;
914             else if (lm1 || lm2 || lm == 'h')
915                 specifier = Format.error;
916             else
917                 specifier = lm == 'l' ? Format.lg : Format.g;
918             break;
919 
920         case 'c':
921             if (lm1 || lm2 || lm == 'h')
922                 specifier = Format.error;
923             else
924                 specifier = lm == 'l' ? Format.lc : Format.c;
925             break;
926 
927         case 's':
928             if (lm1 || lm2 || lm == 'h')
929                 specifier = Format.error;
930             else
931                 specifier = lm == 'l' ? Format.ls : Format.s;
932             break;
933 
934         case 'p':
935             if (lm1 || lm2 || lm == 'h' || lm == 'l')
936                 specifier = Format.error;
937             else
938                 specifier = Format.p;
939             break;
940 
941         case 'n':
942             if (lm == 'L')
943                 specifier = Format.error;
944             else
945                 specifier = lm == 'l' && lm2 ? Format.lln :
946                             lm == 'l'        ? Format.ln  :
947                             lm == 'h' && lm2 ? Format.hhn :
948                             lm == 'h'        ? Format.hn  :
949                             lm == 'j'        ? Format.jn  :
950                             lm == 'z'        ? Format.zn  :
951                             lm == 't'        ? Format.tn  :
952                                                Format.n;
953             break;
954 
955         case 'm':
956             if (useGNUExts)
957             {
958                 // https://www.gnu.org/software/libc/manual/html_node/Other-Output-Conversions.html
959                 specifier = Format.GNU_m;
960                 break;
961             }
962             goto default;
963 
964         default:
965             specifier = Format.error;
966             break;
967     }
968 
969     ++idx;
970     return specifier; // success
971 }
972 
973 unittest
974 {
975     /* parseGenericFormatSpecifier
976      */
977 
978     char genSpecifier;
979     size_t idx;
980 
981     assert(parseGenericFormatSpecifier("hhd", idx, genSpecifier) == Format.hhd);
982     assert(genSpecifier == 'd');
983 
984     idx = 0;
985     assert(parseGenericFormatSpecifier("hn", idx, genSpecifier) == Format.hn);
986     assert(genSpecifier == 'n');
987 
988     idx = 0;
989     assert(parseGenericFormatSpecifier("ji", idx, genSpecifier) == Format.jd);
990     assert(genSpecifier == 'i');
991 
992     idx = 0;
993     assert(parseGenericFormatSpecifier("lu", idx, genSpecifier) == Format.lu);
994     assert(genSpecifier == 'u');
995 
996     idx = 0;
997     assert(parseGenericFormatSpecifier("k", idx, genSpecifier) == Format.error);
998 
999     /* parsePrintfFormatSpecifier
1000      */
1001 
1002      bool widthStar;
1003      bool precisionStar;
1004 
1005      // one for each Format
1006      idx = 0;
1007      assert(parsePrintfFormatSpecifier("%d", idx, widthStar, precisionStar) == Format.d);
1008      assert(idx == 2);
1009      assert(!widthStar && !precisionStar);
1010 
1011      idx = 0;
1012      assert(parsePrintfFormatSpecifier("%ld", idx, widthStar, precisionStar) == Format.ld);
1013      assert(idx == 3);
1014 
1015      idx = 0;
1016      assert(parsePrintfFormatSpecifier("%lld", idx, widthStar, precisionStar) == Format.lld);
1017      assert(idx == 4);
1018 
1019      idx = 0;
1020      assert(parsePrintfFormatSpecifier("%jd", idx, widthStar, precisionStar) == Format.jd);
1021      assert(idx == 3);
1022 
1023      idx = 0;
1024      assert(parsePrintfFormatSpecifier("%zd", idx, widthStar, precisionStar) == Format.zd);
1025      assert(idx == 3);
1026 
1027      idx = 0;
1028      assert(parsePrintfFormatSpecifier("%td", idx, widthStar, precisionStar) == Format.td);
1029      assert(idx == 3);
1030 
1031      idx = 0;
1032      assert(parsePrintfFormatSpecifier("%g", idx, widthStar, precisionStar) == Format.g);
1033      assert(idx == 2);
1034 
1035      idx = 0;
1036      assert(parsePrintfFormatSpecifier("%Lg", idx, widthStar, precisionStar) == Format.Lg);
1037      assert(idx == 3);
1038 
1039      idx = 0;
1040      assert(parsePrintfFormatSpecifier("%p", idx, widthStar, precisionStar) == Format.p);
1041      assert(idx == 2);
1042 
1043      idx = 0;
1044      assert(parsePrintfFormatSpecifier("%n", idx, widthStar, precisionStar) == Format.n);
1045      assert(idx == 2);
1046 
1047      idx = 0;
1048      assert(parsePrintfFormatSpecifier("%ln", idx, widthStar, precisionStar) == Format.ln);
1049      assert(idx == 3);
1050 
1051      idx = 0;
1052      assert(parsePrintfFormatSpecifier("%lln", idx, widthStar, precisionStar) == Format.lln);
1053      assert(idx == 4);
1054 
1055      idx = 0;
1056      assert(parsePrintfFormatSpecifier("%hn", idx, widthStar, precisionStar) == Format.hn);
1057      assert(idx == 3);
1058 
1059      idx = 0;
1060      assert(parsePrintfFormatSpecifier("%hhn", idx, widthStar, precisionStar) == Format.hhn);
1061      assert(idx == 4);
1062 
1063      idx = 0;
1064      assert(parsePrintfFormatSpecifier("%jn", idx, widthStar, precisionStar) == Format.jn);
1065      assert(idx == 3);
1066 
1067      idx = 0;
1068      assert(parsePrintfFormatSpecifier("%zn", idx, widthStar, precisionStar) == Format.zn);
1069      assert(idx == 3);
1070 
1071      idx = 0;
1072      assert(parsePrintfFormatSpecifier("%tn", idx, widthStar, precisionStar) == Format.tn);
1073      assert(idx == 3);
1074 
1075      idx = 0;
1076      assert(parsePrintfFormatSpecifier("%c", idx, widthStar, precisionStar) == Format.c);
1077      assert(idx == 2);
1078 
1079      idx = 0;
1080      assert(parsePrintfFormatSpecifier("%lc", idx, widthStar, precisionStar) == Format.lc);
1081      assert(idx == 3);
1082 
1083      idx = 0;
1084      assert(parsePrintfFormatSpecifier("%s", idx, widthStar, precisionStar) == Format.s);
1085      assert(idx == 2);
1086 
1087      idx = 0;
1088      assert(parsePrintfFormatSpecifier("%ls", idx, widthStar, precisionStar) == Format.ls);
1089      assert(idx == 3);
1090 
1091      idx = 0;
1092      assert(parsePrintfFormatSpecifier("%%", idx, widthStar, precisionStar) == Format.percent);
1093      assert(idx == 2);
1094 
1095      // Synonyms
1096      idx = 0;
1097      assert(parsePrintfFormatSpecifier("%i", idx, widthStar, precisionStar) == Format.d);
1098      assert(idx == 2);
1099 
1100      idx = 0;
1101      assert(parsePrintfFormatSpecifier("%u", idx, widthStar, precisionStar) == Format.u);
1102      assert(idx == 2);
1103 
1104      idx = 0;
1105      assert(parsePrintfFormatSpecifier("%o", idx, widthStar, precisionStar) == Format.u);
1106      assert(idx == 2);
1107 
1108      idx = 0;
1109      assert(parsePrintfFormatSpecifier("%x", idx, widthStar, precisionStar) == Format.u);
1110      assert(idx == 2);
1111 
1112      idx = 0;
1113      assert(parsePrintfFormatSpecifier("%X", idx, widthStar, precisionStar) == Format.u);
1114      assert(idx == 2);
1115 
1116      idx = 0;
1117      assert(parsePrintfFormatSpecifier("%f", idx, widthStar, precisionStar) == Format.g);
1118      assert(idx == 2);
1119 
1120      idx = 0;
1121      assert(parsePrintfFormatSpecifier("%F", idx, widthStar, precisionStar) == Format.g);
1122      assert(idx == 2);
1123 
1124      idx = 0;
1125      assert(parsePrintfFormatSpecifier("%G", idx, widthStar, precisionStar) == Format.g);
1126      assert(idx == 2);
1127 
1128      idx = 0;
1129      Format g = parsePrintfFormatSpecifier("%a", idx, widthStar, precisionStar);
1130      assert(g == Format.g || g == Format.GNU_a);
1131      assert(idx == 2);
1132 
1133      idx = 0;
1134      assert(parsePrintfFormatSpecifier("%A", idx, widthStar, precisionStar) == Format.g);
1135      assert(idx == 2);
1136 
1137      idx = 0;
1138      assert(parsePrintfFormatSpecifier("%lg", idx, widthStar, precisionStar) == Format.lg);
1139      assert(idx == 3);
1140 
1141      // width, precision
1142      idx = 0;
1143      assert(parsePrintfFormatSpecifier("%*d", idx, widthStar, precisionStar) == Format.d);
1144      assert(idx == 3);
1145      assert(widthStar && !precisionStar);
1146 
1147      idx = 0;
1148      assert(parsePrintfFormatSpecifier("%.*d", idx, widthStar, precisionStar) == Format.d);
1149      assert(idx == 4);
1150      assert(!widthStar && precisionStar);
1151 
1152      idx = 0;
1153      assert(parsePrintfFormatSpecifier("%*.*d", idx, widthStar, precisionStar) == Format.d);
1154      assert(idx == 5);
1155      assert(widthStar && precisionStar);
1156 
1157      // Too short formats
1158      {
foreach(s;["%","%-","%+","% ","%#","%0","%*","%1","%19","%.","%.*","%.1","%.12","%j","%z","%t","%l","%h","%ll","%hh"])1159          foreach (s; ["%", "%-", "%+", "% ", "%#", "%0", "%*", "%1", "%19", "%.", "%.*", "%.1", "%.12",
1160                       "%j", "%z", "%t", "%l", "%h", "%ll", "%hh"])
1161          {
1162              idx = 0;
1163              assert(parsePrintfFormatSpecifier(s, idx, widthStar, precisionStar) == Format.error);
1164              assert(idx == s.length);
1165          }
1166      }
1167 
1168      // Undefined format combinations
1169      {
foreach(s;["%#d","%llg","%jg","%zg","%tg","%hg","%hhg","%#c","%0c","%jc","%zc","%tc","%Lc","%hc","%hhc","%llc","%#s","%0s","%js","%zs","%ts","%Ls","%hs","%hhs","%lls","%jp","%zp","%tp","%Lp","%hp","%lp","%hhp","%llp","%-n","%+n","% n","%#n","%0n","%*n","%1n","%19n","%.n","%.*n","%.1n","%.12n","%Ln","%K"])1170          foreach (s; ["%#d", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg",
1171                       "%#c", "%0c", "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc",
1172                       "%#s", "%0s", "%js", "%zs", "%ts", "%Ls", "%hs", "%hhs", "%lls",
1173                       "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp",
1174                       "%-n", "%+n", "% n", "%#n", "%0n", "%*n", "%1n", "%19n", "%.n", "%.*n", "%.1n", "%.12n", "%Ln", "%K"])
1175          {
1176              idx = 0;
1177              assert(parsePrintfFormatSpecifier(s, idx, widthStar, precisionStar) == Format.error);
1178              assert(idx == s.length);
1179          }
1180      }
1181 
1182     /* parseScanfFormatSpecifier
1183      */
1184 
1185     bool asterisk;
1186 
1187     // one for each Format
1188     idx = 0;
1189     assert(parseScanfFormatSpecifier("%d", idx, asterisk) == Format.d);
1190     assert(idx == 2);
1191     assert(!asterisk);
1192 
1193     idx = 0;
1194     assert(parseScanfFormatSpecifier("%hhd", idx, asterisk) == Format.hhd);
1195     assert(idx == 4);
1196 
1197     idx = 0;
1198     assert(parseScanfFormatSpecifier("%hd", idx, asterisk) == Format.hd);
1199     assert(idx == 3);
1200 
1201     idx = 0;
1202     assert(parseScanfFormatSpecifier("%ld", idx, asterisk) == Format.ld);
1203     assert(idx == 3);
1204 
1205     idx = 0;
1206     assert(parseScanfFormatSpecifier("%lld", idx, asterisk) == Format.lld);
1207     assert(idx == 4);
1208 
1209     idx = 0;
1210     assert(parseScanfFormatSpecifier("%jd", idx, asterisk) == Format.jd);
1211     assert(idx == 3);
1212 
1213     idx = 0;
1214     assert(parseScanfFormatSpecifier("%zd", idx, asterisk) == Format.zd);
1215     assert(idx == 3);
1216 
1217     idx = 0;
1218     assert(parseScanfFormatSpecifier("%td", idx, asterisk,) == Format.td);
1219     assert(idx == 3);
1220 
1221     idx = 0;
1222     assert(parseScanfFormatSpecifier("%u", idx, asterisk) == Format.u);
1223     assert(idx == 2);
1224 
1225     idx = 0;
1226     assert(parseScanfFormatSpecifier("%hhu", idx, asterisk,) == Format.hhu);
1227     assert(idx == 4);
1228 
1229     idx = 0;
1230     assert(parseScanfFormatSpecifier("%hu", idx, asterisk) == Format.hu);
1231     assert(idx == 3);
1232 
1233     idx = 0;
1234     assert(parseScanfFormatSpecifier("%lu", idx, asterisk) == Format.lu);
1235     assert(idx == 3);
1236 
1237     idx = 0;
1238     assert(parseScanfFormatSpecifier("%llu", idx, asterisk) == Format.llu);
1239     assert(idx == 4);
1240 
1241     idx = 0;
1242     assert(parseScanfFormatSpecifier("%ju", idx, asterisk) == Format.ju);
1243     assert(idx == 3);
1244 
1245     idx = 0;
1246     assert(parseScanfFormatSpecifier("%g", idx, asterisk) == Format.g);
1247     assert(idx == 2);
1248 
1249     idx = 0;
1250     assert(parseScanfFormatSpecifier("%lg", idx, asterisk) == Format.lg);
1251     assert(idx == 3);
1252 
1253     idx = 0;
1254     assert(parseScanfFormatSpecifier("%Lg", idx, asterisk) == Format.Lg);
1255     assert(idx == 3);
1256 
1257     idx = 0;
1258     assert(parseScanfFormatSpecifier("%p", idx, asterisk) == Format.p);
1259     assert(idx == 2);
1260 
1261     idx = 0;
1262     assert(parseScanfFormatSpecifier("%s", idx, asterisk) == Format.s);
1263     assert(idx == 2);
1264 
1265     idx = 0;
1266     assert(parseScanfFormatSpecifier("%ls", idx, asterisk,) == Format.ls);
1267     assert(idx == 3);
1268 
1269     idx = 0;
1270     assert(parseScanfFormatSpecifier("%%", idx, asterisk) == Format.percent);
1271     assert(idx == 2);
1272 
1273     // Synonyms
1274     idx = 0;
1275     assert(parseScanfFormatSpecifier("%i", idx, asterisk) == Format.d);
1276     assert(idx == 2);
1277 
1278     idx = 0;
1279     assert(parseScanfFormatSpecifier("%n", idx, asterisk) == Format.n);
1280     assert(idx == 2);
1281 
1282     idx = 0;
1283     assert(parseScanfFormatSpecifier("%o", idx, asterisk) == Format.u);
1284     assert(idx == 2);
1285 
1286     idx = 0;
1287     assert(parseScanfFormatSpecifier("%x", idx, asterisk) == Format.u);
1288     assert(idx == 2);
1289 
1290     idx = 0;
1291     assert(parseScanfFormatSpecifier("%f", idx, asterisk) == Format.g);
1292     assert(idx == 2);
1293 
1294     idx = 0;
1295     assert(parseScanfFormatSpecifier("%e", idx, asterisk) == Format.g);
1296     assert(idx == 2);
1297 
1298     idx = 0;
1299     g = parseScanfFormatSpecifier("%a", idx, asterisk);
1300     assert(g == Format.g || g == Format.GNU_a);
1301     assert(idx == 2);
1302 
1303     idx = 0;
1304     assert(parseScanfFormatSpecifier("%c", idx, asterisk) == Format.c);
1305     assert(idx == 2);
1306 
1307     // asterisk
1308     idx = 0;
1309     assert(parseScanfFormatSpecifier("%*d", idx, asterisk) == Format.d);
1310     assert(idx == 3);
1311     assert(asterisk);
1312 
1313     idx = 0;
1314     assert(parseScanfFormatSpecifier("%9ld", idx, asterisk) == Format.ld);
1315     assert(idx == 4);
1316     assert(!asterisk);
1317 
1318     idx = 0;
1319     assert(parseScanfFormatSpecifier("%*25984hhd", idx, asterisk) == Format.hhd);
1320     assert(idx == 10);
1321     assert(asterisk);
1322 
1323     // scansets
1324     idx = 0;
1325     assert(parseScanfFormatSpecifier("%[a-zA-Z]s", idx, asterisk) == Format.s);
1326     assert(idx == 10);
1327     assert(!asterisk);
1328 
1329     idx = 0;
1330     assert(parseScanfFormatSpecifier("%*25[a-z]hhd", idx, asterisk) == Format.hhd);
1331     assert(idx == 12);
1332     assert(asterisk);
1333 
1334     // Too short formats
foreach(s;["%","% ","%#","%0","%*","%1","%19","%j","%z","%t","%l","%h","%ll","%hh","%K"])1335     foreach (s; ["%", "% ", "%#", "%0", "%*", "%1", "%19",
1336                  "%j", "%z", "%t", "%l", "%h", "%ll", "%hh", "%K"])
1337     {
1338         idx = 0;
1339         assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error);
1340         assert(idx == s.length);
1341     }
1342 
1343 
1344     // Undefined format combinations
foreach(s;["%Ld","%llg","%jg","%zg","%tg","%hg","%hhg","%jc","%zc","%tc","%Lc","%hc","%hhc","%llc","%jp","%zp","%tp","%Lp","%hp","%lp","%hhp","%llp","%-","%+","%#","%0","%.","%Ln"])1345     foreach (s; ["%Ld", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg",
1346                  "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc",
1347                  "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp",
1348                  "%-", "%+", "%#", "%0", "%.", "%Ln"])
1349     {
1350         idx = 0;
1351         assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error);
1352         assert(idx == s.length);
1353 
1354     }
1355 
1356     // Invalid scansets
foreach(s;["%[]","%[s","%[0-9lld","%[","%[a-z]"])1357     foreach (s; ["%[]", "%[s", "%[0-9lld", "%[", "%[a-z]"])
1358     {
1359         idx = 0;
1360         assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error);
1361         assert(idx == s.length);
1362     }
1363 
1364 }
1365