xref: /netbsd-src/external/gpl3/gcc/dist/gcc/c-family/c-format.cc (revision 2683f5b185977c9184701f18c843971cd908b00e)
1 /* Check calls to formatted I/O functions (-Wformat).
2    Copyright (C) 1992-2022 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "c-target.h"
25 #include "c-common.h"
26 #include "alloc-pool.h"
27 #include "stringpool.h"
28 #include "c-objc.h"
29 #include "intl.h"
30 #include "langhooks.h"
31 #include "c-format.h"
32 #include "diagnostic.h"
33 #include "substring-locations.h"
34 #include "selftest.h"
35 #include "selftest-diagnostic.h"
36 #include "builtins.h"
37 #include "attribs.h"
38 #include "gcc-rich-location.h"
39 
40 /* Handle attributes associated with format checking.  */
41 
42 /* This must be in the same order as format_types, except for
43    format_type_error.  Target-specific format types do not have
44    matching enum values.  */
45 enum format_type { printf_format_type, asm_fprintf_format_type,
46 		   gcc_diag_format_type, gcc_tdiag_format_type,
47 		   gcc_cdiag_format_type,
48 		   gcc_cxxdiag_format_type, gcc_gfc_format_type,
49 		   gcc_dump_printf_format_type,
50 		   gcc_objc_string_format_type,
51 		   format_type_error = -1};
52 
53 struct function_format_info
54 {
55   enum format_type format_type;		/* type of format (printf, scanf, etc.) */
56   /* IS_RAW is relevant only for GCC diagnostic format functions.
57      It is set for "raw" formatting functions like pp_printf that
58      are not intended to produce complete diagnostics according to
59      GCC guidelines, and clear for others like error and warning
60      whose format string is checked for proper quoting and spelling.  */
61   bool is_raw;
62   unsigned HOST_WIDE_INT format_num;	/* number of format argument */
63   unsigned HOST_WIDE_INT first_arg_num;	/* number of first arg (zero for varargs) */
64 };
65 
66 /* Initialized in init_dynamic_diag_info.  */
67 static GTY(()) tree local_tree_type_node;
68 static GTY(()) tree local_event_ptr_node;
69 static GTY(()) tree local_gimple_ptr_node;
70 static GTY(()) tree local_cgraph_node_ptr_node;
71 static GTY(()) tree locus;
72 
73 static bool decode_format_attr (const_tree, tree, tree, function_format_info *,
74 				bool);
75 static format_type decode_format_type (const char *, bool * = NULL);
76 
77 static bool check_format_string (const_tree argument,
78 				 unsigned HOST_WIDE_INT format_num,
79 				 int flags, bool *no_add_attrs,
80 				 int expected_format_type);
81 static tree get_constant (const_tree fntype, const_tree atname, tree expr,
82 			  int argno, unsigned HOST_WIDE_INT *value,
83 			  int flags, bool validated_p);
84 static const char *convert_format_name_to_system_name (const char *attr_name);
85 
86 static int first_target_format_type;
87 static const char *format_name (int format_num);
88 static int format_flags (int format_num);
89 
90 /* Emit a warning as per format_warning_va, but construct the substring_loc
91    for the character at offset (CHAR_IDX - 1) within a string constant
92    FORMAT_STRING_CST at FMT_STRING_LOC.  */
93 
94 ATTRIBUTE_GCC_DIAG (5,6)
95 static bool
format_warning_at_char(location_t fmt_string_loc,tree format_string_cst,int char_idx,int opt,const char * gmsgid,...)96 format_warning_at_char (location_t fmt_string_loc, tree format_string_cst,
97 			int char_idx, int opt, const char *gmsgid, ...)
98 {
99   va_list ap;
100   va_start (ap, gmsgid);
101   tree string_type = TREE_TYPE (format_string_cst);
102 
103   /* The callers are of the form:
104        format_warning (format_string_loc, format_string_cst,
105 		       format_chars - orig_format_chars,
106       where format_chars has already been incremented, so that
107       CHAR_IDX is one character beyond where the warning should
108       be emitted.  Fix it.  */
109   char_idx -= 1;
110 
111   substring_loc fmt_loc (fmt_string_loc, string_type, char_idx, char_idx,
112 			 char_idx);
113   format_string_diagnostic_t diag (fmt_loc, NULL, UNKNOWN_LOCATION, NULL,
114 				   NULL);
115   bool warned = diag.emit_warning_va (opt, gmsgid, &ap);
116   va_end (ap);
117 
118   return warned;
119 }
120 
121 
122 /* Emit a warning as per format_warning_va, but construct the substring_loc
123    for the substring at offset (POS1, POS2 - 1) within a string constant
124    FORMAT_STRING_CST at FMT_STRING_LOC.  */
125 
126 ATTRIBUTE_GCC_DIAG (6,7)
127 static bool
format_warning_substr(location_t fmt_string_loc,tree format_string_cst,int pos1,int pos2,int opt,const char * gmsgid,...)128 format_warning_substr (location_t fmt_string_loc, tree format_string_cst,
129 		       int pos1, int pos2, int opt, const char *gmsgid, ...)
130 {
131   va_list ap;
132   va_start (ap, gmsgid);
133   tree string_type = TREE_TYPE (format_string_cst);
134 
135   pos2 -= 1;
136 
137   substring_loc fmt_loc (fmt_string_loc, string_type, pos1, pos1, pos2);
138   format_string_diagnostic_t diag (fmt_loc, NULL, UNKNOWN_LOCATION, NULL,
139 				   NULL);
140   bool warned = diag.emit_warning_va (opt, gmsgid, &ap);
141   va_end (ap);
142 
143   return warned;
144 }
145 
146 
147 /* Check that we have a pointer to a string suitable for use as a format.
148    The default is to check for a char type.
149    For objective-c dialects, this is extended to include references to string
150    objects validated by objc_string_ref_type_p ().
151    Targets may also provide a string object type that can be used within c and
152    c++ and shared with their respective objective-c dialects. In this case the
153    reference to a format string is checked for validity via a hook.
154 
155    The function returns true if strref points to any string type valid for the
156    language dialect and target.  */
157 
158 bool
valid_format_string_type_p(tree strref)159 valid_format_string_type_p (tree strref)
160 {
161   return (strref != NULL
162 	  && TREE_CODE (strref) == POINTER_TYPE
163 	  && (TYPE_MAIN_VARIANT (TREE_TYPE (strref)) == char_type_node
164 	      || objc_string_ref_type_p (strref)
165 	      || (*targetcm.string_object_ref_type_p) ((const_tree) strref)));
166 }
167 
168 /* Handle a "format_arg" attribute; arguments as in
169    struct attribute_spec.handler.  */
170 tree
handle_format_arg_attribute(tree * node,tree atname,tree args,int flags,bool * no_add_attrs)171 handle_format_arg_attribute (tree *node, tree atname,
172 			     tree args, int flags, bool *no_add_attrs)
173 {
174   tree type = *node;
175   /* Note that TREE_VALUE (args) is changed in place below.  */
176   tree *format_num_expr = &TREE_VALUE (args);
177   unsigned HOST_WIDE_INT format_num = 0;
178 
179   if (tree val = get_constant (type, atname, *format_num_expr, 0, &format_num,
180 			       0, false))
181     *format_num_expr = val;
182   else
183     {
184       *no_add_attrs = true;
185       return NULL_TREE;
186     }
187 
188   if (prototype_p (type))
189     {
190       /* The format arg can be any string reference valid for the language and
191 	target.  We cannot be more specific in this case.  */
192       if (!check_format_string (type, format_num, flags, no_add_attrs, -1))
193 	return NULL_TREE;
194     }
195 
196   if (!valid_format_string_type_p (TREE_TYPE (type)))
197     {
198       if (!(flags & (int) ATTR_FLAG_BUILT_IN))
199 	error ("function does not return string type");
200       *no_add_attrs = true;
201       return NULL_TREE;
202     }
203 
204   return NULL_TREE;
205 }
206 
207 /* Verify that the format_num argument is actually a string reference suitable,
208    for the language dialect and target (in case the format attribute is in
209    error).  When we know the specific reference type expected, this is also
210    checked.  */
211 static bool
check_format_string(const_tree fntype,unsigned HOST_WIDE_INT format_num,int flags,bool * no_add_attrs,int expected_format_type)212 check_format_string (const_tree fntype, unsigned HOST_WIDE_INT format_num,
213 		     int flags, bool *no_add_attrs, int expected_format_type)
214 {
215   unsigned HOST_WIDE_INT i;
216   bool is_objc_sref, is_target_sref, is_char_ref;
217   tree ref;
218   int fmt_flags;
219   function_args_iterator iter;
220 
221   i = 1;
222   FOREACH_FUNCTION_ARGS (fntype, ref, iter)
223     {
224       if (i == format_num)
225 	break;
226       i++;
227     }
228 
229   if (!ref
230       || !valid_format_string_type_p (ref))
231     {
232       if (!(flags & (int) ATTR_FLAG_BUILT_IN))
233 	error ("format string argument is not a string type");
234       *no_add_attrs = true;
235       return false;
236     }
237 
238   /* We only know that we want a suitable string reference.  */
239   if (expected_format_type < 0)
240     return true;
241 
242   /* Now check that the arg matches the expected type.  */
243   is_char_ref =
244     (TYPE_MAIN_VARIANT (TREE_TYPE (ref)) == char_type_node);
245 
246   fmt_flags = format_flags (expected_format_type);
247   is_objc_sref = is_target_sref = false;
248   if (!is_char_ref)
249     is_objc_sref = objc_string_ref_type_p (ref);
250 
251   if (!(fmt_flags & FMT_FLAG_PARSE_ARG_CONVERT_EXTERNAL))
252     {
253       if (is_char_ref)
254 	return true; /* OK, we expected a char and found one.  */
255       else
256 	{
257 	  /* We expected a char but found an extended string type.  */
258 	  if (is_objc_sref)
259 	    error ("found a %qs reference but the format argument should"
260 		   " be a string", format_name (gcc_objc_string_format_type));
261 	  else
262 	    error ("found a %qT but the format argument should be a string",
263 		   ref);
264 	  *no_add_attrs = true;
265 	  return false;
266 	}
267     }
268 
269   /* We expect a string object type as the format arg.  */
270   if (is_char_ref)
271     {
272       error ("format argument should be a %qs reference but"
273 	     " a string was found", format_name (expected_format_type));
274       *no_add_attrs = true;
275       return false;
276     }
277 
278   /* We will assert that objective-c will support either its own string type
279      or the target-supplied variant.  */
280   if (!is_objc_sref)
281     is_target_sref = (*targetcm.string_object_ref_type_p) ((const_tree) ref);
282 
283   if (expected_format_type == (int) gcc_objc_string_format_type
284       && (is_objc_sref || is_target_sref))
285     return true;
286 
287   /* We will allow a target string ref to match only itself.  */
288   if (first_target_format_type
289       && expected_format_type >= first_target_format_type
290       && is_target_sref)
291     return true;
292   else
293     {
294       error ("format argument should be a %qs reference",
295 	      format_name (expected_format_type));
296       *no_add_attrs = true;
297       return false;
298     }
299 }
300 
301 /* Under the control of FLAGS, verify EXPR is a valid constant that
302    refers to a positional argument ARGNO having a string type (char*
303    or, for targets like Darwin, a pointer to struct CFString) to
304    a function type FNTYPE declared with attribute ATNAME.
305    If valid, store the constant's integer value in *VALUE and return
306    the value.
307    If VALIDATED_P is true assert the validation is successful.
308    Returns the converted constant value on success, null otherwise.  */
309 
310 static tree
get_constant(const_tree fntype,const_tree atname,tree expr,int argno,unsigned HOST_WIDE_INT * value,int flags,bool validated_p)311 get_constant (const_tree fntype, const_tree atname, tree expr, int argno,
312 	      unsigned HOST_WIDE_INT *value, int flags, bool validated_p)
313 {
314   /* Require the referenced argument to have a string type.  For targets
315      like Darwin, also accept pointers to struct CFString.  */
316   if (tree val = positional_argument (fntype, atname, expr, STRING_CST,
317 				      argno, flags))
318     {
319       *value = TREE_INT_CST_LOW (val);
320       return val;
321     }
322 
323   gcc_assert (!validated_p);
324   return NULL_TREE;
325 }
326 
327 /* Decode the arguments to a "format" attribute into a
328    function_format_info structure.  It is already known that the list
329    is of the right length.  If VALIDATED_P is true, then these
330    attributes have already been validated and must not be erroneous;
331    if false, it will give an error message.  Returns true if the
332    attributes are successfully decoded, false otherwise.  */
333 
334 static bool
decode_format_attr(const_tree fntype,tree atname,tree args,function_format_info * info,bool validated_p)335 decode_format_attr (const_tree fntype, tree atname, tree args,
336 		    function_format_info *info, bool validated_p)
337 {
338   tree format_type_id = TREE_VALUE (args);
339   /* Note that TREE_VALUE (args) is changed in place below.  Ditto
340      for the value of the next element on the list.  */
341   tree *format_num_expr = &TREE_VALUE (TREE_CHAIN (args));
342   tree *first_arg_num_expr = &TREE_VALUE (TREE_CHAIN (TREE_CHAIN (args)));
343 
344   if (TREE_CODE (format_type_id) != IDENTIFIER_NODE)
345     {
346       gcc_assert (!validated_p);
347       error ("unrecognized format specifier");
348       return false;
349     }
350   else
351     {
352       const char *p = IDENTIFIER_POINTER (format_type_id);
353 
354       info->format_type = decode_format_type (p, &info->is_raw);
355 
356       if (!c_dialect_objc ()
357 	   && info->format_type == gcc_objc_string_format_type)
358 	{
359 	  gcc_assert (!validated_p);
360 	  warning (OPT_Wformat_, "%qE is only allowed in Objective-C dialects",
361 		   format_type_id);
362 	  info->format_type = format_type_error;
363 	  return false;
364 	}
365 
366       if (info->format_type == format_type_error)
367 	{
368 	  gcc_assert (!validated_p);
369 	  warning (OPT_Wformat_, "%qE is an unrecognized format function type",
370 		   format_type_id);
371 	  return false;
372 	}
373     }
374 
375   if (tree val = get_constant (fntype, atname, *format_num_expr,
376 			       2, &info->format_num, 0, validated_p))
377     *format_num_expr = val;
378   else
379     return false;
380 
381   if (tree val = get_constant (fntype, atname, *first_arg_num_expr,
382 			       3, &info->first_arg_num,
383 			       (POSARG_ZERO | POSARG_ELLIPSIS), validated_p))
384     *first_arg_num_expr = val;
385   else
386     return false;
387 
388   if (info->first_arg_num != 0 && info->first_arg_num <= info->format_num)
389     {
390       gcc_assert (!validated_p);
391       error ("format string argument follows the arguments to be formatted");
392       return false;
393     }
394 
395   return true;
396 }
397 
398 /* Check a call to a format function against a parameter list.  */
399 
400 /* The C standard version C++ is treated as equivalent to
401    or inheriting from, for the purpose of format features supported.  */
402 #define CPLUSPLUS_STD_VER	(cxx_dialect < cxx11 ? STD_C94 : STD_C99)
403 /* The C standard version we are checking formats against when pedantic.  */
404 #define C_STD_VER		((int) (c_dialect_cxx ()		   \
405 				 ? CPLUSPLUS_STD_VER			   \
406 				 : (flag_isoc2x				   \
407 				    ? STD_C2X				   \
408 				    : (flag_isoc99			   \
409 				       ? STD_C99			   \
410 				       : (flag_isoc94 ? STD_C94 : STD_C89)))))
411 /* The name to give to the standard version we are warning about when
412    pedantic.  FEATURE_VER is the version in which the feature warned out
413    appeared, which is higher than C_STD_VER.  */
414 #define C_STD_NAME(FEATURE_VER) (c_dialect_cxx ()		\
415 				 ? (cxx_dialect < cxx11 ? "ISO C++98" \
416 				    : "ISO C++11")		\
417 				 : ((FEATURE_VER) == STD_EXT	\
418 				    ? "ISO C"			\
419 				    : ((FEATURE_VER) == STD_C2X	\
420 				       ? "ISO C17"		\
421 				       : "ISO C90")))
422 /* Adjust a C standard version, which may be STD_C9L, to account for
423    -Wno-long-long.  Returns other standard versions unchanged.  */
424 #define ADJ_STD(VER)		((int) ((VER) == STD_C9L		      \
425 				       ? (warn_long_long ? STD_C99 : STD_C89) \
426 				       : (VER)))
427 
428 /* Enum describing the kind of specifiers present in the format and
429    requiring an argument.  */
430 enum format_specifier_kind {
431   CF_KIND_FORMAT,
432   CF_KIND_FIELD_WIDTH,
433   CF_KIND_FIELD_PRECISION
434 };
435 
436 static const char *kind_descriptions[] = {
437   N_("format"),
438   N_("field width specifier"),
439   N_("field precision specifier")
440 };
441 
442 /* Structure describing details of a type expected in format checking,
443    and the type to check against it.  */
444 struct format_wanted_type
445 {
446   /* The type wanted.  */
447   tree wanted_type;
448   /* The name of this type to use in diagnostics.  */
449   const char *wanted_type_name;
450   /* Should be type checked just for scalar width identity.  */
451   int scalar_identity_flag;
452   /* The level of indirection through pointers at which this type occurs.  */
453   int pointer_count;
454   /* Whether, when pointer_count is 1, to allow any character type when
455      pedantic, rather than just the character or void type specified.  */
456   int char_lenient_flag;
457   /* Whether the argument, dereferenced once, is written into and so the
458      argument must not be a pointer to a const-qualified type.  */
459   int writing_in_flag;
460   /* Whether the argument, dereferenced once, is read from and so
461      must not be a NULL pointer.  */
462   int reading_from_flag;
463   /* The kind of specifier that this type is used for.  */
464   enum format_specifier_kind kind;
465   /* The starting character of the specifier.  This never includes the
466      initial percent sign.  */
467   const char *format_start;
468   /* The length of the specifier.  */
469   int format_length;
470   /* The actual parameter to check against the wanted type.  */
471   tree param;
472   /* The argument number of that parameter.  */
473   int arg_num;
474   /* The offset location of this argument with respect to the format
475      string location.  */
476   unsigned int offset_loc;
477   /* The next type to check for this format conversion, or NULL if none.  */
478   struct format_wanted_type *next;
479 };
480 
481 /* Convenience macro for format_length_info meaning unused.  */
482 #define NO_FMT NULL, FMT_LEN_none, STD_C89
483 
484 static const format_length_info printf_length_specs[] =
485 {
486   { "h", FMT_LEN_h, STD_C89, "hh", FMT_LEN_hh, STD_C99, 0 },
487   { "l", FMT_LEN_l, STD_C89, "ll", FMT_LEN_ll, STD_C9L, 0 },
488   { "q", FMT_LEN_ll, STD_EXT, NO_FMT, 0 },
489   { "L", FMT_LEN_L, STD_C89, NO_FMT, 0 },
490   { "z", FMT_LEN_z, STD_C99, NO_FMT, 0 },
491   { "Z", FMT_LEN_z, STD_EXT, NO_FMT, 0 },
492   { "t", FMT_LEN_t, STD_C99, NO_FMT, 0 },
493   { "j", FMT_LEN_j, STD_C99, NO_FMT, 0 },
494   { "H", FMT_LEN_H, STD_C2X, NO_FMT, 0 },
495   { "D", FMT_LEN_D, STD_C2X, "DD", FMT_LEN_DD, STD_C2X, 0 },
496   { NO_FMT, NO_FMT, 0 }
497 };
498 
499 /* Length specifiers valid for asm_fprintf.  */
500 static const format_length_info asm_fprintf_length_specs[] =
501 {
502   { "l", FMT_LEN_l, STD_C89, "ll", FMT_LEN_ll, STD_C89, 0 },
503   { "w", FMT_LEN_w, STD_C89, NO_FMT, 0 },
504   { NO_FMT, NO_FMT, 0 }
505 };
506 
507 /* Length specifiers valid for GCC diagnostics.  */
508 static const format_length_info gcc_diag_length_specs[] =
509 {
510   { "l", FMT_LEN_l, STD_C89, "ll", FMT_LEN_ll, STD_C89, 0 },
511   { "w", FMT_LEN_w, STD_C89, NO_FMT, 0 },
512   { NO_FMT, NO_FMT, 0 }
513 };
514 
515 /* The custom diagnostics all accept the same length specifiers.  */
516 #define gcc_tdiag_length_specs gcc_diag_length_specs
517 #define gcc_cdiag_length_specs gcc_diag_length_specs
518 #define gcc_cxxdiag_length_specs gcc_diag_length_specs
519 #define gcc_dump_printf_length_specs gcc_diag_length_specs
520 
521 /* This differs from printf_length_specs only in that "Z" is not accepted.  */
522 static const format_length_info scanf_length_specs[] =
523 {
524   { "h", FMT_LEN_h, STD_C89, "hh", FMT_LEN_hh, STD_C99, 0 },
525   { "l", FMT_LEN_l, STD_C89, "ll", FMT_LEN_ll, STD_C9L, 0 },
526   { "q", FMT_LEN_ll, STD_EXT, NO_FMT, 0 },
527   { "L", FMT_LEN_L, STD_C89, NO_FMT, 0 },
528   { "z", FMT_LEN_z, STD_C99, NO_FMT, 0 },
529   { "t", FMT_LEN_t, STD_C99, NO_FMT, 0 },
530   { "j", FMT_LEN_j, STD_C99, NO_FMT, 0 },
531   { "H", FMT_LEN_H, STD_C2X, NO_FMT, 0 },
532   { "D", FMT_LEN_D, STD_C2X, "DD", FMT_LEN_DD, STD_C2X, 0 },
533   { NO_FMT, NO_FMT, 0 }
534 };
535 
536 
537 /* All tables for strfmon use STD_C89 everywhere, since -pedantic warnings
538    make no sense for a format type not part of any C standard version.  */
539 static const format_length_info strfmon_length_specs[] =
540 {
541   /* A GNU extension.  */
542   { "L", FMT_LEN_L, STD_C89, NO_FMT, 0 },
543   { NO_FMT, NO_FMT, 0 }
544 };
545 
546 
547 /* Length modifiers used by the fortran/error.cc routines.  */
548 static const format_length_info gcc_gfc_length_specs[] =
549 {
550   { "l", FMT_LEN_l, STD_C89, "ll", FMT_LEN_ll, STD_C89, 0 },
551   { "w", FMT_LEN_w, STD_C89, NO_FMT, 0 },
552   { NO_FMT, NO_FMT, 0 }
553 };
554 
555 
556 static const format_flag_spec printf_flag_specs[] =
557 {
558   { ' ',  0, 0, 0, N_("' ' flag"),        N_("the ' ' printf flag"),              STD_C89 },
559   { '+',  0, 0, 0, N_("'+' flag"),        N_("the '+' printf flag"),              STD_C89 },
560   { '#',  0, 0, 0, N_("'#' flag"),        N_("the '#' printf flag"),              STD_C89 },
561   { '0',  0, 0, 0, N_("'0' flag"),        N_("the '0' printf flag"),              STD_C89 },
562   { '-',  0, 0, 0, N_("'-' flag"),        N_("the '-' printf flag"),              STD_C89 },
563   { '\'', 0, 0, 0, N_("''' flag"),        N_("the ''' printf flag"),              STD_EXT },
564   { 'I',  0, 0, 0, N_("'I' flag"),        N_("the 'I' printf flag"),              STD_EXT },
565   { 'w',  0, 0, 0, N_("field width"),     N_("field width in printf format"),     STD_C89 },
566   { 'p',  0, 0, 0, N_("precision"),       N_("precision in printf format"),       STD_C89 },
567   { 'L',  0, 0, 0, N_("length modifier"), N_("length modifier in printf format"), STD_C89 },
568   { 0, 0, 0, 0, NULL, NULL, STD_C89 }
569 };
570 
571 
572 static const format_flag_pair printf_flag_pairs[] =
573 {
574   { ' ', '+', 1, 0   },
575   { '0', '-', 1, 0   },
576   { '0', 'p', 1, 'i' },
577   { 0, 0, 0, 0 }
578 };
579 
580 static const format_flag_spec asm_fprintf_flag_specs[] =
581 {
582   { ' ',  0, 0, 0, N_("' ' flag"),        N_("the ' ' printf flag"),              STD_C89 },
583   { '+',  0, 0, 0, N_("'+' flag"),        N_("the '+' printf flag"),              STD_C89 },
584   { '#',  0, 0, 0, N_("'#' flag"),        N_("the '#' printf flag"),              STD_C89 },
585   { '0',  0, 0, 0, N_("'0' flag"),        N_("the '0' printf flag"),              STD_C89 },
586   { '-',  0, 0, 0, N_("'-' flag"),        N_("the '-' printf flag"),              STD_C89 },
587   { 'w',  0, 0, 0, N_("field width"),     N_("field width in printf format"),     STD_C89 },
588   { 'p',  0, 0, 0, N_("precision"),       N_("precision in printf format"),       STD_C89 },
589   { 'L',  0, 0, 0, N_("length modifier"), N_("length modifier in printf format"), STD_C89 },
590   { 0, 0, 0, 0, NULL, NULL, STD_C89 }
591 };
592 
593 static const format_flag_pair asm_fprintf_flag_pairs[] =
594 {
595   { ' ', '+', 1, 0   },
596   { '0', '-', 1, 0   },
597   { '0', 'p', 1, 'i' },
598   { 0, 0, 0, 0 }
599 };
600 
601 static const format_flag_pair gcc_diag_flag_pairs[] =
602 {
603   { 0, 0, 0, 0 }
604 };
605 
606 #define gcc_tdiag_flag_pairs gcc_diag_flag_pairs
607 #define gcc_cdiag_flag_pairs gcc_diag_flag_pairs
608 #define gcc_cxxdiag_flag_pairs gcc_diag_flag_pairs
609 #define gcc_gfc_flag_pairs gcc_diag_flag_pairs
610 #define gcc_dump_printf_flag_pairs gcc_diag_flag_pairs
611 
612 static const format_flag_spec gcc_diag_flag_specs[] =
613 {
614   { '+',  0, 0, 0, N_("'+' flag"),        N_("the '+' printf flag"),              STD_C89 },
615   { '#',  0, 0, 0, N_("'#' flag"),        N_("the '#' printf flag"),              STD_C89 },
616   { 'q',  0, 0, 1, N_("'q' flag"),        N_("the 'q' diagnostic flag"),          STD_C89 },
617   { 'p',  0, 0, 0, N_("precision"),       N_("precision in printf format"),       STD_C89 },
618   { 'L',  0, 0, 0, N_("length modifier"), N_("length modifier in printf format"), STD_C89 },
619   { 0, 0, 0, 0, NULL, NULL, STD_C89 }
620 };
621 
622 #define gcc_tdiag_flag_specs gcc_diag_flag_specs
623 #define gcc_cdiag_flag_specs gcc_diag_flag_specs
624 #define gcc_cxxdiag_flag_specs gcc_diag_flag_specs
625 #define gcc_gfc_flag_specs gcc_diag_flag_specs
626 #define gcc_dump_printf_flag_specs gcc_diag_flag_specs
627 
628 static const format_flag_spec scanf_flag_specs[] =
629 {
630   { '*',  0, 0, 0, N_("assignment suppression"), N_("the assignment suppression scanf feature"), STD_C89 },
631   { 'a',  0, 0, 0, N_("'a' flag"),               N_("the 'a' scanf flag"),                       STD_EXT },
632   { 'm',  0, 0, 0, N_("'m' flag"),               N_("the 'm' scanf flag"),                       STD_EXT },
633   { 'w',  0, 0, 0, N_("field width"),            N_("field width in scanf format"),              STD_C89 },
634   { 'L',  0, 0, 0, N_("length modifier"),        N_("length modifier in scanf format"),          STD_C89 },
635   { '\'', 0, 0, 0, N_("''' flag"),               N_("the ''' scanf flag"),                       STD_EXT },
636   { 'I',  0, 0, 0, N_("'I' flag"),               N_("the 'I' scanf flag"),                       STD_EXT },
637   { 0, 0, 0, 0, NULL, NULL, STD_C89 }
638 };
639 
640 
641 static const format_flag_pair scanf_flag_pairs[] =
642 {
643   { '*', 'L', 0, 0 },
644   { 'a', 'm', 0, 0 },
645   { 0, 0, 0, 0 }
646 };
647 
648 
649 static const format_flag_spec strftime_flag_specs[] =
650 {
651   { '_', 0,   0, 0, N_("'_' flag"),     N_("the '_' strftime flag"),          STD_EXT },
652   { '-', 0,   0, 0, N_("'-' flag"),     N_("the '-' strftime flag"),          STD_EXT },
653   { '0', 0,   0, 0, N_("'0' flag"),     N_("the '0' strftime flag"),          STD_EXT },
654   { '^', 0,   0, 0, N_("'^' flag"),     N_("the '^' strftime flag"),          STD_EXT },
655   { '#', 0,   0, 0, N_("'#' flag"),     N_("the '#' strftime flag"),          STD_EXT },
656   { 'w', 0,   0, 0, N_("field width"),  N_("field width in strftime format"), STD_EXT },
657   { 'E', 0,   0, 0, N_("'E' modifier"), N_("the 'E' strftime modifier"),      STD_C99 },
658   { 'O', 0,   0, 0, N_("'O' modifier"), N_("the 'O' strftime modifier"),      STD_C99 },
659   { 'O', 'o', 0, 0, NULL,               N_("the 'O' modifier"),               STD_EXT },
660   { 'O', 'p', 0, 0, NULL,               N_("the 'O' modifier"),               STD_C2X },
661   { 0, 0, 0, 0, NULL, NULL, STD_C89 }
662 };
663 
664 
665 static const format_flag_pair strftime_flag_pairs[] =
666 {
667   { 'E', 'O', 0, 0 },
668   { '_', '-', 0, 0 },
669   { '_', '0', 0, 0 },
670   { '-', '0', 0, 0 },
671   { '^', '#', 0, 0 },
672   { 0, 0, 0, 0 }
673 };
674 
675 
676 static const format_flag_spec strfmon_flag_specs[] =
677 {
678   { '=',  0, 1, 0, N_("fill character"),  N_("fill character in strfmon format"),  STD_C89 },
679   { '^',  0, 0, 0, N_("'^' flag"),        N_("the '^' strfmon flag"),              STD_C89 },
680   { '+',  0, 0, 0, N_("'+' flag"),        N_("the '+' strfmon flag"),              STD_C89 },
681   { '(',  0, 0, 0, N_("'(' flag"),        N_("the '(' strfmon flag"),              STD_C89 },
682   { '!',  0, 0, 0, N_("'!' flag"),        N_("the '!' strfmon flag"),              STD_C89 },
683   { '-',  0, 0, 0, N_("'-' flag"),        N_("the '-' strfmon flag"),              STD_C89 },
684   { 'w',  0, 0, 0, N_("field width"),     N_("field width in strfmon format"),     STD_C89 },
685   { '#',  0, 0, 0, N_("left precision"),  N_("left precision in strfmon format"),  STD_C89 },
686   { 'p',  0, 0, 0, N_("right precision"), N_("right precision in strfmon format"), STD_C89 },
687   { 'L',  0, 0, 0, N_("length modifier"), N_("length modifier in strfmon format"), STD_C89 },
688   { 0, 0, 0, 0, NULL, NULL, STD_C89 }
689 };
690 
691 static const format_flag_pair strfmon_flag_pairs[] =
692 {
693   { '+', '(', 0, 0 },
694   { 0, 0, 0, 0 }
695 };
696 
697 
698 static const format_char_info print_char_table[] =
699 {
700   /* C89 conversion specifiers.  */
701   { "di",  0, STD_C89, { T89_I,   T99_SC,  T89_S,   T89_L,   T9L_LL,  TEX_LL,  T99_SST, T99_PD,  T99_IM,  BADLEN,  BADLEN,  BADLEN  }, "-wp0 +'I",  "i",  NULL },
702   { "oxX", 0, STD_C89, { T89_UI,  T99_UC,  T89_US,  T89_UL,  T9L_ULL, TEX_ULL, T99_ST,  T99_UPD, T99_UIM, BADLEN,  BADLEN,  BADLEN }, "-wp0#",     "i",  NULL },
703   { "u",   0, STD_C89, { T89_UI,  T99_UC,  T89_US,  T89_UL,  T9L_ULL, TEX_ULL, T99_ST,  T99_UPD, T99_UIM, BADLEN,  BADLEN,  BADLEN }, "-wp0'I",    "i",  NULL },
704   { "fgG", 0, STD_C89, { T89_D,   BADLEN,  BADLEN,  T99_D,   BADLEN,  T89_LD,  BADLEN,  BADLEN,  BADLEN,  T2X_D32, T2X_D64, T2X_D128 }, "-wp0 +#'I", "",   NULL },
705   { "eE",  0, STD_C89, { T89_D,   BADLEN,  BADLEN,  T99_D,   BADLEN,  T89_LD,  BADLEN,  BADLEN,  BADLEN,  T2X_D32, T2X_D64, T2X_D128 }, "-wp0 +#I",  "",   NULL },
706   { "c",   0, STD_C89, { T89_I,   BADLEN,  BADLEN,  T94_WI,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "-w",        "",   NULL },
707   { "s",   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  T94_W,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "-wp",       "cR", NULL },
708   { "p",   1, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "-w",        "c",  NULL },
709   { "n",   1, STD_C89, { T89_I,   T99_SC,  T89_S,   T89_L,   T9L_LL,  BADLEN,  T99_SST, T99_PD,  T99_IM,  BADLEN,  BADLEN,  BADLEN }, "",          "W",  NULL },
710   /* C99 conversion specifiers.  */
711   { "F",   0, STD_C99, { T99_D,   BADLEN,  BADLEN,  T99_D,   BADLEN,  T99_LD,  BADLEN,  BADLEN,  BADLEN,  T2X_D32, T2X_D64, T2X_D128 }, "-wp0 +#'I", "",   NULL },
712   { "aA",  0, STD_C99, { T99_D,   BADLEN,  BADLEN,  T99_D,   BADLEN,  T99_LD,  BADLEN,  BADLEN,  BADLEN,  T2X_D32, T2X_D64,  T2X_D128 }, "-wp0 +#",   "",   NULL },
713   /* C2X conversion specifiers.  */
714   { "b",   0, STD_C2X, { T2X_UI,  T2X_UC,  T2X_US,  T2X_UL,  T2X_ULL, TEX_ULL, T2X_ST,  T2X_UPD, T2X_UIM, BADLEN,  BADLEN,  BADLEN }, "-wp0#",     "i",  NULL },
715   /* X/Open conversion specifiers.  */
716   { "C",   0, STD_EXT, { TEX_WI,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "-w",        "",   NULL },
717   { "S",   1, STD_EXT, { TEX_W,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "-wp",       "R",  NULL },
718   /* GNU conversion specifiers.  */
719   { "m",   0, STD_EXT, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "-wp",       "",   NULL },
720   { "B",   0, STD_EXT, { T2X_UI,  T2X_UC,  T2X_US,  T2X_UL,  T2X_ULL, TEX_ULL, T2X_ST,  T2X_UPD, T2X_UIM, BADLEN,  BADLEN,  BADLEN }, "-wp0#",     "i",  NULL },
721   { NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
722 };
723 
724 static const format_char_info asm_fprintf_char_table[] =
725 {
726   /* C89 conversion specifiers.  */
727   { "di",  0, STD_C89, { T89_I,   BADLEN,  BADLEN,  T89_L,   T9L_LL,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "-wp0 +",  "i", NULL },
728   { "oxX", 0, STD_C89, { T89_UI,  BADLEN,  BADLEN,  T89_UL,  T9L_ULL, BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "-wp0#",   "i", NULL },
729   { "u",   0, STD_C89, { T89_UI,  BADLEN,  BADLEN,  T89_UL,  T9L_ULL, BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "-wp0",    "i", NULL },
730   { "c",   0, STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "-w",       "", NULL },
731   { "s",   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "-wp",    "cR", NULL },
732 
733   /* asm_fprintf conversion specifiers.  */
734   { "O",   0, STD_C89, NOARGUMENTS, "",      "",   NULL },
735   { "R",   0, STD_C89, NOARGUMENTS, "",      "",   NULL },
736   { "I",   0, STD_C89, NOARGUMENTS, "",      "",   NULL },
737   { "L",   0, STD_C89, NOARGUMENTS, "",      "",   NULL },
738   { "U",   0, STD_C89, NOARGUMENTS, "",      "",   NULL },
739   { "r",   0, STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "",  "", NULL },
740   { "z",   0, STD_C89, NOARGUMENTS, "",      "",   NULL },
741   { "@",   0, STD_C89, NOARGUMENTS, "",      "",   NULL },
742   { NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
743 };
744 
745 /* GCC-specific format_char_info arrays.  */
746 
747 /* The conversion specifiers implemented within pp_format, and thus supported
748    by all pretty_printer instances within GCC.  */
749 
750 #define PP_FORMAT_CHAR_TABLE \
751   { "di",  0, STD_C89, { T89_I,   BADLEN,  BADLEN,  T89_L,   T9L_LL,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q",  "",   NULL }, \
752   { "ox",  0, STD_C89, { T89_UI,  BADLEN,  BADLEN,  T89_UL,  T9L_ULL, BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q",  "",   NULL }, \
753   { "u",   0, STD_C89, { T89_UI,  BADLEN,  BADLEN,  T89_UL,  T9L_ULL, BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q",  "",   NULL }, \
754   { "c",   0, STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q",  "",   NULL }, \
755   { "s",   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "pq", "cR", NULL }, \
756   { "p",   1, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q",  "c",  NULL }, \
757   { "r",   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "",    "//cR",   NULL }, \
758   { "@",   1, STD_C89, { T_EVENT_PTR,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL }, \
759   { "<",   0, STD_C89, NOARGUMENTS, "",      "<",   NULL }, \
760   { ">",   0, STD_C89, NOARGUMENTS, "",      ">",   NULL }, \
761   { "'" ,  0, STD_C89, NOARGUMENTS, "",      "",    NULL }, \
762   { "{",   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "",   "cR", NULL }, \
763   { "}",   0, STD_C89, NOARGUMENTS, "",      "",    NULL }, \
764   { "R",   0, STD_C89, NOARGUMENTS, "",     "\\",   NULL }, \
765   { "m",   0, STD_C89, NOARGUMENTS, "q",     "",   NULL }, \
766   { "Z",   1, STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "",    "", &gcc_diag_char_table[0] }
767 
768 static const format_char_info gcc_diag_char_table[] =
769 {
770   /* The conversion specifiers implemented within pp_format.  */
771   PP_FORMAT_CHAR_TABLE,
772 
773   { NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
774 };
775 
776 static const format_char_info gcc_tdiag_char_table[] =
777 {
778   /* The conversion specifiers implemented within pp_format.  */
779   PP_FORMAT_CHAR_TABLE,
780 
781   /* Custom conversion specifiers implemented by default_tree_printer.  */
782 
783   /* These will require a "tree" at runtime.  */
784   { "DFTV", 1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+", "'",   NULL },
785   { "E", 1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+", "",   NULL },
786 
787   { NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
788 };
789 
790 static const format_char_info gcc_cdiag_char_table[] =
791 {
792   /* The conversion specifiers implemented within pp_format.  */
793   PP_FORMAT_CHAR_TABLE,
794 
795   /* Custom conversion specifiers implemented by c_tree_printer.  */
796 
797   /* These will require a "tree" at runtime.  */
798   { "DFTV", 1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+", "'",   NULL },
799   { "E",   1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+", "",   NULL },
800 
801   { "v",   0, STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q#",  "",   NULL },
802 
803   { NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
804 };
805 
806 static const format_char_info gcc_cxxdiag_char_table[] =
807 {
808   /* The conversion specifiers implemented within pp_format.  */
809   PP_FORMAT_CHAR_TABLE,
810 
811   /* Custom conversion specifiers implemented by cp_printer.  */
812 
813   /* These will require a "tree" at runtime.  */
814   { "ADFHISTVX",1,STD_C89,{ T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+#",   "'",   NULL },
815   { "E", 1,STD_C89,{ T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+#",   "",   NULL },
816 
817   /* These accept either an 'int' or an 'enum tree_code' (which is handled as an 'int'.)  */
818   { "CLOPQ",0,STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q",  "",   NULL },
819 
820   { NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
821 };
822 
823 static const format_char_info gcc_gfc_char_table[] =
824 {
825   /* C89 conversion specifiers.  */
826   { "di",  0, STD_C89, { T89_I,   BADLEN,  BADLEN,  T89_L,   T9L_LL,  BADLEN,  BADLEN,  BADLEN,  BADLEN, BADLEN, BADLEN, BADLEN }, "q", "", NULL },
827   { "u",   0, STD_C89, { T89_UI,  BADLEN,  BADLEN,  T89_UL,  T9L_ULL,  BADLEN,  BADLEN,  BADLEN,  BADLEN, BADLEN, BADLEN, BADLEN  }, "q", "", NULL },
828   { "c",   0, STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN, BADLEN, BADLEN, BADLEN }, "q", "", NULL },
829   { "s",   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN, BADLEN, BADLEN, BADLEN }, "q", "cR", NULL },
830 
831   /* gfc conversion specifiers.  */
832 
833   { "C",   0, STD_C89, NOARGUMENTS, "",      "",   NULL },
834 
835   /* This will require a "locus" at runtime.  */
836   { "L",   0, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "R", NULL },
837 
838   /* These will require nothing.  */
839   { "<>",0, STD_C89, NOARGUMENTS, "",      "",   NULL },
840   { NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
841 };
842 
843 static const format_char_info gcc_dump_printf_char_table[] =
844 {
845   /* The conversion specifiers implemented within pp_format.  */
846   PP_FORMAT_CHAR_TABLE,
847 
848   /* Custom conversion specifiers implemented by dump_pretty_printer.  */
849 
850   /* E and G require a "gimple *" argument at runtime.  */
851   { "EG",   1, STD_C89, { T89_G,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL },
852 
853   /* C requires a "cgraph_node *" argument at runtime.  */
854   { "C",   1, STD_C89, { T_CGRAPH_NODE,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL },
855 
856   /* T requires a "tree" at runtime.  */
857   { "T",   1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL },
858 
859   /* %f requires a "double"; it doesn't support modifiers.  */
860   { "f",   0, STD_C89, { T89_D,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL },
861 
862   { NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
863 };
864 
865 static const format_char_info scan_char_table[] =
866 {
867   /* C89 conversion specifiers.  */
868   { "di",    1, STD_C89, { T89_I,   T99_SC,  T89_S,   T89_L,   T9L_LL,  TEX_LL,  T99_SST, T99_PD,  T99_IM,  BADLEN,  BADLEN,  BADLEN }, "*w'I", "W",   NULL },
869   { "u",     1, STD_C89, { T89_UI,  T99_UC,  T89_US,  T89_UL,  T9L_ULL, TEX_ULL, T99_ST,  T99_UPD, T99_UIM, BADLEN,  BADLEN,  BADLEN }, "*w'I", "W",   NULL },
870   { "oxX",   1, STD_C89, { T89_UI,  T99_UC,  T89_US,  T89_UL,  T9L_ULL, TEX_ULL, T99_ST,  T99_UPD, T99_UIM, BADLEN,  BADLEN,  BADLEN }, "*w",   "W",   NULL },
871   { "efgEG", 1, STD_C89, { T89_F,   BADLEN,  BADLEN,  T89_D,   BADLEN,  T89_LD,  BADLEN,  BADLEN,  BADLEN,  T2X_D32, T2X_D64, T2X_D128 }, "*w'",  "W",   NULL },
872   { "c",     1, STD_C89, { T89_C,   BADLEN,  BADLEN,  T94_W,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*mw",   "cW",  NULL },
873   { "s",     1, STD_C89, { T89_C,   BADLEN,  BADLEN,  T94_W,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*amw",  "cW",  NULL },
874   { "[",     1, STD_C89, { T89_C,   BADLEN,  BADLEN,  T94_W,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*amw",  "cW[", NULL },
875   { "p",     2, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*w",   "W",   NULL },
876   { "n",     1, STD_C89, { T89_I,   T99_SC,  T89_S,   T89_L,   T9L_LL,  BADLEN,  T99_SST, T99_PD,  T99_IM,  BADLEN,  BADLEN,  BADLEN }, "",     "W",   NULL },
877   /* C99 conversion specifiers.  */
878   { "F",   1, STD_C99, { T99_F,   BADLEN,  BADLEN,  T99_D,   BADLEN,  T99_LD,  BADLEN,  BADLEN,  BADLEN,  T2X_D32, T2X_D64, T2X_D128 }, "*w'",  "W",   NULL },
879   { "aA",   1, STD_C99, { T99_F,   BADLEN,  BADLEN,  T99_D,   BADLEN,  T99_LD,  BADLEN,  BADLEN,  BADLEN,  T2X_D32,  T2X_D64,  T2X_D128 }, "*w'",  "W",   NULL },
880   /* C2X conversion specifiers.  */
881   { "b",     1, STD_C2X, { T2X_UI,  T2X_UC,  T2X_US,  T2X_UL,  T2X_ULL, TEX_ULL, T2X_ST,  T2X_UPD, T2X_UIM, BADLEN,  BADLEN,  BADLEN }, "*w",   "W",   NULL },
882   /* X/Open conversion specifiers.  */
883   { "C",     1, STD_EXT, { TEX_W,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*mw",   "W",   NULL },
884   { "S",     1, STD_EXT, { TEX_W,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*amw",  "W",   NULL },
885   { NULL, 0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
886 };
887 
888 static const format_char_info time_char_table[] =
889 {
890   /* C89 conversion specifiers.  */
891   { "AZa",		0, STD_C89, NOLENGTHS, "^#",     "",   NULL },
892   { "Bb",		0, STD_C89, NOLENGTHS, "O^#",    "p",  NULL },
893   { "cx",		0, STD_C89, NOLENGTHS, "E",      "3",  NULL },
894   { "HIMSUWdmw",	0, STD_C89, NOLENGTHS, "-_0Ow",  "",   NULL },
895   { "j",		0, STD_C89, NOLENGTHS, "-_0Ow",  "o",  NULL },
896   { "p",		0, STD_C89, NOLENGTHS, "#",      "",   NULL },
897   { "X",		0, STD_C89, NOLENGTHS, "E",      "",   NULL },
898   { "y",		0, STD_C89, NOLENGTHS, "EO-_0w", "4",  NULL },
899   { "Y",		0, STD_C89, NOLENGTHS, "-_0EOw", "o",  NULL },
900   { "%",		0, STD_C89, NOLENGTHS, "",       "",   NULL },
901   /* C99 conversion specifiers.  */
902   { "C",		0, STD_C99, NOLENGTHS, "-_0EOw", "o",  NULL },
903   { "D",		0, STD_C99, NOLENGTHS, "",       "2",  NULL },
904   { "eVu",		0, STD_C99, NOLENGTHS, "-_0Ow",  "",   NULL },
905   { "FRTnrt",		0, STD_C99, NOLENGTHS, "",       "",   NULL },
906   { "g",		0, STD_C99, NOLENGTHS, "O-_0w",  "2o", NULL },
907   { "G",		0, STD_C99, NOLENGTHS, "-_0Ow",  "o",  NULL },
908   { "h",		0, STD_C99, NOLENGTHS, "^#",     "",   NULL },
909   { "z",		0, STD_C99, NOLENGTHS, "O",      "o",  NULL },
910   /* GNU conversion specifiers.  */
911   { "kls",		0, STD_EXT, NOLENGTHS, "-_0Ow",  "",   NULL },
912   { "P",		0, STD_EXT, NOLENGTHS, "",       "",   NULL },
913   { NULL,		0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
914 };
915 
916 static const format_char_info monetary_char_table[] =
917 {
918   { "in", 0, STD_C89, { T89_D, BADLEN, BADLEN, BADLEN, BADLEN, T89_LD, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "=^+(!-w#p", "", NULL },
919   { NULL, 0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
920 };
921 
922 /* This must be in the same order as enum format_type.  */
923 static const format_kind_info format_types_orig[] =
924 {
925   { "gnu_printf",   printf_length_specs,  print_char_table, " +#0-'I", NULL,
926     printf_flag_specs, printf_flag_pairs,
927     FMT_FLAG_ARG_CONVERT|FMT_FLAG_DOLLAR_MULTIPLE|FMT_FLAG_USE_DOLLAR|FMT_FLAG_EMPTY_PREC_OK,
928     'w', 0, 'p', 0, 'L', 0,
929     &integer_type_node, &integer_type_node, format_type_error
930   },
931   { "asm_fprintf",   asm_fprintf_length_specs,  asm_fprintf_char_table, " +#0-", NULL,
932     asm_fprintf_flag_specs, asm_fprintf_flag_pairs,
933     FMT_FLAG_ARG_CONVERT|FMT_FLAG_EMPTY_PREC_OK,
934     'w', 0, 'p', 0, 'L', 0,
935     NULL, NULL, format_type_error
936   },
937   { "gcc_diag",   gcc_diag_length_specs,  gcc_diag_char_table, "q+#", NULL,
938     gcc_diag_flag_specs, gcc_diag_flag_pairs,
939     FMT_FLAG_ARG_CONVERT|FMT_FLAG_M_OK,
940     0, 0, 'p', 0, 'L', 0,
941     NULL, &integer_type_node, format_type_error
942   },
943   { "gcc_tdiag",   gcc_tdiag_length_specs,  gcc_tdiag_char_table, "q+#", NULL,
944     gcc_tdiag_flag_specs, gcc_tdiag_flag_pairs,
945     FMT_FLAG_ARG_CONVERT|FMT_FLAG_M_OK,
946     0, 0, 'p', 0, 'L', 0,
947     NULL, &integer_type_node, format_type_error
948   },
949   { "gcc_cdiag",   gcc_cdiag_length_specs,  gcc_cdiag_char_table, "q+#", NULL,
950     gcc_cdiag_flag_specs, gcc_cdiag_flag_pairs,
951     FMT_FLAG_ARG_CONVERT|FMT_FLAG_M_OK,
952     0, 0, 'p', 0, 'L', 0,
953     NULL, &integer_type_node, format_type_error
954   },
955   { "gcc_cxxdiag",   gcc_cxxdiag_length_specs,  gcc_cxxdiag_char_table, "q+#", NULL,
956     gcc_cxxdiag_flag_specs, gcc_cxxdiag_flag_pairs,
957     FMT_FLAG_ARG_CONVERT|FMT_FLAG_M_OK,
958     0, 0, 'p', 0, 'L', 0,
959     NULL, &integer_type_node, format_type_error
960   },
961   { "gcc_gfc", gcc_gfc_length_specs, gcc_gfc_char_table, "q+#", NULL,
962     gcc_gfc_flag_specs, gcc_gfc_flag_pairs,
963     FMT_FLAG_ARG_CONVERT|FMT_FLAG_M_OK,
964     0, 0, 0, 0, 0, 0,
965     NULL, NULL, format_type_error
966   },
967   { "gcc_dump_printf",   gcc_dump_printf_length_specs,
968     gcc_dump_printf_char_table, "q+#", NULL,
969     gcc_dump_printf_flag_specs, gcc_dump_printf_flag_pairs,
970     FMT_FLAG_ARG_CONVERT,
971     0, 0, 'p', 0, 'L', 0,
972     NULL, &integer_type_node
973   },
974   { "NSString",   NULL,  NULL, NULL, NULL,
975     NULL, NULL,
976     FMT_FLAG_ARG_CONVERT|FMT_FLAG_PARSE_ARG_CONVERT_EXTERNAL, 0, 0, 0, 0, 0, 0,
977     NULL, NULL, format_type_error
978   },
979   { "gnu_scanf",    scanf_length_specs,   scan_char_table,  "*'I", NULL,
980     scanf_flag_specs, scanf_flag_pairs,
981     FMT_FLAG_ARG_CONVERT|FMT_FLAG_SCANF_A_KLUDGE|FMT_FLAG_USE_DOLLAR|FMT_FLAG_ZERO_WIDTH_BAD|FMT_FLAG_DOLLAR_GAP_POINTER_OK,
982     'w', 0, 0, '*', 'L', 'm',
983     NULL, NULL, format_type_error
984   },
985   { "gnu_strftime", NULL,                 time_char_table,  "_-0^#", "EO",
986     strftime_flag_specs, strftime_flag_pairs,
987     FMT_FLAG_FANCY_PERCENT_OK|FMT_FLAG_M_OK, 'w', 0, 0, 0, 0, 0,
988     NULL, NULL, format_type_error
989   },
990   { "gnu_strfmon",  strfmon_length_specs, monetary_char_table, "=^+(!-", NULL,
991     strfmon_flag_specs, strfmon_flag_pairs,
992     FMT_FLAG_ARG_CONVERT, 'w', '#', 'p', 0, 'L', 0,
993     NULL, NULL, format_type_error
994   },
995   { "gnu_syslog",   printf_length_specs,  print_char_table, " +#0-'I", NULL,
996     printf_flag_specs, printf_flag_pairs,
997     FMT_FLAG_ARG_CONVERT|FMT_FLAG_DOLLAR_MULTIPLE|FMT_FLAG_USE_DOLLAR|FMT_FLAG_EMPTY_PREC_OK|FMT_FLAG_M_OK,
998     'w', 0, 'p', 0, 'L', 0,
999     &integer_type_node, &integer_type_node, printf_format_type
1000   },
1001 };
1002 
1003 /* This layer of indirection allows GCC to reassign format_types with
1004    new data if necessary, while still allowing the original data to be
1005    const.  */
1006 static const format_kind_info *format_types = format_types_orig;
1007 /* We can modify this one.  We also add target-specific format types
1008    to the end of the array.  */
1009 static format_kind_info *dynamic_format_types;
1010 
1011 static int n_format_types = ARRAY_SIZE (format_types_orig);
1012 
1013 /* Structure detailing the results of checking a format function call
1014    where the format expression may be a conditional expression with
1015    many leaves resulting from nested conditional expressions.  */
1016 struct format_check_results
1017 {
1018   /* Number of leaves of the format argument that could not be checked
1019      as they were not string literals.  */
1020   int number_non_literal;
1021   /* Number of leaves of the format argument that were null pointers or
1022      string literals, but had extra format arguments.  */
1023   int number_extra_args;
1024   location_t extra_arg_loc;
1025   /* Number of leaves of the format argument that were null pointers or
1026      string literals, but had extra format arguments and used $ operand
1027      numbers.  */
1028   int number_dollar_extra_args;
1029   /* Number of leaves of the format argument that were wide string
1030      literals.  */
1031   int number_wide;
1032   /* Number of leaves of the format argument that are not array of "char".  */
1033   int number_non_char;
1034   /* Number of leaves of the format argument that were empty strings.  */
1035   int number_empty;
1036   /* Number of leaves of the format argument that were unterminated
1037      strings.  */
1038   int number_unterminated;
1039   /* Number of leaves of the format argument that were not counted above.  */
1040   int number_other;
1041   /* Location of the format string.  */
1042   location_t format_string_loc;
1043 };
1044 
1045 struct format_check_context
1046 {
1047   format_check_results *res;
1048   function_format_info *info;
1049   tree params;
1050   vec<location_t> *arglocs;
1051 };
1052 
1053 /* Return the format name (as specified in the original table) for the format
1054    type indicated by format_num.  */
1055 static const char *
format_name(int format_num)1056 format_name (int format_num)
1057 {
1058   if (format_num >= 0 && format_num < n_format_types)
1059     return format_types[format_num].name;
1060   gcc_unreachable ();
1061 }
1062 
1063 /* Return the format flags (as specified in the original table) for the format
1064    type indicated by format_num.  */
1065 static int
format_flags(int format_num)1066 format_flags (int format_num)
1067 {
1068   if (format_num >= 0 && format_num < n_format_types)
1069     return format_types[format_num].flags;
1070   gcc_unreachable ();
1071 }
1072 
1073 static void check_format_info (function_format_info *, tree,
1074 			       vec<location_t> *);
1075 static void check_format_arg (void *, tree, unsigned HOST_WIDE_INT);
1076 static void check_format_info_main (format_check_results *,
1077 				    function_format_info *, const char *,
1078 				    location_t, tree,
1079 				    int, tree,
1080 				    unsigned HOST_WIDE_INT,
1081 				    object_allocator<format_wanted_type> &,
1082 				    vec<location_t> *);
1083 
1084 static void init_dollar_format_checking (int, tree);
1085 static int maybe_read_dollar_number (const char **, int,
1086 				     tree, tree *, const format_kind_info *);
1087 static bool avoid_dollar_number (const char *);
1088 static void finish_dollar_format_checking (format_check_results *, int);
1089 
1090 static const format_flag_spec *get_flag_spec (const format_flag_spec *,
1091 					      int, const char *);
1092 
1093 static void check_format_types (const substring_loc &fmt_loc,
1094 				format_wanted_type *,
1095 				const format_kind_info *fki,
1096 				int offset_to_type_start,
1097 				char conversion_char,
1098 				vec<location_t> *arglocs);
1099 static void format_type_warning (const substring_loc &fmt_loc,
1100 				 location_t param_loc,
1101 				 format_wanted_type *, tree,
1102 				 tree,
1103 				 const format_kind_info *fki,
1104 				 int offset_to_type_start,
1105 				 char conversion_char);
1106 
1107 /* Decode a format type from a string, returning the type, or
1108    format_type_error if not valid, in which case the caller should
1109    print an error message.  On success, when IS_RAW is non-null, set
1110    *IS_RAW when the format type corresponds to a GCC "raw" diagnostic
1111    formatting function and clear it otherwise.  */
1112 static format_type
decode_format_type(const char * s,bool * is_raw)1113 decode_format_type (const char *s, bool *is_raw /* = NULL */)
1114 {
1115   bool is_raw_buf;
1116 
1117   if (!is_raw)
1118     is_raw = &is_raw_buf;
1119 
1120   *is_raw = false;
1121 
1122   s = convert_format_name_to_system_name (s);
1123 
1124   size_t slen = strlen (s);
1125   for (int i = 0; i < n_format_types; i++)
1126     {
1127       /* Check for a match with no underscores.  */
1128       if (!strcmp (s, format_types[i].name))
1129 	return static_cast<format_type> (i);
1130 
1131       /* Check for leading and trailing underscores.  */
1132       size_t alen = strlen (format_types[i].name);
1133       if (slen == alen + 4 && s[0] == '_' && s[1] == '_'
1134 	  && s[slen - 1] == '_' && s[slen - 2] == '_'
1135 	  && !strncmp (s + 2, format_types[i].name, alen))
1136 	return static_cast<format_type>(i);
1137 
1138       /* Check for the "_raw" suffix and no leading underscores.  */
1139       if (slen == alen + 4
1140 	  && !strncmp (s, format_types[i].name, alen)
1141 	  && !strcmp (s + alen, "_raw"))
1142 	{
1143 	  *is_raw = true;
1144 	  return static_cast<format_type>(i);
1145 	}
1146 
1147       /* Check for the "_raw__" suffix and leading underscores.  */
1148       if (slen == alen + 8 && s[0] == '_' && s[1] == '_'
1149 	  && !strncmp (s + 2, format_types[i].name, alen)
1150 	  && !strcmp (s + 2 + alen, "_raw__"))
1151 	{
1152 	  *is_raw = true;
1153 	  return static_cast<format_type>(i);
1154 	}
1155     }
1156 
1157   return format_type_error;
1158 }
1159 
1160 
1161 /* Check the argument list of a call to printf, scanf, etc.
1162    ATTRS are the attributes on the function type.  There are NARGS argument
1163    values in the array ARGARRAY.
1164    Also, if -Wsuggest-attribute=format,
1165    warn for calls to vprintf or vscanf in functions with no such format
1166    attribute themselves.  */
1167 
1168 void
check_function_format(const_tree fntype,tree attrs,int nargs,tree * argarray,vec<location_t> * arglocs)1169 check_function_format (const_tree fntype, tree attrs, int nargs,
1170 		       tree *argarray, vec<location_t> *arglocs)
1171 {
1172   tree a;
1173 
1174   tree atname = get_identifier ("format");
1175 
1176   /* See if this function has any format attributes.  */
1177   for (a = attrs; a; a = TREE_CHAIN (a))
1178     {
1179       if (is_attribute_p ("format", get_attribute_name (a)))
1180 	{
1181 	  /* Yup; check it.  */
1182 	  function_format_info info;
1183 	  decode_format_attr (fntype, atname, TREE_VALUE (a), &info,
1184 			      /*validated=*/true);
1185 	  if (warn_format)
1186 	    {
1187 	      /* FIXME: Rewrite all the internal functions in this file
1188 		 to use the ARGARRAY directly instead of constructing this
1189 		 temporary list.  */
1190 	      tree params = NULL_TREE;
1191 	      int i;
1192 	      for (i = nargs - 1; i >= 0; i--)
1193 		params = tree_cons (NULL_TREE, argarray[i], params);
1194 	      check_format_info (&info, params, arglocs);
1195 	    }
1196 	  const format_kind_info *fi = &format_types[info.format_type];
1197 
1198 	  /* Attempt to detect whether the current function might benefit
1199 	     from the format attribute if the called function is decorated
1200 	     with it.  Avoid using calls with string literal formats for
1201 	     guidance since those are unlikely to be viable candidates.  */
1202 	  if (warn_suggest_attribute_format
1203 	      && current_function_decl != NULL_TREE
1204 	      && info.first_arg_num == 0
1205 	      && (fi->flags & (int) FMT_FLAG_ARG_CONVERT)
1206 	      /* c_strlen will fail for a function parameter but succeed
1207 		 for a literal or constant array.  */
1208 	      && !c_strlen (argarray[info.format_num - 1], 1))
1209 	    {
1210 	      tree c;
1211 	      for (c = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
1212 		   c;
1213 		   c = TREE_CHAIN (c))
1214 		{
1215 		  if (!is_attribute_p ("format", TREE_PURPOSE (c)))
1216 		     continue;
1217 		  int format_type = decode_format_type (
1218 		      IDENTIFIER_POINTER (TREE_VALUE (TREE_VALUE (c))));
1219 		  if (format_type == format_type_error)
1220 		     continue;
1221 		  if (format_type == info.format_type ||
1222 		      format_type == fi->parent_format_type)
1223 		    break;
1224 		}
1225 	      if (c == NULL_TREE)
1226 		{
1227 		  /* Check if the current function has a parameter to which
1228 		     the format attribute could be attached; if not, it
1229 		     can't be a candidate for a format attribute, despite
1230 		     the vprintf-like or vscanf-like call.  */
1231 		  tree args;
1232 		  for (args = DECL_ARGUMENTS (current_function_decl);
1233 		       args != 0;
1234 		       args = DECL_CHAIN (args))
1235 		    {
1236 		      if (TREE_CODE (TREE_TYPE (args)) == POINTER_TYPE
1237 			  && (TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (args)))
1238 			      == char_type_node))
1239 			break;
1240 		    }
1241 		  if (args != 0)
1242 		    warning (OPT_Wsuggest_attribute_format, "function %qD "
1243 			     "might be a candidate for %qs format attribute",
1244 			     current_function_decl,
1245 			     format_types[info.format_type].name);
1246 		}
1247 	    }
1248 	}
1249     }
1250 }
1251 
1252 
1253 /* Variables used by the checking of $ operand number formats.  */
1254 static char *dollar_arguments_used = NULL;
1255 static char *dollar_arguments_pointer_p = NULL;
1256 static int dollar_arguments_alloc = 0;
1257 static int dollar_arguments_count;
1258 static int dollar_first_arg_num;
1259 static int dollar_max_arg_used;
1260 static int dollar_format_warned;
1261 
1262 /* Initialize the checking for a format string that may contain $
1263    parameter number specifications; we will need to keep track of whether
1264    each parameter has been used.  FIRST_ARG_NUM is the number of the first
1265    argument that is a parameter to the format, or 0 for a vprintf-style
1266    function; PARAMS is the list of arguments starting at this argument.  */
1267 
1268 static void
init_dollar_format_checking(int first_arg_num,tree params)1269 init_dollar_format_checking (int first_arg_num, tree params)
1270 {
1271   tree oparams = params;
1272 
1273   dollar_first_arg_num = first_arg_num;
1274   dollar_arguments_count = 0;
1275   dollar_max_arg_used = 0;
1276   dollar_format_warned = 0;
1277   if (first_arg_num > 0)
1278     {
1279       while (params)
1280 	{
1281 	  dollar_arguments_count++;
1282 	  params = TREE_CHAIN (params);
1283 	}
1284     }
1285   if (dollar_arguments_alloc < dollar_arguments_count)
1286     {
1287       free (dollar_arguments_used);
1288       free (dollar_arguments_pointer_p);
1289       dollar_arguments_alloc = dollar_arguments_count;
1290       dollar_arguments_used = XNEWVEC (char, dollar_arguments_alloc);
1291       dollar_arguments_pointer_p = XNEWVEC (char, dollar_arguments_alloc);
1292     }
1293   if (dollar_arguments_alloc)
1294     {
1295       memset (dollar_arguments_used, 0, dollar_arguments_alloc);
1296       if (first_arg_num > 0)
1297 	{
1298 	  int i = 0;
1299 	  params = oparams;
1300 	  while (params)
1301 	    {
1302 	      dollar_arguments_pointer_p[i] = (TREE_CODE (TREE_TYPE (TREE_VALUE (params)))
1303 					       == POINTER_TYPE);
1304 	      params = TREE_CHAIN (params);
1305 	      i++;
1306 	    }
1307 	}
1308     }
1309 }
1310 
1311 
1312 /* Look for a decimal number followed by a $ in *FORMAT.  If DOLLAR_NEEDED
1313    is set, it is an error if one is not found; otherwise, it is OK.  If
1314    such a number is found, check whether it is within range and mark that
1315    numbered operand as being used for later checking.  Returns the operand
1316    number if found and within range, zero if no such number was found and
1317    this is OK, or -1 on error.  PARAMS points to the first operand of the
1318    format; PARAM_PTR is made to point to the parameter referred to.  If
1319    a $ format is found, *FORMAT is updated to point just after it.  */
1320 
1321 static int
maybe_read_dollar_number(const char ** format,int dollar_needed,tree params,tree * param_ptr,const format_kind_info * fki)1322 maybe_read_dollar_number (const char **format,
1323 			  int dollar_needed, tree params, tree *param_ptr,
1324 			  const format_kind_info *fki)
1325 {
1326   int argnum;
1327   int overflow_flag;
1328   const char *fcp = *format;
1329   if (!ISDIGIT (*fcp))
1330     {
1331       if (dollar_needed)
1332 	{
1333 	  warning (OPT_Wformat_, "missing $ operand number in format");
1334 	  return -1;
1335 	}
1336       else
1337 	return 0;
1338     }
1339   argnum = 0;
1340   overflow_flag = 0;
1341   while (ISDIGIT (*fcp))
1342     {
1343       HOST_WIDE_INT nargnum
1344 	= HOST_WIDE_INT_UC (10) * argnum + (*fcp - '0');
1345       if ((int) nargnum != nargnum)
1346 	overflow_flag = 1;
1347       argnum = nargnum;
1348       fcp++;
1349     }
1350   if (*fcp != '$')
1351     {
1352       if (dollar_needed)
1353 	{
1354 	  warning (OPT_Wformat_, "missing $ operand number in format");
1355 	  return -1;
1356 	}
1357       else
1358 	return 0;
1359     }
1360   *format = fcp + 1;
1361   if (pedantic && !dollar_format_warned)
1362     {
1363       warning (OPT_Wformat_, "%s does not support %%n$ operand number formats",
1364 	       C_STD_NAME (STD_EXT));
1365       dollar_format_warned = 1;
1366     }
1367   if (overflow_flag || argnum == 0
1368       || (dollar_first_arg_num && argnum > dollar_arguments_count))
1369     {
1370       warning (OPT_Wformat_, "operand number out of range in format");
1371       return -1;
1372     }
1373   if (argnum > dollar_max_arg_used)
1374     dollar_max_arg_used = argnum;
1375   /* For vprintf-style functions we may need to allocate more memory to
1376      track which arguments are used.  */
1377   while (dollar_arguments_alloc < dollar_max_arg_used)
1378     {
1379       int nalloc;
1380       nalloc = 2 * dollar_arguments_alloc + 16;
1381       dollar_arguments_used = XRESIZEVEC (char, dollar_arguments_used,
1382 					  nalloc);
1383       dollar_arguments_pointer_p = XRESIZEVEC (char, dollar_arguments_pointer_p,
1384 					       nalloc);
1385       memset (dollar_arguments_used + dollar_arguments_alloc, 0,
1386 	      nalloc - dollar_arguments_alloc);
1387       dollar_arguments_alloc = nalloc;
1388     }
1389   if (!(fki->flags & (int) FMT_FLAG_DOLLAR_MULTIPLE)
1390       && dollar_arguments_used[argnum - 1] == 1)
1391     {
1392       dollar_arguments_used[argnum - 1] = 2;
1393       warning (OPT_Wformat_, "format argument %d used more than once in %s format",
1394 	       argnum, fki->name);
1395     }
1396   else
1397     dollar_arguments_used[argnum - 1] = 1;
1398   if (dollar_first_arg_num)
1399     {
1400       int i;
1401       *param_ptr = params;
1402       for (i = 1; i < argnum && *param_ptr != 0; i++)
1403 	*param_ptr = TREE_CHAIN (*param_ptr);
1404 
1405       /* This case shouldn't be caught here.  */
1406       gcc_assert (*param_ptr);
1407     }
1408   else
1409     *param_ptr = 0;
1410   return argnum;
1411 }
1412 
1413 /* Ensure that FORMAT does not start with a decimal number followed by
1414    a $; give a diagnostic and return true if it does, false otherwise.  */
1415 
1416 static bool
avoid_dollar_number(const char * format)1417 avoid_dollar_number (const char *format)
1418 {
1419   if (!ISDIGIT (*format))
1420     return false;
1421   while (ISDIGIT (*format))
1422     format++;
1423   if (*format == '$')
1424     {
1425       warning (OPT_Wformat_,
1426 	       "%<$%>operand number used after format without operand number");
1427       return true;
1428     }
1429   return false;
1430 }
1431 
1432 
1433 /* Finish the checking for a format string that used $ operand number formats
1434    instead of non-$ formats.  We check for unused operands before used ones
1435    (a serious error, since the implementation of the format function
1436    can't know what types to pass to va_arg to find the later arguments).
1437    and for unused operands at the end of the format (if we know how many
1438    arguments the format had, so not for vprintf).  If there were operand
1439    numbers out of range on a non-vprintf-style format, we won't have reached
1440    here.  If POINTER_GAP_OK, unused arguments are OK if all arguments are
1441    pointers.  */
1442 
1443 static void
finish_dollar_format_checking(format_check_results * res,int pointer_gap_ok)1444 finish_dollar_format_checking (format_check_results *res, int pointer_gap_ok)
1445 {
1446   int i;
1447   bool found_pointer_gap = false;
1448   for (i = 0; i < dollar_max_arg_used; i++)
1449     {
1450       if (!dollar_arguments_used[i])
1451 	{
1452 	  if (pointer_gap_ok && (dollar_first_arg_num == 0
1453 				 || dollar_arguments_pointer_p[i]))
1454 	    found_pointer_gap = true;
1455 	  else
1456 	    warning_at (res->format_string_loc, OPT_Wformat_,
1457 			"format argument %d unused before used argument %d "
1458 			"in %<$%>-style format",
1459 			i + 1, dollar_max_arg_used);
1460 	}
1461     }
1462   if (found_pointer_gap
1463       || (dollar_first_arg_num
1464 	  && dollar_max_arg_used < dollar_arguments_count))
1465     {
1466       res->number_other--;
1467       res->number_dollar_extra_args++;
1468     }
1469 }
1470 
1471 
1472 /* Retrieve the specification for a format flag.  SPEC contains the
1473    specifications for format flags for the applicable kind of format.
1474    FLAG is the flag in question.  If PREDICATES is NULL, the basic
1475    spec for that flag must be retrieved and must exist.  If
1476    PREDICATES is not NULL, it is a string listing possible predicates
1477    for the spec entry; if an entry predicated on any of these is
1478    found, it is returned, otherwise NULL is returned.  */
1479 
1480 static const format_flag_spec *
get_flag_spec(const format_flag_spec * spec,int flag,const char * predicates)1481 get_flag_spec (const format_flag_spec *spec, int flag, const char *predicates)
1482 {
1483   int i;
1484   for (i = 0; spec[i].flag_char != 0; i++)
1485     {
1486       if (spec[i].flag_char != flag)
1487 	continue;
1488       if (predicates != NULL)
1489 	{
1490 	  if (spec[i].predicate != 0
1491 	      && strchr (predicates, spec[i].predicate) != 0)
1492 	    return &spec[i];
1493 	}
1494       else if (spec[i].predicate == 0)
1495 	return &spec[i];
1496     }
1497   gcc_assert (predicates);
1498   return NULL;
1499 }
1500 
1501 
1502 /* Check the argument list of a call to printf, scanf, etc.
1503    INFO points to the function_format_info structure.
1504    PARAMS is the list of argument values.  */
1505 
1506 static void
check_format_info(function_format_info * info,tree params,vec<location_t> * arglocs)1507 check_format_info (function_format_info *info, tree params,
1508 		   vec<location_t> *arglocs)
1509 {
1510   format_check_context format_ctx;
1511   unsigned HOST_WIDE_INT arg_num;
1512   tree format_tree;
1513   format_check_results res;
1514   /* Skip to format argument.  If the argument isn't available, there's
1515      no work for us to do; prototype checking will catch the problem.  */
1516   for (arg_num = 1; ; ++arg_num)
1517     {
1518       if (params == 0)
1519 	return;
1520       if (arg_num == info->format_num)
1521 	break;
1522       params = TREE_CHAIN (params);
1523     }
1524   format_tree = TREE_VALUE (params);
1525   params = TREE_CHAIN (params);
1526   if (format_tree == 0)
1527     return;
1528 
1529   res.number_non_literal = 0;
1530   res.number_extra_args = 0;
1531   res.extra_arg_loc = UNKNOWN_LOCATION;
1532   res.number_dollar_extra_args = 0;
1533   res.number_wide = 0;
1534   res.number_non_char = 0;
1535   res.number_empty = 0;
1536   res.number_unterminated = 0;
1537   res.number_other = 0;
1538   res.format_string_loc = input_location;
1539 
1540   format_ctx.res = &res;
1541   format_ctx.info = info;
1542   format_ctx.params = params;
1543   format_ctx.arglocs = arglocs;
1544 
1545   check_function_arguments_recurse (check_format_arg, &format_ctx,
1546 				    format_tree, arg_num, OPT_Wformat_);
1547 
1548   location_t loc = format_ctx.res->format_string_loc;
1549 
1550   if (res.number_non_literal > 0)
1551     {
1552       /* Functions taking a va_list normally pass a non-literal format
1553 	 string.  These functions typically are declared with
1554 	 first_arg_num == 0, so avoid warning in those cases.  */
1555       if (!(format_types[info->format_type].flags & (int) FMT_FLAG_ARG_CONVERT))
1556 	{
1557 	  /* For strftime-like formats, warn for not checking the format
1558 	     string; but there are no arguments to check.  */
1559 	  warning_at (loc, OPT_Wformat_nonliteral,
1560 		      "format not a string literal, format string not checked");
1561 	}
1562       else if (info->first_arg_num != 0)
1563 	{
1564 	  /* If there are no arguments for the format at all, we may have
1565 	     printf (foo) which is likely to be a security hole.  */
1566 	  while (arg_num + 1 < info->first_arg_num)
1567 	    {
1568 	      if (params == 0)
1569 		break;
1570 	      params = TREE_CHAIN (params);
1571 	      ++arg_num;
1572 	    }
1573 	  if (params == 0 && warn_format_security)
1574 	    warning_at (loc, OPT_Wformat_security,
1575 			"format not a string literal and no format arguments");
1576 	  else if (params == 0 && warn_format_nonliteral)
1577 	    warning_at (loc, OPT_Wformat_nonliteral,
1578 			"format not a string literal and no format arguments");
1579 	  else
1580 	    warning_at (loc, OPT_Wformat_nonliteral,
1581 			"format not a string literal, argument types not checked");
1582 	}
1583     }
1584 
1585   /* If there were extra arguments to the format, normally warn.  However,
1586      the standard does say extra arguments are ignored, so in the specific
1587      case where we have multiple leaves (conditional expressions or
1588      ngettext) allow extra arguments if at least one leaf didn't have extra
1589      arguments, but was otherwise OK (either non-literal or checked OK).
1590      If the format is an empty string, this should be counted similarly to the
1591      case of extra format arguments.  */
1592   if (res.number_extra_args > 0 && res.number_non_literal == 0
1593       && res.number_other == 0)
1594     {
1595       if (res.extra_arg_loc == UNKNOWN_LOCATION)
1596 	res.extra_arg_loc = loc;
1597       warning_at (res.extra_arg_loc, OPT_Wformat_extra_args,
1598 		  "too many arguments for format");
1599     }
1600   if (res.number_dollar_extra_args > 0 && res.number_non_literal == 0
1601       && res.number_other == 0)
1602     warning_at (loc, OPT_Wformat_extra_args,
1603 		"unused arguments in %<$%>-style format");
1604   if (res.number_empty > 0 && res.number_non_literal == 0
1605       && res.number_other == 0)
1606     warning_at (loc, OPT_Wformat_zero_length, "zero-length %s format string",
1607 	     format_types[info->format_type].name);
1608 
1609   if (res.number_wide > 0)
1610     warning_at (loc, OPT_Wformat_, "format is a wide character string");
1611 
1612   if (res.number_non_char > 0)
1613     warning_at (loc, OPT_Wformat_,
1614 		"format string is not an array of type %qs", "char");
1615 
1616   if (res.number_unterminated > 0)
1617     warning_at (loc, OPT_Wformat_, "unterminated format string");
1618 }
1619 
1620 /* Callback from check_function_arguments_recurse to check a
1621    format string.  FORMAT_TREE is the format parameter.  ARG_NUM
1622    is the number of the format argument.  CTX points to a
1623    format_check_context.  */
1624 
1625 static void
check_format_arg(void * ctx,tree format_tree,unsigned HOST_WIDE_INT arg_num)1626 check_format_arg (void *ctx, tree format_tree,
1627 		  unsigned HOST_WIDE_INT arg_num)
1628 {
1629   format_check_context *format_ctx = (format_check_context *) ctx;
1630   format_check_results *res = format_ctx->res;
1631   function_format_info *info = format_ctx->info;
1632   tree params = format_ctx->params;
1633   vec<location_t> *arglocs = format_ctx->arglocs;
1634 
1635   int format_length;
1636   HOST_WIDE_INT offset;
1637   const char *format_chars;
1638   tree array_size = 0;
1639   tree array_init;
1640 
1641   location_t fmt_param_loc = EXPR_LOC_OR_LOC (format_tree, input_location);
1642 
1643   /* Pull out a constant value if the front end didn't, and handle location
1644      wrappers.  */
1645   format_tree = fold_for_warn (format_tree);
1646   STRIP_NOPS (format_tree);
1647 
1648   if (integer_zerop (format_tree))
1649     {
1650       /* Skip to first argument to check, so we can see if this format
1651 	 has any arguments (it shouldn't).  */
1652       while (arg_num + 1 < info->first_arg_num)
1653 	{
1654 	  if (params == 0)
1655 	    return;
1656 	  params = TREE_CHAIN (params);
1657 	  ++arg_num;
1658 	}
1659 
1660       if (params == 0)
1661 	res->number_other++;
1662       else
1663 	{
1664 	  if (res->number_extra_args == 0)
1665 	    res->extra_arg_loc = EXPR_LOC_OR_LOC (TREE_VALUE (params),
1666 						  input_location);
1667 	  res->number_extra_args++;
1668 	}
1669       return;
1670     }
1671 
1672   offset = 0;
1673   if (TREE_CODE (format_tree) == POINTER_PLUS_EXPR)
1674     {
1675       tree arg0, arg1;
1676 
1677       arg0 = TREE_OPERAND (format_tree, 0);
1678       arg1 = TREE_OPERAND (format_tree, 1);
1679       STRIP_NOPS (arg0);
1680       STRIP_NOPS (arg1);
1681       if (TREE_CODE (arg1) == INTEGER_CST)
1682 	format_tree = arg0;
1683       else
1684 	{
1685 	  res->number_non_literal++;
1686 	  return;
1687 	}
1688       /* POINTER_PLUS_EXPR offsets are to be interpreted signed.  */
1689       if (!cst_and_fits_in_hwi (arg1))
1690 	{
1691 	  res->number_non_literal++;
1692 	  return;
1693 	}
1694       offset = int_cst_value (arg1);
1695     }
1696   if (TREE_CODE (format_tree) != ADDR_EXPR)
1697     {
1698       res->number_non_literal++;
1699       return;
1700     }
1701   res->format_string_loc = EXPR_LOC_OR_LOC (format_tree, input_location);
1702   format_tree = TREE_OPERAND (format_tree, 0);
1703   if (format_types[info->format_type].flags
1704       & (int) FMT_FLAG_PARSE_ARG_CONVERT_EXTERNAL)
1705     {
1706       bool objc_str = (info->format_type == gcc_objc_string_format_type);
1707       /* We cannot examine this string here - but we can check that it is
1708 	 a valid type.  */
1709       if (TREE_CODE (format_tree) != CONST_DECL
1710 	  || !((objc_str && objc_string_ref_type_p (TREE_TYPE (format_tree)))
1711 		|| (*targetcm.string_object_ref_type_p)
1712 				     ((const_tree) TREE_TYPE (format_tree))))
1713 	{
1714 	  res->number_non_literal++;
1715 	  return;
1716 	}
1717       /* Skip to first argument to check.  */
1718       while (arg_num + 1 < info->first_arg_num)
1719 	{
1720 	  if (params == 0)
1721 	    return;
1722 	  params = TREE_CHAIN (params);
1723 	  ++arg_num;
1724 	}
1725       /* So, we have a valid literal string object and one or more params.
1726 	 We need to use an external helper to parse the string into format
1727 	 info.  For Objective-C variants we provide the resource within the
1728 	 objc tree, for target variants, via a hook.  */
1729       if (objc_str)
1730 	objc_check_format_arg (format_tree, params);
1731       else if (targetcm.check_string_object_format_arg)
1732 	(*targetcm.check_string_object_format_arg) (format_tree, params);
1733       /* Else we can't handle it and retire quietly.  */
1734       return;
1735     }
1736   if (TREE_CODE (format_tree) == ARRAY_REF
1737       && tree_fits_shwi_p (TREE_OPERAND (format_tree, 1))
1738       && (offset += tree_to_shwi (TREE_OPERAND (format_tree, 1))) >= 0)
1739     format_tree = TREE_OPERAND (format_tree, 0);
1740   if (offset < 0)
1741     {
1742       res->number_non_literal++;
1743       return;
1744     }
1745   if (VAR_P (format_tree)
1746       && TREE_CODE (TREE_TYPE (format_tree)) == ARRAY_TYPE
1747       && (array_init = decl_constant_value (format_tree)) != format_tree
1748       && TREE_CODE (array_init) == STRING_CST)
1749     {
1750       /* Extract the string constant initializer.  Note that this may include
1751 	 a trailing NUL character that is not in the array (e.g.
1752 	 const char a[3] = "foo";).  */
1753       array_size = DECL_SIZE_UNIT (format_tree);
1754       format_tree = array_init;
1755     }
1756   if (TREE_CODE (format_tree) != STRING_CST)
1757     {
1758       res->number_non_literal++;
1759       return;
1760     }
1761   tree underlying_type
1762     = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (format_tree)));
1763   if (underlying_type != char_type_node
1764       && !(flag_char8_t && underlying_type == char8_type_node))
1765     {
1766       if (underlying_type == char16_type_node
1767 	  || underlying_type == char32_type_node
1768 	  || underlying_type == wchar_type_node)
1769 	res->number_wide++;
1770       else
1771 	res->number_non_char++;
1772       return;
1773     }
1774   format_chars = TREE_STRING_POINTER (format_tree);
1775   format_length = TREE_STRING_LENGTH (format_tree);
1776   if (array_size != 0)
1777     {
1778       /* Variable length arrays can't be initialized.  */
1779       gcc_assert (TREE_CODE (array_size) == INTEGER_CST);
1780 
1781       if (tree_fits_shwi_p (array_size))
1782 	{
1783 	  HOST_WIDE_INT array_size_value = tree_to_shwi (array_size);
1784 	  if (array_size_value > 0
1785 	      && array_size_value == (int) array_size_value
1786 	      && format_length > array_size_value)
1787 	    format_length = array_size_value;
1788 	}
1789     }
1790   if (offset)
1791     {
1792       if (offset >= format_length)
1793 	{
1794 	  res->number_non_literal++;
1795 	  return;
1796 	}
1797       format_chars += offset;
1798       format_length -= offset;
1799     }
1800   if (format_length < 1 || format_chars[--format_length] != 0)
1801     {
1802       res->number_unterminated++;
1803       return;
1804     }
1805   if (format_length == 0)
1806     {
1807       res->number_empty++;
1808       return;
1809     }
1810 
1811   /* Skip to first argument to check.  */
1812   while (arg_num + 1 < info->first_arg_num)
1813     {
1814       if (params == 0)
1815 	return;
1816       params = TREE_CHAIN (params);
1817       ++arg_num;
1818     }
1819   /* Provisionally increment res->number_other; check_format_info_main
1820      will decrement it if it finds there are extra arguments, but this way
1821      need not adjust it for every return.  */
1822   res->number_other++;
1823   object_allocator <format_wanted_type> fwt_pool ("format_wanted_type pool");
1824   check_format_info_main (res, info, format_chars, fmt_param_loc, format_tree,
1825 			  format_length, params, arg_num, fwt_pool, arglocs);
1826 }
1827 
1828 /* Support class for argument_parser and check_format_info_main.
1829    Tracks any flag characters that have been applied to the
1830    current argument.  */
1831 
1832 class flag_chars_t
1833 {
1834  public:
1835   flag_chars_t ();
1836   bool has_char_p (char ch) const;
1837   void add_char (char ch);
1838   void validate (const format_kind_info *fki,
1839 		 const format_char_info *fci,
1840 		 const format_flag_spec *flag_specs,
1841 		 const char * const format_chars,
1842 		 tree format_string_cst,
1843 		 location_t format_string_loc,
1844 		 const char * const orig_format_chars,
1845 		 char format_char,
1846 		 bool quoted);
1847   int get_alloc_flag (const format_kind_info *fki);
1848   int assignment_suppression_p (const format_kind_info *fki);
1849 
1850  private:
1851   char m_flag_chars[256];
1852 };
1853 
1854 /* Support struct for argument_parser and check_format_info_main.
1855    Encapsulates any length modifier applied to the current argument.  */
1856 
1857 class length_modifier
1858 {
1859 public:
length_modifier()1860   length_modifier ()
1861   : chars (NULL), val (FMT_LEN_none), std (STD_C89),
1862     scalar_identity_flag (0)
1863   {
1864   }
1865 
length_modifier(const char * chars_,enum format_lengths val_,enum format_std_version std_,int scalar_identity_flag_)1866   length_modifier (const char *chars_,
1867 		   enum format_lengths val_,
1868 		   enum format_std_version std_,
1869 		   int scalar_identity_flag_)
1870   : chars (chars_), val (val_), std (std_),
1871     scalar_identity_flag (scalar_identity_flag_)
1872   {
1873   }
1874 
1875   const char *chars;
1876   enum format_lengths val;
1877   enum format_std_version std;
1878   int scalar_identity_flag;
1879 };
1880 
1881 /* Parsing one argument within a format string.  */
1882 
1883 class argument_parser
1884 {
1885  public:
1886   argument_parser (function_format_info *info, const char *&format_chars,
1887 		   tree format_string_cst,
1888 		   const char * const orig_format_chars,
1889 		   location_t format_string_loc, flag_chars_t &flag_chars,
1890 		   int &has_operand_number, tree first_fillin_param,
1891 		   object_allocator <format_wanted_type> &fwt_pool_,
1892 		   vec<location_t> *arglocs);
1893 
1894   bool read_any_dollar ();
1895 
1896   bool read_format_flags ();
1897 
1898   bool
1899   read_any_format_width (tree &params,
1900 			 unsigned HOST_WIDE_INT &arg_num);
1901 
1902   void
1903   read_any_format_left_precision ();
1904 
1905   bool
1906   read_any_format_precision (tree &params,
1907 			     unsigned HOST_WIDE_INT &arg_num);
1908 
1909   void handle_alloc_chars ();
1910 
1911   length_modifier read_any_length_modifier ();
1912 
1913   void read_any_other_modifier ();
1914 
1915   const format_char_info *find_format_char_info (char format_char);
1916 
1917   void
1918   validate_flag_pairs (const format_char_info *fci,
1919 		       char format_char);
1920 
1921   void
1922   give_y2k_warnings (const format_char_info *fci,
1923 		     char format_char);
1924 
1925   void parse_any_scan_set (const format_char_info *fci);
1926 
1927   bool handle_conversions (const format_char_info *fci,
1928 			   const length_modifier &len_modifier,
1929 			   tree &wanted_type,
1930 			   const char *&wanted_type_name,
1931 			   unsigned HOST_WIDE_INT &arg_num,
1932 			   tree &params,
1933 			   char format_char);
1934 
1935   bool
1936   check_argument_type (const format_char_info *fci,
1937 		       const length_modifier &len_modifier,
1938 		       tree &wanted_type,
1939 		       const char *&wanted_type_name,
1940 		       const bool suppressed,
1941 		       unsigned HOST_WIDE_INT &arg_num,
1942 		       tree &params,
1943 		       const int alloc_flag,
1944 		       const char * const format_start,
1945 		       const char * const type_start,
1946 		       location_t fmt_param_loc,
1947 		       char conversion_char);
1948 
1949  private:
1950   const function_format_info *const info;
1951   const format_kind_info * const fki;
1952   const format_flag_spec * const flag_specs;
1953   const char *start_of_this_format;
1954   const char *&format_chars;
1955   const tree format_string_cst;
1956   const char * const orig_format_chars;
1957   const location_t format_string_loc;
1958   object_allocator <format_wanted_type> &fwt_pool;
1959   flag_chars_t &flag_chars;
1960   int main_arg_num;
1961   tree main_arg_params;
1962   int &has_operand_number;
1963   const tree first_fillin_param;
1964   format_wanted_type width_wanted_type;
1965   format_wanted_type precision_wanted_type;
1966  public:
1967   format_wanted_type main_wanted_type;
1968  private:
1969   format_wanted_type *first_wanted_type;
1970   format_wanted_type *last_wanted_type;
1971   vec<location_t> *arglocs;
1972 };
1973 
1974 /* flag_chars_t's constructor.  */
1975 
flag_chars_t()1976 flag_chars_t::flag_chars_t ()
1977 {
1978   m_flag_chars[0] = 0;
1979 }
1980 
1981 /* Has CH been seen as a flag within the current argument?  */
1982 
1983 bool
has_char_p(char ch) const1984 flag_chars_t::has_char_p (char ch) const
1985 {
1986   return strchr (m_flag_chars, ch) != 0;
1987 }
1988 
1989 /* Add CH to the flags seen within the current argument.  */
1990 
1991 void
add_char(char ch)1992 flag_chars_t::add_char (char ch)
1993 {
1994   int i = strlen (m_flag_chars);
1995   m_flag_chars[i++] = ch;
1996   m_flag_chars[i] = 0;
1997 }
1998 
1999 /* Validate the individual flags used, removing any that are invalid.  */
2000 
2001 void
validate(const format_kind_info * fki,const format_char_info * fci,const format_flag_spec * flag_specs,const char * const format_chars,tree format_string_cst,location_t format_string_loc,const char * const orig_format_chars,char format_char,bool quoted)2002 flag_chars_t::validate (const format_kind_info *fki,
2003 			const format_char_info *fci,
2004 			const format_flag_spec *flag_specs,
2005 			const char * const format_chars,
2006 			tree format_string_cst,
2007 			location_t format_string_loc,
2008 			const char * const orig_format_chars,
2009 			char format_char,
2010 			bool quoted)
2011 {
2012   int i;
2013   int d = 0;
2014   bool quotflag = false;
2015 
2016   for (i = 0; m_flag_chars[i] != 0; i++)
2017     {
2018       const format_flag_spec *s = get_flag_spec (flag_specs,
2019 						 m_flag_chars[i], NULL);
2020       m_flag_chars[i - d] = m_flag_chars[i];
2021       if (m_flag_chars[i] == fki->length_code_char)
2022 	continue;
2023 
2024       /* Remember if a quoting flag is seen.  */
2025       quotflag |= s->quoting;
2026 
2027       if (strchr (fci->flag_chars, m_flag_chars[i]) == 0)
2028 	{
2029 	  format_warning_at_char (format_string_loc, format_string_cst,
2030 				  format_chars - orig_format_chars,
2031 				  OPT_Wformat_,
2032 				  "%s used with %<%%%c%> %s format",
2033 				  _(s->name), format_char, fki->name);
2034 	  d++;
2035 	  continue;
2036 	}
2037       if (pedantic)
2038 	{
2039 	  const format_flag_spec *t;
2040 	  if (ADJ_STD (s->std) > C_STD_VER)
2041 	    warning_at (format_string_loc, OPT_Wformat_,
2042 			"%s does not support %s",
2043 			C_STD_NAME (s->std), _(s->long_name));
2044 	  t = get_flag_spec (flag_specs, m_flag_chars[i], fci->flags2);
2045 	  if (t != NULL && ADJ_STD (t->std) > ADJ_STD (s->std))
2046 	    {
2047 	      const char *long_name = (t->long_name != NULL
2048 				       ? t->long_name
2049 				       : s->long_name);
2050 	      if (ADJ_STD (t->std) > C_STD_VER)
2051 		warning_at (format_string_loc, OPT_Wformat_,
2052 			    "%s does not support %s with"
2053 			    " the %<%%%c%> %s format",
2054 			    C_STD_NAME (t->std), _(long_name),
2055 			    format_char, fki->name);
2056 	    }
2057 	}
2058 
2059       /* Detect quoting directives used within a quoted sequence, such
2060 	 as GCC's "%<...%qE".  */
2061       if (quoted && s->quoting)
2062 	{
2063 	  format_warning_at_char (format_string_loc, format_string_cst,
2064 				  format_chars - orig_format_chars - 1,
2065 				  OPT_Wformat_,
2066 				  "%s used within a quoted sequence",
2067 				  _(s->name));
2068 	}
2069     }
2070   m_flag_chars[i - d] = 0;
2071 
2072   if (!quoted
2073       && !quotflag
2074       && strchr (fci->flags2, '\''))
2075     {
2076       format_warning_at_char (format_string_loc, format_string_cst,
2077 			      format_chars - orig_format_chars,
2078 			      OPT_Wformat_,
2079 			      "%qc conversion used unquoted",
2080 			      format_char);
2081     }
2082 }
2083 
2084 /* Determine if an assignment-allocation has been set, requiring
2085    an extra char ** for writing back a dynamically-allocated char *.
2086    This is for handling the optional 'm' character in scanf.  */
2087 
2088 int
get_alloc_flag(const format_kind_info * fki)2089 flag_chars_t::get_alloc_flag (const format_kind_info *fki)
2090 {
2091   if ((fki->flags & (int) FMT_FLAG_SCANF_A_KLUDGE)
2092       && has_char_p ('a'))
2093     return 1;
2094   if (fki->alloc_char && has_char_p (fki->alloc_char))
2095     return 1;
2096   return 0;
2097 }
2098 
2099 /* Determine if an assignment-suppression character was seen.
2100    ('*' in scanf, for discarding the converted input).  */
2101 
2102 int
assignment_suppression_p(const format_kind_info * fki)2103 flag_chars_t::assignment_suppression_p (const format_kind_info *fki)
2104 {
2105   if (fki->suppression_char
2106       && has_char_p (fki->suppression_char))
2107     return 1;
2108   return 0;
2109 }
2110 
2111 /* Constructor for argument_parser.  Initialize for parsing one
2112    argument within a format string.  */
2113 
2114 argument_parser::
argument_parser(function_format_info * info_,const char * & format_chars_,tree format_string_cst_,const char * const orig_format_chars_,location_t format_string_loc_,flag_chars_t & flag_chars_,int & has_operand_number_,tree first_fillin_param_,object_allocator<format_wanted_type> & fwt_pool_,vec<location_t> * arglocs_)2115 argument_parser (function_format_info *info_, const char *&format_chars_,
2116 		 tree format_string_cst_,
2117 		 const char * const orig_format_chars_,
2118 		 location_t format_string_loc_,
2119 		 flag_chars_t &flag_chars_,
2120 		 int &has_operand_number_,
2121 		 tree first_fillin_param_,
2122 		 object_allocator <format_wanted_type> &fwt_pool_,
2123 		 vec<location_t> *arglocs_)
2124 : info (info_),
2125   fki (&format_types[info->format_type]),
2126   flag_specs (fki->flag_specs),
2127   start_of_this_format (format_chars_),
2128   format_chars (format_chars_),
2129   format_string_cst (format_string_cst_),
2130   orig_format_chars (orig_format_chars_),
2131   format_string_loc (format_string_loc_),
2132   fwt_pool (fwt_pool_),
2133   flag_chars (flag_chars_),
2134   main_arg_num (0),
2135   main_arg_params (NULL),
2136   has_operand_number (has_operand_number_),
2137   first_fillin_param (first_fillin_param_),
2138   first_wanted_type (NULL),
2139   last_wanted_type (NULL),
2140   arglocs (arglocs_)
2141 {
2142 }
2143 
2144 /* Handle dollars at the start of format arguments, setting up main_arg_params
2145    and main_arg_num.
2146 
2147    Return true if format parsing is to continue, false otherwise.  */
2148 
2149 bool
read_any_dollar()2150 argument_parser::read_any_dollar ()
2151 {
2152   if ((fki->flags & (int) FMT_FLAG_USE_DOLLAR) && has_operand_number != 0)
2153     {
2154       /* Possibly read a $ operand number at the start of the format.
2155 	 If one was previously used, one is required here.  If one
2156 	 is not used here, we can't immediately conclude this is a
2157 	 format without them, since it could be printf %m or scanf %*.  */
2158       int opnum;
2159       opnum = maybe_read_dollar_number (&format_chars, 0,
2160 					first_fillin_param,
2161 					&main_arg_params, fki);
2162       if (opnum == -1)
2163 	return false;
2164       else if (opnum > 0)
2165 	{
2166 	  has_operand_number = 1;
2167 	  main_arg_num = opnum + info->first_arg_num - 1;
2168 	}
2169     }
2170   else if (fki->flags & FMT_FLAG_USE_DOLLAR)
2171     {
2172       if (avoid_dollar_number (format_chars))
2173 	return false;
2174     }
2175   return true;
2176 }
2177 
2178 /* Read any format flags, but do not yet validate them beyond removing
2179    duplicates, since in general validation depends on the rest of
2180    the format.
2181 
2182    Return true if format parsing is to continue, false otherwise.  */
2183 
2184 bool
read_format_flags()2185 argument_parser::read_format_flags ()
2186 {
2187   while (*format_chars != 0
2188 	 && strchr (fki->flag_chars, *format_chars) != 0)
2189     {
2190       const format_flag_spec *s = get_flag_spec (flag_specs,
2191 						 *format_chars, NULL);
2192       if (flag_chars.has_char_p (*format_chars))
2193 	{
2194 	  format_warning_at_char (format_string_loc, format_string_cst,
2195 				  format_chars + 1 - orig_format_chars,
2196 				  OPT_Wformat_,
2197 				  "repeated %s in format", _(s->name));
2198 	}
2199       else
2200 	flag_chars.add_char (*format_chars);
2201 
2202       if (s->skip_next_char)
2203 	{
2204 	  ++format_chars;
2205 	  if (*format_chars == 0)
2206 	    {
2207 	      warning_at (format_string_loc, OPT_Wformat_,
2208 			  "missing fill character at end of strfmon format");
2209 	      return false;
2210 	    }
2211 	}
2212       ++format_chars;
2213     }
2214 
2215   return true;
2216 }
2217 
2218 /* Read any format width, possibly * or *m$.
2219 
2220    Return true if format parsing is to continue, false otherwise.  */
2221 
2222 bool
2223 argument_parser::
read_any_format_width(tree & params,unsigned HOST_WIDE_INT & arg_num)2224 read_any_format_width (tree &params,
2225 		       unsigned HOST_WIDE_INT &arg_num)
2226 {
2227   if (!fki->width_char)
2228     return true;
2229 
2230   if (fki->width_type != NULL && *format_chars == '*')
2231     {
2232       flag_chars.add_char (fki->width_char);
2233       /* "...a field width...may be indicated by an asterisk.
2234 	 In this case, an int argument supplies the field width..."  */
2235       ++format_chars;
2236       if (has_operand_number != 0)
2237 	{
2238 	  int opnum;
2239 	  opnum = maybe_read_dollar_number (&format_chars,
2240 					    has_operand_number == 1,
2241 					    first_fillin_param,
2242 					    &params, fki);
2243 	  if (opnum == -1)
2244 	    return false;
2245 	  else if (opnum > 0)
2246 	    {
2247 	      has_operand_number = 1;
2248 	      arg_num = opnum + info->first_arg_num - 1;
2249 	    }
2250 	  else
2251 	    has_operand_number = 0;
2252 	}
2253       else
2254 	{
2255 	  if (avoid_dollar_number (format_chars))
2256 	    return false;
2257 	}
2258       if (info->first_arg_num != 0)
2259 	{
2260 	  tree cur_param;
2261 	  if (params == 0)
2262 	    cur_param = NULL;
2263 	  else
2264 	    {
2265 	      cur_param = TREE_VALUE (params);
2266 	      if (has_operand_number <= 0)
2267 		{
2268 		  params = TREE_CHAIN (params);
2269 		  ++arg_num;
2270 		}
2271 	    }
2272 	  width_wanted_type.wanted_type = *fki->width_type;
2273 	  width_wanted_type.wanted_type_name = NULL;
2274 	  width_wanted_type.pointer_count = 0;
2275 	  width_wanted_type.char_lenient_flag = 0;
2276 	  width_wanted_type.scalar_identity_flag = 0;
2277 	  width_wanted_type.writing_in_flag = 0;
2278 	  width_wanted_type.reading_from_flag = 0;
2279 	  width_wanted_type.kind = CF_KIND_FIELD_WIDTH;
2280 	  width_wanted_type.format_start = format_chars - 1;
2281 	  width_wanted_type.format_length = 1;
2282 	  width_wanted_type.param = cur_param;
2283 	  width_wanted_type.arg_num = arg_num;
2284 	  width_wanted_type.offset_loc =
2285 	    format_chars - orig_format_chars;
2286 	  width_wanted_type.next = NULL;
2287 	  if (last_wanted_type != 0)
2288 	    last_wanted_type->next = &width_wanted_type;
2289 	  if (first_wanted_type == 0)
2290 	    first_wanted_type = &width_wanted_type;
2291 	  last_wanted_type = &width_wanted_type;
2292 	}
2293     }
2294   else
2295     {
2296       /* Possibly read a numeric width.  If the width is zero,
2297 	 we complain if appropriate.  */
2298       int non_zero_width_char = FALSE;
2299       int found_width = FALSE;
2300       while (ISDIGIT (*format_chars))
2301 	{
2302 	  found_width = TRUE;
2303 	  if (*format_chars != '0')
2304 	    non_zero_width_char = TRUE;
2305 	  ++format_chars;
2306 	}
2307       if (found_width && !non_zero_width_char &&
2308 	  (fki->flags & (int) FMT_FLAG_ZERO_WIDTH_BAD))
2309 	warning_at (format_string_loc, OPT_Wformat_,
2310 		    "zero width in %s format", fki->name);
2311       if (found_width)
2312 	flag_chars.add_char (fki->width_char);
2313     }
2314 
2315   return true;
2316 }
2317 
2318 /* Read any format left precision (must be a number, not *).  */
2319 void
read_any_format_left_precision()2320 argument_parser::read_any_format_left_precision ()
2321 {
2322   if (fki->left_precision_char == 0)
2323     return;
2324   if (*format_chars != '#')
2325     return;
2326 
2327   ++format_chars;
2328   flag_chars.add_char (fki->left_precision_char);
2329   if (!ISDIGIT (*format_chars))
2330     format_warning_at_char (format_string_loc, format_string_cst,
2331 			    format_chars - orig_format_chars,
2332 			    OPT_Wformat_,
2333 			    "empty left precision in %s format", fki->name);
2334   while (ISDIGIT (*format_chars))
2335     ++format_chars;
2336 }
2337 
2338 /* Read any format precision, possibly * or *m$.
2339 
2340    Return true if format parsing is to continue, false otherwise.  */
2341 
2342 bool
2343 argument_parser::
read_any_format_precision(tree & params,unsigned HOST_WIDE_INT & arg_num)2344 read_any_format_precision (tree &params,
2345 			   unsigned HOST_WIDE_INT &arg_num)
2346 {
2347   if (fki->precision_char == 0)
2348     return true;
2349   if (*format_chars != '.')
2350     return true;
2351 
2352   ++format_chars;
2353   flag_chars.add_char (fki->precision_char);
2354   if (fki->precision_type != NULL && *format_chars == '*')
2355     {
2356       /* "...a...precision...may be indicated by an asterisk.
2357 	 In this case, an int argument supplies the...precision."  */
2358       ++format_chars;
2359       if (has_operand_number != 0)
2360 	{
2361 	  int opnum;
2362 	  opnum = maybe_read_dollar_number (&format_chars,
2363 					    has_operand_number == 1,
2364 					    first_fillin_param,
2365 					    &params, fki);
2366 	  if (opnum == -1)
2367 	    return false;
2368 	  else if (opnum > 0)
2369 	    {
2370 	      has_operand_number = 1;
2371 	      arg_num = opnum + info->first_arg_num - 1;
2372 	    }
2373 	  else
2374 	    has_operand_number = 0;
2375 	}
2376       else
2377 	{
2378 	  if (avoid_dollar_number (format_chars))
2379 	    return false;
2380 	}
2381       if (info->first_arg_num != 0)
2382 	{
2383 	  tree cur_param;
2384 	  if (params == 0)
2385 	    cur_param = NULL;
2386 	  else
2387 	    {
2388 	      cur_param = TREE_VALUE (params);
2389 	      if (has_operand_number <= 0)
2390 		{
2391 		  params = TREE_CHAIN (params);
2392 		  ++arg_num;
2393 		}
2394 	    }
2395 	  precision_wanted_type.wanted_type = *fki->precision_type;
2396 	  precision_wanted_type.wanted_type_name = NULL;
2397 	  precision_wanted_type.pointer_count = 0;
2398 	  precision_wanted_type.char_lenient_flag = 0;
2399 	  precision_wanted_type.scalar_identity_flag = 0;
2400 	  precision_wanted_type.writing_in_flag = 0;
2401 	  precision_wanted_type.reading_from_flag = 0;
2402 	  precision_wanted_type.kind = CF_KIND_FIELD_PRECISION;
2403 	  precision_wanted_type.param = cur_param;
2404 	  precision_wanted_type.format_start = format_chars - 2;
2405 	  precision_wanted_type.format_length = 2;
2406 	  precision_wanted_type.arg_num = arg_num;
2407 	  precision_wanted_type.offset_loc =
2408 	    format_chars - orig_format_chars;
2409 	  precision_wanted_type.next = NULL;
2410 	  if (last_wanted_type != 0)
2411 	    last_wanted_type->next = &precision_wanted_type;
2412 	  if (first_wanted_type == 0)
2413 	    first_wanted_type = &precision_wanted_type;
2414 	  last_wanted_type = &precision_wanted_type;
2415 	}
2416     }
2417   else
2418     {
2419       if (!(fki->flags & (int) FMT_FLAG_EMPTY_PREC_OK)
2420 	  && !ISDIGIT (*format_chars))
2421 	format_warning_at_char (format_string_loc, format_string_cst,
2422 				format_chars - orig_format_chars,
2423 				OPT_Wformat_,
2424 				"empty precision in %s format", fki->name);
2425       while (ISDIGIT (*format_chars))
2426 	++format_chars;
2427     }
2428 
2429   return true;
2430 }
2431 
2432 /* Parse any assignment-allocation flags, which request an extra
2433    char ** for writing back a dynamically-allocated char *.
2434    This is for handling the optional 'm' character in scanf,
2435    and, before C99, 'a' (for compatibility with a non-standard
2436    GNU libc extension).  */
2437 
2438 void
handle_alloc_chars()2439 argument_parser::handle_alloc_chars ()
2440 {
2441   if (fki->alloc_char && fki->alloc_char == *format_chars)
2442     {
2443       flag_chars.add_char (fki->alloc_char);
2444       format_chars++;
2445     }
2446 
2447   /* Handle the scanf allocation kludge.  */
2448   if (fki->flags & (int) FMT_FLAG_SCANF_A_KLUDGE)
2449     {
2450       if (*format_chars == 'a' && !flag_isoc99)
2451 	{
2452 	  if (format_chars[1] == 's' || format_chars[1] == 'S'
2453 	      || format_chars[1] == '[')
2454 	    {
2455 	      /* 'a' is used as a flag.  */
2456 	      flag_chars.add_char ('a');
2457 	      format_chars++;
2458 	    }
2459 	}
2460     }
2461 }
2462 
2463 /* Look for length modifiers within the current format argument,
2464    returning a length_modifier instance describing it (or the
2465    default if one is not found).
2466 
2467    Issue warnings about non-standard modifiers.  */
2468 
2469 length_modifier
read_any_length_modifier()2470 argument_parser::read_any_length_modifier ()
2471 {
2472   length_modifier result;
2473 
2474   const format_length_info *fli = fki->length_char_specs;
2475   if (!fli)
2476     return result;
2477 
2478   while (fli->name != 0
2479 	 && strncmp (fli->name, format_chars, strlen (fli->name)))
2480     fli++;
2481   if (fli->name != 0)
2482     {
2483       format_chars += strlen (fli->name);
2484       if (fli->double_name != 0 && fli->name[0] == *format_chars)
2485 	{
2486 	  format_chars++;
2487 	  result = length_modifier (fli->double_name, fli->double_index,
2488 				    fli->double_std, 0);
2489 	}
2490       else
2491 	{
2492 	  result = length_modifier (fli->name, fli->index, fli->std,
2493 				    fli->scalar_identity_flag);
2494 	}
2495       flag_chars.add_char (fki->length_code_char);
2496     }
2497   if (pedantic)
2498     {
2499       /* Warn if the length modifier is non-standard.  */
2500       if (ADJ_STD (result.std) > C_STD_VER)
2501 	warning_at (format_string_loc, OPT_Wformat_,
2502 		    "%s does not support the %qs %s length modifier",
2503 		    C_STD_NAME (result.std), result.chars,
2504 		    fki->name);
2505     }
2506 
2507   return result;
2508 }
2509 
2510 /* Read any other modifier (strftime E/O).  */
2511 
2512 void
read_any_other_modifier()2513 argument_parser::read_any_other_modifier ()
2514 {
2515   if (fki->modifier_chars == NULL)
2516     return;
2517 
2518   while (*format_chars != 0
2519 	 && strchr (fki->modifier_chars, *format_chars) != 0)
2520     {
2521       if (flag_chars.has_char_p (*format_chars))
2522 	{
2523 	  const format_flag_spec *s = get_flag_spec (flag_specs,
2524 						     *format_chars, NULL);
2525 	  format_warning_at_char (format_string_loc, format_string_cst,
2526 				  format_chars - orig_format_chars,
2527 				  OPT_Wformat_,
2528 				  "repeated %s in format", _(s->name));
2529 	}
2530       else
2531 	flag_chars.add_char (*format_chars);
2532       ++format_chars;
2533     }
2534 }
2535 
2536 /* Return the format_char_info corresponding to FORMAT_CHAR,
2537    potentially issuing a warning if the format char is
2538    not supported in the C standard version we are checking
2539    against.
2540 
2541    Issue a warning and return NULL if it is not found.
2542 
2543    Issue warnings about non-standard modifiers.  */
2544 
2545 const format_char_info *
find_format_char_info(char format_char)2546 argument_parser::find_format_char_info (char format_char)
2547 {
2548   const format_char_info *fci = fki->conversion_specs;
2549 
2550   while (fci->format_chars != 0
2551 	 && strchr (fci->format_chars, format_char) == 0)
2552     ++fci;
2553   if (fci->format_chars == 0)
2554     {
2555       format_warning_at_char (format_string_loc, format_string_cst,
2556 			      format_chars - orig_format_chars,
2557 			      OPT_Wformat_,
2558 			      "unknown conversion type character"
2559 			      " %qc in format",
2560 			      format_char);
2561       return NULL;
2562     }
2563 
2564   if (pedantic)
2565     {
2566       if (ADJ_STD (fci->std) > C_STD_VER)
2567 	format_warning_at_char (format_string_loc, format_string_cst,
2568 				format_chars - orig_format_chars,
2569 				OPT_Wformat_,
2570 				"%s does not support the %<%%%c%> %s format",
2571 				C_STD_NAME (fci->std), format_char, fki->name);
2572     }
2573 
2574   return fci;
2575 }
2576 
2577 /* Validate the pairs of flags used.
2578    Issue warnings about incompatible combinations of flags.  */
2579 
2580 void
validate_flag_pairs(const format_char_info * fci,char format_char)2581 argument_parser::validate_flag_pairs (const format_char_info *fci,
2582 				      char format_char)
2583 {
2584   const format_flag_pair * const bad_flag_pairs = fki->bad_flag_pairs;
2585 
2586   for (int i = 0; bad_flag_pairs[i].flag_char1 != 0; i++)
2587     {
2588       const format_flag_spec *s, *t;
2589       if (!flag_chars.has_char_p (bad_flag_pairs[i].flag_char1))
2590 	continue;
2591       if (!flag_chars.has_char_p (bad_flag_pairs[i].flag_char2))
2592 	continue;
2593       if (bad_flag_pairs[i].predicate != 0
2594 	  && strchr (fci->flags2, bad_flag_pairs[i].predicate) == 0)
2595 	continue;
2596       s = get_flag_spec (flag_specs, bad_flag_pairs[i].flag_char1, NULL);
2597       t = get_flag_spec (flag_specs, bad_flag_pairs[i].flag_char2, NULL);
2598       if (bad_flag_pairs[i].ignored)
2599 	{
2600 	  if (bad_flag_pairs[i].predicate != 0)
2601 	    warning_at (format_string_loc, OPT_Wformat_,
2602 			"%s ignored with %s and %<%%%c%> %s format",
2603 			_(s->name), _(t->name), format_char,
2604 			fki->name);
2605 	  else
2606 	    warning_at (format_string_loc, OPT_Wformat_,
2607 			"%s ignored with %s in %s format",
2608 			_(s->name), _(t->name), fki->name);
2609 	}
2610       else
2611 	{
2612 	  if (bad_flag_pairs[i].predicate != 0)
2613 	    warning_at (format_string_loc, OPT_Wformat_,
2614 			"use of %s and %s together with %<%%%c%> %s format",
2615 			_(s->name), _(t->name), format_char,
2616 			fki->name);
2617 	  else
2618 	    warning_at (format_string_loc, OPT_Wformat_,
2619 			"use of %s and %s together in %s format",
2620 			_(s->name), _(t->name), fki->name);
2621 	}
2622     }
2623 }
2624 
2625 /* Give Y2K warnings.  */
2626 
2627 void
give_y2k_warnings(const format_char_info * fci,char format_char)2628 argument_parser::give_y2k_warnings (const format_char_info *fci,
2629 				    char format_char)
2630 {
2631   if (!warn_format_y2k)
2632     return;
2633 
2634   int y2k_level = 0;
2635   if (strchr (fci->flags2, '4') != 0)
2636     if (flag_chars.has_char_p ('E'))
2637       y2k_level = 3;
2638     else
2639       y2k_level = 2;
2640   else if (strchr (fci->flags2, '3') != 0)
2641     y2k_level = 3;
2642   else if (strchr (fci->flags2, '2') != 0)
2643     y2k_level = 2;
2644   if (y2k_level == 3)
2645     warning_at (format_string_loc, OPT_Wformat_y2k,
2646 		"%<%%%c%> yields only last 2 digits of "
2647 		"year in some locales", format_char);
2648   else if (y2k_level == 2)
2649     warning_at (format_string_loc, OPT_Wformat_y2k,
2650 		"%<%%%c%> yields only last 2 digits of year",
2651 		format_char);
2652 }
2653 
2654 /* Parse any "scan sets" enclosed in square brackets, e.g.
2655    for scanf-style calls.  */
2656 
2657 void
parse_any_scan_set(const format_char_info * fci)2658 argument_parser::parse_any_scan_set (const format_char_info *fci)
2659 {
2660   if (strchr (fci->flags2, '[') == NULL)
2661     return;
2662 
2663   /* Skip over scan set, in case it happens to have '%' in it.  */
2664   if (*format_chars == '^')
2665     ++format_chars;
2666   /* Find closing bracket; if one is hit immediately, then
2667      it's part of the scan set rather than a terminator.  */
2668   if (*format_chars == ']')
2669     ++format_chars;
2670   while (*format_chars && *format_chars != ']')
2671     ++format_chars;
2672   if (*format_chars != ']')
2673     /* The end of the format string was reached.  */
2674     format_warning_at_char (format_string_loc, format_string_cst,
2675 			    format_chars - orig_format_chars,
2676 			    OPT_Wformat_,
2677 			    "no closing %<]%> for %<%%[%> format");
2678 }
2679 
2680 /* Return true if this argument is to be continued to be parsed,
2681    false to skip to next argument.  */
2682 
2683 bool
handle_conversions(const format_char_info * fci,const length_modifier & len_modifier,tree & wanted_type,const char * & wanted_type_name,unsigned HOST_WIDE_INT & arg_num,tree & params,char format_char)2684 argument_parser::handle_conversions (const format_char_info *fci,
2685 				     const length_modifier &len_modifier,
2686 				     tree &wanted_type,
2687 				     const char *&wanted_type_name,
2688 				     unsigned HOST_WIDE_INT &arg_num,
2689 				     tree &params,
2690 				     char format_char)
2691 {
2692   enum format_std_version wanted_type_std;
2693 
2694   if (!(fki->flags & (int) FMT_FLAG_ARG_CONVERT))
2695     return true;
2696 
2697   wanted_type = (fci->types[len_modifier.val].type
2698 		 ? *fci->types[len_modifier.val].type : 0);
2699   wanted_type_name = fci->types[len_modifier.val].name;
2700   wanted_type_std = fci->types[len_modifier.val].std;
2701   if (wanted_type == 0)
2702     {
2703       format_warning_at_char (format_string_loc, format_string_cst,
2704 			      format_chars - orig_format_chars,
2705 			      OPT_Wformat_,
2706 			      "use of %qs length modifier with %qc type"
2707 			      " character has either no effect"
2708 			      " or undefined behavior",
2709 			      len_modifier.chars, format_char);
2710       /* Heuristic: skip one argument when an invalid length/type
2711 	 combination is encountered.  */
2712       arg_num++;
2713       if (params != 0)
2714 	params = TREE_CHAIN (params);
2715       return false;
2716     }
2717   else if (pedantic
2718 	   /* Warn if non-standard, provided it is more non-standard
2719 	      than the length and type characters that may already
2720 	      have been warned for.  */
2721 	   && ADJ_STD (wanted_type_std) > ADJ_STD (len_modifier.std)
2722 	   && ADJ_STD (wanted_type_std) > ADJ_STD (fci->std))
2723     {
2724       if (ADJ_STD (wanted_type_std) > C_STD_VER)
2725 	format_warning_at_char (format_string_loc, format_string_cst,
2726 				format_chars - orig_format_chars,
2727 				OPT_Wformat_,
2728 				"%s does not support the %<%%%s%c%> %s format",
2729 				C_STD_NAME (wanted_type_std),
2730 				len_modifier.chars,
2731 				format_char, fki->name);
2732     }
2733 
2734   return true;
2735 }
2736 
2737 /* Check type of argument against desired type.
2738 
2739    Return true if format parsing is to continue, false otherwise.  */
2740 
2741 bool
2742 argument_parser::
check_argument_type(const format_char_info * fci,const length_modifier & len_modifier,tree & wanted_type,const char * & wanted_type_name,const bool suppressed,unsigned HOST_WIDE_INT & arg_num,tree & params,const int alloc_flag,const char * const format_start,const char * const type_start,location_t fmt_param_loc,char conversion_char)2743 check_argument_type (const format_char_info *fci,
2744 		     const length_modifier &len_modifier,
2745 		     tree &wanted_type,
2746 		     const char *&wanted_type_name,
2747 		     const bool suppressed,
2748 		     unsigned HOST_WIDE_INT &arg_num,
2749 		     tree &params,
2750 		     const int alloc_flag,
2751 		     const char * const format_start,
2752 		     const char * const type_start,
2753 		     location_t fmt_param_loc,
2754 		     char conversion_char)
2755 {
2756   if (info->first_arg_num == 0)
2757     return true;
2758 
2759   if ((fci->pointer_count == 0 && wanted_type == void_type_node)
2760       || suppressed)
2761     {
2762       if (main_arg_num != 0)
2763 	{
2764 	  if (suppressed)
2765 	    warning_at (format_string_loc, OPT_Wformat_,
2766 			"operand number specified with "
2767 			"suppressed assignment");
2768 	  else
2769 	    warning_at (format_string_loc, OPT_Wformat_,
2770 			"operand number specified for format "
2771 			"taking no argument");
2772 	}
2773     }
2774   else
2775     {
2776       format_wanted_type *wanted_type_ptr;
2777 
2778       if (main_arg_num != 0)
2779 	{
2780 	  arg_num = main_arg_num;
2781 	  params = main_arg_params;
2782 	}
2783       else
2784 	{
2785 	  ++arg_num;
2786 	  if (has_operand_number > 0)
2787 	    {
2788 	      warning_at (format_string_loc, OPT_Wformat_,
2789 			  "missing $ operand number in format");
2790 	      return false;
2791 	    }
2792 	  else
2793 	    has_operand_number = 0;
2794 	}
2795 
2796       wanted_type_ptr = &main_wanted_type;
2797       while (fci)
2798 	{
2799 	  tree cur_param;
2800 	  if (params == 0)
2801 	    cur_param = NULL;
2802 	  else
2803 	    {
2804 	      cur_param = TREE_VALUE (params);
2805 	      params = TREE_CHAIN (params);
2806 	    }
2807 
2808 	  wanted_type_ptr->wanted_type = wanted_type;
2809 	  wanted_type_ptr->wanted_type_name = wanted_type_name;
2810 	  wanted_type_ptr->pointer_count = fci->pointer_count + alloc_flag;
2811 	  wanted_type_ptr->char_lenient_flag = 0;
2812 	  if (strchr (fci->flags2, 'c') != 0)
2813 	    wanted_type_ptr->char_lenient_flag = 1;
2814 	  wanted_type_ptr->scalar_identity_flag = 0;
2815 	  if (len_modifier.scalar_identity_flag)
2816 	    wanted_type_ptr->scalar_identity_flag = 1;
2817 	  wanted_type_ptr->writing_in_flag = 0;
2818 	  wanted_type_ptr->reading_from_flag = 0;
2819 	  if (alloc_flag)
2820 	    wanted_type_ptr->writing_in_flag = 1;
2821 	  else
2822 	    {
2823 	      if (strchr (fci->flags2, 'W') != 0)
2824 		wanted_type_ptr->writing_in_flag = 1;
2825 	      if (strchr (fci->flags2, 'R') != 0)
2826 		wanted_type_ptr->reading_from_flag = 1;
2827 	    }
2828 	  wanted_type_ptr->kind = CF_KIND_FORMAT;
2829 	  wanted_type_ptr->param = cur_param;
2830 	  wanted_type_ptr->arg_num = arg_num;
2831 	  wanted_type_ptr->format_start = format_start;
2832 	  wanted_type_ptr->format_length = format_chars - format_start;
2833 	  wanted_type_ptr->offset_loc = format_chars - orig_format_chars;
2834 	  wanted_type_ptr->next = NULL;
2835 	  if (last_wanted_type != 0)
2836 	    last_wanted_type->next = wanted_type_ptr;
2837 	  if (first_wanted_type == 0)
2838 	    first_wanted_type = wanted_type_ptr;
2839 	  last_wanted_type = wanted_type_ptr;
2840 
2841 	  fci = fci->chain;
2842 	  if (fci)
2843 	    {
2844 	      wanted_type_ptr = fwt_pool.allocate ();
2845 	      arg_num++;
2846 	      wanted_type = *fci->types[len_modifier.val].type;
2847 	      wanted_type_name = fci->types[len_modifier.val].name;
2848 	    }
2849 	}
2850     }
2851 
2852   if (first_wanted_type != 0)
2853     {
2854       ptrdiff_t offset_to_format_start = (start_of_this_format - 1) - orig_format_chars;
2855       ptrdiff_t offset_to_format_end = (format_chars - 1) - orig_format_chars;
2856       /* By default, use the end of the range for the caret location.  */
2857       substring_loc fmt_loc (fmt_param_loc, TREE_TYPE (format_string_cst),
2858 			     offset_to_format_end,
2859 			     offset_to_format_start, offset_to_format_end);
2860       ptrdiff_t offset_to_type_start = type_start - orig_format_chars;
2861       check_format_types (fmt_loc, first_wanted_type, fki,
2862 			  offset_to_type_start,
2863 			  conversion_char, arglocs);
2864     }
2865 
2866   return true;
2867 }
2868 
2869 /* Describes "paired tokens" within the format string that are
2870    expected to be balanced.  */
2871 
2872 class baltoks_t
2873 {
2874 public:
baltoks_t()2875   baltoks_t (): singlequote (), doublequote () { }
2876 
2877   typedef auto_vec<const char *> balanced_tokens_t;
2878   /* Vectors of pointers to opening brackets ('['), curly brackets ('{'),
2879      quoting directives (like GCC "%<"), parentheses, and angle brackets
2880      ('<').  Used to detect unbalanced tokens.  */
2881   balanced_tokens_t brackets;
2882   balanced_tokens_t curly;
2883   balanced_tokens_t quotdirs;
2884   balanced_tokens_t parens;
2885   balanced_tokens_t pointy;
2886   /* Pointer to the last opening quote.  */
2887   const char *singlequote;
2888   const char *doublequote;
2889 };
2890 
2891 /* Describes a keyword, operator, or other name.  */
2892 
2893 struct token_t
2894 {
2895   const char *name;   /* Keyword/operator name.  */
2896   unsigned char len;  /* Its length.  */
2897   const char *alt;    /* Alternate spelling.  */
2898 };
2899 
2900 /* Helper for initializing global token_t arrays below.  */
2901 #define NAME(name) { name, sizeof name - 1, NULL }
2902 
2903 /* C/C++ operators that are expected to be quoted within the format
2904    string.  */
2905 
2906 static const token_t c_opers[] =
2907   {
2908    NAME ("!="), NAME ("%="),  NAME ("&&"),  NAME ("&="), NAME ("*="),
2909    NAME ("++"), NAME ("+="),  NAME ("--"),  NAME ("-="), NAME ("->"),
2910    NAME ("/="), NAME ("<<"),  NAME ("<<="), NAME ("<="), NAME ("=="),
2911    NAME (">="), NAME (">>="), NAME (">>"),  NAME ("?:"),  NAME ("^="),
2912    NAME ("|="), NAME ("||")
2913   };
2914 
2915 static const token_t cxx_opers[] =
2916   {
2917    NAME ("->*"), NAME (".*"),  NAME ("::"),  NAME ("<=>")
2918   };
2919 
2920 /* Common C/C++ keywords that are expected to be quoted within the format
2921    string.  Keywords like auto, inline, or volatile are exccluded because
2922    they are sometimes used in common terms like /auto variables/, /inline
2923    function/, or /volatile access/ where they should not be quoted.  */
2924 
2925 static const token_t c_keywords[] =
2926   {
2927 #undef NAME
2928 #define NAME(name, alt)  { name, sizeof name - 1, alt }
2929 
2930    NAME ("alignas", NULL),
2931    NAME ("alignof", NULL),
2932    NAME ("asm", NULL),
2933    NAME ("bool", NULL),
2934    NAME ("char", NULL),
2935    NAME ("const %", NULL),
2936    NAME ("const-qualified", "%<const%>-qualified"),
2937    NAME ("float", NULL),
2938    NAME ("ifunc", NULL),
2939    NAME ("int", NULL),
2940    NAME ("long double", NULL),
2941    NAME ("long int", NULL),
2942    NAME ("long long", NULL),
2943    NAME ("malloc", NULL),
2944    NAME ("noclone", NULL),
2945    NAME ("noinline", NULL),
2946    NAME ("nonnull", NULL),
2947    NAME ("noreturn", NULL),
2948    NAME ("offsetof", NULL),
2949    NAME ("readonly", "read-only"),
2950    NAME ("readwrite", "read-write"),
2951    NAME ("restrict %", NULL),
2952    NAME ("restrict-qualified", "%<restrict%>-qualified"),
2953    NAME ("short int", NULL),
2954    NAME ("signed char", NULL),
2955    NAME ("signed int", NULL),
2956    NAME ("signed long", NULL),
2957    NAME ("signed short", NULL),
2958    NAME ("sizeof", NULL),
2959    NAME ("typeof", NULL),
2960    NAME ("unsigned char", NULL),
2961    NAME ("unsigned int", NULL),
2962    NAME ("unsigned long", NULL),
2963    NAME ("unsigned short", NULL),
2964    NAME ("volatile %", NULL),
2965    NAME ("volatile-qualified", "%<volatile%>-qualified"),
2966    NAME ("weakref", NULL),
2967   };
2968 
2969 static const token_t cxx_keywords[] =
2970   {
2971    /* C++ only keywords and operators.  */
2972    NAME ("catch", NULL),
2973    NAME ("constexpr if", NULL),
2974    NAME ("constexpr", NULL),
2975    NAME ("constinit", NULL),
2976    NAME ("consteval", NULL),
2977    NAME ("decltype", NULL),
2978    NAME ("nullptr", NULL),
2979    NAME ("operator delete", NULL),
2980    NAME ("operator new", NULL),
2981    NAME ("typeid", NULL),
2982    NAME ("typeinfo", NULL)
2983   };
2984 
2985 /* Blacklisted words such as misspellings that should be avoided in favor
2986    of the specified alternatives.  */
2987 static const struct
2988 {
2989   const char *name;   /* Bad word.  */
2990   unsigned char len;  /* Its length.  */
2991   const char *alt;    /* Preferred alternative.  */
2992 } badwords[] =
2993   {
2994    NAME ("arg", "argument"),
2995    NAME ("bitfield", "bit-field"),
2996    NAME ("builtin function", "built-in function"),
2997    NAME ("can not", "cannot"),
2998    NAME ("commandline option", "command-line option"),
2999    NAME ("commandline", "command line"),
3000    NAME ("command line option", "command-line option"),
3001    NAME ("decl", "declaration"),
3002    NAME ("enumeral", "enumerated"),
3003    NAME ("floating point", "floating-point"),
3004    NAME ("nonstatic", "non-static"),
3005    NAME ("non-zero", "nonzero"),
3006    NAME ("reg", "register"),
3007    NAME ("stmt", "statement"),
3008   };
3009 
3010 /* Common contractions that should be avoided in favor of the specified
3011    alternatives.  */
3012 
3013 static const struct
3014 {
3015   const char *name;   /* Contraction.  */
3016   unsigned char len;  /* Its length.  */
3017   const char *alt;    /* Preferred alternative.  */
3018 } contrs[] =
3019   {
3020    NAME ("can't", "cannot"),
3021    NAME ("didn't", "did not"),
3022    /* These are commonly abused.  Avoid diagnosing them for now.
3023       NAME ("isn't", "is not"),
3024       NAME ("don't", "is not"),
3025    */
3026    NAME ("mustn't", "must not"),
3027    NAME ("needn't", "need not"),
3028    NAME ("should't", "should not"),
3029    NAME ("that's", "that is"),
3030    NAME ("there's", "there is"),
3031    NAME ("they're", "they are"),
3032    NAME ("what's", "what is"),
3033    NAME ("won't", "will not")
3034   };
3035 
3036 /* Check for unquoted TOKENS.  FORMAT_STRING_LOC is the location of
3037    the format string, FORMAT_STRING_CST the format string itself (as
3038    a tree), ORIG_FORMAT_CHARS and FORMAT_CHARS are pointers to
3039    the beginning of the format string and the character currently
3040    being processed, and BALTOKS describes paired "tokens" within
3041    the format string that are expected to be balanced.
3042    Returns a pointer to the last processed character or null when
3043    nothing was done.  */
3044 
3045 static const char*
check_tokens(const token_t * tokens,unsigned ntoks,location_t format_string_loc,tree format_string_cst,const char * orig_format_chars,const char * format_chars,baltoks_t & baltoks)3046 check_tokens (const token_t *tokens, unsigned ntoks,
3047 	      location_t format_string_loc, tree format_string_cst,
3048 	      const char *orig_format_chars, const char *format_chars,
3049 	      baltoks_t &baltoks)
3050 {
3051   /* For brevity.  */
3052   const int opt = OPT_Wformat_diag;
3053   /* Zero-based starting position of a problem sequence.  */
3054   int fmtchrpos = format_chars - orig_format_chars;
3055 
3056   /* For identifier-like "words," set to the word length.  */
3057   unsigned wlen = 0;
3058   /* Set for an operator, clear for an identifier/word.  */
3059   bool is_oper = false;
3060   bool underscore = false;
3061 
3062   if (format_chars[0] == '_' || ISALPHA (format_chars[0]))
3063     {
3064       while (format_chars[wlen] == '_' || ISALNUM (format_chars[wlen]))
3065 	{
3066 	  underscore |= format_chars[wlen] == '_';
3067 	  ++wlen;
3068 	}
3069     }
3070   else
3071     is_oper = true;
3072 
3073   for (unsigned i = 0; i != ntoks; ++i)
3074     {
3075       unsigned toklen = tokens[i].len;
3076 
3077       if (toklen < wlen
3078 	  || strncmp (format_chars, tokens[i].name, toklen))
3079 	continue;
3080 
3081       if (toklen == 2
3082 	  && format_chars - orig_format_chars > 0
3083 	  && (TOUPPER (format_chars[-1]) == 'C'
3084 	      || TOUPPER (format_chars[-1]) == 'G'))
3085 	return format_chars + toklen - 1;   /* Reference to C++ or G++.  */
3086 
3087       if (ISPUNCT (format_chars[toklen - 1]))
3088 	{
3089 	  if (format_chars[toklen - 1] == format_chars[toklen])
3090 	    return NULL;   /* Operator followed by another punctuator.  */
3091 	}
3092       else if (ISALNUM (format_chars[toklen]))
3093 	return NULL;   /* Keyword prefix for a longer word.  */
3094 
3095       if (toklen == 2
3096 	  && format_chars[0] == '-'
3097 	  && format_chars[1] == '-'
3098 	  && ISALNUM (format_chars[2]))
3099 	return NULL;   /* Probably option like --help.  */
3100 
3101       /* Allow this ugly warning for the time being.  */
3102       if (toklen == 2
3103 	  && format_chars - orig_format_chars > 6
3104 	  && startswith (format_chars - 7, " count >= width of "))
3105 	return format_chars + 10;
3106 
3107       /* The token is a type if it ends in an alphabetic character.  */
3108       bool is_type = (ISALPHA (tokens[i].name[toklen - 1])
3109 		      && strchr (tokens[i].name, ' '));
3110 
3111       /* Backtrack to the last alphabetic character (for tokens whose
3112 	 names end in '%').  */
3113       if (!is_oper)
3114 	while (!ISALPHA (tokens[i].name[toklen - 1]))
3115 	  --toklen;
3116 
3117       if (format_warning_substr (format_string_loc, format_string_cst,
3118 				 fmtchrpos, fmtchrpos + toklen, opt,
3119 				 (is_type
3120 				  ? G_("unquoted type name %<%.*s%> in format")
3121 				  : (is_oper
3122 				     ? G_("unquoted operator %<%.*s%> in format")
3123 				     : G_("unquoted keyword %<%.*s%> in format"))),
3124 				 toklen, format_chars)
3125 	  && tokens[i].alt)
3126 	inform (format_string_loc, "use %qs instead", tokens[i].alt);
3127 
3128       return format_chars + toklen - 1;
3129     }
3130 
3131   /* Diagnose unquoted __attribute__.  Consider any parenthesized
3132      argument to the attribute to avoid redundant warnings for
3133      the double parentheses that might follow.  */
3134   if (startswith (format_chars, "__attribute"))
3135     {
3136       unsigned nchars = sizeof "__attribute" - 1;
3137       while ('_' == format_chars[nchars])
3138 	++nchars;
3139 
3140       for (int i = nchars; format_chars[i]; ++i)
3141 	if (' ' != format_chars[i])
3142 	  {
3143 	    nchars = i;
3144 	    break;
3145 	  }
3146 
3147       if (format_chars[nchars] == '(')
3148 	{
3149 	  baltoks.parens.safe_push (format_chars + nchars);
3150 
3151 	  ++nchars;
3152 	  bool close = false;
3153 	  if (format_chars[nchars] == '(')
3154 	    {
3155 	      baltoks.parens.safe_push (format_chars + nchars);
3156 	      close = true;
3157 	      ++nchars;
3158 	    }
3159 	  for (int i = nchars; format_chars[i]; ++i)
3160 	    if (')' == format_chars[i])
3161 	      {
3162 		if (baltoks.parens.length () > 0)
3163 		  baltoks.parens.pop ();
3164 		nchars = i + 1;
3165 		break;
3166 	      }
3167 
3168 	  if (close && format_chars[nchars] == ')')
3169 	    {
3170 	      if (baltoks.parens.length () > 0)
3171 		baltoks.parens.pop ();
3172 	      ++nchars;
3173 	    }
3174 	}
3175 
3176       format_warning_substr (format_string_loc, format_string_cst,
3177 			     fmtchrpos, fmtchrpos + nchars, opt,
3178 			      "unquoted attribute in format");
3179       return format_chars + nchars - 1;
3180     }
3181 
3182   /* Diagnose unquoted built-ins.  */
3183   if (format_chars[0] == '_'
3184       && format_chars[1] == '_'
3185       && (startswith (format_chars + 2, "atomic")
3186 	  || startswith (format_chars + 2, "builtin")
3187 	  || startswith (format_chars + 2, "sync")))
3188     {
3189       format_warning_substr (format_string_loc, format_string_cst,
3190 			     fmtchrpos, fmtchrpos + wlen, opt,
3191 			     "unquoted name of built-in function %<%.*s%> "
3192 			     "in format",
3193 			     wlen, format_chars);
3194       return format_chars + wlen - 1;
3195     }
3196 
3197   /* Diagnose unquoted substrings of alphanumeric characters containing
3198      underscores.  They most likely refer to identifiers and should be
3199      quoted.  */
3200   if (underscore)
3201     format_warning_substr (format_string_loc, format_string_cst,
3202 			   format_chars - orig_format_chars,
3203 			   format_chars + wlen - orig_format_chars,
3204 			   opt,
3205 			   "unquoted identifier or keyword %<%.*s%> in format",
3206 			   wlen, format_chars);
3207   else
3208     {
3209       /* Diagnose some common misspellings.  */
3210       for (unsigned i = 0; i != sizeof badwords / sizeof *badwords; ++i)
3211 	{
3212 	  unsigned badwlen = strspn (badwords[i].name, " -");
3213 	  if (wlen >= badwlen
3214 	      && (wlen <= badwords[i].len
3215 		  || (wlen == badwords[i].len + 1U
3216 		      && TOUPPER (format_chars[wlen - 1]) == 'S'))
3217 	      && !strncasecmp (format_chars, badwords[i].name, badwords[i].len))
3218 	    {
3219 	      /* Handle singular as well as plural forms of all bad words
3220 		 even though the latter don't necessarily make sense for
3221 		 all of the former (like "can nots").  */
3222 	      badwlen = badwords[i].len;
3223 	      const char *plural = "";
3224 	      if (TOUPPER (format_chars[badwlen]) == 'S')
3225 		{
3226 		  ++badwlen;
3227 		  plural = "s";
3228 		}
3229 
3230 	      /* As an exception, don't warn about "decl-specifier*" since
3231 		 it's a C++ grammar production.  */
3232 	      if (badwords[i].name[0] == 'd'
3233 		  && startswith (format_chars, "decl-specifier"))
3234 		continue;
3235 
3236 	      format_warning_substr (format_string_loc, format_string_cst,
3237 				     fmtchrpos, fmtchrpos + badwords[i].len,
3238 				     opt,
3239 				     "misspelled term %<%.*s%> in format; "
3240 				     "use %<%s%s%> instead",
3241 				     badwlen, format_chars,
3242 				     badwords[i].alt, plural);
3243 
3244 	      return format_chars + badwords[i].len - 1;
3245 	    }
3246 	}
3247 
3248       /* Skip C++/G++.  */
3249       if (!strncasecmp (format_chars, "c++", 3)
3250 	  || !strncasecmp (format_chars, "g++", 3))
3251 	return format_chars + 2;
3252     }
3253 
3254   return wlen ? format_chars + wlen - 1 : NULL;
3255 }
3256 
3257 /* Check plain text in a format string of a GCC diagnostic function
3258    for common quoting, punctuation, and spelling mistakes, and issue
3259    -Wformat-diag warnings if they are found.   FORMAT_STRING_LOC is
3260    the location of the format string, FORMAT_STRING_CST the format
3261    string itself (as a tree), ORIG_FORMAT_CHARS and FORMAT_CHARS are
3262    pointers to the beginning of the format string and the character
3263    currently being processed, and BALTOKS describes paired "tokens"
3264    within the format string that are expected to be balanced.
3265    Returns a pointer to the last processed character.  */
3266 
3267 static const char*
check_plain(location_t format_string_loc,tree format_string_cst,const char * orig_format_chars,const char * format_chars,baltoks_t & baltoks)3268 check_plain (location_t format_string_loc, tree format_string_cst,
3269 	     const char *orig_format_chars, const char *format_chars,
3270 	     baltoks_t &baltoks)
3271 {
3272   /* For brevity.  */
3273   const int opt = OPT_Wformat_diag;
3274   /* Zero-based starting position of a problem sequence.  */
3275   int fmtchrpos = format_chars - orig_format_chars;
3276 
3277   if (*format_chars == '%')
3278     {
3279       /* Diagnose %<%s%> and suggest using %qs instead.  */
3280       if (startswith (format_chars, "%<%s%>"))
3281 	format_warning_substr (format_string_loc, format_string_cst,
3282 			       fmtchrpos, fmtchrpos + 6, opt,
3283 			       "quoted %qs directive in format; "
3284 			       "use %qs instead", "%s", "%qs");
3285       else if (format_chars - orig_format_chars > 2
3286 	       && !strncasecmp (format_chars - 3, "can%'t", 6))
3287 	format_warning_substr (format_string_loc,
3288 			       format_string_cst,
3289 			       fmtchrpos - 3, fmtchrpos + 3, opt,
3290 			       "contraction %<%.*s%> in format; "
3291 			       "use %qs instead",
3292 			       6, format_chars - 3, "cannot");
3293 
3294       return format_chars;
3295     }
3296 
3297   if (baltoks.quotdirs.length ())
3298     {
3299       /* Skip over all plain text within a quoting directive until
3300 	 the next directive.  */
3301       while (*format_chars && '%' != *format_chars)
3302 	++format_chars;
3303 
3304       return format_chars;
3305     }
3306 
3307   /* The length of the problem sequence.  */
3308   int nchars = 0;
3309 
3310   /* Diagnose any whitespace characters other than <space> but only
3311      leading, trailing, and two or more consecutive <space>s.  Do
3312      this before diagnosing control characters because whitespace
3313      is a subset of controls.  */
3314   const char *other_than_space = NULL;
3315   while (ISSPACE (format_chars[nchars]))
3316     {
3317       if (format_chars[nchars] != ' ' && !other_than_space)
3318 	other_than_space = format_chars + nchars;
3319       ++nchars;
3320     }
3321 
3322   if (nchars)
3323     {
3324       /* This is the most common problem: go the extra mile to describe
3325 	 the problem in as much helpful detail as possible.  */
3326       if (other_than_space)
3327 	{
3328 	  format_warning_substr (format_string_loc, format_string_cst,
3329 				 fmtchrpos, fmtchrpos + nchars, opt,
3330 				 "unquoted whitespace character %qc in format",
3331 				 *other_than_space);
3332 	  return format_chars + nchars - 1;
3333 	}
3334 
3335       if (fmtchrpos == 0)
3336 	/* Accept strings of leading spaces with no warning.  */
3337 	return format_chars + nchars - 1;
3338 
3339       if (!format_chars[nchars])
3340 	{
3341 	  format_warning_substr (format_string_loc, format_string_cst,
3342 				 fmtchrpos, fmtchrpos + nchars, opt,
3343 				 "spurious trailing space in format");
3344 	  return format_chars + nchars - 1;
3345 	}
3346 
3347       if (nchars > 1)
3348 	{
3349 	  if (nchars == 2
3350 	      && orig_format_chars < format_chars
3351 	      && format_chars[-1] == '.'
3352 	      && format_chars[0] == ' '
3353 	      && format_chars[1] == ' ')
3354 	    {
3355 	      /* A period followed by two spaces.  */
3356 	      if (ISUPPER (*orig_format_chars))
3357 		{
3358 		  /* If the part before the period is a capitalized
3359 		     sentence check to make sure that what follows
3360 		     is also capitalized.  */
3361 		  if (ISLOWER (format_chars[2]))
3362 		    format_warning_substr (format_string_loc, format_string_cst,
3363 					   fmtchrpos, fmtchrpos + nchars, opt,
3364 					   "inconsistent capitalization in "
3365 					   "format");
3366 		}
3367 	    }
3368 	  else
3369 	    format_warning_substr (format_string_loc, format_string_cst,
3370 				   fmtchrpos, fmtchrpos + nchars, opt,
3371 				   "unquoted sequence of %i consecutive "
3372 				   "space characters in format", nchars);
3373 	  return format_chars + nchars - 1;
3374 	}
3375 
3376       format_chars += nchars;
3377       nchars = 0;
3378     }
3379 
3380   fmtchrpos = format_chars - orig_format_chars;
3381 
3382   /* Diagnose any unquoted control characters other than the terminating
3383      NUL.  */
3384   while (format_chars[nchars] && ISCNTRL (format_chars[nchars]))
3385     ++nchars;
3386 
3387   if (nchars > 1)
3388     {
3389       format_warning_substr (format_string_loc, format_string_cst,
3390 			     fmtchrpos, fmtchrpos + nchars, opt,
3391 			     "unquoted control characters in format");
3392       return format_chars + nchars - 1;
3393     }
3394   if (nchars)
3395     {
3396       format_warning_substr (format_string_loc, format_string_cst,
3397 			     fmtchrpos, fmtchrpos + nchars, opt,
3398 			     "unquoted control character %qc in format",
3399 			     *format_chars);
3400       return format_chars + nchars - 1;
3401     }
3402 
3403   if (ISPUNCT (format_chars[0]))
3404     {
3405       size_t nelts = sizeof c_opers / sizeof *c_opers;
3406       if (const char *ret = check_tokens (c_opers, nelts,
3407 					  format_string_loc, format_string_cst,
3408 					  orig_format_chars, format_chars,
3409 					  baltoks))
3410 	return ret;
3411 
3412       nelts = c_dialect_cxx () ? sizeof cxx_opers / sizeof *cxx_opers : 0;
3413       if (const char *ret = check_tokens (cxx_opers, nelts,
3414 					  format_string_loc, format_string_cst,
3415 					  orig_format_chars, format_chars,
3416 					  baltoks))
3417 	return ret;
3418     }
3419 
3420   if (ISALPHA (format_chars[0]))
3421     {
3422       size_t nelts = sizeof c_keywords / sizeof *c_keywords;
3423       if (const char *ret = check_tokens (c_keywords, nelts,
3424 					  format_string_loc, format_string_cst,
3425 					  orig_format_chars, format_chars,
3426 					  baltoks))
3427 	return ret;
3428 
3429       nelts = c_dialect_cxx () ? sizeof cxx_keywords / sizeof *cxx_keywords : 0;
3430       if (const char *ret = check_tokens (cxx_keywords, nelts,
3431 					  format_string_loc, format_string_cst,
3432 					  orig_format_chars, format_chars,
3433 					  baltoks))
3434 	return ret;
3435     }
3436 
3437   nchars = 0;
3438 
3439   /* Diagnose unquoted options.  */
3440   if  ((format_chars == orig_format_chars
3441 	|| format_chars[-1] == ' ')
3442        && format_chars[0] == '-'
3443        && ((format_chars[1] == '-'
3444 	    && ISALPHA (format_chars[2]))
3445 	   || ISALPHA (format_chars[1])))
3446     {
3447       nchars = 1;
3448       while (ISALNUM (format_chars[nchars])
3449 	     || '_' == format_chars[nchars]
3450 	     || '-' == format_chars[nchars]
3451 	     || '+' == format_chars[nchars])
3452 	++nchars;
3453 
3454       format_warning_substr (format_string_loc, format_string_cst,
3455 			     fmtchrpos, fmtchrpos + nchars, opt,
3456 			     "unquoted option name %<%.*s%> in format",
3457 			     nchars, format_chars);
3458       return format_chars + nchars - 1;
3459     }
3460 
3461   /* Diagnose leading, trailing, and two or more consecutive punctuation
3462      characters.  */
3463   const char *unbalanced = NULL;
3464   while ('%' != format_chars[nchars]
3465 	 && ISPUNCT (format_chars[nchars])
3466 	 && !unbalanced)
3467     {
3468       switch (format_chars[nchars])
3469 	{
3470 	case '[':
3471 	  baltoks.brackets.safe_push (format_chars + nchars);
3472 	  break;
3473 	case '{':
3474 	  baltoks.curly.safe_push (format_chars + nchars);
3475 	  break;
3476 	case '(':
3477 	  baltoks.parens.safe_push (format_chars + nchars);
3478 	  break;
3479 	case '<':
3480 	  baltoks.pointy.safe_push (format_chars + nchars);
3481 	  break;
3482 
3483 	case ']':
3484 	  if (baltoks.brackets.length () > 0)
3485 	    baltoks.brackets.pop ();
3486 	  else
3487 	    unbalanced = format_chars + nchars;
3488 	  break;
3489 	case '}':
3490 	  if (baltoks.curly.length () > 0)
3491 	    baltoks.curly.pop ();
3492 	  else
3493 	    unbalanced = format_chars + nchars;
3494 	  break;
3495 	case ')':
3496 	  if (baltoks.parens.length () > 0)
3497 	    baltoks.parens.pop ();
3498 	  else
3499 	    unbalanced = format_chars + nchars;
3500 	  break;
3501 	case '>':
3502 	  if (baltoks.pointy.length () > 0)
3503 	    baltoks.pointy.pop ();
3504 	  else
3505 	    unbalanced = format_chars + nchars;
3506 	  break;
3507 	}
3508 
3509       ++nchars;
3510     }
3511 
3512   if (unbalanced)
3513     {
3514       format_warning_substr (format_string_loc, format_string_cst,
3515 			     fmtchrpos, fmtchrpos + nchars, opt,
3516 			     "unbalanced punctuation character %qc in format",
3517 			     *unbalanced);
3518       return format_chars + nchars - 1;
3519     }
3520 
3521   if (nchars)
3522     {
3523       /* Consider any identifier that follows the pound ('#') sign
3524 	 a preprocessing directive.  */
3525       if (nchars == 1
3526 	  && format_chars[0] == '#'
3527 	  && ISALPHA (format_chars[1]))
3528 	{
3529 	  while (ISALNUM (format_chars[nchars])
3530 		 || format_chars[nchars] == '_')
3531 	    ++nchars;
3532 
3533 	  format_warning_substr (format_string_loc, format_string_cst,
3534 				 fmtchrpos, fmtchrpos + nchars, opt,
3535 				 "unquoted preprocessing directive %<%.*s%> "
3536 				 "in format", nchars, format_chars);
3537 	  return format_chars + nchars - 1;
3538 	}
3539 
3540       /* Diagnose a bare single quote.  */
3541       if (nchars == 1
3542 	  && format_chars[0] == '\''
3543 	  && format_chars - orig_format_chars
3544 	  && ISALPHA (format_chars[-1])
3545 	  && ISALPHA (format_chars[1]))
3546 	{
3547 	  /* Diagnose a subset of contractions that are best avoided.  */
3548 	  for (unsigned i = 0; i != sizeof contrs / sizeof *contrs; ++i)
3549 	    {
3550 	      const char *apos = strchr (contrs[i].name, '\'');
3551 	      gcc_assert (apos != NULL);
3552 	      int off = apos - contrs[i].name;
3553 
3554 	      if (format_chars - orig_format_chars >= off
3555 		  && !strncmp (format_chars - off,
3556 			       contrs[i].name, contrs[i].len))
3557 		{
3558 		  format_warning_substr (format_string_loc,
3559 					 format_string_cst,
3560 					 fmtchrpos, fmtchrpos + nchars, opt,
3561 					 "contraction %<%.*s%> in format; "
3562 					 "use %qs instead",
3563 					 contrs[i].len, contrs[i].name,
3564 					 contrs[i].alt);
3565 		  return format_chars + nchars - 1;
3566 		}
3567 	    }
3568 
3569 	  if (format_warning_substr (format_string_loc, format_string_cst,
3570 				     fmtchrpos, fmtchrpos + nchars, opt,
3571 				     "bare apostrophe %<'%> in format"))
3572 	    inform (format_string_loc,
3573 		    "if avoiding the apostrophe is not feasible, enclose "
3574 		    "it in a pair of %qs and %qs directives instead",
3575 		    "%<", "%>");
3576 	  return format_chars + nchars - 1;
3577 	}
3578 
3579       /* Diagnose a backtick (grave accent).  */
3580       if (nchars == 1
3581 	  && format_chars[0] == '`')
3582 	{
3583 	  if (format_warning_substr (format_string_loc, format_string_cst,
3584 				     fmtchrpos, fmtchrpos + nchars, opt,
3585 				     "grave accent %<`%> in format"))
3586 	    inform (format_string_loc,
3587 		    "use the apostrophe directive %qs instead", "%'");
3588 	  return format_chars + nchars - 1;
3589 	}
3590 
3591       /* Diagnose a punctuation character after a space.  */
3592       if (nchars == 1
3593 	  && format_chars - orig_format_chars
3594 	  && format_chars[-1] == ' '
3595 	  && strspn (format_chars, "!?:;.,") == 1)
3596 	{
3597 	  format_warning_substr (format_string_loc, format_string_cst,
3598 				 fmtchrpos - 1, fmtchrpos, opt,
3599 				 "space followed by punctuation character "
3600 				 "%<%c%>", format_chars[0]);
3601 	  return format_chars;
3602 	}
3603 
3604       if (nchars == 1)
3605 	{
3606 	  if (startswith (format_chars, "\"%s\""))
3607 	    {
3608 	      if (format_warning_substr (format_string_loc, format_string_cst,
3609 					 fmtchrpos, fmtchrpos + 4, opt,
3610 					 "quoted %qs directive in format",
3611 					 "%s"))
3612 		inform (format_string_loc, "if using %qs is not feasible, "
3613 			"use %qs instead", "%qs", "\"%-s\"");
3614 	    }
3615 
3616 	  if (format_chars[0] == '"')
3617 	    {
3618 	      baltoks.doublequote = baltoks.doublequote ? NULL : format_chars;
3619 	      return format_chars + nchars - 1;
3620 	    }
3621 	  if (format_chars[0] == '\'')
3622 	    {
3623 	      baltoks.singlequote = baltoks.singlequote ? NULL : format_chars;
3624 	      return format_chars + nchars - 1;
3625 	    }
3626 	}
3627 
3628       if (fmtchrpos == 0)
3629 	{
3630 	  if (nchars == 1
3631 	      && format_chars[0] == '(')
3632 	    ;   /* Text beginning in an open parenthesis.  */
3633 	  else if (nchars == 3
3634 	      && startswith (format_chars, "...")
3635 	      && format_chars[3])
3636 	    ;   /* Text beginning in an ellipsis.  */
3637 	  else
3638 	    {
3639 	      format_warning_substr (format_string_loc, format_string_cst,
3640 				     fmtchrpos, fmtchrpos + nchars, opt,
3641 				     "spurious leading punctuation sequence "
3642 				     "%<%.*s%> in format",
3643 				     nchars, format_chars);
3644 	      return format_chars + nchars - 1;
3645 	    }
3646 	}
3647       else if (!format_chars[nchars])
3648 	{
3649 	  if (nchars == 1
3650 	      && (format_chars[nchars - 1] == ':'
3651 		  || format_chars[nchars - 1] == ')'))
3652 	    ;   /* Text ending in a colon or a closing parenthesis.  */
3653 	  else if (nchars == 1
3654 		   && ((ISUPPER (*orig_format_chars)
3655 			&& format_chars[nchars - 1] == '.')
3656 		       || strspn (format_chars + nchars - 1, "?])") == 1))
3657 		  ;   /* Capitalized sentence terminated by a single period,
3658 			 or text ending in a question mark, closing bracket,
3659 			 or parenthesis.  */
3660 	  else if (nchars == 2
3661 		   && format_chars[0] == '?'
3662 		   && format_chars[1] == ')')
3663 	    ;   /* A question mark after a closing parenthetical note.  */
3664 	  else if (nchars == 2
3665 		   && format_chars[0] == ')'
3666 		   && (format_chars[1] == '?'
3667 		       || format_chars[1] == ';'
3668 		       || format_chars[1] == ':'
3669 		       || (ISUPPER (*orig_format_chars)
3670 			   && format_chars[1] == '.')))
3671 	    ;   /* Closing parenthetical note followed by a question mark,
3672 		   semicolon, or colon at the end of the string, or by
3673 		   a period at the end of a capitalized sentence.  */
3674 	  else if (nchars == 3
3675 		   && format_chars - orig_format_chars > 0
3676 		   && startswith (format_chars, "..."))
3677 	    ;   /* Text ending in the ellipsis.  */
3678 	  else
3679 	    format_warning_substr (format_string_loc, format_string_cst,
3680 				   fmtchrpos, fmtchrpos + nchars, opt,
3681 				   "spurious trailing punctuation sequence "
3682 				   "%<%.*s%> in format",
3683 				   nchars, format_chars);
3684 
3685 	  return format_chars + nchars - 1;
3686 	}
3687       else if (nchars == 2
3688 	       && format_chars[0] == ')'
3689 	       && (format_chars[1] == ':'
3690 		   || format_chars[1] == ';'
3691 		   || format_chars[1] == ',')
3692 	       && format_chars[2] == ' ')
3693 	;   /* Closing parenthetical note followed by a colon, semicolon
3694 	       or a comma followed by a space in the middle of the string.  */
3695       else if (nchars > 1)
3696 	format_warning_substr (format_string_loc, format_string_cst,
3697 			       fmtchrpos, fmtchrpos + nchars, opt,
3698 			       "unquoted sequence of %i consecutive "
3699 			       "punctuation characters %q.*s in format",
3700 			       nchars, nchars, format_chars);
3701       return format_chars + nchars - 1;
3702     }
3703 
3704   nchars = 0;
3705 
3706   /* Finally, diagnose any unquoted non-graph, non-punctuation characters
3707      other than the terminating NUL.  */
3708   while (format_chars[nchars]
3709 	 && '%' != format_chars[nchars]
3710 	 && !ISPUNCT (format_chars[nchars])
3711 	 && !ISGRAPH (format_chars[nchars]))
3712     ++nchars;
3713 
3714   if (nchars > 1)
3715     {
3716       format_warning_substr (format_string_loc, format_string_cst,
3717 			     fmtchrpos, fmtchrpos + nchars, opt,
3718 			     "unquoted non-graph characters in format");
3719       return format_chars + nchars - 1;
3720     }
3721   if (nchars)
3722     {
3723       format_warning_substr (format_string_loc, format_string_cst,
3724 			     fmtchrpos, fmtchrpos + nchars, opt,
3725 			     "unquoted non-graph character %qc in format",
3726 			     *format_chars);
3727       return format_chars + nchars - 1;
3728     }
3729 
3730   return format_chars;
3731 }
3732 
3733 /* Diagnose unbalanced tokens described by BALTOKS in format string
3734    ORIG_FORMAT_CHARS and the corresponding FORMAT_STRING_CST.  */
3735 
3736 static void
maybe_diag_unbalanced_tokens(location_t format_string_loc,const char * orig_format_chars,tree format_string_cst,baltoks_t & baltoks)3737 maybe_diag_unbalanced_tokens (location_t format_string_loc,
3738 			      const char *orig_format_chars,
3739 			      tree format_string_cst,
3740 			      baltoks_t &baltoks)
3741 {
3742   const char *unbalanced = NULL;
3743 
3744   if (baltoks.brackets.length ())
3745     unbalanced = baltoks.brackets.pop ();
3746   else if (baltoks.curly.length ())
3747     unbalanced = baltoks.curly.pop ();
3748   else if (baltoks.parens.length ())
3749     unbalanced = baltoks.parens.pop ();
3750   else if (baltoks.pointy.length ())
3751     unbalanced = baltoks.pointy.pop ();
3752 
3753   if (unbalanced)
3754     format_warning_at_char (format_string_loc, format_string_cst,
3755 			    unbalanced - orig_format_chars + 1,
3756 			    OPT_Wformat_diag,
3757 			    "unbalanced punctuation character %<%c%> in format",
3758 			    *unbalanced);
3759 
3760   if (baltoks.quotdirs.length ())
3761     format_warning_at_char (format_string_loc, format_string_cst,
3762 			    baltoks.quotdirs.pop () - orig_format_chars,
3763 			    OPT_Wformat_,
3764 			    "unterminated quoting directive");
3765 
3766   const char *quote
3767     = baltoks.singlequote ? baltoks.singlequote : baltoks.doublequote;
3768 
3769   if (quote)
3770     format_warning_at_char (format_string_loc, format_string_cst,
3771   			    quote - orig_format_chars + 1,
3772 			    OPT_Wformat_diag,
3773   			    "unterminated quote character %<%c%> in format",
3774   			    *quote);
3775 }
3776 
3777 /* Do the main part of checking a call to a format function.  FORMAT_CHARS
3778    is the NUL-terminated format string (which at this point may contain
3779    internal NUL characters); FORMAT_LENGTH is its length (excluding the
3780    terminating NUL character).  ARG_NUM is one less than the number of
3781    the first format argument to check; PARAMS points to that format
3782    argument in the list of arguments.  */
3783 
3784 static void
check_format_info_main(format_check_results * res,function_format_info * info,const char * format_chars,location_t fmt_param_loc,tree format_string_cst,int format_length,tree params,unsigned HOST_WIDE_INT arg_num,object_allocator<format_wanted_type> & fwt_pool,vec<location_t> * arglocs)3785 check_format_info_main (format_check_results *res,
3786 			function_format_info *info, const char *format_chars,
3787 			location_t fmt_param_loc, tree format_string_cst,
3788 			int format_length, tree params,
3789 			unsigned HOST_WIDE_INT arg_num,
3790 			object_allocator <format_wanted_type> &fwt_pool,
3791 			vec<location_t> *arglocs)
3792 {
3793   const char * const orig_format_chars = format_chars;
3794   const tree first_fillin_param = params;
3795 
3796   const format_kind_info * const fki = &format_types[info->format_type];
3797   const format_flag_spec * const flag_specs = fki->flag_specs;
3798   const location_t format_string_loc = res->format_string_loc;
3799 
3800   /* -1 if no conversions taking an operand have been found; 0 if one has
3801      and it didn't use $; 1 if $ formats are in use.  */
3802   int has_operand_number = -1;
3803 
3804   /* Vectors of pointers to opening quoting directives (like GCC "%<"),
3805      opening braces, brackets, and parentheses.  Used to detect unbalanced
3806      tokens.  */
3807   baltoks_t baltoks;
3808 
3809   /* Pointers to the most recent color directives (like GCC's "%r or %R").
3810      A starting color directive much be terminated before the end of
3811      the format string.  A terminating directive makes no sense without
3812      a prior starting directive.  */
3813   const char *color_begin = NULL;
3814   const char *color_end = NULL;
3815 
3816   init_dollar_format_checking (info->first_arg_num, first_fillin_param);
3817 
3818   /* In GCC diagnostic functions check plain directives (substrings within
3819      the format string that don't start with %) for quoting and punctuations
3820      problems.  */
3821   bool ck_plain = (!info->is_raw
3822 		   && (info->format_type == gcc_diag_format_type
3823 		       || info->format_type == gcc_tdiag_format_type
3824 		       || info->format_type == gcc_cdiag_format_type
3825 		       || info->format_type == gcc_cxxdiag_format_type));
3826 
3827   while (*format_chars != 0)
3828     {
3829       if (ck_plain)
3830 	format_chars = check_plain (format_string_loc,
3831 				    format_string_cst,
3832 				    orig_format_chars, format_chars,
3833 				    baltoks);
3834 
3835       if (*format_chars == 0 || *format_chars++ != '%')
3836 	continue;
3837 
3838       if (*format_chars == 0)
3839 	{
3840 	  format_warning_at_char (format_string_loc, format_string_cst,
3841 				  format_chars - orig_format_chars,
3842 				  OPT_Wformat_,
3843 				  "spurious trailing %<%%%> in format");
3844 	  continue;
3845 	}
3846       if (*format_chars == '%')
3847 	{
3848 	  ++format_chars;
3849 	  continue;
3850 	}
3851 
3852       /* ARGUMENT_PARSER ctor takes FORMAT_CHARS by reference and calls
3853 	 to ARG_PARSER members may modify the variable.  */
3854       flag_chars_t flag_chars;
3855       argument_parser arg_parser (info, format_chars, format_string_cst,
3856 				  orig_format_chars, format_string_loc,
3857 				  flag_chars, has_operand_number,
3858 				  first_fillin_param, fwt_pool, arglocs);
3859 
3860       if (!arg_parser.read_any_dollar ())
3861 	return;
3862 
3863       if (!arg_parser.read_format_flags ())
3864 	return;
3865 
3866       /* Read any format width, possibly * or *m$.  */
3867       if (!arg_parser.read_any_format_width (params, arg_num))
3868 	return;
3869 
3870       /* Read any format left precision (must be a number, not *).  */
3871       arg_parser.read_any_format_left_precision ();
3872 
3873       /* Read any format precision, possibly * or *m$.  */
3874       if (!arg_parser.read_any_format_precision (params, arg_num))
3875 	return;
3876 
3877       const char *format_start = format_chars;
3878 
3879       arg_parser.handle_alloc_chars ();
3880 
3881       /* The rest of the conversion specification is the length modifier
3882 	 (if any), and the conversion specifier, so this is where the
3883 	 type information starts.  If we need to issue a suggestion
3884 	 about a type mismatch, then we should preserve everything up
3885 	 to here. */
3886       const char *type_start = format_chars;
3887 
3888       /* Read any length modifier, if this kind of format has them.  */
3889       const length_modifier len_modifier
3890 	= arg_parser.read_any_length_modifier ();
3891 
3892       /* Read any modifier (strftime E/O).  */
3893       arg_parser.read_any_other_modifier ();
3894 
3895       char format_char = *format_chars;
3896       if (format_char == 0
3897 	  || (!(fki->flags & (int) FMT_FLAG_FANCY_PERCENT_OK)
3898 	      && format_char == '%'))
3899 	{
3900 	  format_warning_at_char (format_string_loc, format_string_cst,
3901 			     format_chars - orig_format_chars,
3902 			     OPT_Wformat_,
3903 			     "conversion lacks type at end of format");
3904 	  continue;
3905 	}
3906 
3907       if (format_char == 'm' && !(fki->flags & FMT_FLAG_M_OK))
3908         {
3909 	  warning (OPT_Wformat_,
3910 	      "%%m is only allowed in syslog(3) like functions");
3911 	  continue;
3912 	}
3913 
3914       format_chars++;
3915 
3916       const format_char_info * const fci
3917 	= arg_parser.find_format_char_info (format_char);
3918       if (!fci)
3919 	continue;
3920 
3921       flag_chars.validate (fki, fci, flag_specs, format_chars,
3922 			   format_string_cst,
3923 			   format_string_loc, orig_format_chars, format_char,
3924 			   baltoks.quotdirs.length () > 0);
3925 
3926       const int alloc_flag = flag_chars.get_alloc_flag (fki);
3927       const bool suppressed = flag_chars.assignment_suppression_p (fki);
3928 
3929       /* Diagnose nested or unmatched quoting directives such as GCC's
3930 	 "%<...%<" and "%>...%>".  */
3931       bool quot_begin_p = strchr (fci->flags2, '<');
3932       bool quot_end_p = strchr (fci->flags2, '>');
3933 
3934       if (quot_begin_p && !quot_end_p)
3935 	{
3936 	  if (baltoks.quotdirs.length ())
3937 	    format_warning_at_char (format_string_loc, format_string_cst,
3938 				    format_chars - orig_format_chars,
3939 				    OPT_Wformat_,
3940 				    "nested quoting directive");
3941 	  baltoks.quotdirs.safe_push (format_chars);
3942 	}
3943       else if (!quot_begin_p && quot_end_p)
3944 	{
3945 	  if (baltoks.quotdirs.length ())
3946 	    baltoks.quotdirs.pop ();
3947 	  else
3948 	    format_warning_at_char (format_string_loc, format_string_cst,
3949 				    format_chars - orig_format_chars,
3950 				    OPT_Wformat_,
3951 				    "unmatched quoting directive");
3952 	}
3953 
3954       bool color_begin_p = strchr (fci->flags2, '/');
3955       if (color_begin_p)
3956 	{
3957 	  color_begin = format_chars;
3958 	  color_end = NULL;
3959 	}
3960       else if (strchr (fci->flags2, '\\'))
3961 	{
3962 	  if (color_end)
3963 	    format_warning_at_char (format_string_loc, format_string_cst,
3964 				    format_chars - orig_format_chars,
3965 				    OPT_Wformat_,
3966 				    "%qc directive redundant after prior "
3967 				    "occurence of the same", format_char);
3968 	  else if (!color_begin)
3969 	    format_warning_at_char (format_string_loc, format_string_cst,
3970 				    format_chars - orig_format_chars,
3971 				    OPT_Wformat_,
3972 				    "unmatched color reset directive");
3973 	  color_end = format_chars;
3974 	}
3975 
3976       /* Diagnose directives that shouldn't appear in a quoted sequence.
3977 	 (They are denoted by a double quote in FLAGS2.)  */
3978       if (baltoks.quotdirs.length ())
3979 	{
3980 	  if (strchr (fci->flags2, '"'))
3981 	    format_warning_at_char (format_string_loc, format_string_cst,
3982 				    format_chars - orig_format_chars,
3983 				    OPT_Wformat_,
3984 				    "%qc conversion used within a quoted "
3985 				    "sequence",
3986 				    format_char);
3987 	}
3988 
3989       /* Validate the pairs of flags used.  */
3990       arg_parser.validate_flag_pairs (fci, format_char);
3991 
3992       arg_parser.give_y2k_warnings (fci, format_char);
3993 
3994       arg_parser.parse_any_scan_set (fci);
3995 
3996       tree wanted_type = NULL;
3997       const char *wanted_type_name = NULL;
3998 
3999       if (!arg_parser.handle_conversions (fci, len_modifier,
4000 					  wanted_type, wanted_type_name,
4001 					  arg_num,
4002 					  params,
4003 					  format_char))
4004 	continue;
4005 
4006       arg_parser.main_wanted_type.next = NULL;
4007 
4008       /* Finally. . .check type of argument against desired type!  */
4009       if (!arg_parser.check_argument_type (fci, len_modifier,
4010 					   wanted_type, wanted_type_name,
4011 					   suppressed,
4012 					   arg_num, params,
4013 					   alloc_flag,
4014 					   format_start, type_start,
4015 					   fmt_param_loc,
4016 					   format_char))
4017 	return;
4018     }
4019 
4020   if (format_chars - orig_format_chars != format_length)
4021     format_warning_at_char (format_string_loc, format_string_cst,
4022 			    format_chars + 1 - orig_format_chars,
4023 			    OPT_Wformat_contains_nul,
4024 			    "embedded %<\\0%> in format");
4025   if (info->first_arg_num != 0 && params != 0
4026       && has_operand_number <= 0)
4027     {
4028       res->number_other--;
4029       res->number_extra_args++;
4030     }
4031   if (has_operand_number > 0)
4032     finish_dollar_format_checking (res, fki->flags & (int) FMT_FLAG_DOLLAR_GAP_POINTER_OK);
4033 
4034   maybe_diag_unbalanced_tokens (format_string_loc, orig_format_chars,
4035 				format_string_cst, baltoks);
4036 
4037   if (color_begin && !color_end)
4038     format_warning_at_char (format_string_loc, format_string_cst,
4039 			    color_begin - orig_format_chars,
4040 			    OPT_Wformat_, "unterminated color directive");
4041 }
4042 
4043 /* Check the argument types from a single format conversion (possibly
4044    including width and precision arguments).
4045 
4046    FMT_LOC is the location of the format conversion.
4047 
4048    TYPES is a singly-linked list expressing the parts of the format
4049    conversion that expect argument types, and the arguments they
4050    correspond to.
4051 
4052    OFFSET_TO_TYPE_START is the offset within the execution-charset encoded
4053    format string to where type information begins for the conversion
4054    (the length modifier and conversion specifier).
4055 
4056    CONVERSION_CHAR is the user-provided conversion specifier.
4057 
4058    For example, given:
4059 
4060      sprintf (d, "before %-+*.*lld after", arg3, arg4, arg5);
4061 
4062    then FMT_LOC covers this range:
4063 
4064      sprintf (d, "before %-+*.*lld after", arg3, arg4, arg5);
4065                          ^^^^^^^^^
4066 
4067    and TYPES in this case is a three-entry singly-linked list consisting of:
4068    (1) the check for the field width here:
4069          sprintf (d, "before %-+*.*lld after", arg3, arg4, arg5);
4070                                 ^              ^^^^
4071        against arg3, and
4072    (2) the check for the field precision here:
4073          sprintf (d, "before %-+*.*lld after", arg3, arg4, arg5);
4074                                  ^^                  ^^^^
4075        against arg4, and
4076    (3) the check for the length modifier and conversion char here:
4077          sprintf (d, "before %-+*.*lld after", arg3, arg4, arg5);
4078                                    ^^^                     ^^^^
4079        against arg5.
4080 
4081    OFFSET_TO_TYPE_START is 13, the offset to the "lld" within the
4082    STRING_CST:
4083 
4084                   0000000000111111111122
4085                   0123456789012345678901
4086      sprintf (d, "before %-+*.*lld after", arg3, arg4, arg5);
4087                                ^ ^
4088                                | ` CONVERSION_CHAR: 'd'
4089                                type starts here.  */
4090 
4091 static void
check_format_types(const substring_loc & fmt_loc,format_wanted_type * types,const format_kind_info * fki,int offset_to_type_start,char conversion_char,vec<location_t> * arglocs)4092 check_format_types (const substring_loc &fmt_loc,
4093 		    format_wanted_type *types, const format_kind_info *fki,
4094 		    int offset_to_type_start,
4095 		    char conversion_char,
4096 		    vec<location_t> *arglocs)
4097 {
4098   for (; types != 0; types = types->next)
4099     {
4100       tree cur_param;
4101       tree cur_type;
4102       tree orig_cur_type;
4103       tree wanted_type;
4104       int arg_num;
4105       int i;
4106       int char_type_flag;
4107 
4108       wanted_type = types->wanted_type;
4109       arg_num = types->arg_num;
4110 
4111       /* The following should not occur here.  */
4112       gcc_assert (wanted_type);
4113       gcc_assert (wanted_type != void_type_node || types->pointer_count);
4114 
4115       if (types->pointer_count == 0)
4116 	wanted_type = lang_hooks.types.type_promotes_to (wanted_type);
4117 
4118       wanted_type = TYPE_MAIN_VARIANT (wanted_type);
4119 
4120       cur_param = types->param;
4121       if (!cur_param)
4122         {
4123 	  format_type_warning (fmt_loc, UNKNOWN_LOCATION, types, wanted_type,
4124 			       NULL, fki, offset_to_type_start,
4125 			       conversion_char);
4126           continue;
4127         }
4128 
4129       cur_type = TREE_TYPE (cur_param);
4130       if (cur_type == error_mark_node)
4131 	continue;
4132       orig_cur_type = cur_type;
4133       char_type_flag = 0;
4134 
4135       location_t param_loc = UNKNOWN_LOCATION;
4136       if (EXPR_HAS_LOCATION (cur_param))
4137 	param_loc = EXPR_LOCATION (cur_param);
4138       else if (arglocs)
4139 	{
4140 	  /* arg_num is 1-based.  */
4141 	  gcc_assert (types->arg_num > 0);
4142 	  param_loc = (*arglocs)[types->arg_num - 1];
4143 	}
4144 
4145       STRIP_NOPS (cur_param);
4146 
4147       /* Check the types of any additional pointer arguments
4148 	 that precede the "real" argument.  */
4149       for (i = 0; i < types->pointer_count; ++i)
4150 	{
4151 	  if (TREE_CODE (cur_type) == POINTER_TYPE)
4152 	    {
4153 	      cur_type = TREE_TYPE (cur_type);
4154 	      if (cur_type == error_mark_node)
4155 		break;
4156 
4157 	      /* Check for writing through a NULL pointer.  */
4158 	      if (types->writing_in_flag
4159 		  && i == 0
4160 		  && cur_param != 0
4161 		  && integer_zerop (cur_param))
4162 		warning (OPT_Wformat_, "writing through null pointer "
4163 			 "(argument %d)", arg_num);
4164 
4165 	      /* Check for reading through a NULL pointer.  Ignore
4166 		 printf-family of functions as they are checked for
4167 		 null arguments by the middle-end.  */
4168 	      if (fki->conversion_specs != print_char_table
4169 		  && types->reading_from_flag
4170 		  && i == 0
4171 		  && cur_param != 0
4172 		  && integer_zerop (cur_param))
4173 		warning (OPT_Wformat_, "reading through null pointer "
4174 			 "(argument %d)", arg_num);
4175 
4176 	      if (cur_param != 0 && TREE_CODE (cur_param) == ADDR_EXPR)
4177 		cur_param = TREE_OPERAND (cur_param, 0);
4178 	      else
4179 		cur_param = 0;
4180 
4181 	      /* See if this is an attempt to write into a const type with
4182 		 scanf or with printf "%n".  Note: the writing in happens
4183 		 at the first indirection only, if for example
4184 		 void * const * is passed to scanf %p; passing
4185 		 const void ** is simply passing an incompatible type.  */
4186 	      if (types->writing_in_flag
4187 		  && i == 0
4188 		  && (TYPE_READONLY (cur_type)
4189 		      || (cur_param != 0
4190 			  && (CONSTANT_CLASS_P (cur_param)
4191 			      || (DECL_P (cur_param)
4192 				  && TREE_READONLY (cur_param))))))
4193 		warning (OPT_Wformat_, "writing into constant object "
4194 			 "(argument %d)", arg_num);
4195 
4196 	      /* If there are extra type qualifiers beyond the first
4197 		 indirection, then this makes the types technically
4198 		 incompatible.  */
4199 	      if (i > 0
4200 		  && pedantic
4201 		  && (TYPE_READONLY (cur_type)
4202 		      || TYPE_VOLATILE (cur_type)
4203 		      || TYPE_ATOMIC (cur_type)
4204 		      || TYPE_RESTRICT (cur_type)))
4205 		warning (OPT_Wformat_, "extra type qualifiers in format "
4206 			 "argument (argument %d)",
4207 			 arg_num);
4208 
4209 	    }
4210 	  else
4211 	    {
4212 	      format_type_warning (fmt_loc, param_loc,
4213 				   types, wanted_type, orig_cur_type, fki,
4214 				   offset_to_type_start, conversion_char);
4215 	      break;
4216 	    }
4217 	}
4218 
4219       if (i < types->pointer_count)
4220 	continue;
4221 
4222       cur_type = TYPE_MAIN_VARIANT (cur_type);
4223 
4224       /* Check whether the argument type is a character type.  This leniency
4225 	 only applies to certain formats, flagged with 'c'.  */
4226       if (types->char_lenient_flag)
4227 	char_type_flag = (cur_type == char_type_node
4228 			  || cur_type == signed_char_type_node
4229 			  || cur_type == unsigned_char_type_node);
4230 
4231       /* Check the type of the "real" argument, if there's a type we want.  */
4232       if (lang_hooks.types_compatible_p (wanted_type, cur_type))
4233 	continue;
4234       /* If we want 'void *', allow any pointer type.
4235 	 (Anything else would already have got a warning.)
4236 	 With -Wpedantic, only allow pointers to void and to character
4237 	 types.  */
4238       if (wanted_type == void_type_node
4239 	  && (!pedantic || (i == 1 && char_type_flag)))
4240 	continue;
4241       /* Don't warn about differences merely in signedness, unless
4242 	 -Wpedantic.  With -Wpedantic, warn if the type is a pointer
4243 	 target and not a character type, and for character types at
4244 	 a second level of indirection.  */
4245       if (TREE_CODE (wanted_type) == INTEGER_TYPE
4246 	  && TREE_CODE (cur_type) == INTEGER_TYPE
4247 	  && ((!pedantic && !warn_format_signedness)
4248 	      || (i == 0 && !warn_format_signedness)
4249 	      || (i == 1 && char_type_flag))
4250 	  && (TYPE_UNSIGNED (wanted_type)
4251 	      ? wanted_type == c_common_unsigned_type (cur_type)
4252 	      : wanted_type == c_common_signed_type (cur_type)))
4253 	continue;
4254       /* Don't warn about differences merely in signedness if we know
4255 	 that the current type is integer-promoted and its original type
4256 	 was unsigned such as that it is in the range of WANTED_TYPE.  */
4257       if (TREE_CODE (wanted_type) == INTEGER_TYPE
4258 	  && TREE_CODE (cur_type) == INTEGER_TYPE
4259 	  && warn_format_signedness
4260 	  && TYPE_UNSIGNED (wanted_type)
4261 	  && cur_param != NULL_TREE
4262 	  && TREE_CODE (cur_param) == NOP_EXPR)
4263 	{
4264 	  tree t = TREE_TYPE (TREE_OPERAND (cur_param, 0));
4265 	  if (TYPE_UNSIGNED (t)
4266 	      && cur_type == lang_hooks.types.type_promotes_to (t))
4267 	    continue;
4268 	}
4269       /* Likewise, "signed char", "unsigned char" and "char" are
4270 	 equivalent but the above test won't consider them equivalent.  */
4271       if (wanted_type == char_type_node
4272 	  && (!pedantic || i < 2)
4273 	  && char_type_flag)
4274 	continue;
4275       if (types->scalar_identity_flag
4276 	  && (TREE_CODE (cur_type) == TREE_CODE (wanted_type)
4277 	      || (INTEGRAL_TYPE_P (cur_type)
4278 		  && INTEGRAL_TYPE_P (wanted_type)))
4279 	  && TYPE_PRECISION (cur_type) == TYPE_PRECISION (wanted_type))
4280 	continue;
4281       /* Now we have a type mismatch.  */
4282       format_type_warning (fmt_loc, param_loc, types,
4283 			   wanted_type, orig_cur_type, fki,
4284 			   offset_to_type_start, conversion_char);
4285     }
4286 }
4287 
4288 /* Given type TYPE, attempt to dereference the type N times
4289    (e.g. from ("int ***", 2) to "int *")
4290 
4291    Return the derefenced type, with any qualifiers
4292    such as "const" stripped from the result, or
4293    NULL if unsuccessful (e.g. TYPE is not a pointer type).  */
4294 
4295 static tree
deref_n_times(tree type,int n)4296 deref_n_times (tree type, int n)
4297 {
4298   gcc_assert (type);
4299 
4300   for (int i = n; i > 0; i--)
4301     {
4302       if (TREE_CODE (type) != POINTER_TYPE)
4303 	return NULL_TREE;
4304       type = TREE_TYPE (type);
4305     }
4306   /* Strip off any "const" etc.  */
4307   return build_qualified_type (type, 0);
4308 }
4309 
4310 /* Lookup the format code for FORMAT_LEN within FLI,
4311    returning the string code for expressing it, or NULL
4312    if it is not found.  */
4313 
4314 static const char *
get_modifier_for_format_len(const format_length_info * fli,enum format_lengths format_len)4315 get_modifier_for_format_len (const format_length_info *fli,
4316 			     enum format_lengths format_len)
4317 {
4318   for (; fli->name; fli++)
4319     {
4320       if (fli->index == format_len)
4321 	return fli->name;
4322       if (fli->double_index == format_len)
4323 	return fli->double_name;
4324     }
4325   return NULL;
4326 }
4327 
4328 #if CHECKING_P
4329 
4330 namespace selftest {
4331 
4332 static void
test_get_modifier_for_format_len()4333 test_get_modifier_for_format_len ()
4334 {
4335   ASSERT_STREQ ("h",
4336 		get_modifier_for_format_len (printf_length_specs, FMT_LEN_h));
4337   ASSERT_STREQ ("hh",
4338 		get_modifier_for_format_len (printf_length_specs, FMT_LEN_hh));
4339   ASSERT_STREQ ("L",
4340 		get_modifier_for_format_len (printf_length_specs, FMT_LEN_L));
4341   ASSERT_EQ (NULL,
4342 	     get_modifier_for_format_len (printf_length_specs, FMT_LEN_none));
4343 }
4344 
4345 } // namespace selftest
4346 
4347 #endif /* CHECKING_P */
4348 
4349 /* Determine if SPEC_TYPE and ARG_TYPE are sufficiently similar for a
4350    format_type_detail using SPEC_TYPE to be offered as a suggestion for
4351    Wformat type errors where the argument has type ARG_TYPE.  */
4352 
4353 static bool
matching_type_p(tree spec_type,tree arg_type)4354 matching_type_p (tree spec_type, tree arg_type)
4355 {
4356   gcc_assert (spec_type);
4357   gcc_assert (arg_type);
4358 
4359   /* If any of the types requires structural equality, we can't compare
4360      their canonical types.  */
4361   if (TYPE_STRUCTURAL_EQUALITY_P (spec_type)
4362       || TYPE_STRUCTURAL_EQUALITY_P (arg_type))
4363     return false;
4364 
4365   spec_type = TYPE_CANONICAL (spec_type);
4366   arg_type = TYPE_CANONICAL (arg_type);
4367 
4368   if (TREE_CODE (spec_type) == INTEGER_TYPE
4369       && TREE_CODE (arg_type) == INTEGER_TYPE
4370       && (TYPE_UNSIGNED (spec_type)
4371 	  ? spec_type == c_common_unsigned_type (arg_type)
4372 	  : spec_type == c_common_signed_type (arg_type)))
4373     return true;
4374 
4375   return spec_type == arg_type;
4376 }
4377 
4378 /* Subroutine of get_format_for_type.
4379 
4380    Generate a string containing the length modifier and conversion specifier
4381    that should be used to format arguments of type ARG_TYPE within FKI
4382    (effectively the inverse of the checking code).
4383 
4384    If CONVERSION_CHAR is not zero (the first pass), the resulting suggestion
4385    is required to use it, for correcting bogus length modifiers.
4386    If CONVERSION_CHAR is zero (the second pass), then allow any suggestion
4387    that matches ARG_TYPE.
4388 
4389    If successful, returns a non-NULL string which should be freed
4390    by the caller.
4391    Otherwise, returns NULL.  */
4392 
4393 static char *
get_format_for_type_1(const format_kind_info * fki,tree arg_type,char conversion_char)4394 get_format_for_type_1 (const format_kind_info *fki, tree arg_type,
4395 		       char conversion_char)
4396 {
4397   gcc_assert (arg_type);
4398 
4399   const format_char_info *spec;
4400   for (spec = &fki->conversion_specs[0];
4401        spec->format_chars;
4402        spec++)
4403     {
4404       if (conversion_char)
4405 	if (!strchr (spec->format_chars, conversion_char))
4406 	  continue;
4407 
4408       tree effective_arg_type = deref_n_times (arg_type,
4409 					       spec->pointer_count);
4410       if (!effective_arg_type)
4411 	continue;
4412       for (int i = 0; i < FMT_LEN_MAX; i++)
4413 	{
4414 	  const format_type_detail *ftd = &spec->types[i];
4415 	  if (!ftd->type || *ftd->type == NULL_TREE)
4416 	    continue;
4417 	  if (matching_type_p (*ftd->type, effective_arg_type))
4418 	    {
4419 	      const char *len_modifier
4420 		= get_modifier_for_format_len (fki->length_char_specs,
4421 					       (enum format_lengths)i);
4422 	      if (!len_modifier)
4423 		len_modifier = "";
4424 
4425 	      if (conversion_char)
4426 		/* We found a match, using the given conversion char - the
4427 		   length modifier was incorrect (or absent).
4428 		   Provide a suggestion using the conversion char with the
4429 		   correct length modifier for the type.  */
4430 		return xasprintf ("%s%c", len_modifier, conversion_char);
4431 	      else
4432 		/* 2nd pass: no match was possible using the user-provided
4433 		   conversion char, but we do have a match without using it.
4434 		   Provide a suggestion using the first conversion char
4435 		   listed for the given type.  */
4436 		return xasprintf ("%s%c", len_modifier, spec->format_chars[0]);
4437 	    }
4438 	}
4439    }
4440 
4441   return NULL;
4442 }
4443 
4444 /* Generate a string containing the length modifier and conversion specifier
4445    that should be used to format arguments of type ARG_TYPE within FKI
4446    (effectively the inverse of the checking code).
4447 
4448    If successful, returns a non-NULL string which should be freed
4449    by the caller.
4450    Otherwise, returns NULL.  */
4451 
4452 static char *
get_format_for_type(const format_kind_info * fki,tree arg_type,char conversion_char)4453 get_format_for_type (const format_kind_info *fki, tree arg_type,
4454 		     char conversion_char)
4455 {
4456   gcc_assert (arg_type);
4457   gcc_assert (conversion_char);
4458 
4459   /* First pass: look for a format_char_info containing CONVERSION_CHAR
4460      If we find one, then presumably the length modifier was incorrect
4461      (or absent).  */
4462   char *result = get_format_for_type_1 (fki, arg_type, conversion_char);
4463   if (result)
4464     return result;
4465 
4466   /* Second pass: we didn't find a match for CONVERSION_CHAR, so try
4467      matching just on the type. */
4468   return get_format_for_type_1 (fki, arg_type, '\0');
4469 }
4470 
4471 /* Attempt to get a string for use as a replacement fix-it hint for the
4472    source range in FMT_LOC.
4473 
4474    Preserve all of the text within the range of FMT_LOC up to
4475    OFFSET_TO_TYPE_START, replacing the rest with an appropriate
4476    length modifier and conversion specifier for ARG_TYPE, attempting
4477    to keep the user-provided CONVERSION_CHAR if possible.
4478 
4479    For example, given a long vs long long mismatch for arg5 here:
4480 
4481     000000000111111111122222222223333333333|
4482     123456789012345678901234567890123456789` column numbers
4483                    0000000000111111111122|
4484                    0123456789012345678901` string offsets
4485                           V~~~~~~~~ : range of FMT_LOC, from cols 23-31
4486       sprintf (d, "before %-+*.*lld after", arg3, arg4, arg5);
4487                                 ^ ^
4488                                 | ` CONVERSION_CHAR: 'd'
4489                                 type starts here
4490 
4491    where OFFSET_TO_TYPE_START is 13 (the offset to the "lld" within the
4492    STRING_CST), where the user provided:
4493      %-+*.*lld
4494    the result (assuming "long" argument 5) should be:
4495      %-+*.*ld
4496 
4497    If successful, returns a non-NULL string which should be freed
4498    by the caller.
4499    Otherwise, returns NULL.  */
4500 
4501 static char *
get_corrected_substring(const substring_loc & fmt_loc,format_wanted_type * type,tree arg_type,const format_kind_info * fki,int offset_to_type_start,char conversion_char)4502 get_corrected_substring (const substring_loc &fmt_loc,
4503 			 format_wanted_type *type, tree arg_type,
4504 			 const format_kind_info *fki,
4505 			 int offset_to_type_start, char conversion_char)
4506 {
4507   /* Attempt to provide hints for argument types, but not for field widths
4508      and precisions.  */
4509   if (!arg_type)
4510     return NULL;
4511   if (type->kind != CF_KIND_FORMAT)
4512     return NULL;
4513 
4514   /* Locate the current code within the source range, rejecting
4515      any awkward cases where the format string occupies more than
4516      one line.
4517      Lookup the place where the type starts (including any length
4518      modifiers), getting it as the caret location.  */
4519   substring_loc type_loc (fmt_loc);
4520   type_loc.set_caret_index (offset_to_type_start);
4521 
4522   location_t fmt_substring_loc;
4523   const char *err = type_loc.get_location (&fmt_substring_loc);
4524   if (err)
4525     return NULL;
4526 
4527   source_range fmt_substring_range
4528     = get_range_from_loc (line_table, fmt_substring_loc);
4529 
4530   expanded_location caret
4531     = expand_location_to_spelling_point (fmt_substring_loc);
4532   expanded_location start
4533     = expand_location_to_spelling_point (fmt_substring_range.m_start);
4534   expanded_location finish
4535     = expand_location_to_spelling_point (fmt_substring_range.m_finish);
4536   if (caret.file != start.file)
4537     return NULL;
4538   if (start.file != finish.file)
4539     return NULL;
4540   if (caret.line != start.line)
4541     return NULL;
4542   if (start.line != finish.line)
4543     return NULL;
4544   if (start.column > caret.column)
4545     return NULL;
4546   if (start.column > finish.column)
4547     return NULL;
4548   if (caret.column > finish.column)
4549     return NULL;
4550 
4551   char_span line = location_get_source_line (start.file, start.line);
4552   if (!line)
4553     return NULL;
4554 
4555   /* If we got this far, then we have the line containing the
4556      existing conversion specification.
4557 
4558      Generate a trimmed copy, containing the prefix part of the conversion
4559      specification, up to the (but not including) the length modifier.
4560      In the above example, this would be "%-+*.*".  */
4561   int length_up_to_type = caret.column - start.column;
4562   char_span prefix_span = line.subspan (start.column - 1, length_up_to_type);
4563   char *prefix = prefix_span.xstrdup ();
4564 
4565   /* Now attempt to generate a suggestion for the rest of the specification
4566      (length modifier and conversion char), based on ARG_TYPE and
4567      CONVERSION_CHAR.
4568      In the above example, this would be "ld".  */
4569   char *format_for_type = get_format_for_type (fki, arg_type, conversion_char);
4570   if (!format_for_type)
4571     {
4572       free (prefix);
4573       return NULL;
4574     }
4575 
4576   /* Success.  Generate the resulting suggestion for the whole range of
4577      FMT_LOC by concatenating the two strings.
4578      In the above example, this would be "%-+*.*ld".  */
4579   char *result = concat (prefix, format_for_type, NULL);
4580   free (format_for_type);
4581   free (prefix);
4582   return result;
4583 }
4584 
4585 /* Helper class for adding zero or more trailing '*' to types.
4586 
4587    The format type and name exclude any '*' for pointers, so those
4588    must be formatted manually.  For all the types we currently have,
4589    this is adequate, but formats taking pointers to functions or
4590    arrays would require the full type to be built up in order to
4591    print it with %T.  */
4592 
4593 class indirection_suffix
4594 {
4595  public:
indirection_suffix(int pointer_count)4596   indirection_suffix (int pointer_count) : m_pointer_count (pointer_count) {}
4597 
4598   /* Determine the size of the buffer (including NUL-terminator).  */
4599 
get_buffer_size() const4600   size_t get_buffer_size () const
4601   {
4602     return m_pointer_count + 2;
4603   }
4604 
4605   /* Write the '*' to DST and add a NUL-terminator.  */
4606 
fill_buffer(char * dst) const4607   void fill_buffer (char *dst) const
4608   {
4609     if (m_pointer_count == 0)
4610       dst[0] = 0;
4611     else if (c_dialect_cxx ())
4612       {
4613 	memset (dst, '*', m_pointer_count);
4614 	dst[m_pointer_count] = 0;
4615       }
4616     else
4617       {
4618 	dst[0] = ' ';
4619 	memset (dst + 1, '*', m_pointer_count);
4620 	dst[m_pointer_count + 1] = 0;
4621       }
4622   }
4623 
4624  private:
4625   int m_pointer_count;
4626 };
4627 
4628 /* Subclass of range_label for labelling the range in the format string
4629    with the type in question, adding trailing '*' for pointer_count.  */
4630 
4631 class range_label_for_format_type_mismatch
4632   : public range_label_for_type_mismatch
4633 {
4634  public:
range_label_for_format_type_mismatch(tree labelled_type,tree other_type,int pointer_count)4635   range_label_for_format_type_mismatch (tree labelled_type, tree other_type,
4636 					int pointer_count)
4637   : range_label_for_type_mismatch (labelled_type, other_type),
4638     m_pointer_count (pointer_count)
4639   {
4640   }
4641 
get_text(unsigned range_idx) const4642   label_text get_text (unsigned range_idx) const FINAL OVERRIDE
4643   {
4644     label_text text = range_label_for_type_mismatch::get_text (range_idx);
4645     if (text.m_buffer == NULL)
4646       return text;
4647 
4648     indirection_suffix suffix (m_pointer_count);
4649     char *p = (char *) alloca (suffix.get_buffer_size ());
4650     suffix.fill_buffer (p);
4651 
4652     char *result = concat (text.m_buffer, p, NULL);
4653     text.maybe_free ();
4654     return label_text::take (result);
4655   }
4656 
4657  private:
4658   int m_pointer_count;
4659 };
4660 
4661 /* Give a warning about a format argument of different type from that expected.
4662    The range of the diagnostic is taken from WHOLE_FMT_LOC; the caret location
4663    is based on the location of the char at TYPE->offset_loc.
4664    PARAM_LOC is the location of the relevant argument, or UNKNOWN_LOCATION
4665    if this is unavailable.
4666    WANTED_TYPE is the type the argument should have,
4667    possibly stripped of pointer dereferences.  The description (such as "field
4668    precision"), the placement in the format string, a possibly more
4669    friendly name of WANTED_TYPE, and the number of pointer dereferences
4670    are taken from TYPE.  ARG_TYPE is the type of the actual argument,
4671    or NULL if it is missing.
4672 
4673    OFFSET_TO_TYPE_START is the offset within the execution-charset encoded
4674    format string to where type information begins for the conversion
4675    (the length modifier and conversion specifier).
4676    CONVERSION_CHAR is the user-provided conversion specifier.
4677 
4678    For example, given a type mismatch for argument 5 here:
4679 
4680     00000000011111111112222222222333333333344444444445555555555|
4681     12345678901234567890123456789012345678901234567890123456789` column numbers
4682                    0000000000111111111122|
4683                    0123456789012345678901` offsets within STRING_CST
4684                           V~~~~~~~~ : range of WHOLE_FMT_LOC, from cols 23-31
4685       sprintf (d, "before %-+*.*lld after", int_expr, int_expr, long_expr);
4686                                 ^ ^                             ^~~~~~~~~
4687                                 | ` CONVERSION_CHAR: 'd'        PARAM_LOC
4688                                 type starts here
4689 
4690    OFFSET_TO_TYPE_START is 13, the offset to the "lld" within the
4691    STRING_CST.  */
4692 
4693 static void
format_type_warning(const substring_loc & whole_fmt_loc,location_t param_loc,format_wanted_type * type,tree wanted_type,tree arg_type,const format_kind_info * fki,int offset_to_type_start,char conversion_char)4694 format_type_warning (const substring_loc &whole_fmt_loc,
4695 		     location_t param_loc,
4696 		     format_wanted_type *type,
4697 		     tree wanted_type, tree arg_type,
4698 		     const format_kind_info *fki,
4699 		     int offset_to_type_start,
4700 		     char conversion_char)
4701 {
4702   enum format_specifier_kind kind = type->kind;
4703   const char *wanted_type_name = type->wanted_type_name;
4704   const char *format_start = type->format_start;
4705   int format_length = type->format_length;
4706   int pointer_count = type->pointer_count;
4707   int arg_num = type->arg_num;
4708 
4709   /* If ARG_TYPE is a typedef with a misleading name (for example,
4710      size_t but not the standard size_t expected by printf %zu), avoid
4711      printing the typedef name.  */
4712   if (wanted_type_name
4713       && arg_type
4714       && TYPE_NAME (arg_type)
4715       && TREE_CODE (TYPE_NAME (arg_type)) == TYPE_DECL
4716       && DECL_NAME (TYPE_NAME (arg_type))
4717       && !strcmp (wanted_type_name,
4718 		  lang_hooks.decl_printable_name (TYPE_NAME (arg_type), 2)))
4719     arg_type = TYPE_MAIN_VARIANT (arg_type);
4720 
4721   indirection_suffix suffix (pointer_count);
4722   char *p = (char *) alloca (suffix.get_buffer_size ());
4723   suffix.fill_buffer (p);
4724 
4725   /* WHOLE_FMT_LOC has the caret at the end of the range.
4726      Set the caret to be at the offset from TYPE.  Subtract one
4727      from the offset for the same reason as in format_warning_at_char.  */
4728   substring_loc fmt_loc (whole_fmt_loc);
4729   fmt_loc.set_caret_index (type->offset_loc - 1);
4730 
4731   range_label_for_format_type_mismatch fmt_label (wanted_type, arg_type,
4732 						  pointer_count);
4733   range_label_for_type_mismatch param_label (arg_type, wanted_type);
4734 
4735   /* Get a string for use as a replacement fix-it hint for the range in
4736      fmt_loc, or NULL.  */
4737   char *corrected_substring
4738     = get_corrected_substring (fmt_loc, type, arg_type, fki,
4739 			       offset_to_type_start, conversion_char);
4740   format_string_diagnostic_t diag (fmt_loc, &fmt_label, param_loc, &param_label,
4741 				   corrected_substring);
4742   if (wanted_type_name)
4743     {
4744       if (arg_type)
4745 	diag.emit_warning
4746 	  (OPT_Wformat_,
4747 	   "%s %<%s%.*s%> expects argument of type %<%s%s%>, "
4748 	   "but argument %d has type %qT",
4749 	   gettext (kind_descriptions[kind]),
4750 	   (kind == CF_KIND_FORMAT ? "%" : ""),
4751 	   format_length, format_start,
4752 	   wanted_type_name, p, arg_num, arg_type);
4753       else
4754 	diag.emit_warning
4755 	  (OPT_Wformat_,
4756 	   "%s %<%s%.*s%> expects a matching %<%s%s%> argument",
4757 	   gettext (kind_descriptions[kind]),
4758 	   (kind == CF_KIND_FORMAT ? "%" : ""),
4759 	   format_length, format_start, wanted_type_name, p);
4760     }
4761   else
4762     {
4763       if (arg_type)
4764 	diag.emit_warning
4765 	  (OPT_Wformat_,
4766 	   "%s %<%s%.*s%> expects argument of type %<%T%s%>, "
4767 	   "but argument %d has type %qT",
4768 	   gettext (kind_descriptions[kind]),
4769 	   (kind == CF_KIND_FORMAT ? "%" : ""),
4770 	   format_length, format_start,
4771 	   wanted_type, p, arg_num, arg_type);
4772       else
4773 	diag.emit_warning
4774 	  (OPT_Wformat_,
4775 	   "%s %<%s%.*s%> expects a matching %<%T%s%> argument",
4776 	   gettext (kind_descriptions[kind]),
4777 	   (kind == CF_KIND_FORMAT ? "%" : ""),
4778 	   format_length, format_start, wanted_type, p);
4779     }
4780 
4781   free (corrected_substring);
4782 }
4783 
4784 
4785 /* Given a format_char_info array FCI, and a character C, this function
4786    returns the index into the conversion_specs where that specifier's
4787    data is located.  The character must exist.  */
4788 static unsigned int
find_char_info_specifier_index(const format_char_info * fci,int c)4789 find_char_info_specifier_index (const format_char_info *fci, int c)
4790 {
4791   unsigned i;
4792 
4793   for (i = 0; fci->format_chars; i++, fci++)
4794     if (strchr (fci->format_chars, c))
4795       return i;
4796 
4797   /* We shouldn't be looking for a non-existent specifier.  */
4798   gcc_unreachable ();
4799 }
4800 
4801 /* Given a format_length_info array FLI, and a character C, this
4802    function returns the index into the conversion_specs where that
4803    modifier's data is located.  The character must exist.  */
4804 static unsigned int
find_length_info_modifier_index(const format_length_info * fli,int c)4805 find_length_info_modifier_index (const format_length_info *fli, int c)
4806 {
4807   unsigned i;
4808 
4809   for (i = 0; fli->name; i++, fli++)
4810     if (strchr (fli->name, c))
4811       return i;
4812 
4813   /* We shouldn't be looking for a non-existent modifier.  */
4814   gcc_unreachable ();
4815 }
4816 
4817 /* Determine the type of HOST_WIDE_INT in the code being compiled for
4818    use in GCC's __asm_fprintf__ custom format attribute.  You must
4819    have set dynamic_format_types before calling this function.  */
4820 static void
init_dynamic_asm_fprintf_info(void)4821 init_dynamic_asm_fprintf_info (void)
4822 {
4823   static tree hwi;
4824 
4825   if (!hwi)
4826     {
4827       format_length_info *new_asm_fprintf_length_specs;
4828       unsigned int i;
4829 
4830       /* Find the underlying type for HOST_WIDE_INT.  For the %w
4831 	 length modifier to work, one must have issued: "typedef
4832 	 HOST_WIDE_INT __gcc_host_wide_int__;" in one's source code
4833 	 prior to using that modifier.  */
4834       hwi = maybe_get_identifier ("__gcc_host_wide_int__");
4835       if (!hwi)
4836 	{
4837 	  error ("%<__gcc_host_wide_int__%> is not defined as a type");
4838 	  return;
4839 	}
4840       hwi = identifier_global_value (hwi);
4841       if (!hwi || TREE_CODE (hwi) != TYPE_DECL)
4842 	{
4843 	  error ("%<__gcc_host_wide_int__%> is not defined as a type");
4844 	  return;
4845 	}
4846       hwi = DECL_ORIGINAL_TYPE (hwi);
4847       gcc_assert (hwi);
4848       if (hwi != long_integer_type_node && hwi != long_long_integer_type_node)
4849 	{
4850 	  error ("%<__gcc_host_wide_int__%> is not defined as %<long%>"
4851 		 " or %<long long%>");
4852 	  return;
4853 	}
4854 
4855       /* Create a new (writable) copy of asm_fprintf_length_specs.  */
4856       new_asm_fprintf_length_specs = (format_length_info *)
4857 				     xmemdup (asm_fprintf_length_specs,
4858 					      sizeof (asm_fprintf_length_specs),
4859 					      sizeof (asm_fprintf_length_specs));
4860 
4861       /* HOST_WIDE_INT must be one of 'long' or 'long long'.  */
4862       i = find_length_info_modifier_index (new_asm_fprintf_length_specs, 'w');
4863       if (hwi == long_integer_type_node)
4864 	new_asm_fprintf_length_specs[i].index = FMT_LEN_l;
4865       else if (hwi == long_long_integer_type_node)
4866 	new_asm_fprintf_length_specs[i].index = FMT_LEN_ll;
4867       else
4868 	gcc_unreachable ();
4869 
4870       /* Assign the new data for use.  */
4871       dynamic_format_types[asm_fprintf_format_type].length_char_specs =
4872 	new_asm_fprintf_length_specs;
4873     }
4874 }
4875 
4876 static const format_length_info*
get_init_dynamic_hwi(void)4877 get_init_dynamic_hwi (void)
4878 {
4879   static tree hwi;
4880   static format_length_info *diag_ls;
4881 
4882   if (!hwi)
4883     {
4884       unsigned int i;
4885 
4886       /* Find the underlying type for HOST_WIDE_INT.  For the 'w'
4887 	 length modifier to work, one must have issued: "typedef
4888 	 HOST_WIDE_INT __gcc_host_wide_int__;" in one's source code
4889 	 prior to using that modifier.  */
4890       if ((hwi = maybe_get_identifier ("__gcc_host_wide_int__")))
4891 	{
4892 	  hwi = identifier_global_value (hwi);
4893 	  if (hwi)
4894 	    {
4895 	      if (TREE_CODE (hwi) != TYPE_DECL)
4896 		{
4897 		  error ("%<__gcc_host_wide_int__%> is not defined as a type");
4898 		  hwi = 0;
4899 		}
4900 	      else
4901 		{
4902 		  hwi = DECL_ORIGINAL_TYPE (hwi);
4903 		  gcc_assert (hwi);
4904 		  if (hwi != long_integer_type_node
4905 		      && hwi != long_long_integer_type_node)
4906 		    {
4907 		      error ("%<__gcc_host_wide_int__%> is not defined"
4908 			     " as %<long%> or %<long long%>");
4909 		      hwi = 0;
4910 		    }
4911 		}
4912 	    }
4913 	}
4914       if (!diag_ls)
4915 	diag_ls = (format_length_info *)
4916 		  xmemdup (gcc_diag_length_specs,
4917 			   sizeof (gcc_diag_length_specs),
4918 			   sizeof (gcc_diag_length_specs));
4919       if (hwi)
4920 	{
4921 	  /* HOST_WIDE_INT must be one of 'long' or 'long long'.  */
4922 	  i = find_length_info_modifier_index (diag_ls, 'w');
4923 	  if (hwi == long_integer_type_node)
4924 	    diag_ls[i].index = FMT_LEN_l;
4925 	  else if (hwi == long_long_integer_type_node)
4926 	    diag_ls[i].index = FMT_LEN_ll;
4927 	  else
4928 	    gcc_unreachable ();
4929 	}
4930     }
4931   return diag_ls;
4932 }
4933 
4934 /* Determine the type of a "locus" in the code being compiled for use
4935    in GCC's __gcc_gfc__ custom format attribute.  You must have set
4936    dynamic_format_types before calling this function.  */
4937 static void
init_dynamic_gfc_info(void)4938 init_dynamic_gfc_info (void)
4939 {
4940   dynamic_format_types[gcc_gfc_format_type].length_char_specs
4941     = get_init_dynamic_hwi ();
4942 
4943   if (!locus)
4944     {
4945       static format_char_info *gfc_fci;
4946 
4947       /* For the GCC __gcc_gfc__ custom format specifier to work, one
4948 	 must have declared 'locus' prior to using this attribute.  If
4949 	 we haven't seen this declarations then you shouldn't use the
4950 	 specifier requiring that type.  */
4951       if ((locus = maybe_get_identifier ("locus")))
4952 	{
4953 	  locus = identifier_global_value (locus);
4954 	  if (locus)
4955 	    {
4956 	      if (TREE_CODE (locus) != TYPE_DECL
4957 		  || TREE_TYPE (locus) == error_mark_node)
4958 		{
4959 		  error ("%<locus%> is not defined as a type");
4960 		  locus = 0;
4961 		}
4962 	      else
4963 		locus = TREE_TYPE (locus);
4964 	    }
4965 	}
4966 
4967       /* Assign the new data for use.  */
4968 
4969       /* Handle the __gcc_gfc__ format specifics.  */
4970       if (!gfc_fci)
4971 	dynamic_format_types[gcc_gfc_format_type].conversion_specs =
4972 	  gfc_fci = (format_char_info *)
4973 		     xmemdup (gcc_gfc_char_table,
4974 			      sizeof (gcc_gfc_char_table),
4975 			      sizeof (gcc_gfc_char_table));
4976       if (locus)
4977 	{
4978 	  const unsigned i = find_char_info_specifier_index (gfc_fci, 'L');
4979 	  gfc_fci[i].types[0].type = &locus;
4980 	  gfc_fci[i].pointer_count = 1;
4981 	}
4982     }
4983 }
4984 
4985 /* Lookup the type named NAME and return a NAME type if found.
4986    Otherwise, return void_type_node if NAME has not been used yet,
4987    or NULL_TREE if NAME is not a type (issuing an error).  */
4988 
4989 static tree
get_named_type(const char * name)4990 get_named_type (const char *name)
4991 {
4992   if (tree result = maybe_get_identifier (name))
4993     {
4994       result = identifier_global_tag (result);
4995       if (result)
4996 	{
4997 	  if (TYPE_P (result))
4998 	    ;
4999 	  else if (TREE_CODE (result) == TYPE_DECL)
5000 	    result = TREE_TYPE (result);
5001 	  else
5002 	    {
5003 	      error ("%qs is not defined as a type", name);
5004 	      result = NULL_TREE;
5005 	    }
5006 	}
5007       return result;
5008     }
5009   else
5010     return void_type_node;
5011 }
5012 
5013 /* Determine the types of "tree" and "location_t" in the code being
5014    compiled for use in GCC's diagnostic custom format attributes.  You
5015    must have set dynamic_format_types before calling this function.  */
5016 static void
init_dynamic_diag_info(void)5017 init_dynamic_diag_info (void)
5018 {
5019   /* For the GCC-diagnostics custom format specifiers to work, one
5020      must have declared 'tree' and 'location_t' prior to using those
5021      attributes.  If we haven't seen these declarations then
5022      the specifiers requiring these types shouldn't be used.
5023      However we don't force a hard ICE because we may see only one
5024      or the other type.  */
5025   if (tree loc = maybe_get_identifier ("location_t"))
5026     {
5027       loc = identifier_global_value (loc);
5028       if (loc && TREE_CODE (loc) != TYPE_DECL)
5029 	error ("%<location_t%> is not defined as a type");
5030     }
5031 
5032   /* Initialize the global tree node type local to this file.  */
5033   if (!local_tree_type_node
5034       || local_tree_type_node == void_type_node)
5035     {
5036       /* We need to grab the underlying 'union tree_node' so peek into
5037 	 an extra type level.  */
5038       if ((local_tree_type_node = maybe_get_identifier ("tree")))
5039 	{
5040 	  local_tree_type_node
5041 	    = identifier_global_value (local_tree_type_node);
5042 	  if (local_tree_type_node)
5043 	    {
5044 	      if (TREE_CODE (local_tree_type_node) != TYPE_DECL)
5045 		{
5046 		  error ("%<tree%> is not defined as a type");
5047 		  local_tree_type_node = NULL_TREE;
5048 		}
5049 	      else if (TREE_CODE (TREE_TYPE (local_tree_type_node))
5050 		       != POINTER_TYPE)
5051 		{
5052 		  error ("%<tree%> is not defined as a pointer type");
5053 		  local_tree_type_node = NULL_TREE;
5054 		}
5055 	      else
5056 		local_tree_type_node
5057 		  = TREE_TYPE (TREE_TYPE (local_tree_type_node));
5058 	    }
5059 	}
5060       else
5061 	local_tree_type_node = void_type_node;
5062     }
5063 
5064   /* Similar to the above but for gimple*.  */
5065   if (!local_gimple_ptr_node
5066       || local_gimple_ptr_node == void_type_node)
5067     local_gimple_ptr_node = get_named_type ("gimple");
5068 
5069   /* Similar to the above but for cgraph_node*.  */
5070   if (!local_cgraph_node_ptr_node
5071       || local_cgraph_node_ptr_node == void_type_node)
5072     local_cgraph_node_ptr_node = get_named_type ("cgraph_node");
5073 
5074   /* Similar to the above but for diagnostic_event_id_t*.  */
5075   if (!local_event_ptr_node
5076       || local_event_ptr_node == void_type_node)
5077     local_event_ptr_node = get_named_type ("diagnostic_event_id_t");
5078 
5079   /* All the GCC diag formats use the same length specs.  */
5080   dynamic_format_types[gcc_diag_format_type].length_char_specs =
5081     dynamic_format_types[gcc_tdiag_format_type].length_char_specs =
5082     dynamic_format_types[gcc_cdiag_format_type].length_char_specs =
5083     dynamic_format_types[gcc_cxxdiag_format_type].length_char_specs =
5084     dynamic_format_types[gcc_dump_printf_format_type].length_char_specs
5085     = get_init_dynamic_hwi ();
5086 
5087   /* It's safe to "re-initialize these to the same values.  */
5088   dynamic_format_types[gcc_diag_format_type].conversion_specs =
5089     gcc_diag_char_table;
5090   dynamic_format_types[gcc_tdiag_format_type].conversion_specs =
5091     gcc_tdiag_char_table;
5092   dynamic_format_types[gcc_cdiag_format_type].conversion_specs =
5093     gcc_cdiag_char_table;
5094   dynamic_format_types[gcc_cxxdiag_format_type].conversion_specs =
5095     gcc_cxxdiag_char_table;
5096   dynamic_format_types[gcc_dump_printf_format_type].conversion_specs =
5097     gcc_dump_printf_char_table;
5098 }
5099 
5100 #ifdef TARGET_FORMAT_TYPES
5101 extern const format_kind_info TARGET_FORMAT_TYPES[];
5102 #endif
5103 
5104 #ifdef TARGET_OVERRIDES_FORMAT_ATTRIBUTES
5105 extern const target_ovr_attr TARGET_OVERRIDES_FORMAT_ATTRIBUTES[];
5106 #endif
5107 #ifdef TARGET_OVERRIDES_FORMAT_INIT
5108   extern void TARGET_OVERRIDES_FORMAT_INIT (void);
5109 #endif
5110 
5111 /* Attributes such as "printf" are equivalent to those such as
5112    "gnu_printf" unless this is overridden by a target.  */
5113 static const target_ovr_attr gnu_target_overrides_format_attributes[] =
5114 {
5115   { "gnu_printf",   "printf" },
5116   { "gnu_syslog",   "syslog" },
5117   { "gnu_scanf",    "scanf" },
5118   { "gnu_strftime", "strftime" },
5119   { "gnu_strfmon",  "strfmon" },
5120   { NULL,           NULL }
5121 };
5122 
5123 /* Translate to unified attribute name. This is used in decode_format_type and
5124    decode_format_attr. In attr_name the user specified argument is passed. It
5125    returns the unified format name from TARGET_OVERRIDES_FORMAT_ATTRIBUTES
5126    or the attr_name passed to this function, if there is no matching entry.  */
5127 static const char *
convert_format_name_to_system_name(const char * attr_name)5128 convert_format_name_to_system_name (const char *attr_name)
5129 {
5130   int i;
5131 
5132   if (attr_name == NULL || *attr_name == 0
5133       || startswith (attr_name, "gcc_"))
5134     return attr_name;
5135 #ifdef TARGET_OVERRIDES_FORMAT_INIT
5136   TARGET_OVERRIDES_FORMAT_INIT ();
5137 #endif
5138 
5139 #ifdef TARGET_OVERRIDES_FORMAT_ATTRIBUTES
5140   /* Check if format attribute is overridden by target.  */
5141   if (TARGET_OVERRIDES_FORMAT_ATTRIBUTES != NULL
5142       && TARGET_OVERRIDES_FORMAT_ATTRIBUTES_COUNT > 0)
5143     {
5144       for (i = 0; i < TARGET_OVERRIDES_FORMAT_ATTRIBUTES_COUNT; ++i)
5145         {
5146           if (cmp_attribs (TARGET_OVERRIDES_FORMAT_ATTRIBUTES[i].named_attr_src,
5147 			   attr_name))
5148             return attr_name;
5149           if (cmp_attribs (TARGET_OVERRIDES_FORMAT_ATTRIBUTES[i].named_attr_dst,
5150 			   attr_name))
5151             return TARGET_OVERRIDES_FORMAT_ATTRIBUTES[i].named_attr_src;
5152         }
5153     }
5154 #endif
5155   /* Otherwise default to gnu format.  */
5156   for (i = 0;
5157        gnu_target_overrides_format_attributes[i].named_attr_src != NULL;
5158        ++i)
5159     {
5160       if (cmp_attribs (gnu_target_overrides_format_attributes[i].named_attr_src,
5161 		       attr_name))
5162         return attr_name;
5163       if (cmp_attribs (gnu_target_overrides_format_attributes[i].named_attr_dst,
5164 		       attr_name))
5165         return gnu_target_overrides_format_attributes[i].named_attr_src;
5166     }
5167 
5168   return attr_name;
5169 }
5170 
5171 /* Handle a "format" attribute; arguments as in
5172    struct attribute_spec.handler.  */
5173 tree
handle_format_attribute(tree * node,tree atname,tree args,int flags,bool * no_add_attrs)5174 handle_format_attribute (tree *node, tree atname, tree args,
5175 			 int flags, bool *no_add_attrs)
5176 {
5177   const_tree type = *node;
5178   function_format_info info;
5179 
5180 #ifdef TARGET_FORMAT_TYPES
5181   /* If the target provides additional format types, we need to
5182      add them to FORMAT_TYPES at first use.  */
5183   if (!dynamic_format_types)
5184     {
5185       dynamic_format_types = XNEWVEC (format_kind_info,
5186 				      n_format_types + TARGET_N_FORMAT_TYPES);
5187       memcpy (dynamic_format_types, format_types_orig,
5188 	      sizeof (format_types_orig));
5189       memcpy (&dynamic_format_types[n_format_types], TARGET_FORMAT_TYPES,
5190 	      TARGET_N_FORMAT_TYPES * sizeof (dynamic_format_types[0]));
5191 
5192       format_types = dynamic_format_types;
5193       /* Provide a reference for the first potential external type.  */
5194       first_target_format_type = n_format_types;
5195       n_format_types += TARGET_N_FORMAT_TYPES;
5196     }
5197 #endif
5198 
5199   /* Canonicalize name of format function.  */
5200   if (TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
5201     TREE_VALUE (args) = canonicalize_attr_name (TREE_VALUE (args));
5202 
5203   if (!decode_format_attr (type, atname, args, &info, /* validated_p = */false))
5204     {
5205       *no_add_attrs = true;
5206       return NULL_TREE;
5207     }
5208 
5209   if (prototype_p (type))
5210     {
5211       if (!check_format_string (type, info.format_num, flags,
5212 				no_add_attrs, info.format_type))
5213 	return NULL_TREE;
5214 
5215       if (info.first_arg_num != 0)
5216 	{
5217 	  unsigned HOST_WIDE_INT arg_num = 1;
5218 	  function_args_iterator iter;
5219 	  tree arg_type;
5220 
5221 	  /* Verify that first_arg_num points to the last arg,
5222 	     the ...  */
5223 	  FOREACH_FUNCTION_ARGS (type, arg_type, iter)
5224 	    arg_num++;
5225 
5226 	  if (arg_num != info.first_arg_num)
5227 	    {
5228 	      if (!(flags & (int) ATTR_FLAG_BUILT_IN))
5229 		error ("argument to be formatted is not %<...%>");
5230 	      *no_add_attrs = true;
5231 	      return NULL_TREE;
5232 	    }
5233 	}
5234     }
5235 
5236   /* Check if this is a strftime variant. Just for this variant
5237      FMT_FLAG_ARG_CONVERT is not set.  */
5238   if ((format_types[info.format_type].flags & (int) FMT_FLAG_ARG_CONVERT) == 0
5239       && info.first_arg_num != 0)
5240     {
5241       error ("strftime formats cannot format arguments");
5242       *no_add_attrs = true;
5243       return NULL_TREE;
5244     }
5245 
5246   /* If this is a custom GCC-internal format type, we have to
5247      initialize certain bits at runtime.  */
5248   if (info.format_type == asm_fprintf_format_type
5249       || info.format_type == gcc_gfc_format_type
5250       || info.format_type == gcc_diag_format_type
5251       || info.format_type == gcc_tdiag_format_type
5252       || info.format_type == gcc_cdiag_format_type
5253       || info.format_type == gcc_cxxdiag_format_type
5254       || info.format_type == gcc_dump_printf_format_type)
5255     {
5256       /* Our first time through, we have to make sure that our
5257 	 format_type data is allocated dynamically and is modifiable.  */
5258       if (!dynamic_format_types)
5259 	format_types = dynamic_format_types = (format_kind_info *)
5260 	  xmemdup (format_types_orig, sizeof (format_types_orig),
5261 		   sizeof (format_types_orig));
5262 
5263       /* If this is format __asm_fprintf__, we have to initialize
5264 	 GCC's notion of HOST_WIDE_INT for checking %wd.  */
5265       if (info.format_type == asm_fprintf_format_type)
5266 	init_dynamic_asm_fprintf_info ();
5267       /* If this is format __gcc_gfc__, we have to initialize GCC's
5268 	 notion of 'locus' at runtime for %L.  */
5269       else if (info.format_type == gcc_gfc_format_type)
5270 	init_dynamic_gfc_info ();
5271       /* If this is one of the diagnostic attributes, then we have to
5272 	 initialize 'location_t' and 'tree' at runtime.  */
5273       else if (info.format_type == gcc_diag_format_type
5274 	       || info.format_type == gcc_tdiag_format_type
5275 	       || info.format_type == gcc_cdiag_format_type
5276 	       || info.format_type == gcc_cxxdiag_format_type
5277 	       || info.format_type == gcc_dump_printf_format_type)
5278 	init_dynamic_diag_info ();
5279       else
5280 	gcc_unreachable ();
5281     }
5282 
5283   return NULL_TREE;
5284 }
5285 
5286 #if CHECKING_P
5287 
5288 namespace selftest {
5289 
5290 /* Selftests of location handling.  */
5291 
5292 /* Get the format_kind_info with the given name.  */
5293 
5294 static const format_kind_info *
get_info(const char * name)5295 get_info (const char *name)
5296 {
5297   int idx = decode_format_type (name);
5298   const format_kind_info *fki = &format_types[idx];
5299   ASSERT_STREQ (fki->name, name);
5300   return fki;
5301 }
5302 
5303 /* Verify that get_format_for_type (FKI, TYPE, CONVERSION_CHAR)
5304    is EXPECTED_FORMAT.  */
5305 
5306 static void
assert_format_for_type_streq(const location & loc,const format_kind_info * fki,const char * expected_format,tree type,char conversion_char)5307 assert_format_for_type_streq (const location &loc, const format_kind_info *fki,
5308 			      const char *expected_format, tree type,
5309 			      char conversion_char)
5310 {
5311   gcc_assert (fki);
5312   gcc_assert (expected_format);
5313   gcc_assert (type);
5314 
5315   char *actual_format = get_format_for_type (fki, type, conversion_char);
5316   ASSERT_STREQ_AT (loc, expected_format, actual_format);
5317   free (actual_format);
5318 }
5319 
5320 /* Selftests for get_format_for_type.  */
5321 
5322 #define ASSERT_FORMAT_FOR_TYPE_STREQ(EXPECTED_FORMAT, TYPE, CONVERSION_CHAR) \
5323   assert_format_for_type_streq (SELFTEST_LOCATION, (fki), (EXPECTED_FORMAT), \
5324 				(TYPE), (CONVERSION_CHAR))
5325 
5326 /* Selftest for get_format_for_type for "printf"-style functions.  */
5327 
5328 static void
test_get_format_for_type_printf()5329 test_get_format_for_type_printf ()
5330 {
5331   const format_kind_info *fki = get_info ("gnu_printf");
5332   ASSERT_NE (fki, NULL);
5333 
5334   ASSERT_FORMAT_FOR_TYPE_STREQ ("f", double_type_node, 'i');
5335   ASSERT_FORMAT_FOR_TYPE_STREQ ("Lf", long_double_type_node, 'i');
5336   ASSERT_FORMAT_FOR_TYPE_STREQ ("f", double_type_node, 'o');
5337   ASSERT_FORMAT_FOR_TYPE_STREQ ("Lf", long_double_type_node, 'o');
5338   ASSERT_FORMAT_FOR_TYPE_STREQ ("f", double_type_node, 'x');
5339   ASSERT_FORMAT_FOR_TYPE_STREQ ("Lf", long_double_type_node, 'x');
5340   ASSERT_FORMAT_FOR_TYPE_STREQ ("f", double_type_node, 'X');
5341   ASSERT_FORMAT_FOR_TYPE_STREQ ("Lf", long_double_type_node, 'X');
5342   ASSERT_FORMAT_FOR_TYPE_STREQ ("d", integer_type_node, 'd');
5343   ASSERT_FORMAT_FOR_TYPE_STREQ ("i", integer_type_node, 'i');
5344   ASSERT_FORMAT_FOR_TYPE_STREQ ("o", integer_type_node, 'o');
5345   ASSERT_FORMAT_FOR_TYPE_STREQ ("x", integer_type_node, 'x');
5346   ASSERT_FORMAT_FOR_TYPE_STREQ ("X", integer_type_node, 'X');
5347   ASSERT_FORMAT_FOR_TYPE_STREQ ("d", unsigned_type_node, 'd');
5348   ASSERT_FORMAT_FOR_TYPE_STREQ ("i", unsigned_type_node, 'i');
5349   ASSERT_FORMAT_FOR_TYPE_STREQ ("o", unsigned_type_node, 'o');
5350   ASSERT_FORMAT_FOR_TYPE_STREQ ("x", unsigned_type_node, 'x');
5351   ASSERT_FORMAT_FOR_TYPE_STREQ ("X", unsigned_type_node, 'X');
5352   ASSERT_FORMAT_FOR_TYPE_STREQ ("ld", long_integer_type_node, 'd');
5353   ASSERT_FORMAT_FOR_TYPE_STREQ ("li", long_integer_type_node, 'i');
5354   ASSERT_FORMAT_FOR_TYPE_STREQ ("lx", long_integer_type_node, 'x');
5355   ASSERT_FORMAT_FOR_TYPE_STREQ ("lo", long_unsigned_type_node, 'o');
5356   ASSERT_FORMAT_FOR_TYPE_STREQ ("lx", long_unsigned_type_node, 'x');
5357   ASSERT_FORMAT_FOR_TYPE_STREQ ("lld", long_long_integer_type_node, 'd');
5358   ASSERT_FORMAT_FOR_TYPE_STREQ ("lli", long_long_integer_type_node, 'i');
5359   ASSERT_FORMAT_FOR_TYPE_STREQ ("llo", long_long_unsigned_type_node, 'o');
5360   ASSERT_FORMAT_FOR_TYPE_STREQ ("llx", long_long_unsigned_type_node, 'x');
5361   ASSERT_FORMAT_FOR_TYPE_STREQ ("s", build_pointer_type (char_type_node), 'i');
5362 }
5363 
5364 /* Selftest for get_format_for_type for "scanf"-style functions.  */
5365 
5366 static void
test_get_format_for_type_scanf()5367 test_get_format_for_type_scanf ()
5368 {
5369   const format_kind_info *fki = get_info ("gnu_scanf");
5370   ASSERT_NE (fki, NULL);
5371   ASSERT_FORMAT_FOR_TYPE_STREQ ("d", build_pointer_type (integer_type_node), 'd');
5372   ASSERT_FORMAT_FOR_TYPE_STREQ ("u", build_pointer_type (unsigned_type_node), 'u');
5373   ASSERT_FORMAT_FOR_TYPE_STREQ ("ld",
5374 				build_pointer_type (long_integer_type_node), 'd');
5375   ASSERT_FORMAT_FOR_TYPE_STREQ ("lu",
5376 				build_pointer_type (long_unsigned_type_node), 'u');
5377   ASSERT_FORMAT_FOR_TYPE_STREQ
5378     ("lld", build_pointer_type (long_long_integer_type_node), 'd');
5379   ASSERT_FORMAT_FOR_TYPE_STREQ
5380     ("llu", build_pointer_type (long_long_unsigned_type_node), 'u');
5381   ASSERT_FORMAT_FOR_TYPE_STREQ ("e", build_pointer_type (float_type_node), 'e');
5382   ASSERT_FORMAT_FOR_TYPE_STREQ ("le", build_pointer_type (double_type_node), 'e');
5383 }
5384 
5385 #undef ASSERT_FORMAT_FOR_TYPE_STREQ
5386 
5387 /* Exercise the type-printing label code, to give some coverage
5388    under "make selftest-valgrind" (in particular, to ensure that
5389    the label-printing machinery doesn't leak).  */
5390 
5391 static void
test_type_mismatch_range_labels()5392 test_type_mismatch_range_labels ()
5393 {
5394   /* Create a tempfile and write some text to it.
5395      ....................0000000001 11111111 12 22222222
5396      ....................1234567890 12345678 90 12345678.  */
5397   const char *content = "  printf (\"msg: %i\\n\", msg);\n";
5398   temp_source_file tmp (SELFTEST_LOCATION, ".c", content);
5399   line_table_test ltt;
5400 
5401   linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
5402 
5403   location_t c17 = linemap_position_for_column (line_table, 17);
5404   ASSERT_EQ (LOCATION_COLUMN (c17), 17);
5405   location_t c18 = linemap_position_for_column (line_table, 18);
5406   location_t c24 = linemap_position_for_column (line_table, 24);
5407   location_t c26 = linemap_position_for_column (line_table, 26);
5408 
5409   /* Don't attempt to run the tests if column data might be unavailable.  */
5410   if (c26 > LINE_MAP_MAX_LOCATION_WITH_COLS)
5411     return;
5412 
5413   location_t fmt = make_location (c18, c17, c18);
5414   ASSERT_EQ (LOCATION_COLUMN (fmt), 18);
5415 
5416   location_t param = make_location (c24, c24, c26);
5417   ASSERT_EQ (LOCATION_COLUMN (param), 24);
5418 
5419   range_label_for_format_type_mismatch fmt_label (char_type_node,
5420 						  integer_type_node, 1);
5421   range_label_for_type_mismatch param_label (integer_type_node,
5422 					     char_type_node);
5423   gcc_rich_location richloc (fmt, &fmt_label);
5424   richloc.add_range (param, SHOW_RANGE_WITHOUT_CARET, &param_label);
5425 
5426   test_diagnostic_context dc;
5427   diagnostic_show_locus (&dc, &richloc, DK_ERROR);
5428   if (c_dialect_cxx ())
5429     /* "char*", without a space.  */
5430     ASSERT_STREQ ("   printf (\"msg: %i\\n\", msg);\n"
5431 		  "                 ~^     ~~~\n"
5432 		  "                  |     |\n"
5433 		  "                  char* int\n",
5434 		  pp_formatted_text (dc.printer));
5435   else
5436     /* "char *", with a space.  */
5437     ASSERT_STREQ ("   printf (\"msg: %i\\n\", msg);\n"
5438 		  "                 ~^     ~~~\n"
5439 		  "                  |     |\n"
5440 		  "                  |     int\n"
5441 		  "                  char *\n",
5442 		  pp_formatted_text (dc.printer));
5443 }
5444 
5445 /* Run all of the selftests within this file.  */
5446 
5447 void
c_format_cc_tests()5448 c_format_cc_tests ()
5449 {
5450   test_get_modifier_for_format_len ();
5451   test_get_format_for_type_printf ();
5452   test_get_format_for_type_scanf ();
5453   test_type_mismatch_range_labels ();
5454 }
5455 
5456 } // namespace selftest
5457 
5458 #endif /* CHECKING_P */
5459 
5460 #include "gt-c-family-c-format.h"
5461