xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/c-family/c-format.c (revision 8feb0f0b7eaff0608f8350bbfa3098827b4bb91b)
1 /* Check calls to formatted I/O functions (-Wformat).
2    Copyright (C) 1992-2020 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "c-target.h"
25 #include "c-common.h"
26 #include "alloc-pool.h"
27 #include "stringpool.h"
28 #include "c-objc.h"
29 #include "intl.h"
30 #include "langhooks.h"
31 #include "c-format.h"
32 #include "diagnostic.h"
33 #include "substring-locations.h"
34 #include "selftest.h"
35 #include "selftest-diagnostic.h"
36 #include "builtins.h"
37 #include "attribs.h"
38 #include "gcc-rich-location.h"
39 
40 /* Handle attributes associated with format checking.  */
41 
42 /* This must be in the same order as format_types, except for
43    format_type_error.  Target-specific format types do not have
44    matching enum values.  */
45 enum format_type { printf_format_type, asm_fprintf_format_type,
46 		   gcc_diag_format_type, gcc_tdiag_format_type,
47 		   gcc_cdiag_format_type,
48 		   gcc_cxxdiag_format_type, gcc_gfc_format_type,
49 		   gcc_dump_printf_format_type,
50 		   gcc_objc_string_format_type,
51 		   format_type_error = -1};
52 
53 struct function_format_info
54 {
55   enum format_type format_type;		/* type of format (printf, scanf, etc.) */
56   /* IS_RAW is relevant only for GCC diagnostic format functions.
57      It is set for "raw" formatting functions like pp_printf that
58      are not intended to produce complete diagnostics according to
59      GCC guidelines, and clear for others like error and warning
60      whose format string is checked for proper quoting and spelling.  */
61   bool is_raw;
62   unsigned HOST_WIDE_INT format_num;	/* number of format argument */
63   unsigned HOST_WIDE_INT first_arg_num;	/* number of first arg (zero for varargs) */
64 };
65 
66 /* Initialized in init_dynamic_diag_info.  */
67 static GTY(()) tree local_tree_type_node;
68 static GTY(()) tree local_event_ptr_node;
69 static GTY(()) tree local_gimple_ptr_node;
70 static GTY(()) tree local_cgraph_node_ptr_node;
71 static GTY(()) tree locus;
72 
73 static bool decode_format_attr (const_tree, tree, tree, function_format_info *,
74 				bool);
75 static format_type decode_format_type (const char *, bool * = NULL);
76 
77 static bool check_format_string (const_tree argument,
78 				 unsigned HOST_WIDE_INT format_num,
79 				 int flags, bool *no_add_attrs,
80 				 int expected_format_type);
81 static tree get_constant (const_tree fntype, const_tree atname, tree expr,
82 			  int argno, unsigned HOST_WIDE_INT *value,
83 			  int flags, bool validated_p);
84 static const char *convert_format_name_to_system_name (const char *attr_name);
85 
86 static int first_target_format_type;
87 static const char *format_name (int format_num);
88 static int format_flags (int format_num);
89 
90 /* Emit a warning as per format_warning_va, but construct the substring_loc
91    for the character at offset (CHAR_IDX - 1) within a string constant
92    FORMAT_STRING_CST at FMT_STRING_LOC.  */
93 
94 ATTRIBUTE_GCC_DIAG (5,6)
95 static bool
format_warning_at_char(location_t fmt_string_loc,tree format_string_cst,int char_idx,int opt,const char * gmsgid,...)96 format_warning_at_char (location_t fmt_string_loc, tree format_string_cst,
97 			int char_idx, int opt, const char *gmsgid, ...)
98 {
99   va_list ap;
100   va_start (ap, gmsgid);
101   tree string_type = TREE_TYPE (format_string_cst);
102 
103   /* The callers are of the form:
104        format_warning (format_string_loc, format_string_cst,
105 		       format_chars - orig_format_chars,
106       where format_chars has already been incremented, so that
107       CHAR_IDX is one character beyond where the warning should
108       be emitted.  Fix it.  */
109   char_idx -= 1;
110 
111   substring_loc fmt_loc (fmt_string_loc, string_type, char_idx, char_idx,
112 			 char_idx);
113   format_string_diagnostic_t diag (fmt_loc, NULL, UNKNOWN_LOCATION, NULL,
114 				   NULL);
115   bool warned = diag.emit_warning_va (opt, gmsgid, &ap);
116   va_end (ap);
117 
118   return warned;
119 }
120 
121 
122 /* Emit a warning as per format_warning_va, but construct the substring_loc
123    for the substring at offset (POS1, POS2 - 1) within a string constant
124    FORMAT_STRING_CST at FMT_STRING_LOC.  */
125 
126 ATTRIBUTE_GCC_DIAG (6,7)
127 static bool
format_warning_substr(location_t fmt_string_loc,tree format_string_cst,int pos1,int pos2,int opt,const char * gmsgid,...)128 format_warning_substr (location_t fmt_string_loc, tree format_string_cst,
129 		       int pos1, int pos2, int opt, const char *gmsgid, ...)
130 {
131   va_list ap;
132   va_start (ap, gmsgid);
133   tree string_type = TREE_TYPE (format_string_cst);
134 
135   pos2 -= 1;
136 
137   substring_loc fmt_loc (fmt_string_loc, string_type, pos1, pos1, pos2);
138   format_string_diagnostic_t diag (fmt_loc, NULL, UNKNOWN_LOCATION, NULL,
139 				   NULL);
140   bool warned = diag.emit_warning_va (opt, gmsgid, &ap);
141   va_end (ap);
142 
143   return warned;
144 }
145 
146 
147 /* Check that we have a pointer to a string suitable for use as a format.
148    The default is to check for a char type.
149    For objective-c dialects, this is extended to include references to string
150    objects validated by objc_string_ref_type_p ().
151    Targets may also provide a string object type that can be used within c and
152    c++ and shared with their respective objective-c dialects. In this case the
153    reference to a format string is checked for validity via a hook.
154 
155    The function returns true if strref points to any string type valid for the
156    language dialect and target.  */
157 
158 bool
valid_format_string_type_p(tree strref)159 valid_format_string_type_p (tree strref)
160 {
161   return (strref != NULL
162 	  && TREE_CODE (strref) == POINTER_TYPE
163 	  && (TYPE_MAIN_VARIANT (TREE_TYPE (strref)) == char_type_node
164 	      || objc_string_ref_type_p (strref)
165 	      || (*targetcm.string_object_ref_type_p) ((const_tree) strref)));
166 }
167 
168 /* Handle a "format_arg" attribute; arguments as in
169    struct attribute_spec.handler.  */
170 tree
handle_format_arg_attribute(tree * node,tree atname,tree args,int flags,bool * no_add_attrs)171 handle_format_arg_attribute (tree *node, tree atname,
172 			     tree args, int flags, bool *no_add_attrs)
173 {
174   tree type = *node;
175   /* Note that TREE_VALUE (args) is changed in place below.  */
176   tree *format_num_expr = &TREE_VALUE (args);
177   unsigned HOST_WIDE_INT format_num = 0;
178 
179   if (tree val = get_constant (type, atname, *format_num_expr, 0, &format_num,
180 			       0, false))
181     *format_num_expr = val;
182   else
183     {
184       *no_add_attrs = true;
185       return NULL_TREE;
186     }
187 
188   if (prototype_p (type))
189     {
190       /* The format arg can be any string reference valid for the language and
191 	target.  We cannot be more specific in this case.  */
192       if (!check_format_string (type, format_num, flags, no_add_attrs, -1))
193 	return NULL_TREE;
194     }
195 
196   if (!valid_format_string_type_p (TREE_TYPE (type)))
197     {
198       if (!(flags & (int) ATTR_FLAG_BUILT_IN))
199 	error ("function does not return string type");
200       *no_add_attrs = true;
201       return NULL_TREE;
202     }
203 
204   return NULL_TREE;
205 }
206 
207 /* Verify that the format_num argument is actually a string reference suitable,
208    for the language dialect and target (in case the format attribute is in
209    error).  When we know the specific reference type expected, this is also
210    checked.  */
211 static bool
check_format_string(const_tree fntype,unsigned HOST_WIDE_INT format_num,int flags,bool * no_add_attrs,int expected_format_type)212 check_format_string (const_tree fntype, unsigned HOST_WIDE_INT format_num,
213 		     int flags, bool *no_add_attrs, int expected_format_type)
214 {
215   unsigned HOST_WIDE_INT i;
216   bool is_objc_sref, is_target_sref, is_char_ref;
217   tree ref;
218   int fmt_flags;
219   function_args_iterator iter;
220 
221   i = 1;
222   FOREACH_FUNCTION_ARGS (fntype, ref, iter)
223     {
224       if (i == format_num)
225 	break;
226       i++;
227     }
228 
229   if (!ref
230       || !valid_format_string_type_p (ref))
231     {
232       if (!(flags & (int) ATTR_FLAG_BUILT_IN))
233 	error ("format string argument is not a string type");
234       *no_add_attrs = true;
235       return false;
236     }
237 
238   /* We only know that we want a suitable string reference.  */
239   if (expected_format_type < 0)
240     return true;
241 
242   /* Now check that the arg matches the expected type.  */
243   is_char_ref =
244     (TYPE_MAIN_VARIANT (TREE_TYPE (ref)) == char_type_node);
245 
246   fmt_flags = format_flags (expected_format_type);
247   is_objc_sref = is_target_sref = false;
248   if (!is_char_ref)
249     is_objc_sref = objc_string_ref_type_p (ref);
250 
251   if (!(fmt_flags & FMT_FLAG_PARSE_ARG_CONVERT_EXTERNAL))
252     {
253       if (is_char_ref)
254 	return true; /* OK, we expected a char and found one.  */
255       else
256 	{
257 	  /* We expected a char but found an extended string type.  */
258 	  if (is_objc_sref)
259 	    error ("found a %qs reference but the format argument should"
260 		   " be a string", format_name (gcc_objc_string_format_type));
261 	  else
262 	    error ("found a %qT but the format argument should be a string",
263 		   ref);
264 	  *no_add_attrs = true;
265 	  return false;
266 	}
267     }
268 
269   /* We expect a string object type as the format arg.  */
270   if (is_char_ref)
271     {
272       error ("format argument should be a %qs reference but"
273 	     " a string was found", format_name (expected_format_type));
274       *no_add_attrs = true;
275       return false;
276     }
277 
278   /* We will assert that objective-c will support either its own string type
279      or the target-supplied variant.  */
280   if (!is_objc_sref)
281     is_target_sref = (*targetcm.string_object_ref_type_p) ((const_tree) ref);
282 
283   if (expected_format_type == (int) gcc_objc_string_format_type
284       && (is_objc_sref || is_target_sref))
285     return true;
286 
287   /* We will allow a target string ref to match only itself.  */
288   if (first_target_format_type
289       && expected_format_type >= first_target_format_type
290       && is_target_sref)
291     return true;
292   else
293     {
294       error ("format argument should be a %qs reference",
295 	      format_name (expected_format_type));
296       *no_add_attrs = true;
297       return false;
298     }
299 
300   gcc_unreachable ();
301 }
302 
303 /* Under the control of FLAGS, verify EXPR is a valid constant that
304    refers to a positional argument ARGNO having a string type (char*
305    or, for targets like Darwin, a pointer to struct CFString) to
306    a function type FNTYPE declared with attribute ATNAME.
307    If valid, store the constant's integer value in *VALUE and return
308    the value.
309    If VALIDATED_P is true assert the validation is successful.
310    Returns the converted constant value on success, null otherwise.  */
311 
312 static tree
get_constant(const_tree fntype,const_tree atname,tree expr,int argno,unsigned HOST_WIDE_INT * value,int flags,bool validated_p)313 get_constant (const_tree fntype, const_tree atname, tree expr, int argno,
314 	      unsigned HOST_WIDE_INT *value, int flags, bool validated_p)
315 {
316   /* Require the referenced argument to have a string type.  For targets
317      like Darwin, also accept pointers to struct CFString.  */
318   if (tree val = positional_argument (fntype, atname, expr, STRING_CST,
319 				      argno, flags))
320     {
321       *value = TREE_INT_CST_LOW (val);
322       return val;
323     }
324 
325   gcc_assert (!validated_p);
326   return NULL_TREE;
327 }
328 
329 /* Decode the arguments to a "format" attribute into a
330    function_format_info structure.  It is already known that the list
331    is of the right length.  If VALIDATED_P is true, then these
332    attributes have already been validated and must not be erroneous;
333    if false, it will give an error message.  Returns true if the
334    attributes are successfully decoded, false otherwise.  */
335 
336 static bool
decode_format_attr(const_tree fntype,tree atname,tree args,function_format_info * info,bool validated_p)337 decode_format_attr (const_tree fntype, tree atname, tree args,
338 		    function_format_info *info, bool validated_p)
339 {
340   tree format_type_id = TREE_VALUE (args);
341   /* Note that TREE_VALUE (args) is changed in place below.  Ditto
342      for the value of the next element on the list.  */
343   tree *format_num_expr = &TREE_VALUE (TREE_CHAIN (args));
344   tree *first_arg_num_expr = &TREE_VALUE (TREE_CHAIN (TREE_CHAIN (args)));
345 
346   if (TREE_CODE (format_type_id) != IDENTIFIER_NODE)
347     {
348       gcc_assert (!validated_p);
349       error ("unrecognized format specifier");
350       return false;
351     }
352   else
353     {
354       const char *p = IDENTIFIER_POINTER (format_type_id);
355 
356       info->format_type = decode_format_type (p, &info->is_raw);
357 
358       if (!c_dialect_objc ()
359 	   && info->format_type == gcc_objc_string_format_type)
360 	{
361 	  gcc_assert (!validated_p);
362 	  warning (OPT_Wformat_, "%qE is only allowed in Objective-C dialects",
363 		   format_type_id);
364 	  info->format_type = format_type_error;
365 	  return false;
366 	}
367 
368       if (info->format_type == format_type_error)
369 	{
370 	  gcc_assert (!validated_p);
371 	  warning (OPT_Wformat_, "%qE is an unrecognized format function type",
372 		   format_type_id);
373 	  return false;
374 	}
375     }
376 
377   if (tree val = get_constant (fntype, atname, *format_num_expr,
378 			       2, &info->format_num, 0, validated_p))
379     *format_num_expr = val;
380   else
381     return false;
382 
383   if (tree val = get_constant (fntype, atname, *first_arg_num_expr,
384 			       3, &info->first_arg_num,
385 			       (POSARG_ZERO | POSARG_ELLIPSIS), validated_p))
386     *first_arg_num_expr = val;
387   else
388     return false;
389 
390   if (info->first_arg_num != 0 && info->first_arg_num <= info->format_num)
391     {
392       gcc_assert (!validated_p);
393       error ("format string argument follows the arguments to be formatted");
394       return false;
395     }
396 
397   return true;
398 }
399 
400 /* Check a call to a format function against a parameter list.  */
401 
402 /* The C standard version C++ is treated as equivalent to
403    or inheriting from, for the purpose of format features supported.  */
404 #define CPLUSPLUS_STD_VER	(cxx_dialect < cxx11 ? STD_C94 : STD_C99)
405 /* The C standard version we are checking formats against when pedantic.  */
406 #define C_STD_VER		((int) (c_dialect_cxx ()		   \
407 				 ? CPLUSPLUS_STD_VER			   \
408 				 : (flag_isoc2x				   \
409 				    ? STD_C2X				   \
410 				    : (flag_isoc99			   \
411 				       ? STD_C99			   \
412 				       : (flag_isoc94 ? STD_C94 : STD_C89)))))
413 /* The name to give to the standard version we are warning about when
414    pedantic.  FEATURE_VER is the version in which the feature warned out
415    appeared, which is higher than C_STD_VER.  */
416 #define C_STD_NAME(FEATURE_VER) (c_dialect_cxx ()		\
417 				 ? (cxx_dialect < cxx11 ? "ISO C++98" \
418 				    : "ISO C++11")		\
419 				 : ((FEATURE_VER) == STD_EXT	\
420 				    ? "ISO C"			\
421 				    : ((FEATURE_VER) == STD_C2X	\
422 				       ? "ISO C17"		\
423 				       : "ISO C90")))
424 /* Adjust a C standard version, which may be STD_C9L, to account for
425    -Wno-long-long.  Returns other standard versions unchanged.  */
426 #define ADJ_STD(VER)		((int) ((VER) == STD_C9L		      \
427 				       ? (warn_long_long ? STD_C99 : STD_C89) \
428 				       : (VER)))
429 
430 /* Enum describing the kind of specifiers present in the format and
431    requiring an argument.  */
432 enum format_specifier_kind {
433   CF_KIND_FORMAT,
434   CF_KIND_FIELD_WIDTH,
435   CF_KIND_FIELD_PRECISION
436 };
437 
438 static const char *kind_descriptions[] = {
439   N_("format"),
440   N_("field width specifier"),
441   N_("field precision specifier")
442 };
443 
444 /* Structure describing details of a type expected in format checking,
445    and the type to check against it.  */
446 struct format_wanted_type
447 {
448   /* The type wanted.  */
449   tree wanted_type;
450   /* The name of this type to use in diagnostics.  */
451   const char *wanted_type_name;
452   /* Should be type checked just for scalar width identity.  */
453   int scalar_identity_flag;
454   /* The level of indirection through pointers at which this type occurs.  */
455   int pointer_count;
456   /* Whether, when pointer_count is 1, to allow any character type when
457      pedantic, rather than just the character or void type specified.  */
458   int char_lenient_flag;
459   /* Whether the argument, dereferenced once, is written into and so the
460      argument must not be a pointer to a const-qualified type.  */
461   int writing_in_flag;
462   /* Whether the argument, dereferenced once, is read from and so
463      must not be a NULL pointer.  */
464   int reading_from_flag;
465   /* The kind of specifier that this type is used for.  */
466   enum format_specifier_kind kind;
467   /* The starting character of the specifier.  This never includes the
468      initial percent sign.  */
469   const char *format_start;
470   /* The length of the specifier.  */
471   int format_length;
472   /* The actual parameter to check against the wanted type.  */
473   tree param;
474   /* The argument number of that parameter.  */
475   int arg_num;
476   /* The offset location of this argument with respect to the format
477      string location.  */
478   unsigned int offset_loc;
479   /* The next type to check for this format conversion, or NULL if none.  */
480   struct format_wanted_type *next;
481 };
482 
483 /* Convenience macro for format_length_info meaning unused.  */
484 #define NO_FMT NULL, FMT_LEN_none, STD_C89
485 
486 static const format_length_info printf_length_specs[] =
487 {
488   { "h", FMT_LEN_h, STD_C89, "hh", FMT_LEN_hh, STD_C99, 0 },
489   { "l", FMT_LEN_l, STD_C89, "ll", FMT_LEN_ll, STD_C9L, 0 },
490   { "q", FMT_LEN_ll, STD_EXT, NO_FMT, 0 },
491   { "L", FMT_LEN_L, STD_C89, NO_FMT, 0 },
492   { "z", FMT_LEN_z, STD_C99, NO_FMT, 0 },
493   { "Z", FMT_LEN_z, STD_EXT, NO_FMT, 0 },
494   { "t", FMT_LEN_t, STD_C99, NO_FMT, 0 },
495   { "j", FMT_LEN_j, STD_C99, NO_FMT, 0 },
496   { "H", FMT_LEN_H, STD_EXT, NO_FMT, 0 },
497   { "D", FMT_LEN_D, STD_EXT, "DD", FMT_LEN_DD, STD_EXT, 0 },
498   { NO_FMT, NO_FMT, 0 }
499 };
500 
501 /* Length specifiers valid for asm_fprintf.  */
502 static const format_length_info asm_fprintf_length_specs[] =
503 {
504   { "l", FMT_LEN_l, STD_C89, "ll", FMT_LEN_ll, STD_C89, 0 },
505   { "w", FMT_LEN_w, STD_C89, NO_FMT, 0 },
506   { NO_FMT, NO_FMT, 0 }
507 };
508 
509 /* Length specifiers valid for GCC diagnostics.  */
510 static const format_length_info gcc_diag_length_specs[] =
511 {
512   { "l", FMT_LEN_l, STD_C89, "ll", FMT_LEN_ll, STD_C89, 0 },
513   { "w", FMT_LEN_w, STD_C89, NO_FMT, 0 },
514   { NO_FMT, NO_FMT, 0 }
515 };
516 
517 /* The custom diagnostics all accept the same length specifiers.  */
518 #define gcc_tdiag_length_specs gcc_diag_length_specs
519 #define gcc_cdiag_length_specs gcc_diag_length_specs
520 #define gcc_cxxdiag_length_specs gcc_diag_length_specs
521 #define gcc_dump_printf_length_specs gcc_diag_length_specs
522 
523 /* This differs from printf_length_specs only in that "Z" is not accepted.  */
524 static const format_length_info scanf_length_specs[] =
525 {
526   { "h", FMT_LEN_h, STD_C89, "hh", FMT_LEN_hh, STD_C99, 0 },
527   { "l", FMT_LEN_l, STD_C89, "ll", FMT_LEN_ll, STD_C9L, 0 },
528   { "q", FMT_LEN_ll, STD_EXT, NO_FMT, 0 },
529   { "L", FMT_LEN_L, STD_C89, NO_FMT, 0 },
530   { "z", FMT_LEN_z, STD_C99, NO_FMT, 0 },
531   { "t", FMT_LEN_t, STD_C99, NO_FMT, 0 },
532   { "j", FMT_LEN_j, STD_C99, NO_FMT, 0 },
533   { "H", FMT_LEN_H, STD_EXT, NO_FMT, 0 },
534   { "D", FMT_LEN_D, STD_EXT, "DD", FMT_LEN_DD, STD_EXT, 0 },
535   { NO_FMT, NO_FMT, 0 }
536 };
537 
538 
539 /* All tables for strfmon use STD_C89 everywhere, since -pedantic warnings
540    make no sense for a format type not part of any C standard version.  */
541 static const format_length_info strfmon_length_specs[] =
542 {
543   /* A GNU extension.  */
544   { "L", FMT_LEN_L, STD_C89, NO_FMT, 0 },
545   { NO_FMT, NO_FMT, 0 }
546 };
547 
548 
549 /* For now, the Fortran front-end routines only use l as length modifier.  */
550 static const format_length_info gcc_gfc_length_specs[] =
551 {
552   { "l", FMT_LEN_l, STD_C89, NO_FMT, 0 },
553   { NO_FMT, NO_FMT, 0 }
554 };
555 
556 
557 static const format_flag_spec printf_flag_specs[] =
558 {
559   { ' ',  0, 0, 0, N_("' ' flag"),        N_("the ' ' printf flag"),              STD_C89 },
560   { '+',  0, 0, 0, N_("'+' flag"),        N_("the '+' printf flag"),              STD_C89 },
561   { '#',  0, 0, 0, N_("'#' flag"),        N_("the '#' printf flag"),              STD_C89 },
562   { '0',  0, 0, 0, N_("'0' flag"),        N_("the '0' printf flag"),              STD_C89 },
563   { '-',  0, 0, 0, N_("'-' flag"),        N_("the '-' printf flag"),              STD_C89 },
564   { '\'', 0, 0, 0, N_("''' flag"),        N_("the ''' printf flag"),              STD_EXT },
565   { 'I',  0, 0, 0, N_("'I' flag"),        N_("the 'I' printf flag"),              STD_EXT },
566   { 'w',  0, 0, 0, N_("field width"),     N_("field width in printf format"),     STD_C89 },
567   { 'p',  0, 0, 0, N_("precision"),       N_("precision in printf format"),       STD_C89 },
568   { 'L',  0, 0, 0, N_("length modifier"), N_("length modifier in printf format"), STD_C89 },
569   { 0, 0, 0, 0, NULL, NULL, STD_C89 }
570 };
571 
572 
573 static const format_flag_pair printf_flag_pairs[] =
574 {
575   { ' ', '+', 1, 0   },
576   { '0', '-', 1, 0   },
577   { '0', 'p', 1, 'i' },
578   { 0, 0, 0, 0 }
579 };
580 
581 static const format_flag_spec asm_fprintf_flag_specs[] =
582 {
583   { ' ',  0, 0, 0, N_("' ' flag"),        N_("the ' ' printf flag"),              STD_C89 },
584   { '+',  0, 0, 0, N_("'+' flag"),        N_("the '+' printf flag"),              STD_C89 },
585   { '#',  0, 0, 0, N_("'#' flag"),        N_("the '#' printf flag"),              STD_C89 },
586   { '0',  0, 0, 0, N_("'0' flag"),        N_("the '0' printf flag"),              STD_C89 },
587   { '-',  0, 0, 0, N_("'-' flag"),        N_("the '-' printf flag"),              STD_C89 },
588   { 'w',  0, 0, 0, N_("field width"),     N_("field width in printf format"),     STD_C89 },
589   { 'p',  0, 0, 0, N_("precision"),       N_("precision in printf format"),       STD_C89 },
590   { 'L',  0, 0, 0, N_("length modifier"), N_("length modifier in printf format"), STD_C89 },
591   { 0, 0, 0, 0, NULL, NULL, STD_C89 }
592 };
593 
594 static const format_flag_pair asm_fprintf_flag_pairs[] =
595 {
596   { ' ', '+', 1, 0   },
597   { '0', '-', 1, 0   },
598   { '0', 'p', 1, 'i' },
599   { 0, 0, 0, 0 }
600 };
601 
602 static const format_flag_pair gcc_diag_flag_pairs[] =
603 {
604   { 0, 0, 0, 0 }
605 };
606 
607 #define gcc_tdiag_flag_pairs gcc_diag_flag_pairs
608 #define gcc_cdiag_flag_pairs gcc_diag_flag_pairs
609 #define gcc_cxxdiag_flag_pairs gcc_diag_flag_pairs
610 #define gcc_gfc_flag_pairs gcc_diag_flag_pairs
611 #define gcc_dump_printf_flag_pairs gcc_diag_flag_pairs
612 
613 static const format_flag_spec gcc_diag_flag_specs[] =
614 {
615   { '+',  0, 0, 0, N_("'+' flag"),        N_("the '+' printf flag"),              STD_C89 },
616   { '#',  0, 0, 0, N_("'#' flag"),        N_("the '#' printf flag"),              STD_C89 },
617   { 'q',  0, 0, 1, N_("'q' flag"),        N_("the 'q' diagnostic flag"),          STD_C89 },
618   { 'p',  0, 0, 0, N_("precision"),       N_("precision in printf format"),       STD_C89 },
619   { 'L',  0, 0, 0, N_("length modifier"), N_("length modifier in printf format"), STD_C89 },
620   { 0, 0, 0, 0, NULL, NULL, STD_C89 }
621 };
622 
623 #define gcc_tdiag_flag_specs gcc_diag_flag_specs
624 #define gcc_cdiag_flag_specs gcc_diag_flag_specs
625 #define gcc_cxxdiag_flag_specs gcc_diag_flag_specs
626 #define gcc_gfc_flag_specs gcc_diag_flag_specs
627 #define gcc_dump_printf_flag_specs gcc_diag_flag_specs
628 
629 static const format_flag_spec scanf_flag_specs[] =
630 {
631   { '*',  0, 0, 0, N_("assignment suppression"), N_("the assignment suppression scanf feature"), STD_C89 },
632   { 'a',  0, 0, 0, N_("'a' flag"),               N_("the 'a' scanf flag"),                       STD_EXT },
633   { 'm',  0, 0, 0, N_("'m' flag"),               N_("the 'm' scanf flag"),                       STD_EXT },
634   { 'w',  0, 0, 0, N_("field width"),            N_("field width in scanf format"),              STD_C89 },
635   { 'L',  0, 0, 0, N_("length modifier"),        N_("length modifier in scanf format"),          STD_C89 },
636   { '\'', 0, 0, 0, N_("''' flag"),               N_("the ''' scanf flag"),                       STD_EXT },
637   { 'I',  0, 0, 0, N_("'I' flag"),               N_("the 'I' scanf flag"),                       STD_EXT },
638   { 0, 0, 0, 0, NULL, NULL, STD_C89 }
639 };
640 
641 
642 static const format_flag_pair scanf_flag_pairs[] =
643 {
644   { '*', 'L', 0, 0 },
645   { 'a', 'm', 0, 0 },
646   { 0, 0, 0, 0 }
647 };
648 
649 
650 static const format_flag_spec strftime_flag_specs[] =
651 {
652   { '_', 0,   0, 0, N_("'_' flag"),     N_("the '_' strftime flag"),          STD_EXT },
653   { '-', 0,   0, 0, N_("'-' flag"),     N_("the '-' strftime flag"),          STD_EXT },
654   { '0', 0,   0, 0, N_("'0' flag"),     N_("the '0' strftime flag"),          STD_EXT },
655   { '^', 0,   0, 0, N_("'^' flag"),     N_("the '^' strftime flag"),          STD_EXT },
656   { '#', 0,   0, 0, N_("'#' flag"),     N_("the '#' strftime flag"),          STD_EXT },
657   { 'w', 0,   0, 0, N_("field width"),  N_("field width in strftime format"), STD_EXT },
658   { 'E', 0,   0, 0, N_("'E' modifier"), N_("the 'E' strftime modifier"),      STD_C99 },
659   { 'O', 0,   0, 0, N_("'O' modifier"), N_("the 'O' strftime modifier"),      STD_C99 },
660   { 'O', 'o', 0, 0, NULL,               N_("the 'O' modifier"),               STD_EXT },
661   { 'O', 'p', 0, 0, NULL,               N_("the 'O' modifier"),               STD_C2X },
662   { 0, 0, 0, 0, NULL, NULL, STD_C89 }
663 };
664 
665 
666 static const format_flag_pair strftime_flag_pairs[] =
667 {
668   { 'E', 'O', 0, 0 },
669   { '_', '-', 0, 0 },
670   { '_', '0', 0, 0 },
671   { '-', '0', 0, 0 },
672   { '^', '#', 0, 0 },
673   { 0, 0, 0, 0 }
674 };
675 
676 
677 static const format_flag_spec strfmon_flag_specs[] =
678 {
679   { '=',  0, 1, 0, N_("fill character"),  N_("fill character in strfmon format"),  STD_C89 },
680   { '^',  0, 0, 0, N_("'^' flag"),        N_("the '^' strfmon flag"),              STD_C89 },
681   { '+',  0, 0, 0, N_("'+' flag"),        N_("the '+' strfmon flag"),              STD_C89 },
682   { '(',  0, 0, 0, N_("'(' flag"),        N_("the '(' strfmon flag"),              STD_C89 },
683   { '!',  0, 0, 0, N_("'!' flag"),        N_("the '!' strfmon flag"),              STD_C89 },
684   { '-',  0, 0, 0, N_("'-' flag"),        N_("the '-' strfmon flag"),              STD_C89 },
685   { 'w',  0, 0, 0, N_("field width"),     N_("field width in strfmon format"),     STD_C89 },
686   { '#',  0, 0, 0, N_("left precision"),  N_("left precision in strfmon format"),  STD_C89 },
687   { 'p',  0, 0, 0, N_("right precision"), N_("right precision in strfmon format"), STD_C89 },
688   { 'L',  0, 0, 0, N_("length modifier"), N_("length modifier in strfmon format"), STD_C89 },
689   { 0, 0, 0, 0, NULL, NULL, STD_C89 }
690 };
691 
692 static const format_flag_pair strfmon_flag_pairs[] =
693 {
694   { '+', '(', 0, 0 },
695   { 0, 0, 0, 0 }
696 };
697 
698 
699 static const format_char_info print_char_table[] =
700 {
701   /* C89 conversion specifiers.  */
702   { "di",  0, STD_C89, { T89_I,   T99_SC,  T89_S,   T89_L,   T9L_LL,  TEX_LL,  T99_SST, T99_PD,  T99_IM,  BADLEN,  BADLEN,  BADLEN  }, "-wp0 +'I",  "i",  NULL },
703   { "oxX", 0, STD_C89, { T89_UI,  T99_UC,  T89_US,  T89_UL,  T9L_ULL, TEX_ULL, T99_ST,  T99_UPD, T99_UIM, BADLEN,  BADLEN,  BADLEN }, "-wp0#",     "i",  NULL },
704   { "u",   0, STD_C89, { T89_UI,  T99_UC,  T89_US,  T89_UL,  T9L_ULL, TEX_ULL, T99_ST,  T99_UPD, T99_UIM, BADLEN,  BADLEN,  BADLEN }, "-wp0'I",    "i",  NULL },
705   { "fgG", 0, STD_C89, { T89_D,   BADLEN,  BADLEN,  T99_D,   BADLEN,  T89_LD,  BADLEN,  BADLEN,  BADLEN,  TEX_D32, TEX_D64, TEX_D128 }, "-wp0 +#'I", "",   NULL },
706   { "eE",  0, STD_C89, { T89_D,   BADLEN,  BADLEN,  T99_D,   BADLEN,  T89_LD,  BADLEN,  BADLEN,  BADLEN,  TEX_D32, TEX_D64, TEX_D128 }, "-wp0 +#I",  "",   NULL },
707   { "c",   0, STD_C89, { T89_I,   BADLEN,  BADLEN,  T94_WI,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "-w",        "",   NULL },
708   { "s",   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  T94_W,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "-wp",       "cR", NULL },
709   { "p",   1, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "-w",        "c",  NULL },
710   { "n",   1, STD_C89, { T89_I,   T99_SC,  T89_S,   T89_L,   T9L_LL,  BADLEN,  T99_SST, T99_PD,  T99_IM,  BADLEN,  BADLEN,  BADLEN }, "",          "W",  NULL },
711   /* C99 conversion specifiers.  */
712   { "F",   0, STD_C99, { T99_D,   BADLEN,  BADLEN,  T99_D,   BADLEN,  T99_LD,  BADLEN,  BADLEN,  BADLEN,  TEX_D32, TEX_D64, TEX_D128 }, "-wp0 +#'I", "",   NULL },
713   { "aA",  0, STD_C99, { T99_D,   BADLEN,  BADLEN,  T99_D,   BADLEN,  T99_LD,  BADLEN,  BADLEN,  BADLEN,  TEX_D32, TEX_D64,  TEX_D128 }, "-wp0 +#",   "",   NULL },
714   /* X/Open conversion specifiers.  */
715   { "C",   0, STD_EXT, { TEX_WI,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "-w",        "",   NULL },
716   { "S",   1, STD_EXT, { TEX_W,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "-wp",       "R",  NULL },
717   /* GNU conversion specifiers.  */
718   { "m",   0, STD_EXT, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "-wp",       "",   NULL },
719   { NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
720 };
721 
722 static const format_char_info asm_fprintf_char_table[] =
723 {
724   /* C89 conversion specifiers.  */
725   { "di",  0, STD_C89, { T89_I,   BADLEN,  BADLEN,  T89_L,   T9L_LL,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "-wp0 +",  "i", NULL },
726   { "oxX", 0, STD_C89, { T89_UI,  BADLEN,  BADLEN,  T89_UL,  T9L_ULL, BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "-wp0#",   "i", NULL },
727   { "u",   0, STD_C89, { T89_UI,  BADLEN,  BADLEN,  T89_UL,  T9L_ULL, BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "-wp0",    "i", NULL },
728   { "c",   0, STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "-w",       "", NULL },
729   { "s",   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "-wp",    "cR", NULL },
730 
731   /* asm_fprintf conversion specifiers.  */
732   { "O",   0, STD_C89, NOARGUMENTS, "",      "",   NULL },
733   { "R",   0, STD_C89, NOARGUMENTS, "",      "",   NULL },
734   { "I",   0, STD_C89, NOARGUMENTS, "",      "",   NULL },
735   { "L",   0, STD_C89, NOARGUMENTS, "",      "",   NULL },
736   { "U",   0, STD_C89, NOARGUMENTS, "",      "",   NULL },
737   { "r",   0, STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "",  "", NULL },
738   { "z",   0, STD_C89, NOARGUMENTS, "",      "",   NULL },
739   { "@",   0, STD_C89, NOARGUMENTS, "",      "",   NULL },
740   { NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
741 };
742 
743 /* GCC-specific format_char_info arrays.  */
744 
745 /* The conversion specifiers implemented within pp_format, and thus supported
746    by all pretty_printer instances within GCC.  */
747 
748 #define PP_FORMAT_CHAR_TABLE \
749   { "di",  0, STD_C89, { T89_I,   BADLEN,  BADLEN,  T89_L,   T9L_LL,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q",  "",   NULL }, \
750   { "ox",  0, STD_C89, { T89_UI,  BADLEN,  BADLEN,  T89_UL,  T9L_ULL, BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q",  "",   NULL }, \
751   { "u",   0, STD_C89, { T89_UI,  BADLEN,  BADLEN,  T89_UL,  T9L_ULL, BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q",  "",   NULL }, \
752   { "c",   0, STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q",  "",   NULL }, \
753   { "s",   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "pq", "cR", NULL }, \
754   { "p",   1, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q",  "c",  NULL }, \
755   { "r",   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "",    "//cR",   NULL }, \
756   { "@",   1, STD_C89, { T_EVENT_PTR,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL }, \
757   { "<",   0, STD_C89, NOARGUMENTS, "",      "<",   NULL }, \
758   { ">",   0, STD_C89, NOARGUMENTS, "",      ">",   NULL }, \
759   { "'" ,  0, STD_C89, NOARGUMENTS, "",      "",    NULL }, \
760   { "{",   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "",   "cR", NULL }, \
761   { "}",   0, STD_C89, NOARGUMENTS, "",      "",    NULL }, \
762   { "R",   0, STD_C89, NOARGUMENTS, "",     "\\",   NULL }, \
763   { "m",   0, STD_C89, NOARGUMENTS, "q",     "",   NULL }, \
764   { "Z",   1, STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "",    "", &gcc_diag_char_table[0] }
765 
766 static const format_char_info gcc_diag_char_table[] =
767 {
768   /* The conversion specifiers implemented within pp_format.  */
769   PP_FORMAT_CHAR_TABLE,
770 
771   { NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
772 };
773 
774 static const format_char_info gcc_tdiag_char_table[] =
775 {
776   /* The conversion specifiers implemented within pp_format.  */
777   PP_FORMAT_CHAR_TABLE,
778 
779   /* Custom conversion specifiers implemented by default_tree_printer.  */
780 
781   /* These will require a "tree" at runtime.  */
782   { "DFTV", 1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+", "'",   NULL },
783   { "E", 1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+", "",   NULL },
784   { "K", 1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL },
785 
786   /* G requires a "gimple*" argument at runtime.  */
787   { "G", 1, STD_C89, { T89_G,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL },
788 
789   { NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
790 };
791 
792 static const format_char_info gcc_cdiag_char_table[] =
793 {
794   /* The conversion specifiers implemented within pp_format.  */
795   PP_FORMAT_CHAR_TABLE,
796 
797   /* Custom conversion specifiers implemented by c_tree_printer.  */
798 
799   /* These will require a "tree" at runtime.  */
800   { "DFTV", 1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+", "'",   NULL },
801   { "E",   1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+", "",   NULL },
802   { "K",   1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL },
803 
804   /* G requires a "gimple*" argument at runtime.  */
805   { "G",   1, STD_C89, { T89_G,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL },
806 
807   { "v",   0, STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q#",  "",   NULL },
808 
809   { NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
810 };
811 
812 static const format_char_info gcc_cxxdiag_char_table[] =
813 {
814   /* The conversion specifiers implemented within pp_format.  */
815   PP_FORMAT_CHAR_TABLE,
816 
817   /* Custom conversion specifiers implemented by cp_printer.  */
818 
819   /* These will require a "tree" at runtime.  */
820   { "ADFHISTVX",1,STD_C89,{ T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+#",   "'",   NULL },
821   { "E", 1,STD_C89,{ T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+#",   "",   NULL },
822   { "K", 1, STD_C89,{ T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "",   "\"",   NULL },
823 
824   /* G requires a "gimple*" argument at runtime.  */
825   { "G", 1, STD_C89,{ T89_G,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "",   "\"",   NULL },
826 
827   /* These accept either an 'int' or an 'enum tree_code' (which is handled as an 'int'.)  */
828   { "CLOPQ",0,STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q",  "",   NULL },
829 
830   { NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
831 };
832 
833 static const format_char_info gcc_gfc_char_table[] =
834 {
835   /* C89 conversion specifiers.  */
836   { "di",  0, STD_C89, { T89_I,   BADLEN,  BADLEN,  T89_L,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q", "", NULL },
837   { "u",   0, STD_C89, { T89_UI,  BADLEN,  BADLEN,  T89_UL,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q", "", NULL },
838   { "c",   0, STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q", "", NULL },
839   { "s",   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q", "cR", NULL },
840 
841   /* gfc conversion specifiers.  */
842 
843   { "C",   0, STD_C89, NOARGUMENTS, "",      "",   NULL },
844 
845   /* This will require a "locus" at runtime.  */
846   { "L",   0, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "R", NULL },
847 
848   /* These will require nothing.  */
849   { "<>",0, STD_C89, NOARGUMENTS, "",      "",   NULL },
850   { NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
851 };
852 
853 static const format_char_info gcc_dump_printf_char_table[] =
854 {
855   /* The conversion specifiers implemented within pp_format.  */
856   PP_FORMAT_CHAR_TABLE,
857 
858   /* Custom conversion specifiers implemented by dump_pretty_printer.  */
859 
860   /* E and G require a "gimple *" argument at runtime.  */
861   { "EG",   1, STD_C89, { T89_G,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL },
862 
863   /* C requires a "cgraph_node *" argument at runtime.  */
864   { "C",   1, STD_C89, { T_CGRAPH_NODE,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL },
865 
866   /* T requires a "tree" at runtime.  */
867   { "T",   1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL },
868 
869   /* %f requires a "double"; it doesn't support modifiers.  */
870   { "f",   0, STD_C89, { T89_D,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL },
871 
872   { NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
873 };
874 
875 static const format_char_info scan_char_table[] =
876 {
877   /* C89 conversion specifiers.  */
878   { "di",    1, STD_C89, { T89_I,   T99_SC,  T89_S,   T89_L,   T9L_LL,  TEX_LL,  T99_SST, T99_PD,  T99_IM,  BADLEN,  BADLEN,  BADLEN }, "*w'I", "W",   NULL },
879   { "u",     1, STD_C89, { T89_UI,  T99_UC,  T89_US,  T89_UL,  T9L_ULL, TEX_ULL, T99_ST,  T99_UPD, T99_UIM, BADLEN,  BADLEN,  BADLEN }, "*w'I", "W",   NULL },
880   { "oxX",   1, STD_C89, { T89_UI,  T99_UC,  T89_US,  T89_UL,  T9L_ULL, TEX_ULL, T99_ST,  T99_UPD, T99_UIM, BADLEN,  BADLEN,  BADLEN }, "*w",   "W",   NULL },
881   { "efgEG", 1, STD_C89, { T89_F,   BADLEN,  BADLEN,  T89_D,   BADLEN,  T89_LD,  BADLEN,  BADLEN,  BADLEN,  TEX_D32, TEX_D64, TEX_D128 }, "*w'",  "W",   NULL },
882   { "c",     1, STD_C89, { T89_C,   BADLEN,  BADLEN,  T94_W,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*mw",   "cW",  NULL },
883   { "s",     1, STD_C89, { T89_C,   BADLEN,  BADLEN,  T94_W,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*amw",  "cW",  NULL },
884   { "[",     1, STD_C89, { T89_C,   BADLEN,  BADLEN,  T94_W,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*amw",  "cW[", NULL },
885   { "p",     2, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*w",   "W",   NULL },
886   { "n",     1, STD_C89, { T89_I,   T99_SC,  T89_S,   T89_L,   T9L_LL,  BADLEN,  T99_SST, T99_PD,  T99_IM,  BADLEN,  BADLEN,  BADLEN }, "",     "W",   NULL },
887   /* C99 conversion specifiers.  */
888   { "F",   1, STD_C99, { T99_F,   BADLEN,  BADLEN,  T99_D,   BADLEN,  T99_LD,  BADLEN,  BADLEN,  BADLEN,  TEX_D32, TEX_D64, TEX_D128 }, "*w'",  "W",   NULL },
889   { "aA",   1, STD_C99, { T99_F,   BADLEN,  BADLEN,  T99_D,   BADLEN,  T99_LD,  BADLEN,  BADLEN,  BADLEN,  TEX_D32,  TEX_D64,  TEX_D128 }, "*w'",  "W",   NULL },
890   /* X/Open conversion specifiers.  */
891   { "C",     1, STD_EXT, { TEX_W,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*mw",   "W",   NULL },
892   { "S",     1, STD_EXT, { TEX_W,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*amw",  "W",   NULL },
893   { NULL, 0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
894 };
895 
896 static const format_char_info time_char_table[] =
897 {
898   /* C89 conversion specifiers.  */
899   { "AZa",		0, STD_C89, NOLENGTHS, "^#",     "",   NULL },
900   { "Bb",		0, STD_C89, NOLENGTHS, "O^#",    "p",  NULL },
901   { "cx",		0, STD_C89, NOLENGTHS, "E",      "3",  NULL },
902   { "HIMSUWdmw",	0, STD_C89, NOLENGTHS, "-_0Ow",  "",   NULL },
903   { "j",		0, STD_C89, NOLENGTHS, "-_0Ow",  "o",  NULL },
904   { "p",		0, STD_C89, NOLENGTHS, "#",      "",   NULL },
905   { "X",		0, STD_C89, NOLENGTHS, "E",      "",   NULL },
906   { "y",		0, STD_C89, NOLENGTHS, "EO-_0w", "4",  NULL },
907   { "Y",		0, STD_C89, NOLENGTHS, "-_0EOw", "o",  NULL },
908   { "%",		0, STD_C89, NOLENGTHS, "",       "",   NULL },
909   /* C99 conversion specifiers.  */
910   { "C",		0, STD_C99, NOLENGTHS, "-_0EOw", "o",  NULL },
911   { "D",		0, STD_C99, NOLENGTHS, "",       "2",  NULL },
912   { "eVu",		0, STD_C99, NOLENGTHS, "-_0Ow",  "",   NULL },
913   { "FRTnrt",		0, STD_C99, NOLENGTHS, "",       "",   NULL },
914   { "g",		0, STD_C99, NOLENGTHS, "O-_0w",  "2o", NULL },
915   { "G",		0, STD_C99, NOLENGTHS, "-_0Ow",  "o",  NULL },
916   { "h",		0, STD_C99, NOLENGTHS, "^#",     "",   NULL },
917   { "z",		0, STD_C99, NOLENGTHS, "O",      "o",  NULL },
918   /* GNU conversion specifiers.  */
919   { "kls",		0, STD_EXT, NOLENGTHS, "-_0Ow",  "",   NULL },
920   { "P",		0, STD_EXT, NOLENGTHS, "",       "",   NULL },
921   { NULL,		0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
922 };
923 
924 static const format_char_info monetary_char_table[] =
925 {
926   { "in", 0, STD_C89, { T89_D, BADLEN, BADLEN, BADLEN, BADLEN, T89_LD, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "=^+(!-w#p", "", NULL },
927   { NULL, 0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
928 };
929 
930 /* This must be in the same order as enum format_type.  */
931 static const format_kind_info format_types_orig[] =
932 {
933   { "gnu_printf",   printf_length_specs,  print_char_table, " +#0-'I", NULL,
934     printf_flag_specs, printf_flag_pairs,
935     FMT_FLAG_ARG_CONVERT|FMT_FLAG_DOLLAR_MULTIPLE|FMT_FLAG_USE_DOLLAR|FMT_FLAG_EMPTY_PREC_OK,
936     'w', 0, 'p', 0, 'L', 0,
937     &integer_type_node, &integer_type_node, format_type_error
938   },
939   { "asm_fprintf",   asm_fprintf_length_specs,  asm_fprintf_char_table, " +#0-", NULL,
940     asm_fprintf_flag_specs, asm_fprintf_flag_pairs,
941     FMT_FLAG_ARG_CONVERT|FMT_FLAG_EMPTY_PREC_OK,
942     'w', 0, 'p', 0, 'L', 0,
943     NULL, NULL, format_type_error
944   },
945   { "gcc_diag",   gcc_diag_length_specs,  gcc_diag_char_table, "q+#", NULL,
946     gcc_diag_flag_specs, gcc_diag_flag_pairs,
947     FMT_FLAG_ARG_CONVERT|FMT_FLAG_M_OK,
948     0, 0, 'p', 0, 'L', 0,
949     NULL, &integer_type_node, format_type_error
950   },
951   { "gcc_tdiag",   gcc_tdiag_length_specs,  gcc_tdiag_char_table, "q+#", NULL,
952     gcc_tdiag_flag_specs, gcc_tdiag_flag_pairs,
953     FMT_FLAG_ARG_CONVERT|FMT_FLAG_M_OK,
954     0, 0, 'p', 0, 'L', 0,
955     NULL, &integer_type_node, format_type_error
956   },
957   { "gcc_cdiag",   gcc_cdiag_length_specs,  gcc_cdiag_char_table, "q+#", NULL,
958     gcc_cdiag_flag_specs, gcc_cdiag_flag_pairs,
959     FMT_FLAG_ARG_CONVERT|FMT_FLAG_M_OK,
960     0, 0, 'p', 0, 'L', 0,
961     NULL, &integer_type_node, format_type_error
962   },
963   { "gcc_cxxdiag",   gcc_cxxdiag_length_specs,  gcc_cxxdiag_char_table, "q+#", NULL,
964     gcc_cxxdiag_flag_specs, gcc_cxxdiag_flag_pairs,
965     FMT_FLAG_ARG_CONVERT|FMT_FLAG_M_OK,
966     0, 0, 'p', 0, 'L', 0,
967     NULL, &integer_type_node, format_type_error
968   },
969   { "gcc_gfc", gcc_gfc_length_specs, gcc_gfc_char_table, "q+#", NULL,
970     gcc_gfc_flag_specs, gcc_gfc_flag_pairs,
971     FMT_FLAG_ARG_CONVERT|FMT_FLAG_M_OK,
972     0, 0, 0, 0, 0, 0,
973     NULL, NULL, format_type_error
974   },
975   { "gcc_dump_printf",   gcc_dump_printf_length_specs,
976     gcc_dump_printf_char_table, "q+#", NULL,
977     gcc_dump_printf_flag_specs, gcc_dump_printf_flag_pairs,
978     FMT_FLAG_ARG_CONVERT,
979     0, 0, 'p', 0, 'L', 0,
980     NULL, &integer_type_node
981   },
982   { "NSString",   NULL,  NULL, NULL, NULL,
983     NULL, NULL,
984     FMT_FLAG_ARG_CONVERT|FMT_FLAG_PARSE_ARG_CONVERT_EXTERNAL, 0, 0, 0, 0, 0, 0,
985     NULL, NULL, format_type_error
986   },
987   { "gnu_scanf",    scanf_length_specs,   scan_char_table,  "*'I", NULL,
988     scanf_flag_specs, scanf_flag_pairs,
989     FMT_FLAG_ARG_CONVERT|FMT_FLAG_SCANF_A_KLUDGE|FMT_FLAG_USE_DOLLAR|FMT_FLAG_ZERO_WIDTH_BAD|FMT_FLAG_DOLLAR_GAP_POINTER_OK,
990     'w', 0, 0, '*', 'L', 'm',
991     NULL, NULL, format_type_error
992   },
993   { "gnu_strftime", NULL,                 time_char_table,  "_-0^#", "EO",
994     strftime_flag_specs, strftime_flag_pairs,
995     FMT_FLAG_FANCY_PERCENT_OK|FMT_FLAG_M_OK, 'w', 0, 0, 0, 0, 0,
996     NULL, NULL, format_type_error
997   },
998   { "gnu_strfmon",  strfmon_length_specs, monetary_char_table, "=^+(!-", NULL,
999     strfmon_flag_specs, strfmon_flag_pairs,
1000     FMT_FLAG_ARG_CONVERT, 'w', '#', 'p', 0, 'L', 0,
1001     NULL, NULL, format_type_error
1002   },
1003   { "gnu_syslog",   printf_length_specs,  print_char_table, " +#0-'I", NULL,
1004     printf_flag_specs, printf_flag_pairs,
1005     FMT_FLAG_ARG_CONVERT|FMT_FLAG_DOLLAR_MULTIPLE|FMT_FLAG_USE_DOLLAR|FMT_FLAG_EMPTY_PREC_OK|FMT_FLAG_M_OK,
1006     'w', 0, 'p', 0, 'L', 0,
1007     &integer_type_node, &integer_type_node, printf_format_type
1008   },
1009 };
1010 
1011 /* This layer of indirection allows GCC to reassign format_types with
1012    new data if necessary, while still allowing the original data to be
1013    const.  */
1014 static const format_kind_info *format_types = format_types_orig;
1015 /* We can modify this one.  We also add target-specific format types
1016    to the end of the array.  */
1017 static format_kind_info *dynamic_format_types;
1018 
1019 static int n_format_types = ARRAY_SIZE (format_types_orig);
1020 
1021 /* Structure detailing the results of checking a format function call
1022    where the format expression may be a conditional expression with
1023    many leaves resulting from nested conditional expressions.  */
1024 struct format_check_results
1025 {
1026   /* Number of leaves of the format argument that could not be checked
1027      as they were not string literals.  */
1028   int number_non_literal;
1029   /* Number of leaves of the format argument that were null pointers or
1030      string literals, but had extra format arguments.  */
1031   int number_extra_args;
1032   location_t extra_arg_loc;
1033   /* Number of leaves of the format argument that were null pointers or
1034      string literals, but had extra format arguments and used $ operand
1035      numbers.  */
1036   int number_dollar_extra_args;
1037   /* Number of leaves of the format argument that were wide string
1038      literals.  */
1039   int number_wide;
1040   /* Number of leaves of the format argument that are not array of "char".  */
1041   int number_non_char;
1042   /* Number of leaves of the format argument that were empty strings.  */
1043   int number_empty;
1044   /* Number of leaves of the format argument that were unterminated
1045      strings.  */
1046   int number_unterminated;
1047   /* Number of leaves of the format argument that were not counted above.  */
1048   int number_other;
1049   /* Location of the format string.  */
1050   location_t format_string_loc;
1051 };
1052 
1053 struct format_check_context
1054 {
1055   format_check_results *res;
1056   function_format_info *info;
1057   tree params;
1058   vec<location_t> *arglocs;
1059 };
1060 
1061 /* Return the format name (as specified in the original table) for the format
1062    type indicated by format_num.  */
1063 static const char *
format_name(int format_num)1064 format_name (int format_num)
1065 {
1066   if (format_num >= 0 && format_num < n_format_types)
1067     return format_types[format_num].name;
1068   gcc_unreachable ();
1069 }
1070 
1071 /* Return the format flags (as specified in the original table) for the format
1072    type indicated by format_num.  */
1073 static int
format_flags(int format_num)1074 format_flags (int format_num)
1075 {
1076   if (format_num >= 0 && format_num < n_format_types)
1077     return format_types[format_num].flags;
1078   gcc_unreachable ();
1079 }
1080 
1081 static void check_format_info (function_format_info *, tree,
1082 			       vec<location_t> *);
1083 static void check_format_arg (void *, tree, unsigned HOST_WIDE_INT);
1084 static void check_format_info_main (format_check_results *,
1085 				    function_format_info *, const char *,
1086 				    location_t, tree,
1087 				    int, tree,
1088 				    unsigned HOST_WIDE_INT,
1089 				    object_allocator<format_wanted_type> &,
1090 				    vec<location_t> *);
1091 
1092 static void init_dollar_format_checking (int, tree);
1093 static int maybe_read_dollar_number (const char **, int,
1094 				     tree, tree *, const format_kind_info *);
1095 static bool avoid_dollar_number (const char *);
1096 static void finish_dollar_format_checking (format_check_results *, int);
1097 
1098 static const format_flag_spec *get_flag_spec (const format_flag_spec *,
1099 					      int, const char *);
1100 
1101 static void check_format_types (const substring_loc &fmt_loc,
1102 				format_wanted_type *,
1103 				const format_kind_info *fki,
1104 				int offset_to_type_start,
1105 				char conversion_char,
1106 				vec<location_t> *arglocs);
1107 static void format_type_warning (const substring_loc &fmt_loc,
1108 				 location_t param_loc,
1109 				 format_wanted_type *, tree,
1110 				 tree,
1111 				 const format_kind_info *fki,
1112 				 int offset_to_type_start,
1113 				 char conversion_char);
1114 
1115 /* Decode a format type from a string, returning the type, or
1116    format_type_error if not valid, in which case the caller should
1117    print an error message.  On success, when IS_RAW is non-null, set
1118    *IS_RAW when the format type corresponds to a GCC "raw" diagnostic
1119    formatting function and clear it otherwise.  */
1120 static format_type
decode_format_type(const char * s,bool * is_raw)1121 decode_format_type (const char *s, bool *is_raw /* = NULL */)
1122 {
1123   bool is_raw_buf;
1124 
1125   if (!is_raw)
1126     is_raw = &is_raw_buf;
1127 
1128   *is_raw = false;
1129 
1130   s = convert_format_name_to_system_name (s);
1131 
1132   size_t slen = strlen (s);
1133   for (int i = 0; i < n_format_types; i++)
1134     {
1135       /* Check for a match with no underscores.  */
1136       if (!strcmp (s, format_types[i].name))
1137 	return static_cast<format_type> (i);
1138 
1139       /* Check for leading and trailing underscores.  */
1140       size_t alen = strlen (format_types[i].name);
1141       if (slen == alen + 4 && s[0] == '_' && s[1] == '_'
1142 	  && s[slen - 1] == '_' && s[slen - 2] == '_'
1143 	  && !strncmp (s + 2, format_types[i].name, alen))
1144 	return static_cast<format_type>(i);
1145 
1146       /* Check for the "_raw" suffix and no leading underscores.  */
1147       if (slen == alen + 4
1148 	  && !strncmp (s, format_types[i].name, alen)
1149 	  && !strcmp (s + alen, "_raw"))
1150 	{
1151 	  *is_raw = true;
1152 	  return static_cast<format_type>(i);
1153 	}
1154 
1155       /* Check for the "_raw__" suffix and leading underscores.  */
1156       if (slen == alen + 8 && s[0] == '_' && s[1] == '_'
1157 	  && !strncmp (s + 2, format_types[i].name, alen)
1158 	  && !strcmp (s + 2 + alen, "_raw__"))
1159 	{
1160 	  *is_raw = true;
1161 	  return static_cast<format_type>(i);
1162 	}
1163     }
1164 
1165   return format_type_error;
1166 }
1167 
1168 
1169 /* Check the argument list of a call to printf, scanf, etc.
1170    ATTRS are the attributes on the function type.  There are NARGS argument
1171    values in the array ARGARRAY.
1172    Also, if -Wsuggest-attribute=format,
1173    warn for calls to vprintf or vscanf in functions with no such format
1174    attribute themselves.  */
1175 
1176 void
check_function_format(const_tree fntype,tree attrs,int nargs,tree * argarray,vec<location_t> * arglocs)1177 check_function_format (const_tree fntype, tree attrs, int nargs,
1178 		       tree *argarray, vec<location_t> *arglocs)
1179 {
1180   tree a;
1181 
1182   tree atname = get_identifier ("format");
1183 
1184   /* See if this function has any format attributes.  */
1185   for (a = attrs; a; a = TREE_CHAIN (a))
1186     {
1187       if (is_attribute_p ("format", get_attribute_name (a)))
1188 	{
1189 	  /* Yup; check it.  */
1190 	  function_format_info info;
1191 	  decode_format_attr (fntype, atname, TREE_VALUE (a), &info,
1192 			      /*validated=*/true);
1193 	  if (warn_format)
1194 	    {
1195 	      /* FIXME: Rewrite all the internal functions in this file
1196 		 to use the ARGARRAY directly instead of constructing this
1197 		 temporary list.  */
1198 	      tree params = NULL_TREE;
1199 	      int i;
1200 	      for (i = nargs - 1; i >= 0; i--)
1201 		params = tree_cons (NULL_TREE, argarray[i], params);
1202 	      check_format_info (&info, params, arglocs);
1203 	    }
1204 	  const format_kind_info *fi = &format_types[info.format_type];
1205 
1206 	  /* Attempt to detect whether the current function might benefit
1207 	     from the format attribute if the called function is decorated
1208 	     with it.  Avoid using calls with string literal formats for
1209 	     guidance since those are unlikely to be viable candidates.  */
1210 	  if (warn_suggest_attribute_format
1211 	      && current_function_decl != NULL_TREE
1212 	      && info.first_arg_num == 0
1213 	      && (fi->flags & (int) FMT_FLAG_ARG_CONVERT)
1214 	      /* c_strlen will fail for a function parameter but succeed
1215 		 for a literal or constant array.  */
1216 	      && !c_strlen (argarray[info.format_num - 1], 1))
1217 	    {
1218 	      tree c;
1219 	      for (c = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
1220 		   c;
1221 		   c = TREE_CHAIN (c))
1222 		{
1223 		  if (!is_attribute_p ("format", TREE_PURPOSE (c)))
1224 		     continue;
1225 		  int format_type = decode_format_type (
1226 		      IDENTIFIER_POINTER (TREE_VALUE (TREE_VALUE (c))));
1227 		  if (format_type == format_type_error)
1228 		     continue;
1229 		  if (format_type == info.format_type ||
1230 		      format_type == fi->parent_format_type)
1231 		    break;
1232 		}
1233 	      if (c == NULL_TREE)
1234 		{
1235 		  /* Check if the current function has a parameter to which
1236 		     the format attribute could be attached; if not, it
1237 		     can't be a candidate for a format attribute, despite
1238 		     the vprintf-like or vscanf-like call.  */
1239 		  tree args;
1240 		  for (args = DECL_ARGUMENTS (current_function_decl);
1241 		       args != 0;
1242 		       args = DECL_CHAIN (args))
1243 		    {
1244 		      if (TREE_CODE (TREE_TYPE (args)) == POINTER_TYPE
1245 			  && (TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (args)))
1246 			      == char_type_node))
1247 			break;
1248 		    }
1249 		  if (args != 0)
1250 		    warning (OPT_Wsuggest_attribute_format, "function %qD "
1251 			     "might be a candidate for %qs format attribute",
1252 			     current_function_decl,
1253 			     format_types[info.format_type].name);
1254 		}
1255 	    }
1256 	}
1257     }
1258 }
1259 
1260 
1261 /* Variables used by the checking of $ operand number formats.  */
1262 static char *dollar_arguments_used = NULL;
1263 static char *dollar_arguments_pointer_p = NULL;
1264 static int dollar_arguments_alloc = 0;
1265 static int dollar_arguments_count;
1266 static int dollar_first_arg_num;
1267 static int dollar_max_arg_used;
1268 static int dollar_format_warned;
1269 
1270 /* Initialize the checking for a format string that may contain $
1271    parameter number specifications; we will need to keep track of whether
1272    each parameter has been used.  FIRST_ARG_NUM is the number of the first
1273    argument that is a parameter to the format, or 0 for a vprintf-style
1274    function; PARAMS is the list of arguments starting at this argument.  */
1275 
1276 static void
init_dollar_format_checking(int first_arg_num,tree params)1277 init_dollar_format_checking (int first_arg_num, tree params)
1278 {
1279   tree oparams = params;
1280 
1281   dollar_first_arg_num = first_arg_num;
1282   dollar_arguments_count = 0;
1283   dollar_max_arg_used = 0;
1284   dollar_format_warned = 0;
1285   if (first_arg_num > 0)
1286     {
1287       while (params)
1288 	{
1289 	  dollar_arguments_count++;
1290 	  params = TREE_CHAIN (params);
1291 	}
1292     }
1293   if (dollar_arguments_alloc < dollar_arguments_count)
1294     {
1295       free (dollar_arguments_used);
1296       free (dollar_arguments_pointer_p);
1297       dollar_arguments_alloc = dollar_arguments_count;
1298       dollar_arguments_used = XNEWVEC (char, dollar_arguments_alloc);
1299       dollar_arguments_pointer_p = XNEWVEC (char, dollar_arguments_alloc);
1300     }
1301   if (dollar_arguments_alloc)
1302     {
1303       memset (dollar_arguments_used, 0, dollar_arguments_alloc);
1304       if (first_arg_num > 0)
1305 	{
1306 	  int i = 0;
1307 	  params = oparams;
1308 	  while (params)
1309 	    {
1310 	      dollar_arguments_pointer_p[i] = (TREE_CODE (TREE_TYPE (TREE_VALUE (params)))
1311 					       == POINTER_TYPE);
1312 	      params = TREE_CHAIN (params);
1313 	      i++;
1314 	    }
1315 	}
1316     }
1317 }
1318 
1319 
1320 /* Look for a decimal number followed by a $ in *FORMAT.  If DOLLAR_NEEDED
1321    is set, it is an error if one is not found; otherwise, it is OK.  If
1322    such a number is found, check whether it is within range and mark that
1323    numbered operand as being used for later checking.  Returns the operand
1324    number if found and within range, zero if no such number was found and
1325    this is OK, or -1 on error.  PARAMS points to the first operand of the
1326    format; PARAM_PTR is made to point to the parameter referred to.  If
1327    a $ format is found, *FORMAT is updated to point just after it.  */
1328 
1329 static int
maybe_read_dollar_number(const char ** format,int dollar_needed,tree params,tree * param_ptr,const format_kind_info * fki)1330 maybe_read_dollar_number (const char **format,
1331 			  int dollar_needed, tree params, tree *param_ptr,
1332 			  const format_kind_info *fki)
1333 {
1334   int argnum;
1335   int overflow_flag;
1336   const char *fcp = *format;
1337   if (!ISDIGIT (*fcp))
1338     {
1339       if (dollar_needed)
1340 	{
1341 	  warning (OPT_Wformat_, "missing $ operand number in format");
1342 	  return -1;
1343 	}
1344       else
1345 	return 0;
1346     }
1347   argnum = 0;
1348   overflow_flag = 0;
1349   while (ISDIGIT (*fcp))
1350     {
1351       HOST_WIDE_INT nargnum
1352 	= HOST_WIDE_INT_UC (10) * argnum + (*fcp - '0');
1353       if ((int) nargnum != nargnum)
1354 	overflow_flag = 1;
1355       argnum = nargnum;
1356       fcp++;
1357     }
1358   if (*fcp != '$')
1359     {
1360       if (dollar_needed)
1361 	{
1362 	  warning (OPT_Wformat_, "missing $ operand number in format");
1363 	  return -1;
1364 	}
1365       else
1366 	return 0;
1367     }
1368   *format = fcp + 1;
1369   if (pedantic && !dollar_format_warned)
1370     {
1371       warning (OPT_Wformat_, "%s does not support %%n$ operand number formats",
1372 	       C_STD_NAME (STD_EXT));
1373       dollar_format_warned = 1;
1374     }
1375   if (overflow_flag || argnum == 0
1376       || (dollar_first_arg_num && argnum > dollar_arguments_count))
1377     {
1378       warning (OPT_Wformat_, "operand number out of range in format");
1379       return -1;
1380     }
1381   if (argnum > dollar_max_arg_used)
1382     dollar_max_arg_used = argnum;
1383   /* For vprintf-style functions we may need to allocate more memory to
1384      track which arguments are used.  */
1385   while (dollar_arguments_alloc < dollar_max_arg_used)
1386     {
1387       int nalloc;
1388       nalloc = 2 * dollar_arguments_alloc + 16;
1389       dollar_arguments_used = XRESIZEVEC (char, dollar_arguments_used,
1390 					  nalloc);
1391       dollar_arguments_pointer_p = XRESIZEVEC (char, dollar_arguments_pointer_p,
1392 					       nalloc);
1393       memset (dollar_arguments_used + dollar_arguments_alloc, 0,
1394 	      nalloc - dollar_arguments_alloc);
1395       dollar_arguments_alloc = nalloc;
1396     }
1397   if (!(fki->flags & (int) FMT_FLAG_DOLLAR_MULTIPLE)
1398       && dollar_arguments_used[argnum - 1] == 1)
1399     {
1400       dollar_arguments_used[argnum - 1] = 2;
1401       warning (OPT_Wformat_, "format argument %d used more than once in %s format",
1402 	       argnum, fki->name);
1403     }
1404   else
1405     dollar_arguments_used[argnum - 1] = 1;
1406   if (dollar_first_arg_num)
1407     {
1408       int i;
1409       *param_ptr = params;
1410       for (i = 1; i < argnum && *param_ptr != 0; i++)
1411 	*param_ptr = TREE_CHAIN (*param_ptr);
1412 
1413       /* This case shouldn't be caught here.  */
1414       gcc_assert (*param_ptr);
1415     }
1416   else
1417     *param_ptr = 0;
1418   return argnum;
1419 }
1420 
1421 /* Ensure that FORMAT does not start with a decimal number followed by
1422    a $; give a diagnostic and return true if it does, false otherwise.  */
1423 
1424 static bool
avoid_dollar_number(const char * format)1425 avoid_dollar_number (const char *format)
1426 {
1427   if (!ISDIGIT (*format))
1428     return false;
1429   while (ISDIGIT (*format))
1430     format++;
1431   if (*format == '$')
1432     {
1433       warning (OPT_Wformat_,
1434 	       "%<$%>operand number used after format without operand number");
1435       return true;
1436     }
1437   return false;
1438 }
1439 
1440 
1441 /* Finish the checking for a format string that used $ operand number formats
1442    instead of non-$ formats.  We check for unused operands before used ones
1443    (a serious error, since the implementation of the format function
1444    can't know what types to pass to va_arg to find the later arguments).
1445    and for unused operands at the end of the format (if we know how many
1446    arguments the format had, so not for vprintf).  If there were operand
1447    numbers out of range on a non-vprintf-style format, we won't have reached
1448    here.  If POINTER_GAP_OK, unused arguments are OK if all arguments are
1449    pointers.  */
1450 
1451 static void
finish_dollar_format_checking(format_check_results * res,int pointer_gap_ok)1452 finish_dollar_format_checking (format_check_results *res, int pointer_gap_ok)
1453 {
1454   int i;
1455   bool found_pointer_gap = false;
1456   for (i = 0; i < dollar_max_arg_used; i++)
1457     {
1458       if (!dollar_arguments_used[i])
1459 	{
1460 	  if (pointer_gap_ok && (dollar_first_arg_num == 0
1461 				 || dollar_arguments_pointer_p[i]))
1462 	    found_pointer_gap = true;
1463 	  else
1464 	    warning_at (res->format_string_loc, OPT_Wformat_,
1465 			"format argument %d unused before used argument %d "
1466 			"in %<$%>-style format",
1467 			i + 1, dollar_max_arg_used);
1468 	}
1469     }
1470   if (found_pointer_gap
1471       || (dollar_first_arg_num
1472 	  && dollar_max_arg_used < dollar_arguments_count))
1473     {
1474       res->number_other--;
1475       res->number_dollar_extra_args++;
1476     }
1477 }
1478 
1479 
1480 /* Retrieve the specification for a format flag.  SPEC contains the
1481    specifications for format flags for the applicable kind of format.
1482    FLAG is the flag in question.  If PREDICATES is NULL, the basic
1483    spec for that flag must be retrieved and must exist.  If
1484    PREDICATES is not NULL, it is a string listing possible predicates
1485    for the spec entry; if an entry predicated on any of these is
1486    found, it is returned, otherwise NULL is returned.  */
1487 
1488 static const format_flag_spec *
get_flag_spec(const format_flag_spec * spec,int flag,const char * predicates)1489 get_flag_spec (const format_flag_spec *spec, int flag, const char *predicates)
1490 {
1491   int i;
1492   for (i = 0; spec[i].flag_char != 0; i++)
1493     {
1494       if (spec[i].flag_char != flag)
1495 	continue;
1496       if (predicates != NULL)
1497 	{
1498 	  if (spec[i].predicate != 0
1499 	      && strchr (predicates, spec[i].predicate) != 0)
1500 	    return &spec[i];
1501 	}
1502       else if (spec[i].predicate == 0)
1503 	return &spec[i];
1504     }
1505   gcc_assert (predicates);
1506   return NULL;
1507 }
1508 
1509 
1510 /* Check the argument list of a call to printf, scanf, etc.
1511    INFO points to the function_format_info structure.
1512    PARAMS is the list of argument values.  */
1513 
1514 static void
check_format_info(function_format_info * info,tree params,vec<location_t> * arglocs)1515 check_format_info (function_format_info *info, tree params,
1516 		   vec<location_t> *arglocs)
1517 {
1518   format_check_context format_ctx;
1519   unsigned HOST_WIDE_INT arg_num;
1520   tree format_tree;
1521   format_check_results res;
1522   /* Skip to format argument.  If the argument isn't available, there's
1523      no work for us to do; prototype checking will catch the problem.  */
1524   for (arg_num = 1; ; ++arg_num)
1525     {
1526       if (params == 0)
1527 	return;
1528       if (arg_num == info->format_num)
1529 	break;
1530       params = TREE_CHAIN (params);
1531     }
1532   format_tree = TREE_VALUE (params);
1533   params = TREE_CHAIN (params);
1534   if (format_tree == 0)
1535     return;
1536 
1537   res.number_non_literal = 0;
1538   res.number_extra_args = 0;
1539   res.extra_arg_loc = UNKNOWN_LOCATION;
1540   res.number_dollar_extra_args = 0;
1541   res.number_wide = 0;
1542   res.number_non_char = 0;
1543   res.number_empty = 0;
1544   res.number_unterminated = 0;
1545   res.number_other = 0;
1546   res.format_string_loc = input_location;
1547 
1548   format_ctx.res = &res;
1549   format_ctx.info = info;
1550   format_ctx.params = params;
1551   format_ctx.arglocs = arglocs;
1552 
1553   check_function_arguments_recurse (check_format_arg, &format_ctx,
1554 				    format_tree, arg_num);
1555 
1556   location_t loc = format_ctx.res->format_string_loc;
1557 
1558   if (res.number_non_literal > 0)
1559     {
1560       /* Functions taking a va_list normally pass a non-literal format
1561 	 string.  These functions typically are declared with
1562 	 first_arg_num == 0, so avoid warning in those cases.  */
1563       if (!(format_types[info->format_type].flags & (int) FMT_FLAG_ARG_CONVERT))
1564 	{
1565 	  /* For strftime-like formats, warn for not checking the format
1566 	     string; but there are no arguments to check.  */
1567 	  warning_at (loc, OPT_Wformat_nonliteral,
1568 		      "format not a string literal, format string not checked");
1569 	}
1570       else if (info->first_arg_num != 0)
1571 	{
1572 	  /* If there are no arguments for the format at all, we may have
1573 	     printf (foo) which is likely to be a security hole.  */
1574 	  while (arg_num + 1 < info->first_arg_num)
1575 	    {
1576 	      if (params == 0)
1577 		break;
1578 	      params = TREE_CHAIN (params);
1579 	      ++arg_num;
1580 	    }
1581 	  if (params == 0 && warn_format_security)
1582 	    warning_at (loc, OPT_Wformat_security,
1583 			"format not a string literal and no format arguments");
1584 	  else if (params == 0 && warn_format_nonliteral)
1585 	    warning_at (loc, OPT_Wformat_nonliteral,
1586 			"format not a string literal and no format arguments");
1587 	  else
1588 	    warning_at (loc, OPT_Wformat_nonliteral,
1589 			"format not a string literal, argument types not checked");
1590 	}
1591     }
1592 
1593   /* If there were extra arguments to the format, normally warn.  However,
1594      the standard does say extra arguments are ignored, so in the specific
1595      case where we have multiple leaves (conditional expressions or
1596      ngettext) allow extra arguments if at least one leaf didn't have extra
1597      arguments, but was otherwise OK (either non-literal or checked OK).
1598      If the format is an empty string, this should be counted similarly to the
1599      case of extra format arguments.  */
1600   if (res.number_extra_args > 0 && res.number_non_literal == 0
1601       && res.number_other == 0)
1602     {
1603       if (res.extra_arg_loc == UNKNOWN_LOCATION)
1604 	res.extra_arg_loc = loc;
1605       warning_at (res.extra_arg_loc, OPT_Wformat_extra_args,
1606 		  "too many arguments for format");
1607     }
1608   if (res.number_dollar_extra_args > 0 && res.number_non_literal == 0
1609       && res.number_other == 0)
1610     warning_at (loc, OPT_Wformat_extra_args,
1611 		"unused arguments in %<$%>-style format");
1612   if (res.number_empty > 0 && res.number_non_literal == 0
1613       && res.number_other == 0)
1614     warning_at (loc, OPT_Wformat_zero_length, "zero-length %s format string",
1615 	     format_types[info->format_type].name);
1616 
1617   if (res.number_wide > 0)
1618     warning_at (loc, OPT_Wformat_, "format is a wide character string");
1619 
1620   if (res.number_non_char > 0)
1621     warning_at (loc, OPT_Wformat_,
1622 		"format string is not an array of type %qs", "char");
1623 
1624   if (res.number_unterminated > 0)
1625     warning_at (loc, OPT_Wformat_, "unterminated format string");
1626 }
1627 
1628 /* Callback from check_function_arguments_recurse to check a
1629    format string.  FORMAT_TREE is the format parameter.  ARG_NUM
1630    is the number of the format argument.  CTX points to a
1631    format_check_context.  */
1632 
1633 static void
check_format_arg(void * ctx,tree format_tree,unsigned HOST_WIDE_INT arg_num)1634 check_format_arg (void *ctx, tree format_tree,
1635 		  unsigned HOST_WIDE_INT arg_num)
1636 {
1637   format_check_context *format_ctx = (format_check_context *) ctx;
1638   format_check_results *res = format_ctx->res;
1639   function_format_info *info = format_ctx->info;
1640   tree params = format_ctx->params;
1641   vec<location_t> *arglocs = format_ctx->arglocs;
1642 
1643   int format_length;
1644   HOST_WIDE_INT offset;
1645   const char *format_chars;
1646   tree array_size = 0;
1647   tree array_init;
1648 
1649   location_t fmt_param_loc = EXPR_LOC_OR_LOC (format_tree, input_location);
1650 
1651   /* Pull out a constant value if the front end didn't, and handle location
1652      wrappers.  */
1653   format_tree = fold_for_warn (format_tree);
1654   STRIP_NOPS (format_tree);
1655 
1656   if (integer_zerop (format_tree))
1657     {
1658       /* Skip to first argument to check, so we can see if this format
1659 	 has any arguments (it shouldn't).  */
1660       while (arg_num + 1 < info->first_arg_num)
1661 	{
1662 	  if (params == 0)
1663 	    return;
1664 	  params = TREE_CHAIN (params);
1665 	  ++arg_num;
1666 	}
1667 
1668       if (params == 0)
1669 	res->number_other++;
1670       else
1671 	{
1672 	  if (res->number_extra_args == 0)
1673 	    res->extra_arg_loc = EXPR_LOC_OR_LOC (TREE_VALUE (params),
1674 						  input_location);
1675 	  res->number_extra_args++;
1676 	}
1677       return;
1678     }
1679 
1680   offset = 0;
1681   if (TREE_CODE (format_tree) == POINTER_PLUS_EXPR)
1682     {
1683       tree arg0, arg1;
1684 
1685       arg0 = TREE_OPERAND (format_tree, 0);
1686       arg1 = TREE_OPERAND (format_tree, 1);
1687       STRIP_NOPS (arg0);
1688       STRIP_NOPS (arg1);
1689       if (TREE_CODE (arg1) == INTEGER_CST)
1690 	format_tree = arg0;
1691       else
1692 	{
1693 	  res->number_non_literal++;
1694 	  return;
1695 	}
1696       /* POINTER_PLUS_EXPR offsets are to be interpreted signed.  */
1697       if (!cst_and_fits_in_hwi (arg1))
1698 	{
1699 	  res->number_non_literal++;
1700 	  return;
1701 	}
1702       offset = int_cst_value (arg1);
1703     }
1704   if (TREE_CODE (format_tree) != ADDR_EXPR)
1705     {
1706       res->number_non_literal++;
1707       return;
1708     }
1709   res->format_string_loc = EXPR_LOC_OR_LOC (format_tree, input_location);
1710   format_tree = TREE_OPERAND (format_tree, 0);
1711   if (format_types[info->format_type].flags
1712       & (int) FMT_FLAG_PARSE_ARG_CONVERT_EXTERNAL)
1713     {
1714       bool objc_str = (info->format_type == gcc_objc_string_format_type);
1715       /* We cannot examine this string here - but we can check that it is
1716 	 a valid type.  */
1717       if (TREE_CODE (format_tree) != CONST_DECL
1718 	  || !((objc_str && objc_string_ref_type_p (TREE_TYPE (format_tree)))
1719 		|| (*targetcm.string_object_ref_type_p)
1720 				     ((const_tree) TREE_TYPE (format_tree))))
1721 	{
1722 	  res->number_non_literal++;
1723 	  return;
1724 	}
1725       /* Skip to first argument to check.  */
1726       while (arg_num + 1 < info->first_arg_num)
1727 	{
1728 	  if (params == 0)
1729 	    return;
1730 	  params = TREE_CHAIN (params);
1731 	  ++arg_num;
1732 	}
1733       /* So, we have a valid literal string object and one or more params.
1734 	 We need to use an external helper to parse the string into format
1735 	 info.  For Objective-C variants we provide the resource within the
1736 	 objc tree, for target variants, via a hook.  */
1737       if (objc_str)
1738 	objc_check_format_arg (format_tree, params);
1739       else if (targetcm.check_string_object_format_arg)
1740 	(*targetcm.check_string_object_format_arg) (format_tree, params);
1741       /* Else we can't handle it and retire quietly.  */
1742       return;
1743     }
1744   if (TREE_CODE (format_tree) == ARRAY_REF
1745       && tree_fits_shwi_p (TREE_OPERAND (format_tree, 1))
1746       && (offset += tree_to_shwi (TREE_OPERAND (format_tree, 1))) >= 0)
1747     format_tree = TREE_OPERAND (format_tree, 0);
1748   if (offset < 0)
1749     {
1750       res->number_non_literal++;
1751       return;
1752     }
1753   if (VAR_P (format_tree)
1754       && TREE_CODE (TREE_TYPE (format_tree)) == ARRAY_TYPE
1755       && (array_init = decl_constant_value (format_tree)) != format_tree
1756       && TREE_CODE (array_init) == STRING_CST)
1757     {
1758       /* Extract the string constant initializer.  Note that this may include
1759 	 a trailing NUL character that is not in the array (e.g.
1760 	 const char a[3] = "foo";).  */
1761       array_size = DECL_SIZE_UNIT (format_tree);
1762       format_tree = array_init;
1763     }
1764   if (TREE_CODE (format_tree) != STRING_CST)
1765     {
1766       res->number_non_literal++;
1767       return;
1768     }
1769   tree underlying_type
1770     = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (format_tree)));
1771   if (underlying_type != char_type_node)
1772     {
1773       if (underlying_type == char16_type_node
1774 	  || underlying_type == char32_type_node
1775 	  || underlying_type == wchar_type_node)
1776 	res->number_wide++;
1777       else
1778 	res->number_non_char++;
1779       return;
1780     }
1781   format_chars = TREE_STRING_POINTER (format_tree);
1782   format_length = TREE_STRING_LENGTH (format_tree);
1783   if (array_size != 0)
1784     {
1785       /* Variable length arrays can't be initialized.  */
1786       gcc_assert (TREE_CODE (array_size) == INTEGER_CST);
1787 
1788       if (tree_fits_shwi_p (array_size))
1789 	{
1790 	  HOST_WIDE_INT array_size_value = tree_to_shwi (array_size);
1791 	  if (array_size_value > 0
1792 	      && array_size_value == (int) array_size_value
1793 	      && format_length > array_size_value)
1794 	    format_length = array_size_value;
1795 	}
1796     }
1797   if (offset)
1798     {
1799       if (offset >= format_length)
1800 	{
1801 	  res->number_non_literal++;
1802 	  return;
1803 	}
1804       format_chars += offset;
1805       format_length -= offset;
1806     }
1807   if (format_length < 1 || format_chars[--format_length] != 0)
1808     {
1809       res->number_unterminated++;
1810       return;
1811     }
1812   if (format_length == 0)
1813     {
1814       res->number_empty++;
1815       return;
1816     }
1817 
1818   /* Skip to first argument to check.  */
1819   while (arg_num + 1 < info->first_arg_num)
1820     {
1821       if (params == 0)
1822 	return;
1823       params = TREE_CHAIN (params);
1824       ++arg_num;
1825     }
1826   /* Provisionally increment res->number_other; check_format_info_main
1827      will decrement it if it finds there are extra arguments, but this way
1828      need not adjust it for every return.  */
1829   res->number_other++;
1830   object_allocator <format_wanted_type> fwt_pool ("format_wanted_type pool");
1831   check_format_info_main (res, info, format_chars, fmt_param_loc, format_tree,
1832 			  format_length, params, arg_num, fwt_pool, arglocs);
1833 }
1834 
1835 /* Support class for argument_parser and check_format_info_main.
1836    Tracks any flag characters that have been applied to the
1837    current argument.  */
1838 
1839 class flag_chars_t
1840 {
1841  public:
1842   flag_chars_t ();
1843   bool has_char_p (char ch) const;
1844   void add_char (char ch);
1845   void validate (const format_kind_info *fki,
1846 		 const format_char_info *fci,
1847 		 const format_flag_spec *flag_specs,
1848 		 const char * const format_chars,
1849 		 tree format_string_cst,
1850 		 location_t format_string_loc,
1851 		 const char * const orig_format_chars,
1852 		 char format_char,
1853 		 bool quoted);
1854   int get_alloc_flag (const format_kind_info *fki);
1855   int assignment_suppression_p (const format_kind_info *fki);
1856 
1857  private:
1858   char m_flag_chars[256];
1859 };
1860 
1861 /* Support struct for argument_parser and check_format_info_main.
1862    Encapsulates any length modifier applied to the current argument.  */
1863 
1864 class length_modifier
1865 {
1866 public:
length_modifier()1867   length_modifier ()
1868   : chars (NULL), val (FMT_LEN_none), std (STD_C89),
1869     scalar_identity_flag (0)
1870   {
1871   }
1872 
length_modifier(const char * chars_,enum format_lengths val_,enum format_std_version std_,int scalar_identity_flag_)1873   length_modifier (const char *chars_,
1874 		   enum format_lengths val_,
1875 		   enum format_std_version std_,
1876 		   int scalar_identity_flag_)
1877   : chars (chars_), val (val_), std (std_),
1878     scalar_identity_flag (scalar_identity_flag_)
1879   {
1880   }
1881 
1882   const char *chars;
1883   enum format_lengths val;
1884   enum format_std_version std;
1885   int scalar_identity_flag;
1886 };
1887 
1888 /* Parsing one argument within a format string.  */
1889 
1890 class argument_parser
1891 {
1892  public:
1893   argument_parser (function_format_info *info, const char *&format_chars,
1894 		   tree format_string_cst,
1895 		   const char * const orig_format_chars,
1896 		   location_t format_string_loc, flag_chars_t &flag_chars,
1897 		   int &has_operand_number, tree first_fillin_param,
1898 		   object_allocator <format_wanted_type> &fwt_pool_,
1899 		   vec<location_t> *arglocs);
1900 
1901   bool read_any_dollar ();
1902 
1903   bool read_format_flags ();
1904 
1905   bool
1906   read_any_format_width (tree &params,
1907 			 unsigned HOST_WIDE_INT &arg_num);
1908 
1909   void
1910   read_any_format_left_precision ();
1911 
1912   bool
1913   read_any_format_precision (tree &params,
1914 			     unsigned HOST_WIDE_INT &arg_num);
1915 
1916   void handle_alloc_chars ();
1917 
1918   length_modifier read_any_length_modifier ();
1919 
1920   void read_any_other_modifier ();
1921 
1922   const format_char_info *find_format_char_info (char format_char);
1923 
1924   void
1925   validate_flag_pairs (const format_char_info *fci,
1926 		       char format_char);
1927 
1928   void
1929   give_y2k_warnings (const format_char_info *fci,
1930 		     char format_char);
1931 
1932   void parse_any_scan_set (const format_char_info *fci);
1933 
1934   bool handle_conversions (const format_char_info *fci,
1935 			   const length_modifier &len_modifier,
1936 			   tree &wanted_type,
1937 			   const char *&wanted_type_name,
1938 			   unsigned HOST_WIDE_INT &arg_num,
1939 			   tree &params,
1940 			   char format_char);
1941 
1942   bool
1943   check_argument_type (const format_char_info *fci,
1944 		       const length_modifier &len_modifier,
1945 		       tree &wanted_type,
1946 		       const char *&wanted_type_name,
1947 		       const bool suppressed,
1948 		       unsigned HOST_WIDE_INT &arg_num,
1949 		       tree &params,
1950 		       const int alloc_flag,
1951 		       const char * const format_start,
1952 		       const char * const type_start,
1953 		       location_t fmt_param_loc,
1954 		       char conversion_char);
1955 
1956  private:
1957   const function_format_info *const info;
1958   const format_kind_info * const fki;
1959   const format_flag_spec * const flag_specs;
1960   const char *start_of_this_format;
1961   const char *&format_chars;
1962   const tree format_string_cst;
1963   const char * const orig_format_chars;
1964   const location_t format_string_loc;
1965   object_allocator <format_wanted_type> &fwt_pool;
1966   flag_chars_t &flag_chars;
1967   int main_arg_num;
1968   tree main_arg_params;
1969   int &has_operand_number;
1970   const tree first_fillin_param;
1971   format_wanted_type width_wanted_type;
1972   format_wanted_type precision_wanted_type;
1973  public:
1974   format_wanted_type main_wanted_type;
1975  private:
1976   format_wanted_type *first_wanted_type;
1977   format_wanted_type *last_wanted_type;
1978   vec<location_t> *arglocs;
1979 };
1980 
1981 /* flag_chars_t's constructor.  */
1982 
flag_chars_t()1983 flag_chars_t::flag_chars_t ()
1984 {
1985   m_flag_chars[0] = 0;
1986 }
1987 
1988 /* Has CH been seen as a flag within the current argument?  */
1989 
1990 bool
has_char_p(char ch)1991 flag_chars_t::has_char_p (char ch) const
1992 {
1993   return strchr (m_flag_chars, ch) != 0;
1994 }
1995 
1996 /* Add CH to the flags seen within the current argument.  */
1997 
1998 void
add_char(char ch)1999 flag_chars_t::add_char (char ch)
2000 {
2001   int i = strlen (m_flag_chars);
2002   m_flag_chars[i++] = ch;
2003   m_flag_chars[i] = 0;
2004 }
2005 
2006 /* Validate the individual flags used, removing any that are invalid.  */
2007 
2008 void
validate(const format_kind_info * fki,const format_char_info * fci,const format_flag_spec * flag_specs,const char * const format_chars,tree format_string_cst,location_t format_string_loc,const char * const orig_format_chars,char format_char,bool quoted)2009 flag_chars_t::validate (const format_kind_info *fki,
2010 			const format_char_info *fci,
2011 			const format_flag_spec *flag_specs,
2012 			const char * const format_chars,
2013 			tree format_string_cst,
2014 			location_t format_string_loc,
2015 			const char * const orig_format_chars,
2016 			char format_char,
2017 			bool quoted)
2018 {
2019   int i;
2020   int d = 0;
2021   bool quotflag = false;
2022 
2023   for (i = 0; m_flag_chars[i] != 0; i++)
2024     {
2025       const format_flag_spec *s = get_flag_spec (flag_specs,
2026 						 m_flag_chars[i], NULL);
2027       m_flag_chars[i - d] = m_flag_chars[i];
2028       if (m_flag_chars[i] == fki->length_code_char)
2029 	continue;
2030 
2031       /* Remember if a quoting flag is seen.  */
2032       quotflag |= s->quoting;
2033 
2034       if (strchr (fci->flag_chars, m_flag_chars[i]) == 0)
2035 	{
2036 	  format_warning_at_char (format_string_loc, format_string_cst,
2037 				  format_chars - orig_format_chars,
2038 				  OPT_Wformat_,
2039 				  "%s used with %<%%%c%> %s format",
2040 				  _(s->name), format_char, fki->name);
2041 	  d++;
2042 	  continue;
2043 	}
2044       if (pedantic)
2045 	{
2046 	  const format_flag_spec *t;
2047 	  if (ADJ_STD (s->std) > C_STD_VER)
2048 	    warning_at (format_string_loc, OPT_Wformat_,
2049 			"%s does not support %s",
2050 			C_STD_NAME (s->std), _(s->long_name));
2051 	  t = get_flag_spec (flag_specs, m_flag_chars[i], fci->flags2);
2052 	  if (t != NULL && ADJ_STD (t->std) > ADJ_STD (s->std))
2053 	    {
2054 	      const char *long_name = (t->long_name != NULL
2055 				       ? t->long_name
2056 				       : s->long_name);
2057 	      if (ADJ_STD (t->std) > C_STD_VER)
2058 		warning_at (format_string_loc, OPT_Wformat_,
2059 			    "%s does not support %s with"
2060 			    " the %<%%%c%> %s format",
2061 			    C_STD_NAME (t->std), _(long_name),
2062 			    format_char, fki->name);
2063 	    }
2064 	}
2065 
2066       /* Detect quoting directives used within a quoted sequence, such
2067 	 as GCC's "%<...%qE".  */
2068       if (quoted && s->quoting)
2069 	{
2070 	  format_warning_at_char (format_string_loc, format_string_cst,
2071 				  format_chars - orig_format_chars - 1,
2072 				  OPT_Wformat_,
2073 				  "%s used within a quoted sequence",
2074 				  _(s->name));
2075 	}
2076     }
2077   m_flag_chars[i - d] = 0;
2078 
2079   if (!quoted
2080       && !quotflag
2081       && strchr (fci->flags2, '\''))
2082     {
2083       format_warning_at_char (format_string_loc, format_string_cst,
2084 			      format_chars - orig_format_chars,
2085 			      OPT_Wformat_,
2086 			      "%qc conversion used unquoted",
2087 			      format_char);
2088     }
2089 }
2090 
2091 /* Determine if an assignment-allocation has been set, requiring
2092    an extra char ** for writing back a dynamically-allocated char *.
2093    This is for handling the optional 'm' character in scanf.  */
2094 
2095 int
get_alloc_flag(const format_kind_info * fki)2096 flag_chars_t::get_alloc_flag (const format_kind_info *fki)
2097 {
2098   if ((fki->flags & (int) FMT_FLAG_SCANF_A_KLUDGE)
2099       && has_char_p ('a'))
2100     return 1;
2101   if (fki->alloc_char && has_char_p (fki->alloc_char))
2102     return 1;
2103   return 0;
2104 }
2105 
2106 /* Determine if an assignment-suppression character was seen.
2107    ('*' in scanf, for discarding the converted input).  */
2108 
2109 int
assignment_suppression_p(const format_kind_info * fki)2110 flag_chars_t::assignment_suppression_p (const format_kind_info *fki)
2111 {
2112   if (fki->suppression_char
2113       && has_char_p (fki->suppression_char))
2114     return 1;
2115   return 0;
2116 }
2117 
2118 /* Constructor for argument_parser.  Initialize for parsing one
2119    argument within a format string.  */
2120 
2121 argument_parser::
argument_parser(function_format_info * info_,const char * & format_chars_,tree format_string_cst_,const char * const orig_format_chars_,location_t format_string_loc_,flag_chars_t & flag_chars_,int & has_operand_number_,tree first_fillin_param_,object_allocator<format_wanted_type> & fwt_pool_,vec<location_t> * arglocs_)2122 argument_parser (function_format_info *info_, const char *&format_chars_,
2123 		 tree format_string_cst_,
2124 		 const char * const orig_format_chars_,
2125 		 location_t format_string_loc_,
2126 		 flag_chars_t &flag_chars_,
2127 		 int &has_operand_number_,
2128 		 tree first_fillin_param_,
2129 		 object_allocator <format_wanted_type> &fwt_pool_,
2130 		 vec<location_t> *arglocs_)
2131 : info (info_),
2132   fki (&format_types[info->format_type]),
2133   flag_specs (fki->flag_specs),
2134   start_of_this_format (format_chars_),
2135   format_chars (format_chars_),
2136   format_string_cst (format_string_cst_),
2137   orig_format_chars (orig_format_chars_),
2138   format_string_loc (format_string_loc_),
2139   fwt_pool (fwt_pool_),
2140   flag_chars (flag_chars_),
2141   main_arg_num (0),
2142   main_arg_params (NULL),
2143   has_operand_number (has_operand_number_),
2144   first_fillin_param (first_fillin_param_),
2145   first_wanted_type (NULL),
2146   last_wanted_type (NULL),
2147   arglocs (arglocs_)
2148 {
2149 }
2150 
2151 /* Handle dollars at the start of format arguments, setting up main_arg_params
2152    and main_arg_num.
2153 
2154    Return true if format parsing is to continue, false otherwise.  */
2155 
2156 bool
read_any_dollar()2157 argument_parser::read_any_dollar ()
2158 {
2159   if ((fki->flags & (int) FMT_FLAG_USE_DOLLAR) && has_operand_number != 0)
2160     {
2161       /* Possibly read a $ operand number at the start of the format.
2162 	 If one was previously used, one is required here.  If one
2163 	 is not used here, we can't immediately conclude this is a
2164 	 format without them, since it could be printf %m or scanf %*.  */
2165       int opnum;
2166       opnum = maybe_read_dollar_number (&format_chars, 0,
2167 					first_fillin_param,
2168 					&main_arg_params, fki);
2169       if (opnum == -1)
2170 	return false;
2171       else if (opnum > 0)
2172 	{
2173 	  has_operand_number = 1;
2174 	  main_arg_num = opnum + info->first_arg_num - 1;
2175 	}
2176     }
2177   else if (fki->flags & FMT_FLAG_USE_DOLLAR)
2178     {
2179       if (avoid_dollar_number (format_chars))
2180 	return false;
2181     }
2182   return true;
2183 }
2184 
2185 /* Read any format flags, but do not yet validate them beyond removing
2186    duplicates, since in general validation depends on the rest of
2187    the format.
2188 
2189    Return true if format parsing is to continue, false otherwise.  */
2190 
2191 bool
read_format_flags()2192 argument_parser::read_format_flags ()
2193 {
2194   while (*format_chars != 0
2195 	 && strchr (fki->flag_chars, *format_chars) != 0)
2196     {
2197       const format_flag_spec *s = get_flag_spec (flag_specs,
2198 						 *format_chars, NULL);
2199       if (flag_chars.has_char_p (*format_chars))
2200 	{
2201 	  format_warning_at_char (format_string_loc, format_string_cst,
2202 				  format_chars + 1 - orig_format_chars,
2203 				  OPT_Wformat_,
2204 				  "repeated %s in format", _(s->name));
2205 	}
2206       else
2207 	flag_chars.add_char (*format_chars);
2208 
2209       if (s->skip_next_char)
2210 	{
2211 	  ++format_chars;
2212 	  if (*format_chars == 0)
2213 	    {
2214 	      warning_at (format_string_loc, OPT_Wformat_,
2215 			  "missing fill character at end of strfmon format");
2216 	      return false;
2217 	    }
2218 	}
2219       ++format_chars;
2220     }
2221 
2222   return true;
2223 }
2224 
2225 /* Read any format width, possibly * or *m$.
2226 
2227    Return true if format parsing is to continue, false otherwise.  */
2228 
2229 bool
2230 argument_parser::
read_any_format_width(tree & params,unsigned HOST_WIDE_INT & arg_num)2231 read_any_format_width (tree &params,
2232 		       unsigned HOST_WIDE_INT &arg_num)
2233 {
2234   if (!fki->width_char)
2235     return true;
2236 
2237   if (fki->width_type != NULL && *format_chars == '*')
2238     {
2239       flag_chars.add_char (fki->width_char);
2240       /* "...a field width...may be indicated by an asterisk.
2241 	 In this case, an int argument supplies the field width..."  */
2242       ++format_chars;
2243       if (has_operand_number != 0)
2244 	{
2245 	  int opnum;
2246 	  opnum = maybe_read_dollar_number (&format_chars,
2247 					    has_operand_number == 1,
2248 					    first_fillin_param,
2249 					    &params, fki);
2250 	  if (opnum == -1)
2251 	    return false;
2252 	  else if (opnum > 0)
2253 	    {
2254 	      has_operand_number = 1;
2255 	      arg_num = opnum + info->first_arg_num - 1;
2256 	    }
2257 	  else
2258 	    has_operand_number = 0;
2259 	}
2260       else
2261 	{
2262 	  if (avoid_dollar_number (format_chars))
2263 	    return false;
2264 	}
2265       if (info->first_arg_num != 0)
2266 	{
2267 	  tree cur_param;
2268 	  if (params == 0)
2269 	    cur_param = NULL;
2270 	  else
2271 	    {
2272 	      cur_param = TREE_VALUE (params);
2273 	      if (has_operand_number <= 0)
2274 		{
2275 		  params = TREE_CHAIN (params);
2276 		  ++arg_num;
2277 		}
2278 	    }
2279 	  width_wanted_type.wanted_type = *fki->width_type;
2280 	  width_wanted_type.wanted_type_name = NULL;
2281 	  width_wanted_type.pointer_count = 0;
2282 	  width_wanted_type.char_lenient_flag = 0;
2283 	  width_wanted_type.scalar_identity_flag = 0;
2284 	  width_wanted_type.writing_in_flag = 0;
2285 	  width_wanted_type.reading_from_flag = 0;
2286 	  width_wanted_type.kind = CF_KIND_FIELD_WIDTH;
2287 	  width_wanted_type.format_start = format_chars - 1;
2288 	  width_wanted_type.format_length = 1;
2289 	  width_wanted_type.param = cur_param;
2290 	  width_wanted_type.arg_num = arg_num;
2291 	  width_wanted_type.offset_loc =
2292 	    format_chars - orig_format_chars;
2293 	  width_wanted_type.next = NULL;
2294 	  if (last_wanted_type != 0)
2295 	    last_wanted_type->next = &width_wanted_type;
2296 	  if (first_wanted_type == 0)
2297 	    first_wanted_type = &width_wanted_type;
2298 	  last_wanted_type = &width_wanted_type;
2299 	}
2300     }
2301   else
2302     {
2303       /* Possibly read a numeric width.  If the width is zero,
2304 	 we complain if appropriate.  */
2305       int non_zero_width_char = FALSE;
2306       int found_width = FALSE;
2307       while (ISDIGIT (*format_chars))
2308 	{
2309 	  found_width = TRUE;
2310 	  if (*format_chars != '0')
2311 	    non_zero_width_char = TRUE;
2312 	  ++format_chars;
2313 	}
2314       if (found_width && !non_zero_width_char &&
2315 	  (fki->flags & (int) FMT_FLAG_ZERO_WIDTH_BAD))
2316 	warning_at (format_string_loc, OPT_Wformat_,
2317 		    "zero width in %s format", fki->name);
2318       if (found_width)
2319 	flag_chars.add_char (fki->width_char);
2320     }
2321 
2322   return true;
2323 }
2324 
2325 /* Read any format left precision (must be a number, not *).  */
2326 void
read_any_format_left_precision()2327 argument_parser::read_any_format_left_precision ()
2328 {
2329   if (fki->left_precision_char == 0)
2330     return;
2331   if (*format_chars != '#')
2332     return;
2333 
2334   ++format_chars;
2335   flag_chars.add_char (fki->left_precision_char);
2336   if (!ISDIGIT (*format_chars))
2337     format_warning_at_char (format_string_loc, format_string_cst,
2338 			    format_chars - orig_format_chars,
2339 			    OPT_Wformat_,
2340 			    "empty left precision in %s format", fki->name);
2341   while (ISDIGIT (*format_chars))
2342     ++format_chars;
2343 }
2344 
2345 /* Read any format precision, possibly * or *m$.
2346 
2347    Return true if format parsing is to continue, false otherwise.  */
2348 
2349 bool
2350 argument_parser::
read_any_format_precision(tree & params,unsigned HOST_WIDE_INT & arg_num)2351 read_any_format_precision (tree &params,
2352 			   unsigned HOST_WIDE_INT &arg_num)
2353 {
2354   if (fki->precision_char == 0)
2355     return true;
2356   if (*format_chars != '.')
2357     return true;
2358 
2359   ++format_chars;
2360   flag_chars.add_char (fki->precision_char);
2361   if (fki->precision_type != NULL && *format_chars == '*')
2362     {
2363       /* "...a...precision...may be indicated by an asterisk.
2364 	 In this case, an int argument supplies the...precision."  */
2365       ++format_chars;
2366       if (has_operand_number != 0)
2367 	{
2368 	  int opnum;
2369 	  opnum = maybe_read_dollar_number (&format_chars,
2370 					    has_operand_number == 1,
2371 					    first_fillin_param,
2372 					    &params, fki);
2373 	  if (opnum == -1)
2374 	    return false;
2375 	  else if (opnum > 0)
2376 	    {
2377 	      has_operand_number = 1;
2378 	      arg_num = opnum + info->first_arg_num - 1;
2379 	    }
2380 	  else
2381 	    has_operand_number = 0;
2382 	}
2383       else
2384 	{
2385 	  if (avoid_dollar_number (format_chars))
2386 	    return false;
2387 	}
2388       if (info->first_arg_num != 0)
2389 	{
2390 	  tree cur_param;
2391 	  if (params == 0)
2392 	    cur_param = NULL;
2393 	  else
2394 	    {
2395 	      cur_param = TREE_VALUE (params);
2396 	      if (has_operand_number <= 0)
2397 		{
2398 		  params = TREE_CHAIN (params);
2399 		  ++arg_num;
2400 		}
2401 	    }
2402 	  precision_wanted_type.wanted_type = *fki->precision_type;
2403 	  precision_wanted_type.wanted_type_name = NULL;
2404 	  precision_wanted_type.pointer_count = 0;
2405 	  precision_wanted_type.char_lenient_flag = 0;
2406 	  precision_wanted_type.scalar_identity_flag = 0;
2407 	  precision_wanted_type.writing_in_flag = 0;
2408 	  precision_wanted_type.reading_from_flag = 0;
2409 	  precision_wanted_type.kind = CF_KIND_FIELD_PRECISION;
2410 	  precision_wanted_type.param = cur_param;
2411 	  precision_wanted_type.format_start = format_chars - 2;
2412 	  precision_wanted_type.format_length = 2;
2413 	  precision_wanted_type.arg_num = arg_num;
2414 	  precision_wanted_type.offset_loc =
2415 	    format_chars - orig_format_chars;
2416 	  precision_wanted_type.next = NULL;
2417 	  if (last_wanted_type != 0)
2418 	    last_wanted_type->next = &precision_wanted_type;
2419 	  if (first_wanted_type == 0)
2420 	    first_wanted_type = &precision_wanted_type;
2421 	  last_wanted_type = &precision_wanted_type;
2422 	}
2423     }
2424   else
2425     {
2426       if (!(fki->flags & (int) FMT_FLAG_EMPTY_PREC_OK)
2427 	  && !ISDIGIT (*format_chars))
2428 	format_warning_at_char (format_string_loc, format_string_cst,
2429 				format_chars - orig_format_chars,
2430 				OPT_Wformat_,
2431 				"empty precision in %s format", fki->name);
2432       while (ISDIGIT (*format_chars))
2433 	++format_chars;
2434     }
2435 
2436   return true;
2437 }
2438 
2439 /* Parse any assignment-allocation flags, which request an extra
2440    char ** for writing back a dynamically-allocated char *.
2441    This is for handling the optional 'm' character in scanf,
2442    and, before C99, 'a' (for compatibility with a non-standard
2443    GNU libc extension).  */
2444 
2445 void
handle_alloc_chars()2446 argument_parser::handle_alloc_chars ()
2447 {
2448   if (fki->alloc_char && fki->alloc_char == *format_chars)
2449     {
2450       flag_chars.add_char (fki->alloc_char);
2451       format_chars++;
2452     }
2453 
2454   /* Handle the scanf allocation kludge.  */
2455   if (fki->flags & (int) FMT_FLAG_SCANF_A_KLUDGE)
2456     {
2457       if (*format_chars == 'a' && !flag_isoc99)
2458 	{
2459 	  if (format_chars[1] == 's' || format_chars[1] == 'S'
2460 	      || format_chars[1] == '[')
2461 	    {
2462 	      /* 'a' is used as a flag.  */
2463 	      flag_chars.add_char ('a');
2464 	      format_chars++;
2465 	    }
2466 	}
2467     }
2468 }
2469 
2470 /* Look for length modifiers within the current format argument,
2471    returning a length_modifier instance describing it (or the
2472    default if one is not found).
2473 
2474    Issue warnings about non-standard modifiers.  */
2475 
2476 length_modifier
read_any_length_modifier()2477 argument_parser::read_any_length_modifier ()
2478 {
2479   length_modifier result;
2480 
2481   const format_length_info *fli = fki->length_char_specs;
2482   if (!fli)
2483     return result;
2484 
2485   while (fli->name != 0
2486 	 && strncmp (fli->name, format_chars, strlen (fli->name)))
2487     fli++;
2488   if (fli->name != 0)
2489     {
2490       format_chars += strlen (fli->name);
2491       if (fli->double_name != 0 && fli->name[0] == *format_chars)
2492 	{
2493 	  format_chars++;
2494 	  result = length_modifier (fli->double_name, fli->double_index,
2495 				    fli->double_std, 0);
2496 	}
2497       else
2498 	{
2499 	  result = length_modifier (fli->name, fli->index, fli->std,
2500 				    fli->scalar_identity_flag);
2501 	}
2502       flag_chars.add_char (fki->length_code_char);
2503     }
2504   if (pedantic)
2505     {
2506       /* Warn if the length modifier is non-standard.  */
2507       if (ADJ_STD (result.std) > C_STD_VER)
2508 	warning_at (format_string_loc, OPT_Wformat_,
2509 		    "%s does not support the %qs %s length modifier",
2510 		    C_STD_NAME (result.std), result.chars,
2511 		    fki->name);
2512     }
2513 
2514   return result;
2515 }
2516 
2517 /* Read any other modifier (strftime E/O).  */
2518 
2519 void
read_any_other_modifier()2520 argument_parser::read_any_other_modifier ()
2521 {
2522   if (fki->modifier_chars == NULL)
2523     return;
2524 
2525   while (*format_chars != 0
2526 	 && strchr (fki->modifier_chars, *format_chars) != 0)
2527     {
2528       if (flag_chars.has_char_p (*format_chars))
2529 	{
2530 	  const format_flag_spec *s = get_flag_spec (flag_specs,
2531 						     *format_chars, NULL);
2532 	  format_warning_at_char (format_string_loc, format_string_cst,
2533 				  format_chars - orig_format_chars,
2534 				  OPT_Wformat_,
2535 				  "repeated %s in format", _(s->name));
2536 	}
2537       else
2538 	flag_chars.add_char (*format_chars);
2539       ++format_chars;
2540     }
2541 }
2542 
2543 /* Return the format_char_info corresponding to FORMAT_CHAR,
2544    potentially issuing a warning if the format char is
2545    not supported in the C standard version we are checking
2546    against.
2547 
2548    Issue a warning and return NULL if it is not found.
2549 
2550    Issue warnings about non-standard modifiers.  */
2551 
2552 const format_char_info *
find_format_char_info(char format_char)2553 argument_parser::find_format_char_info (char format_char)
2554 {
2555   const format_char_info *fci = fki->conversion_specs;
2556 
2557   while (fci->format_chars != 0
2558 	 && strchr (fci->format_chars, format_char) == 0)
2559     ++fci;
2560   if (fci->format_chars == 0)
2561     {
2562       format_warning_at_char (format_string_loc, format_string_cst,
2563 			      format_chars - orig_format_chars,
2564 			      OPT_Wformat_,
2565 			      "unknown conversion type character"
2566 			      " %qc in format",
2567 			      format_char);
2568       return NULL;
2569     }
2570 
2571   if (pedantic)
2572     {
2573       if (ADJ_STD (fci->std) > C_STD_VER)
2574 	format_warning_at_char (format_string_loc, format_string_cst,
2575 				format_chars - orig_format_chars,
2576 				OPT_Wformat_,
2577 				"%s does not support the %<%%%c%> %s format",
2578 				C_STD_NAME (fci->std), format_char, fki->name);
2579     }
2580 
2581   return fci;
2582 }
2583 
2584 /* Validate the pairs of flags used.
2585    Issue warnings about incompatible combinations of flags.  */
2586 
2587 void
validate_flag_pairs(const format_char_info * fci,char format_char)2588 argument_parser::validate_flag_pairs (const format_char_info *fci,
2589 				      char format_char)
2590 {
2591   const format_flag_pair * const bad_flag_pairs = fki->bad_flag_pairs;
2592 
2593   for (int i = 0; bad_flag_pairs[i].flag_char1 != 0; i++)
2594     {
2595       const format_flag_spec *s, *t;
2596       if (!flag_chars.has_char_p (bad_flag_pairs[i].flag_char1))
2597 	continue;
2598       if (!flag_chars.has_char_p (bad_flag_pairs[i].flag_char2))
2599 	continue;
2600       if (bad_flag_pairs[i].predicate != 0
2601 	  && strchr (fci->flags2, bad_flag_pairs[i].predicate) == 0)
2602 	continue;
2603       s = get_flag_spec (flag_specs, bad_flag_pairs[i].flag_char1, NULL);
2604       t = get_flag_spec (flag_specs, bad_flag_pairs[i].flag_char2, NULL);
2605       if (bad_flag_pairs[i].ignored)
2606 	{
2607 	  if (bad_flag_pairs[i].predicate != 0)
2608 	    warning_at (format_string_loc, OPT_Wformat_,
2609 			"%s ignored with %s and %<%%%c%> %s format",
2610 			_(s->name), _(t->name), format_char,
2611 			fki->name);
2612 	  else
2613 	    warning_at (format_string_loc, OPT_Wformat_,
2614 			"%s ignored with %s in %s format",
2615 			_(s->name), _(t->name), fki->name);
2616 	}
2617       else
2618 	{
2619 	  if (bad_flag_pairs[i].predicate != 0)
2620 	    warning_at (format_string_loc, OPT_Wformat_,
2621 			"use of %s and %s together with %<%%%c%> %s format",
2622 			_(s->name), _(t->name), format_char,
2623 			fki->name);
2624 	  else
2625 	    warning_at (format_string_loc, OPT_Wformat_,
2626 			"use of %s and %s together in %s format",
2627 			_(s->name), _(t->name), fki->name);
2628 	}
2629     }
2630 }
2631 
2632 /* Give Y2K warnings.  */
2633 
2634 void
give_y2k_warnings(const format_char_info * fci,char format_char)2635 argument_parser::give_y2k_warnings (const format_char_info *fci,
2636 				    char format_char)
2637 {
2638   if (!warn_format_y2k)
2639     return;
2640 
2641   int y2k_level = 0;
2642   if (strchr (fci->flags2, '4') != 0)
2643     if (flag_chars.has_char_p ('E'))
2644       y2k_level = 3;
2645     else
2646       y2k_level = 2;
2647   else if (strchr (fci->flags2, '3') != 0)
2648     y2k_level = 3;
2649   else if (strchr (fci->flags2, '2') != 0)
2650     y2k_level = 2;
2651   if (y2k_level == 3)
2652     warning_at (format_string_loc, OPT_Wformat_y2k,
2653 		"%<%%%c%> yields only last 2 digits of "
2654 		"year in some locales", format_char);
2655   else if (y2k_level == 2)
2656     warning_at (format_string_loc, OPT_Wformat_y2k,
2657 		"%<%%%c%> yields only last 2 digits of year",
2658 		format_char);
2659 }
2660 
2661 /* Parse any "scan sets" enclosed in square brackets, e.g.
2662    for scanf-style calls.  */
2663 
2664 void
parse_any_scan_set(const format_char_info * fci)2665 argument_parser::parse_any_scan_set (const format_char_info *fci)
2666 {
2667   if (strchr (fci->flags2, '[') == NULL)
2668     return;
2669 
2670   /* Skip over scan set, in case it happens to have '%' in it.  */
2671   if (*format_chars == '^')
2672     ++format_chars;
2673   /* Find closing bracket; if one is hit immediately, then
2674      it's part of the scan set rather than a terminator.  */
2675   if (*format_chars == ']')
2676     ++format_chars;
2677   while (*format_chars && *format_chars != ']')
2678     ++format_chars;
2679   if (*format_chars != ']')
2680     /* The end of the format string was reached.  */
2681     format_warning_at_char (format_string_loc, format_string_cst,
2682 			    format_chars - orig_format_chars,
2683 			    OPT_Wformat_,
2684 			    "no closing %<]%> for %<%%[%> format");
2685 }
2686 
2687 /* Return true if this argument is to be continued to be parsed,
2688    false to skip to next argument.  */
2689 
2690 bool
handle_conversions(const format_char_info * fci,const length_modifier & len_modifier,tree & wanted_type,const char * & wanted_type_name,unsigned HOST_WIDE_INT & arg_num,tree & params,char format_char)2691 argument_parser::handle_conversions (const format_char_info *fci,
2692 				     const length_modifier &len_modifier,
2693 				     tree &wanted_type,
2694 				     const char *&wanted_type_name,
2695 				     unsigned HOST_WIDE_INT &arg_num,
2696 				     tree &params,
2697 				     char format_char)
2698 {
2699   enum format_std_version wanted_type_std;
2700 
2701   if (!(fki->flags & (int) FMT_FLAG_ARG_CONVERT))
2702     return true;
2703 
2704   wanted_type = (fci->types[len_modifier.val].type
2705 		 ? *fci->types[len_modifier.val].type : 0);
2706   wanted_type_name = fci->types[len_modifier.val].name;
2707   wanted_type_std = fci->types[len_modifier.val].std;
2708   if (wanted_type == 0)
2709     {
2710       format_warning_at_char (format_string_loc, format_string_cst,
2711 			      format_chars - orig_format_chars,
2712 			      OPT_Wformat_,
2713 			      "use of %qs length modifier with %qc type"
2714 			      " character has either no effect"
2715 			      " or undefined behavior",
2716 			      len_modifier.chars, format_char);
2717       /* Heuristic: skip one argument when an invalid length/type
2718 	 combination is encountered.  */
2719       arg_num++;
2720       if (params != 0)
2721 	params = TREE_CHAIN (params);
2722       return false;
2723     }
2724   else if (pedantic
2725 	   /* Warn if non-standard, provided it is more non-standard
2726 	      than the length and type characters that may already
2727 	      have been warned for.  */
2728 	   && ADJ_STD (wanted_type_std) > ADJ_STD (len_modifier.std)
2729 	   && ADJ_STD (wanted_type_std) > ADJ_STD (fci->std))
2730     {
2731       if (ADJ_STD (wanted_type_std) > C_STD_VER)
2732 	format_warning_at_char (format_string_loc, format_string_cst,
2733 				format_chars - orig_format_chars,
2734 				OPT_Wformat_,
2735 				"%s does not support the %<%%%s%c%> %s format",
2736 				C_STD_NAME (wanted_type_std),
2737 				len_modifier.chars,
2738 				format_char, fki->name);
2739     }
2740 
2741   return true;
2742 }
2743 
2744 /* Check type of argument against desired type.
2745 
2746    Return true if format parsing is to continue, false otherwise.  */
2747 
2748 bool
2749 argument_parser::
check_argument_type(const format_char_info * fci,const length_modifier & len_modifier,tree & wanted_type,const char * & wanted_type_name,const bool suppressed,unsigned HOST_WIDE_INT & arg_num,tree & params,const int alloc_flag,const char * const format_start,const char * const type_start,location_t fmt_param_loc,char conversion_char)2750 check_argument_type (const format_char_info *fci,
2751 		     const length_modifier &len_modifier,
2752 		     tree &wanted_type,
2753 		     const char *&wanted_type_name,
2754 		     const bool suppressed,
2755 		     unsigned HOST_WIDE_INT &arg_num,
2756 		     tree &params,
2757 		     const int alloc_flag,
2758 		     const char * const format_start,
2759 		     const char * const type_start,
2760 		     location_t fmt_param_loc,
2761 		     char conversion_char)
2762 {
2763   if (info->first_arg_num == 0)
2764     return true;
2765 
2766   if ((fci->pointer_count == 0 && wanted_type == void_type_node)
2767       || suppressed)
2768     {
2769       if (main_arg_num != 0)
2770 	{
2771 	  if (suppressed)
2772 	    warning_at (format_string_loc, OPT_Wformat_,
2773 			"operand number specified with "
2774 			"suppressed assignment");
2775 	  else
2776 	    warning_at (format_string_loc, OPT_Wformat_,
2777 			"operand number specified for format "
2778 			"taking no argument");
2779 	}
2780     }
2781   else
2782     {
2783       format_wanted_type *wanted_type_ptr;
2784 
2785       if (main_arg_num != 0)
2786 	{
2787 	  arg_num = main_arg_num;
2788 	  params = main_arg_params;
2789 	}
2790       else
2791 	{
2792 	  ++arg_num;
2793 	  if (has_operand_number > 0)
2794 	    {
2795 	      warning_at (format_string_loc, OPT_Wformat_,
2796 			  "missing $ operand number in format");
2797 	      return false;
2798 	    }
2799 	  else
2800 	    has_operand_number = 0;
2801 	}
2802 
2803       wanted_type_ptr = &main_wanted_type;
2804       while (fci)
2805 	{
2806 	  tree cur_param;
2807 	  if (params == 0)
2808 	    cur_param = NULL;
2809 	  else
2810 	    {
2811 	      cur_param = TREE_VALUE (params);
2812 	      params = TREE_CHAIN (params);
2813 	    }
2814 
2815 	  wanted_type_ptr->wanted_type = wanted_type;
2816 	  wanted_type_ptr->wanted_type_name = wanted_type_name;
2817 	  wanted_type_ptr->pointer_count = fci->pointer_count + alloc_flag;
2818 	  wanted_type_ptr->char_lenient_flag = 0;
2819 	  if (strchr (fci->flags2, 'c') != 0)
2820 	    wanted_type_ptr->char_lenient_flag = 1;
2821 	  wanted_type_ptr->scalar_identity_flag = 0;
2822 	  if (len_modifier.scalar_identity_flag)
2823 	    wanted_type_ptr->scalar_identity_flag = 1;
2824 	  wanted_type_ptr->writing_in_flag = 0;
2825 	  wanted_type_ptr->reading_from_flag = 0;
2826 	  if (alloc_flag)
2827 	    wanted_type_ptr->writing_in_flag = 1;
2828 	  else
2829 	    {
2830 	      if (strchr (fci->flags2, 'W') != 0)
2831 		wanted_type_ptr->writing_in_flag = 1;
2832 	      if (strchr (fci->flags2, 'R') != 0)
2833 		wanted_type_ptr->reading_from_flag = 1;
2834 	    }
2835 	  wanted_type_ptr->kind = CF_KIND_FORMAT;
2836 	  wanted_type_ptr->param = cur_param;
2837 	  wanted_type_ptr->arg_num = arg_num;
2838 	  wanted_type_ptr->format_start = format_start;
2839 	  wanted_type_ptr->format_length = format_chars - format_start;
2840 	  wanted_type_ptr->offset_loc = format_chars - orig_format_chars;
2841 	  wanted_type_ptr->next = NULL;
2842 	  if (last_wanted_type != 0)
2843 	    last_wanted_type->next = wanted_type_ptr;
2844 	  if (first_wanted_type == 0)
2845 	    first_wanted_type = wanted_type_ptr;
2846 	  last_wanted_type = wanted_type_ptr;
2847 
2848 	  fci = fci->chain;
2849 	  if (fci)
2850 	    {
2851 	      wanted_type_ptr = fwt_pool.allocate ();
2852 	      arg_num++;
2853 	      wanted_type = *fci->types[len_modifier.val].type;
2854 	      wanted_type_name = fci->types[len_modifier.val].name;
2855 	    }
2856 	}
2857     }
2858 
2859   if (first_wanted_type != 0)
2860     {
2861       ptrdiff_t offset_to_format_start = (start_of_this_format - 1) - orig_format_chars;
2862       ptrdiff_t offset_to_format_end = (format_chars - 1) - orig_format_chars;
2863       /* By default, use the end of the range for the caret location.  */
2864       substring_loc fmt_loc (fmt_param_loc, TREE_TYPE (format_string_cst),
2865 			     offset_to_format_end,
2866 			     offset_to_format_start, offset_to_format_end);
2867       ptrdiff_t offset_to_type_start = type_start - orig_format_chars;
2868       check_format_types (fmt_loc, first_wanted_type, fki,
2869 			  offset_to_type_start,
2870 			  conversion_char, arglocs);
2871     }
2872 
2873   return true;
2874 }
2875 
2876 /* Describes "paired tokens" within the format string that are
2877    expected to be balanced.  */
2878 
2879 class baltoks_t
2880 {
2881 public:
baltoks_t()2882   baltoks_t (): singlequote (), doublequote () { }
2883 
2884   typedef auto_vec<const char *> balanced_tokens_t;
2885   /* Vectors of pointers to opening brackets ('['), curly brackets ('{'),
2886      quoting directives (like GCC "%<"), parentheses, and angle brackets
2887      ('<').  Used to detect unbalanced tokens.  */
2888   balanced_tokens_t brackets;
2889   balanced_tokens_t curly;
2890   balanced_tokens_t quotdirs;
2891   balanced_tokens_t parens;
2892   balanced_tokens_t pointy;
2893   /* Pointer to the last opening quote.  */
2894   const char *singlequote;
2895   const char *doublequote;
2896 };
2897 
2898 /* Describes a keyword, operator, or other name.  */
2899 
2900 struct token_t
2901 {
2902   const char *name;   /* Keyword/operator name.  */
2903   unsigned char len;  /* Its length.  */
2904   const char *alt;    /* Alternate spelling.  */
2905 };
2906 
2907 /* Helper for initializing global token_t arrays below.  */
2908 #define NAME(name) { name, sizeof name - 1, NULL }
2909 
2910 /* C/C++ operators that are expected to be quoted within the format
2911    string.  */
2912 
2913 static const token_t c_opers[] =
2914   {
2915    NAME ("!="), NAME ("%="),  NAME ("&&"),  NAME ("&="), NAME ("*="),
2916    NAME ("++"), NAME ("+="),  NAME ("--"),  NAME ("-="), NAME ("->"),
2917    NAME ("/="), NAME ("<<"),  NAME ("<<="), NAME ("<="), NAME ("=="),
2918    NAME (">="), NAME (">>="), NAME (">>"),  NAME ("?:"),  NAME ("^="),
2919    NAME ("|="), NAME ("||")
2920   };
2921 
2922 static const token_t cxx_opers[] =
2923   {
2924    NAME ("->*"), NAME (".*"),  NAME ("::"),  NAME ("<=>")
2925   };
2926 
2927 /* Common C/C++ keywords that are expected to be quoted within the format
2928    string.  Keywords like auto, inline, or volatile are exccluded because
2929    they are sometimes used in common terms like /auto variables/, /inline
2930    function/, or /volatile access/ where they should not be quoted.  */
2931 
2932 static const token_t c_keywords[] =
2933   {
2934 #undef NAME
2935 #define NAME(name, alt)  { name, sizeof name - 1, alt }
2936 
2937    NAME ("alignas", NULL),
2938    NAME ("alignof", NULL),
2939    NAME ("asm", NULL),
2940    NAME ("bool", NULL),
2941    NAME ("char", NULL),
2942    NAME ("const %", NULL),
2943    NAME ("const-qualified", "%<const%>-qualified"),
2944    NAME ("float", NULL),
2945    NAME ("ifunc", NULL),
2946    NAME ("int", NULL),
2947    NAME ("long double", NULL),
2948    NAME ("long int", NULL),
2949    NAME ("long long", NULL),
2950    NAME ("malloc", NULL),
2951    NAME ("noclone", NULL),
2952    NAME ("noinline", NULL),
2953    NAME ("nonnull", NULL),
2954    NAME ("noreturn", NULL),
2955    NAME ("nothrow", NULL),
2956    NAME ("offsetof", NULL),
2957    NAME ("readonly", "read-only"),
2958    NAME ("readwrite", "read-write"),
2959    NAME ("restrict %", NULL),
2960    NAME ("restrict-qualified", "%<restrict%>-qualified"),
2961    NAME ("short int", NULL),
2962    NAME ("signed char", NULL),
2963    NAME ("signed int", NULL),
2964    NAME ("signed long", NULL),
2965    NAME ("signed short", NULL),
2966    NAME ("sizeof", NULL),
2967    NAME ("typeof", NULL),
2968    NAME ("unsigned char", NULL),
2969    NAME ("unsigned int", NULL),
2970    NAME ("unsigned long", NULL),
2971    NAME ("unsigned short", NULL),
2972    NAME ("volatile %", NULL),
2973    NAME ("volatile-qualified", "%<volatile%>-qualified"),
2974    NAME ("weakref", NULL),
2975   };
2976 
2977 static const token_t cxx_keywords[] =
2978   {
2979    /* C++ only keywords and operators.  */
2980    NAME ("catch", NULL),
2981    NAME ("constexpr if", NULL),
2982    NAME ("constexpr", NULL),
2983    NAME ("constinit", NULL),
2984    NAME ("consteval", NULL),
2985    NAME ("decltype", NULL),
2986    NAME ("nullptr", NULL),
2987    NAME ("operator delete", NULL),
2988    NAME ("operator new", NULL),
2989    NAME ("typeid", NULL),
2990    NAME ("typeinfo", NULL)
2991   };
2992 
2993 /* Blacklisted words such as misspellings that should be avoided in favor
2994    of the specified alternatives.  */
2995 static const struct
2996 {
2997   const char *name;   /* Bad word.  */
2998   unsigned char len;  /* Its length.  */
2999   const char *alt;    /* Preferred alternative.  */
3000 } badwords[] =
3001   {
3002    NAME ("arg", "argument"),
3003    NAME ("bitfield", "bit-field"),
3004    NAME ("builtin function", "built-in function"),
3005    NAME ("can not", "cannot"),
3006    NAME ("commandline option", "command-line option"),
3007    NAME ("commandline", "command line"),
3008    NAME ("command line option", "command-line option"),
3009    NAME ("decl", "declaration"),
3010    NAME ("enumeral", "enumerated"),
3011    NAME ("floating point", "floating-point"),
3012    NAME ("non-zero", "nonzero"),
3013    NAME ("reg", "register"),
3014    NAME ("stmt", "statement"),
3015   };
3016 
3017 /* Common contractions that should be avoided in favor of the specified
3018    alternatives.  */
3019 
3020 static const struct
3021 {
3022   const char *name;   /* Contraction.  */
3023   unsigned char len;  /* Its length.  */
3024   const char *alt;    /* Preferred alternative.  */
3025 } contrs[] =
3026   {
3027    NAME ("can't", "cannot"),
3028    NAME ("didn't", "did not"),
3029    /* These are commonly abused.  Avoid diagnosing them for now.
3030       NAME ("isn't", "is not"),
3031       NAME ("don't", "is not"),
3032    */
3033    NAME ("mustn't", "must not"),
3034    NAME ("needn't", "need not"),
3035    NAME ("should't", "should not"),
3036    NAME ("that's", "that is"),
3037    NAME ("there's", "there is"),
3038    NAME ("they're", "they are"),
3039    NAME ("what's", "what is"),
3040    NAME ("won't", "will not")
3041   };
3042 
3043 /* Check for unquoted TOKENS.  FORMAT_STRING_LOC is the location of
3044    the format string, FORMAT_STRING_CST the format string itself (as
3045    a tree), ORIG_FORMAT_CHARS and FORMAT_CHARS are pointers to
3046    the beginning of the format string and the character currently
3047    being processed, and BALTOKS describes paired "tokens" within
3048    the format string that are expected to be balanced.
3049    Returns a pointer to the last processed character or null when
3050    nothing was done.  */
3051 
3052 static const char*
check_tokens(const token_t * tokens,unsigned ntoks,location_t format_string_loc,tree format_string_cst,const char * orig_format_chars,const char * format_chars,baltoks_t & baltoks)3053 check_tokens (const token_t *tokens, unsigned ntoks,
3054 	      location_t format_string_loc, tree format_string_cst,
3055 	      const char *orig_format_chars, const char *format_chars,
3056 	      baltoks_t &baltoks)
3057 {
3058   /* For brevity.  */
3059   const int opt = OPT_Wformat_diag;
3060   /* Zero-based starting position of a problem sequence.  */
3061   int fmtchrpos = format_chars - orig_format_chars;
3062 
3063   /* For identifier-like "words," set to the word length.  */
3064   unsigned wlen = 0;
3065   /* Set for an operator, clear for an identifier/word.  */
3066   bool is_oper = false;
3067   bool underscore = false;
3068 
3069   if (format_chars[0] == '_' || ISALPHA (format_chars[0]))
3070     {
3071       while (format_chars[wlen] == '_' || ISALNUM (format_chars[wlen]))
3072 	{
3073 	  underscore |= format_chars[wlen] == '_';
3074 	  ++wlen;
3075 	}
3076     }
3077   else
3078     is_oper = true;
3079 
3080   for (unsigned i = 0; i != ntoks; ++i)
3081     {
3082       unsigned toklen = tokens[i].len;
3083 
3084       if (toklen < wlen
3085 	  || strncmp (format_chars, tokens[i].name, toklen))
3086 	continue;
3087 
3088       if (toklen == 2
3089 	  && format_chars - orig_format_chars > 0
3090 	  && (TOUPPER (format_chars[-1]) == 'C'
3091 	      || TOUPPER (format_chars[-1]) == 'G'))
3092 	return format_chars + toklen - 1;   /* Reference to C++ or G++.  */
3093 
3094       if (ISPUNCT (format_chars[toklen - 1]))
3095 	{
3096 	  if (format_chars[toklen - 1] == format_chars[toklen])
3097 	    return NULL;   /* Operator followed by another punctuator.  */
3098 	}
3099       else if (ISALNUM (format_chars[toklen]))
3100 	return NULL;   /* Keyword prefix for a longer word.  */
3101 
3102       if (toklen == 2
3103 	  && format_chars[0] == '-'
3104 	  && format_chars[1] == '-'
3105 	  && ISALNUM (format_chars[2]))
3106 	return NULL;   /* Probably option like --help.  */
3107 
3108       /* Allow this ugly warning for the time being.  */
3109       if (toklen == 2
3110 	  && format_chars - orig_format_chars > 6
3111 	  && !strncmp (format_chars - 7, " count >= width of ", 19))
3112 	return format_chars + 10;
3113 
3114       /* The token is a type if it ends in an alphabetic character.  */
3115       bool is_type = (ISALPHA (tokens[i].name[toklen - 1])
3116 		      && strchr (tokens[i].name, ' '));
3117 
3118       /* Backtrack to the last alphabetic character (for tokens whose
3119 	 names end in '%').  */
3120       if (!is_oper)
3121 	while (!ISALPHA (tokens[i].name[toklen - 1]))
3122 	  --toklen;
3123 
3124       if (format_warning_substr (format_string_loc, format_string_cst,
3125 				 fmtchrpos, fmtchrpos + toklen, opt,
3126 				 (is_type
3127 				  ? G_("unquoted type name %<%.*s%> in format")
3128 				  : (is_oper
3129 				     ? G_("unquoted operator %<%.*s%> in format")
3130 				     : G_("unquoted keyword %<%.*s%> in format"))),
3131 				 toklen, format_chars)
3132 	  && tokens[i].alt)
3133 	inform (format_string_loc, "use %qs instead", tokens[i].alt);
3134 
3135       return format_chars + toklen - 1;
3136     }
3137 
3138   /* Diagnose unquoted __attribute__.  Consider any parenthesized
3139      argument to the attribute to avoid redundant warnings for
3140      the double parentheses that might follow.  */
3141   if (!strncmp (format_chars, "__attribute", sizeof "__attribute" - 1))
3142     {
3143       unsigned nchars = sizeof "__attribute" - 1;
3144       while ('_' == format_chars[nchars])
3145 	++nchars;
3146 
3147       for (int i = nchars; format_chars[i]; ++i)
3148 	if (' ' != format_chars[i])
3149 	  {
3150 	    nchars = i;
3151 	    break;
3152 	  }
3153 
3154       if (format_chars[nchars] == '(')
3155 	{
3156 	  baltoks.parens.safe_push (format_chars + nchars);
3157 
3158 	  ++nchars;
3159 	  bool close = false;
3160 	  if (format_chars[nchars] == '(')
3161 	    {
3162 	      baltoks.parens.safe_push (format_chars + nchars);
3163 	      close = true;
3164 	      ++nchars;
3165 	    }
3166 	  for (int i = nchars; format_chars[i]; ++i)
3167 	    if (')' == format_chars[i])
3168 	      {
3169 		if (baltoks.parens.length () > 0)
3170 		  baltoks.parens.pop ();
3171 		nchars = i + 1;
3172 		break;
3173 	      }
3174 
3175 	  if (close && format_chars[nchars] == ')')
3176 	    {
3177 	      if (baltoks.parens.length () > 0)
3178 		baltoks.parens.pop ();
3179 	      ++nchars;
3180 	    }
3181 	}
3182 
3183       format_warning_substr (format_string_loc, format_string_cst,
3184 			     fmtchrpos, fmtchrpos + nchars, opt,
3185 			      "unquoted attribute in format");
3186       return format_chars + nchars - 1;
3187     }
3188 
3189   /* Diagnose unquoted built-ins.  */
3190   if (format_chars[0] == '_'
3191       && format_chars[1] == '_'
3192       && (!strncmp (format_chars + 2, "atomic", sizeof "atomic" - 1)
3193 	  || !strncmp (format_chars + 2, "builtin", sizeof "builtin" - 1)
3194 	  || !strncmp (format_chars + 2, "sync", sizeof "sync" - 1)))
3195     {
3196       format_warning_substr (format_string_loc, format_string_cst,
3197 			     fmtchrpos, fmtchrpos + wlen, opt,
3198 			     "unquoted name of built-in function %<%.*s%> "
3199 			     "in format",
3200 			     wlen, format_chars);
3201       return format_chars + wlen - 1;
3202     }
3203 
3204   /* Diagnose unquoted substrings of alphanumeric characters containing
3205      underscores.  They most likely refer to identifiers and should be
3206      quoted.  */
3207   if (underscore)
3208     format_warning_substr (format_string_loc, format_string_cst,
3209 			   format_chars - orig_format_chars,
3210 			   format_chars + wlen - orig_format_chars,
3211 			   opt,
3212 			   "unquoted identifier or keyword %<%.*s%> in format",
3213 			   wlen, format_chars);
3214   else
3215     {
3216       /* Diagnose some common missspellings.  */
3217       for (unsigned i = 0; i != sizeof badwords / sizeof *badwords; ++i)
3218 	{
3219 	  unsigned badwlen = strspn (badwords[i].name, " -");
3220 	  if (wlen >= badwlen
3221 	      && (wlen <= badwords[i].len
3222 		  || (wlen == badwords[i].len + 1U
3223 		      && TOUPPER (format_chars[wlen - 1]) == 'S'))
3224 	      && !strncasecmp (format_chars, badwords[i].name, badwords[i].len))
3225 	    {
3226 	      /* Handle singular as well as plural forms of all bad words
3227 		 even though the latter don't necessarily make sense for
3228 		 all of the former (like "can nots").  */
3229 	      badwlen = badwords[i].len;
3230 	      const char *plural = "";
3231 	      if (TOUPPER (format_chars[badwlen]) == 'S')
3232 		{
3233 		  ++badwlen;
3234 		  plural = "s";
3235 		}
3236 
3237 	      format_warning_substr (format_string_loc, format_string_cst,
3238 				     fmtchrpos, fmtchrpos + badwords[i].len,
3239 				     opt,
3240 				     "misspelled term %<%.*s%> in format; "
3241 				     "use %<%s%s%> instead",
3242 				     badwlen, format_chars,
3243 				     badwords[i].alt, plural);
3244 
3245 	      return format_chars + badwords[i].len - 1;
3246 	    }
3247 	}
3248 
3249       /* Skip C++/G++.  */
3250       if (!strncasecmp (format_chars, "c++", 3)
3251 	  || !strncasecmp (format_chars, "g++", 3))
3252 	return format_chars + 2;
3253     }
3254 
3255   return wlen ? format_chars + wlen - 1 : NULL;
3256 }
3257 
3258 /* Check plain text in a format string of a GCC diagnostic function
3259    for common quoting, punctuation, and spelling mistakes, and issue
3260    -Wformat-diag warnings if they are found.   FORMAT_STRING_LOC is
3261    the location of the format string, FORMAT_STRING_CST the format
3262    string itself (as a tree), ORIG_FORMAT_CHARS and FORMAT_CHARS are
3263    pointers to the beginning of the format string and the character
3264    currently being processed, and BALTOKS describes paired "tokens"
3265    within the format string that are expected to be balanced.
3266    Returns a pointer to the last processed character.  */
3267 
3268 static const char*
check_plain(location_t format_string_loc,tree format_string_cst,const char * orig_format_chars,const char * format_chars,baltoks_t & baltoks)3269 check_plain (location_t format_string_loc, tree format_string_cst,
3270 	     const char *orig_format_chars, const char *format_chars,
3271 	     baltoks_t &baltoks)
3272 {
3273   /* For brevity.  */
3274   const int opt = OPT_Wformat_diag;
3275   /* Zero-based starting position of a problem sequence.  */
3276   int fmtchrpos = format_chars - orig_format_chars;
3277 
3278   if (*format_chars == '%')
3279     {
3280       /* Diagnose %<%s%> and suggest using %qs instead.  */
3281       if (!strncmp (format_chars, "%<%s%>", 6))
3282 	format_warning_substr (format_string_loc, format_string_cst,
3283 			       fmtchrpos, fmtchrpos + 6, opt,
3284 			       "quoted %qs directive in format; "
3285 			       "use %qs instead", "%s", "%qs");
3286       else if (format_chars - orig_format_chars > 2
3287 	       && !strncasecmp (format_chars - 3, "can%'t", 6))
3288 	format_warning_substr (format_string_loc,
3289 			       format_string_cst,
3290 			       fmtchrpos - 3, fmtchrpos + 3, opt,
3291 			       "contraction %<%.*s%> in format; "
3292 			       "use %qs instead",
3293 			       6, format_chars - 3, "cannot");
3294 
3295       return format_chars;
3296     }
3297 
3298   if (baltoks.quotdirs.length ())
3299     {
3300       /* Skip over all plain text within a quoting directive until
3301 	 the next directive.  */
3302       while (*format_chars && '%' != *format_chars)
3303 	++format_chars;
3304 
3305       return format_chars;
3306     }
3307 
3308   /* The length of the problem sequence.  */
3309   int nchars = 0;
3310 
3311   /* Diagnose any whitespace characters other than <space> but only
3312      leading, trailing, and two or more consecutive <space>s.  Do
3313      this before diagnosing control characters because whitespace
3314      is a subset of controls.  */
3315   const char *other_than_space = NULL;
3316   while (ISSPACE (format_chars[nchars]))
3317     {
3318       if (format_chars[nchars] != ' ' && !other_than_space)
3319 	other_than_space = format_chars + nchars;
3320       ++nchars;
3321     }
3322 
3323   if (nchars)
3324     {
3325       /* This is the most common problem: go the extra mile to describe
3326 	 the problem in as much helpful detail as possible.  */
3327       if (other_than_space)
3328 	{
3329 	  format_warning_substr (format_string_loc, format_string_cst,
3330 				 fmtchrpos, fmtchrpos + nchars, opt,
3331 				 "unquoted whitespace character %qc in format",
3332 				 *other_than_space);
3333 	  return format_chars + nchars - 1;
3334 	}
3335 
3336       if (fmtchrpos == 0)
3337 	/* Accept strings of leading spaces with no warning.  */
3338 	return format_chars + nchars - 1;
3339 
3340       if (!format_chars[nchars])
3341 	{
3342 	  format_warning_substr (format_string_loc, format_string_cst,
3343 				 fmtchrpos, fmtchrpos + nchars, opt,
3344 				 "spurious trailing space in format");
3345 	  return format_chars + nchars - 1;
3346 	}
3347 
3348       if (nchars > 1)
3349 	{
3350 	  if (nchars == 2
3351 	      && orig_format_chars < format_chars
3352 	      && format_chars[-1] == '.'
3353 	      && format_chars[0] == ' '
3354 	      && format_chars[1] == ' ')
3355 	    {
3356 	      /* A period followed by two spaces.  */
3357 	      if (ISUPPER (*orig_format_chars))
3358 		{
3359 		  /* If the part before the period is a capitalized
3360 		     sentence check to make sure that what follows
3361 		     is also capitalized.  */
3362 		  if (ISLOWER (format_chars[2]))
3363 		    format_warning_substr (format_string_loc, format_string_cst,
3364 					   fmtchrpos, fmtchrpos + nchars, opt,
3365 					   "inconsistent capitalization in "
3366 					   "format");
3367 		}
3368 	    }
3369 	  else
3370 	    format_warning_substr (format_string_loc, format_string_cst,
3371 				   fmtchrpos, fmtchrpos + nchars, opt,
3372 				   "unquoted sequence of %i consecutive "
3373 				   "space characters in format", nchars);
3374 	  return format_chars + nchars - 1;
3375 	}
3376 
3377       format_chars += nchars;
3378       nchars = 0;
3379     }
3380 
3381   fmtchrpos = format_chars - orig_format_chars;
3382 
3383   /* Diagnose any unquoted control characters other than the terminating
3384      NUL.  */
3385   while (format_chars[nchars] && ISCNTRL (format_chars[nchars]))
3386     ++nchars;
3387 
3388   if (nchars > 1)
3389     {
3390       format_warning_substr (format_string_loc, format_string_cst,
3391 			     fmtchrpos, fmtchrpos + nchars, opt,
3392 			     "unquoted control characters in format");
3393       return format_chars + nchars - 1;
3394     }
3395   if (nchars)
3396     {
3397       format_warning_substr (format_string_loc, format_string_cst,
3398 			     fmtchrpos, fmtchrpos + nchars, opt,
3399 			     "unquoted control character %qc in format",
3400 			     *format_chars);
3401       return format_chars + nchars - 1;
3402     }
3403 
3404   if (ISPUNCT (format_chars[0]))
3405     {
3406       size_t nelts = sizeof c_opers / sizeof *c_opers;
3407       if (const char *ret = check_tokens (c_opers, nelts,
3408 					  format_string_loc, format_string_cst,
3409 					  orig_format_chars, format_chars,
3410 					  baltoks))
3411 	return ret;
3412 
3413       nelts = c_dialect_cxx () ? sizeof cxx_opers / sizeof *cxx_opers : 0;
3414       if (const char *ret = check_tokens (cxx_opers, nelts,
3415 					  format_string_loc, format_string_cst,
3416 					  orig_format_chars, format_chars,
3417 					  baltoks))
3418 	return ret;
3419     }
3420 
3421   if (ISALPHA (format_chars[0]))
3422     {
3423       size_t nelts = sizeof c_keywords / sizeof *c_keywords;
3424       if (const char *ret = check_tokens (c_keywords, nelts,
3425 					  format_string_loc, format_string_cst,
3426 					  orig_format_chars, format_chars,
3427 					  baltoks))
3428 	return ret;
3429 
3430       nelts = c_dialect_cxx () ? sizeof cxx_keywords / sizeof *cxx_keywords : 0;
3431       if (const char *ret = check_tokens (cxx_keywords, nelts,
3432 					  format_string_loc, format_string_cst,
3433 					  orig_format_chars, format_chars,
3434 					  baltoks))
3435 	return ret;
3436     }
3437 
3438   nchars = 0;
3439 
3440   /* Diagnose unquoted options.  */
3441   if  ((format_chars == orig_format_chars
3442 	|| format_chars[-1] == ' ')
3443        && format_chars[0] == '-'
3444        && ((format_chars[1] == '-'
3445 	    && ISALPHA (format_chars[2]))
3446 	   || ISALPHA (format_chars[1])))
3447     {
3448       nchars = 1;
3449       while (ISALNUM (format_chars[nchars])
3450 	     || '_' == format_chars[nchars]
3451 	     || '-' == format_chars[nchars]
3452 	     || '+' == format_chars[nchars])
3453 	++nchars;
3454 
3455       format_warning_substr (format_string_loc, format_string_cst,
3456 			     fmtchrpos, fmtchrpos + nchars, opt,
3457 			     "unquoted option name %<%.*s%> in format",
3458 			     nchars, format_chars);
3459       return format_chars + nchars - 1;
3460     }
3461 
3462   /* Diagnose leading, trailing, and two or more consecutive punctuation
3463      characters.  */
3464   const char *unbalanced = NULL;
3465   while ('%' != format_chars[nchars]
3466 	 && ISPUNCT (format_chars[nchars])
3467 	 && !unbalanced)
3468     {
3469       switch (format_chars[nchars])
3470 	{
3471 	case '[':
3472 	  baltoks.brackets.safe_push (format_chars + nchars);
3473 	  break;
3474 	case '{':
3475 	  baltoks.curly.safe_push (format_chars + nchars);
3476 	  break;
3477 	case '(':
3478 	  baltoks.parens.safe_push (format_chars + nchars);
3479 	  break;
3480 	case '<':
3481 	  baltoks.pointy.safe_push (format_chars + nchars);
3482 	  break;
3483 
3484 	case ']':
3485 	  if (baltoks.brackets.length () > 0)
3486 	    baltoks.brackets.pop ();
3487 	  else
3488 	    unbalanced = format_chars + nchars;
3489 	  break;
3490 	case '}':
3491 	  if (baltoks.curly.length () > 0)
3492 	    baltoks.curly.pop ();
3493 	  else
3494 	    unbalanced = format_chars + nchars;
3495 	  break;
3496 	case ')':
3497 	  if (baltoks.parens.length () > 0)
3498 	    baltoks.parens.pop ();
3499 	  else
3500 	    unbalanced = format_chars + nchars;
3501 	  break;
3502 	case '>':
3503 	  if (baltoks.pointy.length () > 0)
3504 	    baltoks.pointy.pop ();
3505 	  else
3506 	    unbalanced = format_chars + nchars;
3507 	  break;
3508 	}
3509 
3510       ++nchars;
3511     }
3512 
3513   if (unbalanced)
3514     {
3515       format_warning_substr (format_string_loc, format_string_cst,
3516 			     fmtchrpos, fmtchrpos + nchars, opt,
3517 			     "unbalanced punctuation character %qc in format",
3518 			     *unbalanced);
3519       return format_chars + nchars - 1;
3520     }
3521 
3522   if (nchars)
3523     {
3524       /* Consider any identifier that follows the pound ('#') sign
3525 	 a preprocessing directive.  */
3526       if (nchars == 1
3527 	  && format_chars[0] == '#'
3528 	  && ISALPHA (format_chars[1]))
3529 	{
3530 	  while (ISALNUM (format_chars[nchars])
3531 		 || format_chars[nchars] == '_')
3532 	    ++nchars;
3533 
3534 	  format_warning_substr (format_string_loc, format_string_cst,
3535 				 fmtchrpos, fmtchrpos + nchars, opt,
3536 				 "unquoted preprocessing directive %<%.*s%> "
3537 				 "in format", nchars, format_chars);
3538 	  return format_chars + nchars - 1;
3539 	}
3540 
3541       /* Diagnose a bare single quote.  */
3542       if (nchars == 1
3543 	  && format_chars[0] == '\''
3544 	  && format_chars - orig_format_chars
3545 	  && ISALPHA (format_chars[-1])
3546 	  && ISALPHA (format_chars[1]))
3547 	{
3548 	  /* Diagnose a subset of contractions that are best avoided.  */
3549 	  for (unsigned i = 0; i != sizeof contrs / sizeof *contrs; ++i)
3550 	    {
3551 	      const char *apos = strchr (contrs[i].name, '\'');
3552 	      gcc_assert (apos != NULL);
3553 	      int off = apos - contrs[i].name;
3554 
3555 	      if (format_chars - orig_format_chars >= off
3556 		  && !strncmp (format_chars - off,
3557 			       contrs[i].name, contrs[i].len))
3558 		{
3559 		  format_warning_substr (format_string_loc,
3560 					 format_string_cst,
3561 					 fmtchrpos, fmtchrpos + nchars, opt,
3562 					 "contraction %<%.*s%> in format; "
3563 					 "use %qs instead",
3564 					 contrs[i].len, contrs[i].name,
3565 					 contrs[i].alt);
3566 		  return format_chars + nchars - 1;
3567 		}
3568 	    }
3569 
3570 	  if (format_warning_substr (format_string_loc, format_string_cst,
3571 				     fmtchrpos, fmtchrpos + nchars, opt,
3572 				     "bare apostrophe %<'%> in format"))
3573 	    inform (format_string_loc,
3574 		    "if avoiding the apostrophe is not feasible, enclose "
3575 		    "it in a pair of %qs and %qs directives instead",
3576 		    "%<", "%>");
3577 	  return format_chars + nchars - 1;
3578 	}
3579 
3580       /* Diagnose a backtick (grave accent).  */
3581       if (nchars == 1
3582 	  && format_chars[0] == '`')
3583 	{
3584 	  if (format_warning_substr (format_string_loc, format_string_cst,
3585 				     fmtchrpos, fmtchrpos + nchars, opt,
3586 				     "grave accent %<`%> in format"))
3587 	    inform (format_string_loc,
3588 		    "use the apostrophe directive %qs instead", "%'");
3589 	  return format_chars + nchars - 1;
3590 	}
3591 
3592       /* Diagnose a punctuation character after a space.  */
3593       if (nchars == 1
3594 	  && format_chars - orig_format_chars
3595 	  && format_chars[-1] == ' '
3596 	  && strspn (format_chars, "!?:;.,") == 1)
3597 	{
3598 	  format_warning_substr (format_string_loc, format_string_cst,
3599 				 fmtchrpos - 1, fmtchrpos, opt,
3600 				 "space followed by punctuation character "
3601 				 "%<%c%>", format_chars[0]);
3602 	  return format_chars;
3603 	}
3604 
3605       if (nchars == 1)
3606 	{
3607 	  if (!strncmp (format_chars, "\"%s\"", 4))
3608 	    {
3609 	      if (format_warning_substr (format_string_loc, format_string_cst,
3610 					 fmtchrpos, fmtchrpos + 4, opt,
3611 					 "quoted %qs directive in format",
3612 					 "%s"))
3613 		inform (format_string_loc, "if using %qs is not feasible, "
3614 			"use %qs instead", "%qs", "\"%-s\"");
3615 	    }
3616 
3617 	  if (format_chars[0] == '"')
3618 	    {
3619 	      baltoks.doublequote = baltoks.doublequote ? NULL : format_chars;
3620 	      return format_chars + nchars - 1;
3621 	    }
3622 	  if (format_chars[0] == '\'')
3623 	    {
3624 	      baltoks.singlequote = baltoks.singlequote ? NULL : format_chars;
3625 	      return format_chars + nchars - 1;
3626 	    }
3627 	}
3628 
3629       if (fmtchrpos == 0)
3630 	{
3631 	  if (nchars == 1
3632 	      && format_chars[0] == '(')
3633 	    ;   /* Text beginning in an open parenthesis.  */
3634 	  else if (nchars == 3
3635 	      && !strncmp (format_chars, "...", 3)
3636 	      && format_chars[3])
3637 	    ;   /* Text beginning in an ellipsis.  */
3638 	  else
3639 	    {
3640 	      format_warning_substr (format_string_loc, format_string_cst,
3641 				     fmtchrpos, fmtchrpos + nchars, opt,
3642 				     "spurious leading punctuation sequence "
3643 				     "%<%.*s%> in format",
3644 				     nchars, format_chars);
3645 	      return format_chars + nchars - 1;
3646 	    }
3647 	}
3648       else if (!format_chars[nchars])
3649 	{
3650 	  if (nchars == 1
3651 	      && (format_chars[nchars - 1] == ':'
3652 		  || format_chars[nchars - 1] == ')'))
3653 	    ;   /* Text ending in a colon or a closing parenthesis.  */
3654 	  else if (nchars == 1
3655 		   && ((ISUPPER (*orig_format_chars)
3656 			&& format_chars[nchars - 1] == '.')
3657 		       || strspn (format_chars + nchars - 1, "?])") == 1))
3658 		  ;   /* Capitalized sentence terminated by a single period,
3659 			 or text ending in a question mark, closing bracket,
3660 			 or parenthesis.  */
3661 	  else if (nchars == 2
3662 		   && format_chars[0] == '?'
3663 		   && format_chars[1] == ')')
3664 	    ;   /* A question mark after a closing parenthetical note.  */
3665 	  else if (nchars == 2
3666 		   && format_chars[0] == ')'
3667 		   && (format_chars[1] == '?'
3668 		       || format_chars[1] == ';'
3669 		       || format_chars[1] == ':'
3670 		       || (ISUPPER (*orig_format_chars)
3671 			   && format_chars[1] == '.')))
3672 	    ;   /* Closing parenthetical note followed by a question mark,
3673 		   semicolon, or colon at the end of the string, or by
3674 		   a period at the end of a capitalized sentence.  */
3675 	  else if (nchars == 3
3676 		   && format_chars - orig_format_chars > 0
3677 		   && !strncmp (format_chars, "...", 3))
3678 	    ;   /* Text ending in the ellipsis.  */
3679 	  else
3680 	    format_warning_substr (format_string_loc, format_string_cst,
3681 				   fmtchrpos, fmtchrpos + nchars, opt,
3682 				   "spurious trailing punctuation sequence "
3683 				   "%<%.*s%> in format",
3684 				   nchars, format_chars);
3685 
3686 	  return format_chars + nchars - 1;
3687 	}
3688       else if (nchars == 2
3689 	       && format_chars[0] == ')'
3690 	       && (format_chars[1] == ':'
3691 		   || format_chars[1] == ';'
3692 		   || format_chars[1] == ',')
3693 	       && format_chars[2] == ' ')
3694 	;   /* Closing parenthetical note followed by a colon, semicolon
3695 	       or a comma followed by a space in the middle of the string.  */
3696       else if (nchars > 1)
3697 	format_warning_substr (format_string_loc, format_string_cst,
3698 			       fmtchrpos, fmtchrpos + nchars, opt,
3699 			       "unquoted sequence of %i consecutive "
3700 			       "punctuation characters %q.*s in format",
3701 			       nchars, nchars, format_chars);
3702       return format_chars + nchars - 1;
3703     }
3704 
3705   nchars = 0;
3706 
3707   /* Finally, diagnose any unquoted non-graph, non-punctuation characters
3708      other than the terminating NUL.  */
3709   while (format_chars[nchars]
3710 	 && '%' != format_chars[nchars]
3711 	 && !ISPUNCT (format_chars[nchars])
3712 	 && !ISGRAPH (format_chars[nchars]))
3713     ++nchars;
3714 
3715   if (nchars > 1)
3716     {
3717       format_warning_substr (format_string_loc, format_string_cst,
3718 			     fmtchrpos, fmtchrpos + nchars, opt,
3719 			     "unquoted non-graph characters in format");
3720       return format_chars + nchars - 1;
3721     }
3722   if (nchars)
3723     {
3724       format_warning_substr (format_string_loc, format_string_cst,
3725 			     fmtchrpos, fmtchrpos + nchars, opt,
3726 			     "unquoted non-graph character %qc in format",
3727 			     *format_chars);
3728       return format_chars + nchars - 1;
3729     }
3730 
3731   return format_chars;
3732 }
3733 
3734 /* Diagnose unbalanced tokens described by BALTOKS in format string
3735    ORIG_FORMAT_CHARS and the corresponding FORMAT_STRING_CST.  */
3736 
3737 static void
maybe_diag_unbalanced_tokens(location_t format_string_loc,const char * orig_format_chars,tree format_string_cst,baltoks_t & baltoks)3738 maybe_diag_unbalanced_tokens (location_t format_string_loc,
3739 			      const char *orig_format_chars,
3740 			      tree format_string_cst,
3741 			      baltoks_t &baltoks)
3742 {
3743   const char *unbalanced = NULL;
3744 
3745   if (baltoks.brackets.length ())
3746     unbalanced = baltoks.brackets.pop ();
3747   else if (baltoks.curly.length ())
3748     unbalanced = baltoks.curly.pop ();
3749   else if (baltoks.parens.length ())
3750     unbalanced = baltoks.parens.pop ();
3751   else if (baltoks.pointy.length ())
3752     unbalanced = baltoks.pointy.pop ();
3753 
3754   if (unbalanced)
3755     format_warning_at_char (format_string_loc, format_string_cst,
3756 			    unbalanced - orig_format_chars + 1,
3757 			    OPT_Wformat_diag,
3758 			    "unbalanced punctuation character %<%c%> in format",
3759 			    *unbalanced);
3760 
3761   if (baltoks.quotdirs.length ())
3762     format_warning_at_char (format_string_loc, format_string_cst,
3763 			    baltoks.quotdirs.pop () - orig_format_chars,
3764 			    OPT_Wformat_,
3765 			    "unterminated quoting directive");
3766 
3767   const char *quote
3768     = baltoks.singlequote ? baltoks.singlequote : baltoks.doublequote;
3769 
3770   if (quote)
3771     format_warning_at_char (format_string_loc, format_string_cst,
3772   			    quote - orig_format_chars + 1,
3773 			    OPT_Wformat_diag,
3774   			    "unterminated quote character %<%c%> in format",
3775   			    *quote);
3776 }
3777 
3778 /* Do the main part of checking a call to a format function.  FORMAT_CHARS
3779    is the NUL-terminated format string (which at this point may contain
3780    internal NUL characters); FORMAT_LENGTH is its length (excluding the
3781    terminating NUL character).  ARG_NUM is one less than the number of
3782    the first format argument to check; PARAMS points to that format
3783    argument in the list of arguments.  */
3784 
3785 static void
check_format_info_main(format_check_results * res,function_format_info * info,const char * format_chars,location_t fmt_param_loc,tree format_string_cst,int format_length,tree params,unsigned HOST_WIDE_INT arg_num,object_allocator<format_wanted_type> & fwt_pool,vec<location_t> * arglocs)3786 check_format_info_main (format_check_results *res,
3787 			function_format_info *info, const char *format_chars,
3788 			location_t fmt_param_loc, tree format_string_cst,
3789 			int format_length, tree params,
3790 			unsigned HOST_WIDE_INT arg_num,
3791 			object_allocator <format_wanted_type> &fwt_pool,
3792 			vec<location_t> *arglocs)
3793 {
3794   const char * const orig_format_chars = format_chars;
3795   const tree first_fillin_param = params;
3796 
3797   const format_kind_info * const fki = &format_types[info->format_type];
3798   const format_flag_spec * const flag_specs = fki->flag_specs;
3799   const location_t format_string_loc = res->format_string_loc;
3800 
3801   /* -1 if no conversions taking an operand have been found; 0 if one has
3802      and it didn't use $; 1 if $ formats are in use.  */
3803   int has_operand_number = -1;
3804 
3805   /* Vectors of pointers to opening quoting directives (like GCC "%<"),
3806      opening braces, brackets, and parentheses.  Used to detect unbalanced
3807      tokens.  */
3808   baltoks_t baltoks;
3809 
3810   /* Pointers to the most recent color directives (like GCC's "%r or %R").
3811      A starting color directive much be terminated before the end of
3812      the format string.  A terminating directive makes no sense without
3813      a prior starting directive.  */
3814   const char *color_begin = NULL;
3815   const char *color_end = NULL;
3816 
3817   init_dollar_format_checking (info->first_arg_num, first_fillin_param);
3818 
3819   /* In GCC diagnostic functions check plain directives (substrings within
3820      the format string that don't start with %) for quoting and punctuations
3821      problems.  */
3822   bool ck_plain = (!info->is_raw
3823 		   && (info->format_type == gcc_diag_format_type
3824 		       || info->format_type == gcc_tdiag_format_type
3825 		       || info->format_type == gcc_cdiag_format_type
3826 		       || info->format_type == gcc_cxxdiag_format_type));
3827 
3828   while (*format_chars != 0)
3829     {
3830       if (ck_plain)
3831 	format_chars = check_plain (format_string_loc,
3832 				    format_string_cst,
3833 				    orig_format_chars, format_chars,
3834 				    baltoks);
3835 
3836       if (*format_chars == 0 || *format_chars++ != '%')
3837 	continue;
3838 
3839       if (*format_chars == 0)
3840 	{
3841 	  format_warning_at_char (format_string_loc, format_string_cst,
3842 				  format_chars - orig_format_chars,
3843 				  OPT_Wformat_,
3844 				  "spurious trailing %<%%%> in format");
3845 	  continue;
3846 	}
3847       if (*format_chars == '%')
3848 	{
3849 	  ++format_chars;
3850 	  continue;
3851 	}
3852 
3853       /* ARGUMENT_PARSER ctor takes FORMAT_CHARS by reference and calls
3854 	 to ARG_PARSER members may modify the variable.  */
3855       flag_chars_t flag_chars;
3856       argument_parser arg_parser (info, format_chars, format_string_cst,
3857 				  orig_format_chars, format_string_loc,
3858 				  flag_chars, has_operand_number,
3859 				  first_fillin_param, fwt_pool, arglocs);
3860 
3861       if (!arg_parser.read_any_dollar ())
3862 	return;
3863 
3864       if (!arg_parser.read_format_flags ())
3865 	return;
3866 
3867       /* Read any format width, possibly * or *m$.  */
3868       if (!arg_parser.read_any_format_width (params, arg_num))
3869 	return;
3870 
3871       /* Read any format left precision (must be a number, not *).  */
3872       arg_parser.read_any_format_left_precision ();
3873 
3874       /* Read any format precision, possibly * or *m$.  */
3875       if (!arg_parser.read_any_format_precision (params, arg_num))
3876 	return;
3877 
3878       const char *format_start = format_chars;
3879 
3880       arg_parser.handle_alloc_chars ();
3881 
3882       /* The rest of the conversion specification is the length modifier
3883 	 (if any), and the conversion specifier, so this is where the
3884 	 type information starts.  If we need to issue a suggestion
3885 	 about a type mismatch, then we should preserve everything up
3886 	 to here. */
3887       const char *type_start = format_chars;
3888 
3889       /* Read any length modifier, if this kind of format has them.  */
3890       const length_modifier len_modifier
3891 	= arg_parser.read_any_length_modifier ();
3892 
3893       /* Read any modifier (strftime E/O).  */
3894       arg_parser.read_any_other_modifier ();
3895 
3896       char format_char = *format_chars;
3897       if (format_char == 0
3898 	  || (!(fki->flags & (int) FMT_FLAG_FANCY_PERCENT_OK)
3899 	      && format_char == '%'))
3900 	{
3901 	  format_warning_at_char (format_string_loc, format_string_cst,
3902 			     format_chars - orig_format_chars,
3903 			     OPT_Wformat_,
3904 			     "conversion lacks type at end of format");
3905 	  continue;
3906 	}
3907 
3908       if (format_char == 'm' && !(fki->flags & FMT_FLAG_M_OK))
3909         {
3910 	  warning (OPT_Wformat_,
3911 	      "%%m is only allowed in syslog(3) like functions");
3912 	  continue;
3913 	}
3914 
3915       format_chars++;
3916 
3917       const format_char_info * const fci
3918 	= arg_parser.find_format_char_info (format_char);
3919       if (!fci)
3920 	continue;
3921 
3922       flag_chars.validate (fki, fci, flag_specs, format_chars,
3923 			   format_string_cst,
3924 			   format_string_loc, orig_format_chars, format_char,
3925 			   baltoks.quotdirs.length () > 0);
3926 
3927       const int alloc_flag = flag_chars.get_alloc_flag (fki);
3928       const bool suppressed = flag_chars.assignment_suppression_p (fki);
3929 
3930       /* Diagnose nested or unmatched quoting directives such as GCC's
3931 	 "%<...%<" and "%>...%>".  */
3932       bool quot_begin_p = strchr (fci->flags2, '<');
3933       bool quot_end_p = strchr (fci->flags2, '>');
3934 
3935       if (quot_begin_p && !quot_end_p)
3936 	{
3937 	  if (baltoks.quotdirs.length ())
3938 	    format_warning_at_char (format_string_loc, format_string_cst,
3939 				    format_chars - orig_format_chars,
3940 				    OPT_Wformat_,
3941 				    "nested quoting directive");
3942 	  baltoks.quotdirs.safe_push (format_chars);
3943 	}
3944       else if (!quot_begin_p && quot_end_p)
3945 	{
3946 	  if (baltoks.quotdirs.length ())
3947 	    baltoks.quotdirs.pop ();
3948 	  else
3949 	    format_warning_at_char (format_string_loc, format_string_cst,
3950 				    format_chars - orig_format_chars,
3951 				    OPT_Wformat_,
3952 				    "unmatched quoting directive");
3953 	}
3954 
3955       bool color_begin_p = strchr (fci->flags2, '/');
3956       if (color_begin_p)
3957 	{
3958 	  color_begin = format_chars;
3959 	  color_end = NULL;
3960 	}
3961       else if (strchr (fci->flags2, '\\'))
3962 	{
3963 	  if (color_end)
3964 	    format_warning_at_char (format_string_loc, format_string_cst,
3965 				    format_chars - orig_format_chars,
3966 				    OPT_Wformat_,
3967 				    "%qc directive redundant after prior "
3968 				    "occurence of the same", format_char);
3969 	  else if (!color_begin)
3970 	    format_warning_at_char (format_string_loc, format_string_cst,
3971 				    format_chars - orig_format_chars,
3972 				    OPT_Wformat_,
3973 				    "unmatched color reset directive");
3974 	  color_end = format_chars;
3975 	}
3976 
3977       /* Diagnose directives that shouldn't appear in a quoted sequence.
3978 	 (They are denoted by a double quote in FLAGS2.)  */
3979       if (baltoks.quotdirs.length ())
3980 	{
3981 	  if (strchr (fci->flags2, '"'))
3982 	    format_warning_at_char (format_string_loc, format_string_cst,
3983 				    format_chars - orig_format_chars,
3984 				    OPT_Wformat_,
3985 				    "%qc conversion used within a quoted "
3986 				    "sequence",
3987 				    format_char);
3988 	}
3989 
3990       /* Validate the pairs of flags used.  */
3991       arg_parser.validate_flag_pairs (fci, format_char);
3992 
3993       arg_parser.give_y2k_warnings (fci, format_char);
3994 
3995       arg_parser.parse_any_scan_set (fci);
3996 
3997       tree wanted_type = NULL;
3998       const char *wanted_type_name = NULL;
3999 
4000       if (!arg_parser.handle_conversions (fci, len_modifier,
4001 					  wanted_type, wanted_type_name,
4002 					  arg_num,
4003 					  params,
4004 					  format_char))
4005 	continue;
4006 
4007       arg_parser.main_wanted_type.next = NULL;
4008 
4009       /* Finally. . .check type of argument against desired type!  */
4010       if (!arg_parser.check_argument_type (fci, len_modifier,
4011 					   wanted_type, wanted_type_name,
4012 					   suppressed,
4013 					   arg_num, params,
4014 					   alloc_flag,
4015 					   format_start, type_start,
4016 					   fmt_param_loc,
4017 					   format_char))
4018 	return;
4019     }
4020 
4021   if (format_chars - orig_format_chars != format_length)
4022     format_warning_at_char (format_string_loc, format_string_cst,
4023 			    format_chars + 1 - orig_format_chars,
4024 			    OPT_Wformat_contains_nul,
4025 			    "embedded %<\\0%> in format");
4026   if (info->first_arg_num != 0 && params != 0
4027       && has_operand_number <= 0)
4028     {
4029       res->number_other--;
4030       res->number_extra_args++;
4031     }
4032   if (has_operand_number > 0)
4033     finish_dollar_format_checking (res, fki->flags & (int) FMT_FLAG_DOLLAR_GAP_POINTER_OK);
4034 
4035   maybe_diag_unbalanced_tokens (format_string_loc, orig_format_chars,
4036 				format_string_cst, baltoks);
4037 
4038   if (color_begin && !color_end)
4039     format_warning_at_char (format_string_loc, format_string_cst,
4040 			    color_begin - orig_format_chars,
4041 			    OPT_Wformat_, "unterminated color directive");
4042 }
4043 
4044 /* Check the argument types from a single format conversion (possibly
4045    including width and precision arguments).
4046 
4047    FMT_LOC is the location of the format conversion.
4048 
4049    TYPES is a singly-linked list expressing the parts of the format
4050    conversion that expect argument types, and the arguments they
4051    correspond to.
4052 
4053    OFFSET_TO_TYPE_START is the offset within the execution-charset encoded
4054    format string to where type information begins for the conversion
4055    (the length modifier and conversion specifier).
4056 
4057    CONVERSION_CHAR is the user-provided conversion specifier.
4058 
4059    For example, given:
4060 
4061      sprintf (d, "before %-+*.*lld after", arg3, arg4, arg5);
4062 
4063    then FMT_LOC covers this range:
4064 
4065      sprintf (d, "before %-+*.*lld after", arg3, arg4, arg5);
4066                          ^^^^^^^^^
4067 
4068    and TYPES in this case is a three-entry singly-linked list consisting of:
4069    (1) the check for the field width here:
4070          sprintf (d, "before %-+*.*lld after", arg3, arg4, arg5);
4071                                 ^              ^^^^
4072        against arg3, and
4073    (2) the check for the field precision here:
4074          sprintf (d, "before %-+*.*lld after", arg3, arg4, arg5);
4075                                  ^^                  ^^^^
4076        against arg4, and
4077    (3) the check for the length modifier and conversion char here:
4078          sprintf (d, "before %-+*.*lld after", arg3, arg4, arg5);
4079                                    ^^^                     ^^^^
4080        against arg5.
4081 
4082    OFFSET_TO_TYPE_START is 13, the offset to the "lld" within the
4083    STRING_CST:
4084 
4085                   0000000000111111111122
4086                   0123456789012345678901
4087      sprintf (d, "before %-+*.*lld after", arg3, arg4, arg5);
4088                                ^ ^
4089                                | ` CONVERSION_CHAR: 'd'
4090                                type starts here.  */
4091 
4092 static void
check_format_types(const substring_loc & fmt_loc,format_wanted_type * types,const format_kind_info * fki,int offset_to_type_start,char conversion_char,vec<location_t> * arglocs)4093 check_format_types (const substring_loc &fmt_loc,
4094 		    format_wanted_type *types, const format_kind_info *fki,
4095 		    int offset_to_type_start,
4096 		    char conversion_char,
4097 		    vec<location_t> *arglocs)
4098 {
4099   for (; types != 0; types = types->next)
4100     {
4101       tree cur_param;
4102       tree cur_type;
4103       tree orig_cur_type;
4104       tree wanted_type;
4105       int arg_num;
4106       int i;
4107       int char_type_flag;
4108 
4109       wanted_type = types->wanted_type;
4110       arg_num = types->arg_num;
4111 
4112       /* The following should not occur here.  */
4113       gcc_assert (wanted_type);
4114       gcc_assert (wanted_type != void_type_node || types->pointer_count);
4115 
4116       if (types->pointer_count == 0)
4117 	wanted_type = lang_hooks.types.type_promotes_to (wanted_type);
4118 
4119       wanted_type = TYPE_MAIN_VARIANT (wanted_type);
4120 
4121       cur_param = types->param;
4122       if (!cur_param)
4123         {
4124 	  format_type_warning (fmt_loc, UNKNOWN_LOCATION, types, wanted_type,
4125 			       NULL, fki, offset_to_type_start,
4126 			       conversion_char);
4127           continue;
4128         }
4129 
4130       cur_type = TREE_TYPE (cur_param);
4131       if (cur_type == error_mark_node)
4132 	continue;
4133       orig_cur_type = cur_type;
4134       char_type_flag = 0;
4135 
4136       location_t param_loc = UNKNOWN_LOCATION;
4137       if (EXPR_HAS_LOCATION (cur_param))
4138 	param_loc = EXPR_LOCATION (cur_param);
4139       else if (arglocs)
4140 	{
4141 	  /* arg_num is 1-based.  */
4142 	  gcc_assert (types->arg_num > 0);
4143 	  param_loc = (*arglocs)[types->arg_num - 1];
4144 	}
4145 
4146       STRIP_NOPS (cur_param);
4147 
4148       /* Check the types of any additional pointer arguments
4149 	 that precede the "real" argument.  */
4150       for (i = 0; i < types->pointer_count; ++i)
4151 	{
4152 	  if (TREE_CODE (cur_type) == POINTER_TYPE)
4153 	    {
4154 	      cur_type = TREE_TYPE (cur_type);
4155 	      if (cur_type == error_mark_node)
4156 		break;
4157 
4158 	      /* Check for writing through a NULL pointer.  */
4159 	      if (types->writing_in_flag
4160 		  && i == 0
4161 		  && cur_param != 0
4162 		  && integer_zerop (cur_param))
4163 		warning (OPT_Wformat_, "writing through null pointer "
4164 			 "(argument %d)", arg_num);
4165 
4166 	      /* Check for reading through a NULL pointer.  Ignore
4167 		 printf-family of functions as they are checked for
4168 		 null arguments by the middle-end.  */
4169 	      if (fki->conversion_specs != print_char_table
4170 		  && types->reading_from_flag
4171 		  && i == 0
4172 		  && cur_param != 0
4173 		  && integer_zerop (cur_param))
4174 		warning (OPT_Wformat_, "reading through null pointer "
4175 			 "(argument %d)", arg_num);
4176 
4177 	      if (cur_param != 0 && TREE_CODE (cur_param) == ADDR_EXPR)
4178 		cur_param = TREE_OPERAND (cur_param, 0);
4179 	      else
4180 		cur_param = 0;
4181 
4182 	      /* See if this is an attempt to write into a const type with
4183 		 scanf or with printf "%n".  Note: the writing in happens
4184 		 at the first indirection only, if for example
4185 		 void * const * is passed to scanf %p; passing
4186 		 const void ** is simply passing an incompatible type.  */
4187 	      if (types->writing_in_flag
4188 		  && i == 0
4189 		  && (TYPE_READONLY (cur_type)
4190 		      || (cur_param != 0
4191 			  && (CONSTANT_CLASS_P (cur_param)
4192 			      || (DECL_P (cur_param)
4193 				  && TREE_READONLY (cur_param))))))
4194 		warning (OPT_Wformat_, "writing into constant object "
4195 			 "(argument %d)", arg_num);
4196 
4197 	      /* If there are extra type qualifiers beyond the first
4198 		 indirection, then this makes the types technically
4199 		 incompatible.  */
4200 	      if (i > 0
4201 		  && pedantic
4202 		  && (TYPE_READONLY (cur_type)
4203 		      || TYPE_VOLATILE (cur_type)
4204 		      || TYPE_ATOMIC (cur_type)
4205 		      || TYPE_RESTRICT (cur_type)))
4206 		warning (OPT_Wformat_, "extra type qualifiers in format "
4207 			 "argument (argument %d)",
4208 			 arg_num);
4209 
4210 	    }
4211 	  else
4212 	    {
4213 	      format_type_warning (fmt_loc, param_loc,
4214 				   types, wanted_type, orig_cur_type, fki,
4215 				   offset_to_type_start, conversion_char);
4216 	      break;
4217 	    }
4218 	}
4219 
4220       if (i < types->pointer_count)
4221 	continue;
4222 
4223       cur_type = TYPE_MAIN_VARIANT (cur_type);
4224 
4225       /* Check whether the argument type is a character type.  This leniency
4226 	 only applies to certain formats, flagged with 'c'.  */
4227       if (types->char_lenient_flag)
4228 	char_type_flag = (cur_type == char_type_node
4229 			  || cur_type == signed_char_type_node
4230 			  || cur_type == unsigned_char_type_node);
4231 
4232       /* Check the type of the "real" argument, if there's a type we want.  */
4233       if (lang_hooks.types_compatible_p (wanted_type, cur_type))
4234 	continue;
4235       /* If we want 'void *', allow any pointer type.
4236 	 (Anything else would already have got a warning.)
4237 	 With -Wpedantic, only allow pointers to void and to character
4238 	 types.  */
4239       if (wanted_type == void_type_node
4240 	  && (!pedantic || (i == 1 && char_type_flag)))
4241 	continue;
4242       /* Don't warn about differences merely in signedness, unless
4243 	 -Wpedantic.  With -Wpedantic, warn if the type is a pointer
4244 	 target and not a character type, and for character types at
4245 	 a second level of indirection.  */
4246       if (TREE_CODE (wanted_type) == INTEGER_TYPE
4247 	  && TREE_CODE (cur_type) == INTEGER_TYPE
4248 	  && ((!pedantic && !warn_format_signedness)
4249 	      || (i == 0 && !warn_format_signedness)
4250 	      || (i == 1 && char_type_flag))
4251 	  && (TYPE_UNSIGNED (wanted_type)
4252 	      ? wanted_type == c_common_unsigned_type (cur_type)
4253 	      : wanted_type == c_common_signed_type (cur_type)))
4254 	continue;
4255       /* Don't warn about differences merely in signedness if we know
4256 	 that the current type is integer-promoted and its original type
4257 	 was unsigned such as that it is in the range of WANTED_TYPE.  */
4258       if (TREE_CODE (wanted_type) == INTEGER_TYPE
4259 	  && TREE_CODE (cur_type) == INTEGER_TYPE
4260 	  && warn_format_signedness
4261 	  && TYPE_UNSIGNED (wanted_type)
4262 	  && cur_param != NULL_TREE
4263 	  && TREE_CODE (cur_param) == NOP_EXPR)
4264 	{
4265 	  tree t = TREE_TYPE (TREE_OPERAND (cur_param, 0));
4266 	  if (TYPE_UNSIGNED (t)
4267 	      && cur_type == lang_hooks.types.type_promotes_to (t))
4268 	    continue;
4269 	}
4270       /* Likewise, "signed char", "unsigned char" and "char" are
4271 	 equivalent but the above test won't consider them equivalent.  */
4272       if (wanted_type == char_type_node
4273 	  && (!pedantic || i < 2)
4274 	  && char_type_flag)
4275 	continue;
4276       if (types->scalar_identity_flag
4277 	  && (TREE_CODE (cur_type) == TREE_CODE (wanted_type)
4278 	      || (INTEGRAL_TYPE_P (cur_type)
4279 		  && INTEGRAL_TYPE_P (wanted_type)))
4280 	  && TYPE_PRECISION (cur_type) == TYPE_PRECISION (wanted_type))
4281 	continue;
4282       /* Now we have a type mismatch.  */
4283       format_type_warning (fmt_loc, param_loc, types,
4284 			   wanted_type, orig_cur_type, fki,
4285 			   offset_to_type_start, conversion_char);
4286     }
4287 }
4288 
4289 /* Given type TYPE, attempt to dereference the type N times
4290    (e.g. from ("int ***", 2) to "int *")
4291 
4292    Return the derefenced type, with any qualifiers
4293    such as "const" stripped from the result, or
4294    NULL if unsuccessful (e.g. TYPE is not a pointer type).  */
4295 
4296 static tree
deref_n_times(tree type,int n)4297 deref_n_times (tree type, int n)
4298 {
4299   gcc_assert (type);
4300 
4301   for (int i = n; i > 0; i--)
4302     {
4303       if (TREE_CODE (type) != POINTER_TYPE)
4304 	return NULL_TREE;
4305       type = TREE_TYPE (type);
4306     }
4307   /* Strip off any "const" etc.  */
4308   return build_qualified_type (type, 0);
4309 }
4310 
4311 /* Lookup the format code for FORMAT_LEN within FLI,
4312    returning the string code for expressing it, or NULL
4313    if it is not found.  */
4314 
4315 static const char *
get_modifier_for_format_len(const format_length_info * fli,enum format_lengths format_len)4316 get_modifier_for_format_len (const format_length_info *fli,
4317 			     enum format_lengths format_len)
4318 {
4319   for (; fli->name; fli++)
4320     {
4321       if (fli->index == format_len)
4322 	return fli->name;
4323       if (fli->double_index == format_len)
4324 	return fli->double_name;
4325     }
4326   return NULL;
4327 }
4328 
4329 #if CHECKING_P
4330 
4331 namespace selftest {
4332 
4333 static void
test_get_modifier_for_format_len()4334 test_get_modifier_for_format_len ()
4335 {
4336   ASSERT_STREQ ("h",
4337 		get_modifier_for_format_len (printf_length_specs, FMT_LEN_h));
4338   ASSERT_STREQ ("hh",
4339 		get_modifier_for_format_len (printf_length_specs, FMT_LEN_hh));
4340   ASSERT_STREQ ("L",
4341 		get_modifier_for_format_len (printf_length_specs, FMT_LEN_L));
4342   ASSERT_EQ (NULL,
4343 	     get_modifier_for_format_len (printf_length_specs, FMT_LEN_none));
4344 }
4345 
4346 } // namespace selftest
4347 
4348 #endif /* CHECKING_P */
4349 
4350 /* Determine if SPEC_TYPE and ARG_TYPE are sufficiently similar for a
4351    format_type_detail using SPEC_TYPE to be offered as a suggestion for
4352    Wformat type errors where the argument has type ARG_TYPE.  */
4353 
4354 static bool
matching_type_p(tree spec_type,tree arg_type)4355 matching_type_p (tree spec_type, tree arg_type)
4356 {
4357   gcc_assert (spec_type);
4358   gcc_assert (arg_type);
4359 
4360   /* If any of the types requires structural equality, we can't compare
4361      their canonical types.  */
4362   if (TYPE_STRUCTURAL_EQUALITY_P (spec_type)
4363       || TYPE_STRUCTURAL_EQUALITY_P (arg_type))
4364     return false;
4365 
4366   spec_type = TYPE_CANONICAL (spec_type);
4367   arg_type = TYPE_CANONICAL (arg_type);
4368 
4369   if (TREE_CODE (spec_type) == INTEGER_TYPE
4370       && TREE_CODE (arg_type) == INTEGER_TYPE
4371       && (TYPE_UNSIGNED (spec_type)
4372 	  ? spec_type == c_common_unsigned_type (arg_type)
4373 	  : spec_type == c_common_signed_type (arg_type)))
4374     return true;
4375 
4376   return spec_type == arg_type;
4377 }
4378 
4379 /* Subroutine of get_format_for_type.
4380 
4381    Generate a string containing the length modifier and conversion specifier
4382    that should be used to format arguments of type ARG_TYPE within FKI
4383    (effectively the inverse of the checking code).
4384 
4385    If CONVERSION_CHAR is not zero (the first pass), the resulting suggestion
4386    is required to use it, for correcting bogus length modifiers.
4387    If CONVERSION_CHAR is zero (the second pass), then allow any suggestion
4388    that matches ARG_TYPE.
4389 
4390    If successful, returns a non-NULL string which should be freed
4391    by the caller.
4392    Otherwise, returns NULL.  */
4393 
4394 static char *
get_format_for_type_1(const format_kind_info * fki,tree arg_type,char conversion_char)4395 get_format_for_type_1 (const format_kind_info *fki, tree arg_type,
4396 		       char conversion_char)
4397 {
4398   gcc_assert (arg_type);
4399 
4400   const format_char_info *spec;
4401   for (spec = &fki->conversion_specs[0];
4402        spec->format_chars;
4403        spec++)
4404     {
4405       if (conversion_char)
4406 	if (!strchr (spec->format_chars, conversion_char))
4407 	  continue;
4408 
4409       tree effective_arg_type = deref_n_times (arg_type,
4410 					       spec->pointer_count);
4411       if (!effective_arg_type)
4412 	continue;
4413       for (int i = 0; i < FMT_LEN_MAX; i++)
4414 	{
4415 	  const format_type_detail *ftd = &spec->types[i];
4416 	  if (!ftd->type || *ftd->type == NULL_TREE)
4417 	    continue;
4418 	  if (matching_type_p (*ftd->type, effective_arg_type))
4419 	    {
4420 	      const char *len_modifier
4421 		= get_modifier_for_format_len (fki->length_char_specs,
4422 					       (enum format_lengths)i);
4423 	      if (!len_modifier)
4424 		len_modifier = "";
4425 
4426 	      if (conversion_char)
4427 		/* We found a match, using the given conversion char - the
4428 		   length modifier was incorrect (or absent).
4429 		   Provide a suggestion using the conversion char with the
4430 		   correct length modifier for the type.  */
4431 		return xasprintf ("%s%c", len_modifier, conversion_char);
4432 	      else
4433 		/* 2nd pass: no match was possible using the user-provided
4434 		   conversion char, but we do have a match without using it.
4435 		   Provide a suggestion using the first conversion char
4436 		   listed for the given type.  */
4437 		return xasprintf ("%s%c", len_modifier, spec->format_chars[0]);
4438 	    }
4439 	}
4440    }
4441 
4442   return NULL;
4443 }
4444 
4445 /* Generate a string containing the length modifier and conversion specifier
4446    that should be used to format arguments of type ARG_TYPE within FKI
4447    (effectively the inverse of the checking code).
4448 
4449    If successful, returns a non-NULL string which should be freed
4450    by the caller.
4451    Otherwise, returns NULL.  */
4452 
4453 static char *
get_format_for_type(const format_kind_info * fki,tree arg_type,char conversion_char)4454 get_format_for_type (const format_kind_info *fki, tree arg_type,
4455 		     char conversion_char)
4456 {
4457   gcc_assert (arg_type);
4458   gcc_assert (conversion_char);
4459 
4460   /* First pass: look for a format_char_info containing CONVERSION_CHAR
4461      If we find one, then presumably the length modifier was incorrect
4462      (or absent).  */
4463   char *result = get_format_for_type_1 (fki, arg_type, conversion_char);
4464   if (result)
4465     return result;
4466 
4467   /* Second pass: we didn't find a match for CONVERSION_CHAR, so try
4468      matching just on the type. */
4469   return get_format_for_type_1 (fki, arg_type, '\0');
4470 }
4471 
4472 /* Attempt to get a string for use as a replacement fix-it hint for the
4473    source range in FMT_LOC.
4474 
4475    Preserve all of the text within the range of FMT_LOC up to
4476    OFFSET_TO_TYPE_START, replacing the rest with an appropriate
4477    length modifier and conversion specifier for ARG_TYPE, attempting
4478    to keep the user-provided CONVERSION_CHAR if possible.
4479 
4480    For example, given a long vs long long mismatch for arg5 here:
4481 
4482     000000000111111111122222222223333333333|
4483     123456789012345678901234567890123456789` column numbers
4484                    0000000000111111111122|
4485                    0123456789012345678901` string offsets
4486                           V~~~~~~~~ : range of FMT_LOC, from cols 23-31
4487       sprintf (d, "before %-+*.*lld after", arg3, arg4, arg5);
4488                                 ^ ^
4489                                 | ` CONVERSION_CHAR: 'd'
4490                                 type starts here
4491 
4492    where OFFSET_TO_TYPE_START is 13 (the offset to the "lld" within the
4493    STRING_CST), where the user provided:
4494      %-+*.*lld
4495    the result (assuming "long" argument 5) should be:
4496      %-+*.*ld
4497 
4498    If successful, returns a non-NULL string which should be freed
4499    by the caller.
4500    Otherwise, returns NULL.  */
4501 
4502 static char *
get_corrected_substring(const substring_loc & fmt_loc,format_wanted_type * type,tree arg_type,const format_kind_info * fki,int offset_to_type_start,char conversion_char)4503 get_corrected_substring (const substring_loc &fmt_loc,
4504 			 format_wanted_type *type, tree arg_type,
4505 			 const format_kind_info *fki,
4506 			 int offset_to_type_start, char conversion_char)
4507 {
4508   /* Attempt to provide hints for argument types, but not for field widths
4509      and precisions.  */
4510   if (!arg_type)
4511     return NULL;
4512   if (type->kind != CF_KIND_FORMAT)
4513     return NULL;
4514 
4515   /* Locate the current code within the source range, rejecting
4516      any awkward cases where the format string occupies more than
4517      one line.
4518      Lookup the place where the type starts (including any length
4519      modifiers), getting it as the caret location.  */
4520   substring_loc type_loc (fmt_loc);
4521   type_loc.set_caret_index (offset_to_type_start);
4522 
4523   location_t fmt_substring_loc;
4524   const char *err = type_loc.get_location (&fmt_substring_loc);
4525   if (err)
4526     return NULL;
4527 
4528   source_range fmt_substring_range
4529     = get_range_from_loc (line_table, fmt_substring_loc);
4530 
4531   expanded_location caret
4532     = expand_location_to_spelling_point (fmt_substring_loc);
4533   expanded_location start
4534     = expand_location_to_spelling_point (fmt_substring_range.m_start);
4535   expanded_location finish
4536     = expand_location_to_spelling_point (fmt_substring_range.m_finish);
4537   if (caret.file != start.file)
4538     return NULL;
4539   if (start.file != finish.file)
4540     return NULL;
4541   if (caret.line != start.line)
4542     return NULL;
4543   if (start.line != finish.line)
4544     return NULL;
4545   if (start.column > caret.column)
4546     return NULL;
4547   if (start.column > finish.column)
4548     return NULL;
4549   if (caret.column > finish.column)
4550     return NULL;
4551 
4552   char_span line = location_get_source_line (start.file, start.line);
4553   if (!line)
4554     return NULL;
4555 
4556   /* If we got this far, then we have the line containing the
4557      existing conversion specification.
4558 
4559      Generate a trimmed copy, containing the prefix part of the conversion
4560      specification, up to the (but not including) the length modifier.
4561      In the above example, this would be "%-+*.*".  */
4562   int length_up_to_type = caret.column - start.column;
4563   char_span prefix_span = line.subspan (start.column - 1, length_up_to_type);
4564   char *prefix = prefix_span.xstrdup ();
4565 
4566   /* Now attempt to generate a suggestion for the rest of the specification
4567      (length modifier and conversion char), based on ARG_TYPE and
4568      CONVERSION_CHAR.
4569      In the above example, this would be "ld".  */
4570   char *format_for_type = get_format_for_type (fki, arg_type, conversion_char);
4571   if (!format_for_type)
4572     {
4573       free (prefix);
4574       return NULL;
4575     }
4576 
4577   /* Success.  Generate the resulting suggestion for the whole range of
4578      FMT_LOC by concatenating the two strings.
4579      In the above example, this would be "%-+*.*ld".  */
4580   char *result = concat (prefix, format_for_type, NULL);
4581   free (format_for_type);
4582   free (prefix);
4583   return result;
4584 }
4585 
4586 /* Helper class for adding zero or more trailing '*' to types.
4587 
4588    The format type and name exclude any '*' for pointers, so those
4589    must be formatted manually.  For all the types we currently have,
4590    this is adequate, but formats taking pointers to functions or
4591    arrays would require the full type to be built up in order to
4592    print it with %T.  */
4593 
4594 class indirection_suffix
4595 {
4596  public:
indirection_suffix(int pointer_count)4597   indirection_suffix (int pointer_count) : m_pointer_count (pointer_count) {}
4598 
4599   /* Determine the size of the buffer (including NUL-terminator).  */
4600 
get_buffer_size()4601   size_t get_buffer_size () const
4602   {
4603     return m_pointer_count + 2;
4604   }
4605 
4606   /* Write the '*' to DST and add a NUL-terminator.  */
4607 
fill_buffer(char * dst)4608   void fill_buffer (char *dst) const
4609   {
4610     if (m_pointer_count == 0)
4611       dst[0] = 0;
4612     else if (c_dialect_cxx ())
4613       {
4614 	memset (dst, '*', m_pointer_count);
4615 	dst[m_pointer_count] = 0;
4616       }
4617     else
4618       {
4619 	dst[0] = ' ';
4620 	memset (dst + 1, '*', m_pointer_count);
4621 	dst[m_pointer_count + 1] = 0;
4622       }
4623   }
4624 
4625  private:
4626   int m_pointer_count;
4627 };
4628 
4629 /* Subclass of range_label for labelling the range in the format string
4630    with the type in question, adding trailing '*' for pointer_count.  */
4631 
4632 class range_label_for_format_type_mismatch
4633   : public range_label_for_type_mismatch
4634 {
4635  public:
range_label_for_format_type_mismatch(tree labelled_type,tree other_type,int pointer_count)4636   range_label_for_format_type_mismatch (tree labelled_type, tree other_type,
4637 					int pointer_count)
4638   : range_label_for_type_mismatch (labelled_type, other_type),
4639     m_pointer_count (pointer_count)
4640   {
4641   }
4642 
get_text(unsigned range_idx)4643   label_text get_text (unsigned range_idx) const FINAL OVERRIDE
4644   {
4645     label_text text = range_label_for_type_mismatch::get_text (range_idx);
4646     if (text.m_buffer == NULL)
4647       return text;
4648 
4649     indirection_suffix suffix (m_pointer_count);
4650     char *p = (char *) alloca (suffix.get_buffer_size ());
4651     suffix.fill_buffer (p);
4652 
4653     char *result = concat (text.m_buffer, p, NULL);
4654     text.maybe_free ();
4655     return label_text::take (result);
4656   }
4657 
4658  private:
4659   int m_pointer_count;
4660 };
4661 
4662 /* Give a warning about a format argument of different type from that expected.
4663    The range of the diagnostic is taken from WHOLE_FMT_LOC; the caret location
4664    is based on the location of the char at TYPE->offset_loc.
4665    PARAM_LOC is the location of the relevant argument, or UNKNOWN_LOCATION
4666    if this is unavailable.
4667    WANTED_TYPE is the type the argument should have,
4668    possibly stripped of pointer dereferences.  The description (such as "field
4669    precision"), the placement in the format string, a possibly more
4670    friendly name of WANTED_TYPE, and the number of pointer dereferences
4671    are taken from TYPE.  ARG_TYPE is the type of the actual argument,
4672    or NULL if it is missing.
4673 
4674    OFFSET_TO_TYPE_START is the offset within the execution-charset encoded
4675    format string to where type information begins for the conversion
4676    (the length modifier and conversion specifier).
4677    CONVERSION_CHAR is the user-provided conversion specifier.
4678 
4679    For example, given a type mismatch for argument 5 here:
4680 
4681     00000000011111111112222222222333333333344444444445555555555|
4682     12345678901234567890123456789012345678901234567890123456789` column numbers
4683                    0000000000111111111122|
4684                    0123456789012345678901` offsets within STRING_CST
4685                           V~~~~~~~~ : range of WHOLE_FMT_LOC, from cols 23-31
4686       sprintf (d, "before %-+*.*lld after", int_expr, int_expr, long_expr);
4687                                 ^ ^                             ^~~~~~~~~
4688                                 | ` CONVERSION_CHAR: 'd'        PARAM_LOC
4689                                 type starts here
4690 
4691    OFFSET_TO_TYPE_START is 13, the offset to the "lld" within the
4692    STRING_CST.  */
4693 
4694 static void
format_type_warning(const substring_loc & whole_fmt_loc,location_t param_loc,format_wanted_type * type,tree wanted_type,tree arg_type,const format_kind_info * fki,int offset_to_type_start,char conversion_char)4695 format_type_warning (const substring_loc &whole_fmt_loc,
4696 		     location_t param_loc,
4697 		     format_wanted_type *type,
4698 		     tree wanted_type, tree arg_type,
4699 		     const format_kind_info *fki,
4700 		     int offset_to_type_start,
4701 		     char conversion_char)
4702 {
4703   enum format_specifier_kind kind = type->kind;
4704   const char *wanted_type_name = type->wanted_type_name;
4705   const char *format_start = type->format_start;
4706   int format_length = type->format_length;
4707   int pointer_count = type->pointer_count;
4708   int arg_num = type->arg_num;
4709 
4710   /* If ARG_TYPE is a typedef with a misleading name (for example,
4711      size_t but not the standard size_t expected by printf %zu), avoid
4712      printing the typedef name.  */
4713   if (wanted_type_name
4714       && arg_type
4715       && TYPE_NAME (arg_type)
4716       && TREE_CODE (TYPE_NAME (arg_type)) == TYPE_DECL
4717       && DECL_NAME (TYPE_NAME (arg_type))
4718       && !strcmp (wanted_type_name,
4719 		  lang_hooks.decl_printable_name (TYPE_NAME (arg_type), 2)))
4720     arg_type = TYPE_MAIN_VARIANT (arg_type);
4721 
4722   indirection_suffix suffix (pointer_count);
4723   char *p = (char *) alloca (suffix.get_buffer_size ());
4724   suffix.fill_buffer (p);
4725 
4726   /* WHOLE_FMT_LOC has the caret at the end of the range.
4727      Set the caret to be at the offset from TYPE.  Subtract one
4728      from the offset for the same reason as in format_warning_at_char.  */
4729   substring_loc fmt_loc (whole_fmt_loc);
4730   fmt_loc.set_caret_index (type->offset_loc - 1);
4731 
4732   range_label_for_format_type_mismatch fmt_label (wanted_type, arg_type,
4733 						  pointer_count);
4734   range_label_for_type_mismatch param_label (arg_type, wanted_type);
4735 
4736   /* Get a string for use as a replacement fix-it hint for the range in
4737      fmt_loc, or NULL.  */
4738   char *corrected_substring
4739     = get_corrected_substring (fmt_loc, type, arg_type, fki,
4740 			       offset_to_type_start, conversion_char);
4741   format_string_diagnostic_t diag (fmt_loc, &fmt_label, param_loc, &param_label,
4742 				   corrected_substring);
4743   if (wanted_type_name)
4744     {
4745       if (arg_type)
4746 	diag.emit_warning
4747 	  (OPT_Wformat_,
4748 	   "%s %<%s%.*s%> expects argument of type %<%s%s%>, "
4749 	   "but argument %d has type %qT",
4750 	   gettext (kind_descriptions[kind]),
4751 	   (kind == CF_KIND_FORMAT ? "%" : ""),
4752 	   format_length, format_start,
4753 	   wanted_type_name, p, arg_num, arg_type);
4754       else
4755 	diag.emit_warning
4756 	  (OPT_Wformat_,
4757 	   "%s %<%s%.*s%> expects a matching %<%s%s%> argument",
4758 	   gettext (kind_descriptions[kind]),
4759 	   (kind == CF_KIND_FORMAT ? "%" : ""),
4760 	   format_length, format_start, wanted_type_name, p);
4761     }
4762   else
4763     {
4764       if (arg_type)
4765 	diag.emit_warning
4766 	  (OPT_Wformat_,
4767 	   "%s %<%s%.*s%> expects argument of type %<%T%s%>, "
4768 	   "but argument %d has type %qT",
4769 	   gettext (kind_descriptions[kind]),
4770 	   (kind == CF_KIND_FORMAT ? "%" : ""),
4771 	   format_length, format_start,
4772 	   wanted_type, p, arg_num, arg_type);
4773       else
4774 	diag.emit_warning
4775 	  (OPT_Wformat_,
4776 	   "%s %<%s%.*s%> expects a matching %<%T%s%> argument",
4777 	   gettext (kind_descriptions[kind]),
4778 	   (kind == CF_KIND_FORMAT ? "%" : ""),
4779 	   format_length, format_start, wanted_type, p);
4780     }
4781 
4782   free (corrected_substring);
4783 }
4784 
4785 
4786 /* Given a format_char_info array FCI, and a character C, this function
4787    returns the index into the conversion_specs where that specifier's
4788    data is located.  The character must exist.  */
4789 static unsigned int
find_char_info_specifier_index(const format_char_info * fci,int c)4790 find_char_info_specifier_index (const format_char_info *fci, int c)
4791 {
4792   unsigned i;
4793 
4794   for (i = 0; fci->format_chars; i++, fci++)
4795     if (strchr (fci->format_chars, c))
4796       return i;
4797 
4798   /* We shouldn't be looking for a non-existent specifier.  */
4799   gcc_unreachable ();
4800 }
4801 
4802 /* Given a format_length_info array FLI, and a character C, this
4803    function returns the index into the conversion_specs where that
4804    modifier's data is located.  The character must exist.  */
4805 static unsigned int
find_length_info_modifier_index(const format_length_info * fli,int c)4806 find_length_info_modifier_index (const format_length_info *fli, int c)
4807 {
4808   unsigned i;
4809 
4810   for (i = 0; fli->name; i++, fli++)
4811     if (strchr (fli->name, c))
4812       return i;
4813 
4814   /* We shouldn't be looking for a non-existent modifier.  */
4815   gcc_unreachable ();
4816 }
4817 
4818 /* Determine the type of HOST_WIDE_INT in the code being compiled for
4819    use in GCC's __asm_fprintf__ custom format attribute.  You must
4820    have set dynamic_format_types before calling this function.  */
4821 static void
init_dynamic_asm_fprintf_info(void)4822 init_dynamic_asm_fprintf_info (void)
4823 {
4824   static tree hwi;
4825 
4826   if (!hwi)
4827     {
4828       format_length_info *new_asm_fprintf_length_specs;
4829       unsigned int i;
4830 
4831       /* Find the underlying type for HOST_WIDE_INT.  For the %w
4832 	 length modifier to work, one must have issued: "typedef
4833 	 HOST_WIDE_INT __gcc_host_wide_int__;" in one's source code
4834 	 prior to using that modifier.  */
4835       hwi = maybe_get_identifier ("__gcc_host_wide_int__");
4836       if (!hwi)
4837 	{
4838 	  error ("%<__gcc_host_wide_int__%> is not defined as a type");
4839 	  return;
4840 	}
4841       hwi = identifier_global_value (hwi);
4842       if (!hwi || TREE_CODE (hwi) != TYPE_DECL)
4843 	{
4844 	  error ("%<__gcc_host_wide_int__%> is not defined as a type");
4845 	  return;
4846 	}
4847       hwi = DECL_ORIGINAL_TYPE (hwi);
4848       gcc_assert (hwi);
4849       if (hwi != long_integer_type_node && hwi != long_long_integer_type_node)
4850 	{
4851 	  error ("%<__gcc_host_wide_int__%> is not defined as %<long%>"
4852 		 " or %<long long%>");
4853 	  return;
4854 	}
4855 
4856       /* Create a new (writable) copy of asm_fprintf_length_specs.  */
4857       new_asm_fprintf_length_specs = (format_length_info *)
4858 				     xmemdup (asm_fprintf_length_specs,
4859 					      sizeof (asm_fprintf_length_specs),
4860 					      sizeof (asm_fprintf_length_specs));
4861 
4862       /* HOST_WIDE_INT must be one of 'long' or 'long long'.  */
4863       i = find_length_info_modifier_index (new_asm_fprintf_length_specs, 'w');
4864       if (hwi == long_integer_type_node)
4865 	new_asm_fprintf_length_specs[i].index = FMT_LEN_l;
4866       else if (hwi == long_long_integer_type_node)
4867 	new_asm_fprintf_length_specs[i].index = FMT_LEN_ll;
4868       else
4869 	gcc_unreachable ();
4870 
4871       /* Assign the new data for use.  */
4872       dynamic_format_types[asm_fprintf_format_type].length_char_specs =
4873 	new_asm_fprintf_length_specs;
4874     }
4875 }
4876 
4877 /* Determine the type of a "locus" in the code being compiled for use
4878    in GCC's __gcc_gfc__ custom format attribute.  You must have set
4879    dynamic_format_types before calling this function.  */
4880 static void
init_dynamic_gfc_info(void)4881 init_dynamic_gfc_info (void)
4882 {
4883   if (!locus)
4884     {
4885       static format_char_info *gfc_fci;
4886 
4887       /* For the GCC __gcc_gfc__ custom format specifier to work, one
4888 	 must have declared 'locus' prior to using this attribute.  If
4889 	 we haven't seen this declarations then you shouldn't use the
4890 	 specifier requiring that type.  */
4891       if ((locus = maybe_get_identifier ("locus")))
4892 	{
4893 	  locus = identifier_global_value (locus);
4894 	  if (locus)
4895 	    {
4896 	      if (TREE_CODE (locus) != TYPE_DECL
4897 		  || TREE_TYPE (locus) == error_mark_node)
4898 		{
4899 		  error ("%<locus%> is not defined as a type");
4900 		  locus = 0;
4901 		}
4902 	      else
4903 		locus = TREE_TYPE (locus);
4904 	    }
4905 	}
4906 
4907       /* Assign the new data for use.  */
4908 
4909       /* Handle the __gcc_gfc__ format specifics.  */
4910       if (!gfc_fci)
4911 	dynamic_format_types[gcc_gfc_format_type].conversion_specs =
4912 	  gfc_fci = (format_char_info *)
4913 		     xmemdup (gcc_gfc_char_table,
4914 			      sizeof (gcc_gfc_char_table),
4915 			      sizeof (gcc_gfc_char_table));
4916       if (locus)
4917 	{
4918 	  const unsigned i = find_char_info_specifier_index (gfc_fci, 'L');
4919 	  gfc_fci[i].types[0].type = &locus;
4920 	  gfc_fci[i].pointer_count = 1;
4921 	}
4922     }
4923 }
4924 
4925 /* Lookup the type named NAME and return a NAME type if found.
4926    Otherwise, return void_type_node if NAME has not been used yet,
4927    or NULL_TREE if NAME is not a type (issuing an error).  */
4928 
4929 static tree
get_named_type(const char * name)4930 get_named_type (const char *name)
4931 {
4932   if (tree result = maybe_get_identifier (name))
4933     {
4934       result = identifier_global_tag (result);
4935       if (result)
4936 	{
4937 	  if (TYPE_P (result))
4938 	    ;
4939 	  else if (TREE_CODE (result) == TYPE_DECL)
4940 	    result = TREE_TYPE (result);
4941 	  else
4942 	    {
4943 	      error ("%qs is not defined as a type", name);
4944 	      result = NULL_TREE;
4945 	    }
4946 	}
4947       return result;
4948     }
4949   else
4950     return void_type_node;
4951 }
4952 
4953 /* Determine the types of "tree" and "location_t" in the code being
4954    compiled for use in GCC's diagnostic custom format attributes.  You
4955    must have set dynamic_format_types before calling this function.  */
4956 static void
init_dynamic_diag_info(void)4957 init_dynamic_diag_info (void)
4958 {
4959   /* For the GCC-diagnostics custom format specifiers to work, one
4960      must have declared 'tree' and 'location_t' prior to using those
4961      attributes.  If we haven't seen these declarations then
4962      the specifiers requiring these types shouldn't be used.
4963      However we don't force a hard ICE because we may see only one
4964      or the other type.  */
4965   if (tree loc = maybe_get_identifier ("location_t"))
4966     {
4967       loc = identifier_global_value (loc);
4968       if (loc && TREE_CODE (loc) != TYPE_DECL)
4969 	error ("%<location_t%> is not defined as a type");
4970     }
4971 
4972   /* Initialize the global tree node type local to this file.  */
4973   if (!local_tree_type_node
4974       || local_tree_type_node == void_type_node)
4975     {
4976       /* We need to grab the underlying 'union tree_node' so peek into
4977 	 an extra type level.  */
4978       if ((local_tree_type_node = maybe_get_identifier ("tree")))
4979 	{
4980 	  local_tree_type_node
4981 	    = identifier_global_value (local_tree_type_node);
4982 	  if (local_tree_type_node)
4983 	    {
4984 	      if (TREE_CODE (local_tree_type_node) != TYPE_DECL)
4985 		{
4986 		  error ("%<tree%> is not defined as a type");
4987 		  local_tree_type_node = NULL_TREE;
4988 		}
4989 	      else if (TREE_CODE (TREE_TYPE (local_tree_type_node))
4990 		       != POINTER_TYPE)
4991 		{
4992 		  error ("%<tree%> is not defined as a pointer type");
4993 		  local_tree_type_node = NULL_TREE;
4994 		}
4995 	      else
4996 		local_tree_type_node
4997 		  = TREE_TYPE (TREE_TYPE (local_tree_type_node));
4998 	    }
4999 	}
5000       else
5001 	local_tree_type_node = void_type_node;
5002     }
5003 
5004   /* Similar to the above but for gimple*.  */
5005   if (!local_gimple_ptr_node
5006       || local_gimple_ptr_node == void_type_node)
5007     local_gimple_ptr_node = get_named_type ("gimple");
5008 
5009   /* Similar to the above but for cgraph_node*.  */
5010   if (!local_cgraph_node_ptr_node
5011       || local_cgraph_node_ptr_node == void_type_node)
5012     local_cgraph_node_ptr_node = get_named_type ("cgraph_node");
5013 
5014   /* Similar to the above but for diagnostic_event_id_t*.  */
5015   if (!local_event_ptr_node
5016       || local_event_ptr_node == void_type_node)
5017     local_event_ptr_node = get_named_type ("diagnostic_event_id_t");
5018 
5019   static tree hwi;
5020 
5021   if (!hwi)
5022     {
5023       static format_length_info *diag_ls;
5024       unsigned int i;
5025 
5026       /* Find the underlying type for HOST_WIDE_INT.  For the 'w'
5027 	 length modifier to work, one must have issued: "typedef
5028 	 HOST_WIDE_INT __gcc_host_wide_int__;" in one's source code
5029 	 prior to using that modifier.  */
5030       if ((hwi = maybe_get_identifier ("__gcc_host_wide_int__")))
5031 	{
5032 	  hwi = identifier_global_value (hwi);
5033 	  if (hwi)
5034 	    {
5035 	      if (TREE_CODE (hwi) != TYPE_DECL)
5036 		{
5037 		  error ("%<__gcc_host_wide_int__%> is not defined as a type");
5038 		  hwi = 0;
5039 		}
5040 	      else
5041 		{
5042 		  hwi = DECL_ORIGINAL_TYPE (hwi);
5043 		  gcc_assert (hwi);
5044 		  if (hwi != long_integer_type_node
5045 		      && hwi != long_long_integer_type_node)
5046 		    {
5047 		      error ("%<__gcc_host_wide_int__%> is not defined"
5048 			     " as %<long%> or %<long long%>");
5049 		      hwi = 0;
5050 		    }
5051 		}
5052 	    }
5053 	}
5054 
5055       /* Assign the new data for use.  */
5056 
5057       /* All the GCC diag formats use the same length specs.  */
5058       if (!diag_ls)
5059 	dynamic_format_types[gcc_diag_format_type].length_char_specs =
5060 	  dynamic_format_types[gcc_tdiag_format_type].length_char_specs =
5061 	  dynamic_format_types[gcc_cdiag_format_type].length_char_specs =
5062 	  dynamic_format_types[gcc_cxxdiag_format_type].length_char_specs =
5063 	  dynamic_format_types[gcc_dump_printf_format_type].length_char_specs =
5064 	  diag_ls = (format_length_info *)
5065 		    xmemdup (gcc_diag_length_specs,
5066 			     sizeof (gcc_diag_length_specs),
5067 			     sizeof (gcc_diag_length_specs));
5068       if (hwi)
5069 	{
5070 	  /* HOST_WIDE_INT must be one of 'long' or 'long long'.  */
5071 	  i = find_length_info_modifier_index (diag_ls, 'w');
5072 	  if (hwi == long_integer_type_node)
5073 	    diag_ls[i].index = FMT_LEN_l;
5074 	  else if (hwi == long_long_integer_type_node)
5075 	    diag_ls[i].index = FMT_LEN_ll;
5076 	  else
5077 	    gcc_unreachable ();
5078 	}
5079     }
5080 
5081   /* It's safe to "re-initialize these to the same values.  */
5082   dynamic_format_types[gcc_diag_format_type].conversion_specs =
5083     gcc_diag_char_table;
5084   dynamic_format_types[gcc_tdiag_format_type].conversion_specs =
5085     gcc_tdiag_char_table;
5086   dynamic_format_types[gcc_cdiag_format_type].conversion_specs =
5087     gcc_cdiag_char_table;
5088   dynamic_format_types[gcc_cxxdiag_format_type].conversion_specs =
5089     gcc_cxxdiag_char_table;
5090   dynamic_format_types[gcc_dump_printf_format_type].conversion_specs =
5091     gcc_dump_printf_char_table;
5092 }
5093 
5094 #ifdef TARGET_FORMAT_TYPES
5095 extern const format_kind_info TARGET_FORMAT_TYPES[];
5096 #endif
5097 
5098 #ifdef TARGET_OVERRIDES_FORMAT_ATTRIBUTES
5099 extern const target_ovr_attr TARGET_OVERRIDES_FORMAT_ATTRIBUTES[];
5100 #endif
5101 #ifdef TARGET_OVERRIDES_FORMAT_INIT
5102   extern void TARGET_OVERRIDES_FORMAT_INIT (void);
5103 #endif
5104 
5105 /* Attributes such as "printf" are equivalent to those such as
5106    "gnu_printf" unless this is overridden by a target.  */
5107 static const target_ovr_attr gnu_target_overrides_format_attributes[] =
5108 {
5109   { "gnu_printf",   "printf" },
5110   { "gnu_syslog",   "syslog" },
5111   { "gnu_scanf",    "scanf" },
5112   { "gnu_strftime", "strftime" },
5113   { "gnu_strfmon",  "strfmon" },
5114   { NULL,           NULL }
5115 };
5116 
5117 /* Translate to unified attribute name. This is used in decode_format_type and
5118    decode_format_attr. In attr_name the user specified argument is passed. It
5119    returns the unified format name from TARGET_OVERRIDES_FORMAT_ATTRIBUTES
5120    or the attr_name passed to this function, if there is no matching entry.  */
5121 static const char *
convert_format_name_to_system_name(const char * attr_name)5122 convert_format_name_to_system_name (const char *attr_name)
5123 {
5124   int i;
5125 
5126   if (attr_name == NULL || *attr_name == 0
5127       || strncmp (attr_name, "gcc_", 4) == 0)
5128     return attr_name;
5129 #ifdef TARGET_OVERRIDES_FORMAT_INIT
5130   TARGET_OVERRIDES_FORMAT_INIT ();
5131 #endif
5132 
5133 #ifdef TARGET_OVERRIDES_FORMAT_ATTRIBUTES
5134   /* Check if format attribute is overridden by target.  */
5135   if (TARGET_OVERRIDES_FORMAT_ATTRIBUTES != NULL
5136       && TARGET_OVERRIDES_FORMAT_ATTRIBUTES_COUNT > 0)
5137     {
5138       for (i = 0; i < TARGET_OVERRIDES_FORMAT_ATTRIBUTES_COUNT; ++i)
5139         {
5140           if (cmp_attribs (TARGET_OVERRIDES_FORMAT_ATTRIBUTES[i].named_attr_src,
5141 			   attr_name))
5142             return attr_name;
5143           if (cmp_attribs (TARGET_OVERRIDES_FORMAT_ATTRIBUTES[i].named_attr_dst,
5144 			   attr_name))
5145             return TARGET_OVERRIDES_FORMAT_ATTRIBUTES[i].named_attr_src;
5146         }
5147     }
5148 #endif
5149   /* Otherwise default to gnu format.  */
5150   for (i = 0;
5151        gnu_target_overrides_format_attributes[i].named_attr_src != NULL;
5152        ++i)
5153     {
5154       if (cmp_attribs (gnu_target_overrides_format_attributes[i].named_attr_src,
5155 		       attr_name))
5156         return attr_name;
5157       if (cmp_attribs (gnu_target_overrides_format_attributes[i].named_attr_dst,
5158 		       attr_name))
5159         return gnu_target_overrides_format_attributes[i].named_attr_src;
5160     }
5161 
5162   return attr_name;
5163 }
5164 
5165 /* Handle a "format" attribute; arguments as in
5166    struct attribute_spec.handler.  */
5167 tree
handle_format_attribute(tree * node,tree atname,tree args,int flags,bool * no_add_attrs)5168 handle_format_attribute (tree *node, tree atname, tree args,
5169 			 int flags, bool *no_add_attrs)
5170 {
5171   const_tree type = *node;
5172   function_format_info info;
5173 
5174 #ifdef TARGET_FORMAT_TYPES
5175   /* If the target provides additional format types, we need to
5176      add them to FORMAT_TYPES at first use.  */
5177   if (TARGET_FORMAT_TYPES != NULL && !dynamic_format_types)
5178     {
5179       dynamic_format_types = XNEWVEC (format_kind_info,
5180 				      n_format_types + TARGET_N_FORMAT_TYPES);
5181       memcpy (dynamic_format_types, format_types_orig,
5182 	      sizeof (format_types_orig));
5183       memcpy (&dynamic_format_types[n_format_types], TARGET_FORMAT_TYPES,
5184 	      TARGET_N_FORMAT_TYPES * sizeof (dynamic_format_types[0]));
5185 
5186       format_types = dynamic_format_types;
5187       /* Provide a reference for the first potential external type.  */
5188       first_target_format_type = n_format_types;
5189       n_format_types += TARGET_N_FORMAT_TYPES;
5190     }
5191 #endif
5192 
5193   /* Canonicalize name of format function.  */
5194   if (TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
5195     TREE_VALUE (args) = canonicalize_attr_name (TREE_VALUE (args));
5196 
5197   if (!decode_format_attr (type, atname, args, &info, /* validated_p = */false))
5198     {
5199       *no_add_attrs = true;
5200       return NULL_TREE;
5201     }
5202 
5203   if (prototype_p (type))
5204     {
5205       if (!check_format_string (type, info.format_num, flags,
5206 				no_add_attrs, info.format_type))
5207 	return NULL_TREE;
5208 
5209       if (info.first_arg_num != 0)
5210 	{
5211 	  unsigned HOST_WIDE_INT arg_num = 1;
5212 	  function_args_iterator iter;
5213 	  tree arg_type;
5214 
5215 	  /* Verify that first_arg_num points to the last arg,
5216 	     the ...  */
5217 	  FOREACH_FUNCTION_ARGS (type, arg_type, iter)
5218 	    arg_num++;
5219 
5220 	  if (arg_num != info.first_arg_num)
5221 	    {
5222 	      if (!(flags & (int) ATTR_FLAG_BUILT_IN))
5223 		error ("argument to be formatted is not %<...%>");
5224 	      *no_add_attrs = true;
5225 	      return NULL_TREE;
5226 	    }
5227 	}
5228     }
5229 
5230   /* Check if this is a strftime variant. Just for this variant
5231      FMT_FLAG_ARG_CONVERT is not set.  */
5232   if ((format_types[info.format_type].flags & (int) FMT_FLAG_ARG_CONVERT) == 0
5233       && info.first_arg_num != 0)
5234     {
5235       error ("strftime formats cannot format arguments");
5236       *no_add_attrs = true;
5237       return NULL_TREE;
5238     }
5239 
5240   /* If this is a custom GCC-internal format type, we have to
5241      initialize certain bits at runtime.  */
5242   if (info.format_type == asm_fprintf_format_type
5243       || info.format_type == gcc_gfc_format_type
5244       || info.format_type == gcc_diag_format_type
5245       || info.format_type == gcc_tdiag_format_type
5246       || info.format_type == gcc_cdiag_format_type
5247       || info.format_type == gcc_cxxdiag_format_type
5248       || info.format_type == gcc_dump_printf_format_type)
5249     {
5250       /* Our first time through, we have to make sure that our
5251 	 format_type data is allocated dynamically and is modifiable.  */
5252       if (!dynamic_format_types)
5253 	format_types = dynamic_format_types = (format_kind_info *)
5254 	  xmemdup (format_types_orig, sizeof (format_types_orig),
5255 		   sizeof (format_types_orig));
5256 
5257       /* If this is format __asm_fprintf__, we have to initialize
5258 	 GCC's notion of HOST_WIDE_INT for checking %wd.  */
5259       if (info.format_type == asm_fprintf_format_type)
5260 	init_dynamic_asm_fprintf_info ();
5261       /* If this is format __gcc_gfc__, we have to initialize GCC's
5262 	 notion of 'locus' at runtime for %L.  */
5263       else if (info.format_type == gcc_gfc_format_type)
5264 	init_dynamic_gfc_info ();
5265       /* If this is one of the diagnostic attributes, then we have to
5266 	 initialize 'location_t' and 'tree' at runtime.  */
5267       else if (info.format_type == gcc_diag_format_type
5268 	       || info.format_type == gcc_tdiag_format_type
5269 	       || info.format_type == gcc_cdiag_format_type
5270 	       || info.format_type == gcc_cxxdiag_format_type
5271 	       || info.format_type == gcc_dump_printf_format_type)
5272 	init_dynamic_diag_info ();
5273       else
5274 	gcc_unreachable ();
5275     }
5276 
5277   return NULL_TREE;
5278 }
5279 
5280 #if CHECKING_P
5281 
5282 namespace selftest {
5283 
5284 /* Selftests of location handling.  */
5285 
5286 /* Get the format_kind_info with the given name.  */
5287 
5288 static const format_kind_info *
get_info(const char * name)5289 get_info (const char *name)
5290 {
5291   int idx = decode_format_type (name);
5292   const format_kind_info *fki = &format_types[idx];
5293   ASSERT_STREQ (fki->name, name);
5294   return fki;
5295 }
5296 
5297 /* Verify that get_format_for_type (FKI, TYPE, CONVERSION_CHAR)
5298    is EXPECTED_FORMAT.  */
5299 
5300 static void
assert_format_for_type_streq(const location & loc,const format_kind_info * fki,const char * expected_format,tree type,char conversion_char)5301 assert_format_for_type_streq (const location &loc, const format_kind_info *fki,
5302 			      const char *expected_format, tree type,
5303 			      char conversion_char)
5304 {
5305   gcc_assert (fki);
5306   gcc_assert (expected_format);
5307   gcc_assert (type);
5308 
5309   char *actual_format = get_format_for_type (fki, type, conversion_char);
5310   ASSERT_STREQ_AT (loc, expected_format, actual_format);
5311   free (actual_format);
5312 }
5313 
5314 /* Selftests for get_format_for_type.  */
5315 
5316 #define ASSERT_FORMAT_FOR_TYPE_STREQ(EXPECTED_FORMAT, TYPE, CONVERSION_CHAR) \
5317   assert_format_for_type_streq (SELFTEST_LOCATION, (fki), (EXPECTED_FORMAT), \
5318 				(TYPE), (CONVERSION_CHAR))
5319 
5320 /* Selftest for get_format_for_type for "printf"-style functions.  */
5321 
5322 static void
test_get_format_for_type_printf()5323 test_get_format_for_type_printf ()
5324 {
5325   const format_kind_info *fki = get_info ("gnu_printf");
5326   ASSERT_NE (fki, NULL);
5327 
5328   ASSERT_FORMAT_FOR_TYPE_STREQ ("f", double_type_node, 'i');
5329   ASSERT_FORMAT_FOR_TYPE_STREQ ("Lf", long_double_type_node, 'i');
5330   ASSERT_FORMAT_FOR_TYPE_STREQ ("f", double_type_node, 'o');
5331   ASSERT_FORMAT_FOR_TYPE_STREQ ("Lf", long_double_type_node, 'o');
5332   ASSERT_FORMAT_FOR_TYPE_STREQ ("f", double_type_node, 'x');
5333   ASSERT_FORMAT_FOR_TYPE_STREQ ("Lf", long_double_type_node, 'x');
5334   ASSERT_FORMAT_FOR_TYPE_STREQ ("f", double_type_node, 'X');
5335   ASSERT_FORMAT_FOR_TYPE_STREQ ("Lf", long_double_type_node, 'X');
5336   ASSERT_FORMAT_FOR_TYPE_STREQ ("d", integer_type_node, 'd');
5337   ASSERT_FORMAT_FOR_TYPE_STREQ ("i", integer_type_node, 'i');
5338   ASSERT_FORMAT_FOR_TYPE_STREQ ("o", integer_type_node, 'o');
5339   ASSERT_FORMAT_FOR_TYPE_STREQ ("x", integer_type_node, 'x');
5340   ASSERT_FORMAT_FOR_TYPE_STREQ ("X", integer_type_node, 'X');
5341   ASSERT_FORMAT_FOR_TYPE_STREQ ("d", unsigned_type_node, 'd');
5342   ASSERT_FORMAT_FOR_TYPE_STREQ ("i", unsigned_type_node, 'i');
5343   ASSERT_FORMAT_FOR_TYPE_STREQ ("o", unsigned_type_node, 'o');
5344   ASSERT_FORMAT_FOR_TYPE_STREQ ("x", unsigned_type_node, 'x');
5345   ASSERT_FORMAT_FOR_TYPE_STREQ ("X", unsigned_type_node, 'X');
5346   ASSERT_FORMAT_FOR_TYPE_STREQ ("ld", long_integer_type_node, 'd');
5347   ASSERT_FORMAT_FOR_TYPE_STREQ ("li", long_integer_type_node, 'i');
5348   ASSERT_FORMAT_FOR_TYPE_STREQ ("lx", long_integer_type_node, 'x');
5349   ASSERT_FORMAT_FOR_TYPE_STREQ ("lo", long_unsigned_type_node, 'o');
5350   ASSERT_FORMAT_FOR_TYPE_STREQ ("lx", long_unsigned_type_node, 'x');
5351   ASSERT_FORMAT_FOR_TYPE_STREQ ("lld", long_long_integer_type_node, 'd');
5352   ASSERT_FORMAT_FOR_TYPE_STREQ ("lli", long_long_integer_type_node, 'i');
5353   ASSERT_FORMAT_FOR_TYPE_STREQ ("llo", long_long_unsigned_type_node, 'o');
5354   ASSERT_FORMAT_FOR_TYPE_STREQ ("llx", long_long_unsigned_type_node, 'x');
5355   ASSERT_FORMAT_FOR_TYPE_STREQ ("s", build_pointer_type (char_type_node), 'i');
5356 }
5357 
5358 /* Selftest for get_format_for_type for "scanf"-style functions.  */
5359 
5360 static void
test_get_format_for_type_scanf()5361 test_get_format_for_type_scanf ()
5362 {
5363   const format_kind_info *fki = get_info ("gnu_scanf");
5364   ASSERT_NE (fki, NULL);
5365   ASSERT_FORMAT_FOR_TYPE_STREQ ("d", build_pointer_type (integer_type_node), 'd');
5366   ASSERT_FORMAT_FOR_TYPE_STREQ ("u", build_pointer_type (unsigned_type_node), 'u');
5367   ASSERT_FORMAT_FOR_TYPE_STREQ ("ld",
5368 				build_pointer_type (long_integer_type_node), 'd');
5369   ASSERT_FORMAT_FOR_TYPE_STREQ ("lu",
5370 				build_pointer_type (long_unsigned_type_node), 'u');
5371   ASSERT_FORMAT_FOR_TYPE_STREQ
5372     ("lld", build_pointer_type (long_long_integer_type_node), 'd');
5373   ASSERT_FORMAT_FOR_TYPE_STREQ
5374     ("llu", build_pointer_type (long_long_unsigned_type_node), 'u');
5375   ASSERT_FORMAT_FOR_TYPE_STREQ ("e", build_pointer_type (float_type_node), 'e');
5376   ASSERT_FORMAT_FOR_TYPE_STREQ ("le", build_pointer_type (double_type_node), 'e');
5377 }
5378 
5379 #undef ASSERT_FORMAT_FOR_TYPE_STREQ
5380 
5381 /* Exercise the type-printing label code, to give some coverage
5382    under "make selftest-valgrind" (in particular, to ensure that
5383    the label-printing machinery doesn't leak).  */
5384 
5385 static void
test_type_mismatch_range_labels()5386 test_type_mismatch_range_labels ()
5387 {
5388   /* Create a tempfile and write some text to it.
5389      ....................0000000001 11111111 12 22222222
5390      ....................1234567890 12345678 90 12345678.  */
5391   const char *content = "  printf (\"msg: %i\\n\", msg);\n";
5392   temp_source_file tmp (SELFTEST_LOCATION, ".c", content);
5393   line_table_test ltt;
5394 
5395   linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
5396 
5397   location_t c17 = linemap_position_for_column (line_table, 17);
5398   ASSERT_EQ (LOCATION_COLUMN (c17), 17);
5399   location_t c18 = linemap_position_for_column (line_table, 18);
5400   location_t c24 = linemap_position_for_column (line_table, 24);
5401   location_t c26 = linemap_position_for_column (line_table, 26);
5402 
5403   /* Don't attempt to run the tests if column data might be unavailable.  */
5404   if (c26 > LINE_MAP_MAX_LOCATION_WITH_COLS)
5405     return;
5406 
5407   location_t fmt = make_location (c18, c17, c18);
5408   ASSERT_EQ (LOCATION_COLUMN (fmt), 18);
5409 
5410   location_t param = make_location (c24, c24, c26);
5411   ASSERT_EQ (LOCATION_COLUMN (param), 24);
5412 
5413   range_label_for_format_type_mismatch fmt_label (char_type_node,
5414 						  integer_type_node, 1);
5415   range_label_for_type_mismatch param_label (integer_type_node,
5416 					     char_type_node);
5417   gcc_rich_location richloc (fmt, &fmt_label);
5418   richloc.add_range (param, SHOW_RANGE_WITHOUT_CARET, &param_label);
5419 
5420   test_diagnostic_context dc;
5421   diagnostic_show_locus (&dc, &richloc, DK_ERROR);
5422   if (c_dialect_cxx ())
5423     /* "char*", without a space.  */
5424     ASSERT_STREQ ("   printf (\"msg: %i\\n\", msg);\n"
5425 		  "                 ~^     ~~~\n"
5426 		  "                  |     |\n"
5427 		  "                  char* int\n",
5428 		  pp_formatted_text (dc.printer));
5429   else
5430     /* "char *", with a space.  */
5431     ASSERT_STREQ ("   printf (\"msg: %i\\n\", msg);\n"
5432 		  "                 ~^     ~~~\n"
5433 		  "                  |     |\n"
5434 		  "                  |     int\n"
5435 		  "                  char *\n",
5436 		  pp_formatted_text (dc.printer));
5437 }
5438 
5439 /* Run all of the selftests within this file.  */
5440 
5441 void
c_format_c_tests()5442 c_format_c_tests ()
5443 {
5444   test_get_modifier_for_format_len ();
5445   test_get_format_for_type_printf ();
5446   test_get_format_for_type_scanf ();
5447   test_type_mismatch_range_labels ();
5448 }
5449 
5450 } // namespace selftest
5451 
5452 #endif /* CHECKING_P */
5453 
5454 #include "gt-c-family-c-format.h"
5455