xref: /netbsd-src/external/gpl3/gcc/dist/gcc/gimple-ssa-sprintf.cc (revision 0a3071956a3a9fdebdbf7f338cf2d439b45fc728)
1 /* Copyright (C) 2016-2022 Free Software Foundation, Inc.
2    Contributed by Martin Sebor <msebor@redhat.com>.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This file implements the printf-return-value pass.  The pass does
21    two things: 1) it analyzes calls to formatted output functions like
22    sprintf looking for possible buffer overflows and calls to bounded
23    functions like snprintf for early truncation (and under the control
24    of the -Wformat-length option issues warnings), and 2) under the
25    control of the -fprintf-return-value option it folds the return
26    value of safe calls into constants, making it possible to eliminate
27    code that depends on the value of those constants.
28 
29    For all functions (bounded or not) the pass uses the size of the
30    destination object.  That means that it will diagnose calls to
31    snprintf not on the basis of the size specified by the function's
32    second argument but rather on the basis of the size the first
33    argument points to (if possible).  For bound-checking built-ins
34    like __builtin___snprintf_chk the pass uses the size typically
35    determined by __builtin_object_size and passed to the built-in
36    by the Glibc inline wrapper.
37 
38    The pass handles all forms standard sprintf format directives,
39    including character, integer, floating point, pointer, and strings,
40    with the standard C flags, widths, and precisions.  For integers
41    and strings it computes the length of output itself.  For floating
42    point it uses MPFR to format known constants with up and down
43    rounding and uses the resulting range of output lengths.  For
44    strings it uses the length of string literals and the sizes of
45    character arrays that a character pointer may point to as a bound
46    on the longest string.  */
47 
48 #include "config.h"
49 #include "system.h"
50 #include "coretypes.h"
51 #include "backend.h"
52 #include "tree.h"
53 #include "gimple.h"
54 #include "tree-pass.h"
55 #include "ssa.h"
56 #include "gimple-fold.h"
57 #include "gimple-pretty-print.h"
58 #include "diagnostic-core.h"
59 #include "fold-const.h"
60 #include "gimple-iterator.h"
61 #include "tree-ssa.h"
62 #include "tree-object-size.h"
63 #include "tree-cfg.h"
64 #include "tree-ssa-propagate.h"
65 #include "calls.h"
66 #include "cfgloop.h"
67 #include "tree-scalar-evolution.h"
68 #include "tree-ssa-loop.h"
69 #include "intl.h"
70 #include "langhooks.h"
71 
72 #include "attribs.h"
73 #include "builtins.h"
74 #include "pointer-query.h"
75 #include "stor-layout.h"
76 
77 #include "realmpfr.h"
78 #include "target.h"
79 
80 #include "cpplib.h"
81 #include "input.h"
82 #include "toplev.h"
83 #include "substring-locations.h"
84 #include "diagnostic.h"
85 #include "domwalk.h"
86 #include "alloc-pool.h"
87 #include "vr-values.h"
88 #include "tree-ssa-strlen.h"
89 #include "tree-dfa.h"
90 
91 /* The likely worst case value of MB_LEN_MAX for the target, large enough
92    for UTF-8.  Ideally, this would be obtained by a target hook if it were
93    to be used for optimization but it's good enough as is for warnings.  */
94 #define target_mb_len_max()   6
95 
96 /* The maximum number of bytes a single non-string directive can result
97    in.  This is the result of printf("%.*Lf", INT_MAX, -LDBL_MAX) for
98    LDBL_MAX_10_EXP of 4932.  */
99 #define IEEE_MAX_10_EXP    4932
100 #define target_dir_max()   (target_int_max () + IEEE_MAX_10_EXP + 2)
101 
102 namespace {
103 
104 /* Set to the warning level for the current function which is equal
105    either to warn_format_trunc for bounded functions or to
106    warn_format_overflow otherwise.  */
107 
108 static int warn_level;
109 
110 /* The minimum, maximum, likely, and unlikely maximum number of bytes
111    of output either a formatting function or an individual directive
112    can result in.  */
113 
114 struct result_range
115 {
116   /* The absolute minimum number of bytes.  The result of a successful
117      conversion is guaranteed to be no less than this.  (An erroneous
118      conversion can be indicated by MIN > HOST_WIDE_INT_MAX.)  */
119   unsigned HOST_WIDE_INT min;
120   /* The likely maximum result that is used in diagnostics.  In most
121      cases MAX is the same as the worst case UNLIKELY result.  */
122   unsigned HOST_WIDE_INT max;
123   /* The likely result used to trigger diagnostics.  For conversions
124      that result in a range of bytes [MIN, MAX], LIKELY is somewhere
125      in that range.  */
126   unsigned HOST_WIDE_INT likely;
127   /* In rare cases (e.g., for multibyte characters) UNLIKELY gives
128      the worst cases maximum result of a directive.  In most cases
129      UNLIKELY == MAX.  UNLIKELY is used to control the return value
130      optimization but not in diagnostics.  */
131   unsigned HOST_WIDE_INT unlikely;
132 };
133 
134 /* Return the value of INT_MIN for the target.  */
135 
136 static inline HOST_WIDE_INT
target_int_min()137 target_int_min ()
138 {
139   return tree_to_shwi (TYPE_MIN_VALUE (integer_type_node));
140 }
141 
142 /* Return the value of INT_MAX for the target.  */
143 
144 static inline unsigned HOST_WIDE_INT
target_int_max()145 target_int_max ()
146 {
147   return tree_to_uhwi (TYPE_MAX_VALUE (integer_type_node));
148 }
149 
150 /* Return the value of SIZE_MAX for the target.  */
151 
152 static inline unsigned HOST_WIDE_INT
target_size_max()153 target_size_max ()
154 {
155   return tree_to_uhwi (TYPE_MAX_VALUE (size_type_node));
156 }
157 
158 /* A straightforward mapping from the execution character set to the host
159    character set indexed by execution character.  */
160 
161 static char target_to_host_charmap[256];
162 
163 /* Initialize a mapping from the execution character set to the host
164    character set.  */
165 
166 static bool
init_target_to_host_charmap()167 init_target_to_host_charmap ()
168 {
169   /* If the percent sign is non-zero the mapping has already been
170      initialized.  */
171   if (target_to_host_charmap['%'])
172     return true;
173 
174   /* Initialize the target_percent character (done elsewhere).  */
175   if (!init_target_chars ())
176     return false;
177 
178   /* The subset of the source character set used by printf conversion
179      specifications (strictly speaking, not all letters are used but
180      they are included here for the sake of simplicity).  The dollar
181      sign must be included even though it's not in the basic source
182      character set.  */
183   const char srcset[] = " 0123456789!\"#%&'()*+,-./:;<=>?[\\]^_{|}~$"
184     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
185 
186   /* Set the mapping for all characters to some ordinary value (i,e.,
187      not none used in printf conversion specifications) and overwrite
188      those that are used by conversion specifications with their
189      corresponding values.  */
190   memset (target_to_host_charmap + 1, '?', sizeof target_to_host_charmap - 1);
191 
192   /* Are the two sets of characters the same?  */
193   bool all_same_p = true;
194 
195   for (const char *pc = srcset; *pc; ++pc)
196     {
197       /* Slice off the high end bits in case target characters are
198 	 signed.  All values are expected to be non-nul, otherwise
199 	 there's a problem.  */
200       if (unsigned char tc = lang_hooks.to_target_charset (*pc))
201 	{
202 	  target_to_host_charmap[tc] = *pc;
203 	  if (tc != *pc)
204 	    all_same_p = false;
205 	}
206       else
207 	return false;
208 
209     }
210 
211   /* Set the first element to a non-zero value if the mapping
212      is 1-to-1, otherwise leave it clear (NUL is assumed to be
213      the same in both character sets).  */
214   target_to_host_charmap[0] = all_same_p;
215 
216   return true;
217 }
218 
219 /* Return the host source character corresponding to the character
220    CH in the execution character set if one exists, or some innocuous
221    (non-special, non-nul) source character otherwise.  */
222 
223 static inline unsigned char
target_to_host(unsigned char ch)224 target_to_host (unsigned char ch)
225 {
226   return target_to_host_charmap[ch];
227 }
228 
229 /* Convert an initial substring of the string TARGSTR consisting of
230    characters in the execution character set into a string in the
231    source character set on the host and store up to HOSTSZ characters
232    in the buffer pointed to by HOSTR.  Return HOSTR.  */
233 
234 static const char*
target_to_host(char * hostr,size_t hostsz,const char * targstr)235 target_to_host (char *hostr, size_t hostsz, const char *targstr)
236 {
237   /* Make sure the buffer is reasonably big.  */
238   gcc_assert (hostsz > 4);
239 
240   /* The interesting subset of source and execution characters are
241      the same so no conversion is necessary.  However, truncate
242      overlong strings just like the translated strings are.  */
243   if (target_to_host_charmap['\0'] == 1)
244     {
245       size_t len = strlen (targstr);
246       if (len >= hostsz)
247 	{
248 	  memcpy (hostr, targstr, hostsz - 4);
249 	  strcpy (hostr + hostsz - 4, "...");
250 	}
251       else
252 	memcpy (hostr, targstr, len + 1);
253       return hostr;
254     }
255 
256   /* Convert the initial substring of TARGSTR to the corresponding
257      characters in the host set, appending "..." if TARGSTR is too
258      long to fit.  Using the static buffer assumes the function is
259      not called in between sequence points (which it isn't).  */
260   for (char *ph = hostr; ; ++targstr)
261     {
262       *ph++ = target_to_host (*targstr);
263       if (!*targstr)
264 	break;
265 
266       if (size_t (ph - hostr) == hostsz)
267 	{
268 	  strcpy (ph - 4, "...");
269 	  break;
270 	}
271     }
272 
273   return hostr;
274 }
275 
276 /* Convert the sequence of decimal digits in the execution character
277    starting at *PS to a HOST_WIDE_INT, analogously to strtol.  Return
278    the result and set *PS to one past the last converted character.
279    On range error set ERANGE to the digit that caused it.  */
280 
281 static inline HOST_WIDE_INT
target_strtowi(const char ** ps,const char ** erange)282 target_strtowi (const char **ps, const char **erange)
283 {
284   unsigned HOST_WIDE_INT val = 0;
285   for ( ; ; ++*ps)
286     {
287       unsigned char c = target_to_host (**ps);
288       if (ISDIGIT (c))
289 	{
290 	  c -= '0';
291 
292 	  /* Check for overflow.  */
293 	  if (val > ((unsigned HOST_WIDE_INT) HOST_WIDE_INT_MAX - c) / 10LU)
294 	    {
295 	      val = HOST_WIDE_INT_MAX;
296 	      *erange = *ps;
297 
298 	      /* Skip the remaining digits.  */
299 	      do
300 		c = target_to_host (*++*ps);
301 	      while (ISDIGIT (c));
302 	      break;
303 	    }
304 	  else
305 	    val = val * 10 + c;
306 	}
307       else
308 	break;
309     }
310 
311   return val;
312 }
313 
314 /* Given FORMAT, set *PLOC to the source location of the format string
315    and return the format string if it is known or null otherwise.  */
316 
317 static const char*
get_format_string(tree format,location_t * ploc)318 get_format_string (tree format, location_t *ploc)
319 {
320   *ploc = EXPR_LOC_OR_LOC (format, input_location);
321 
322   return c_getstr (format);
323 }
324 
325 /* For convenience and brevity, shorter named entrypoints of
326    format_string_diagnostic_t::emit_warning_va and
327    format_string_diagnostic_t::emit_warning_n_va.
328    These have to be functions with the attribute so that exgettext
329    works properly.  */
330 
331 static bool
332 ATTRIBUTE_GCC_DIAG (5, 6)
fmtwarn(const substring_loc & fmt_loc,location_t param_loc,const char * corrected_substring,opt_code opt,const char * gmsgid,...)333 fmtwarn (const substring_loc &fmt_loc, location_t param_loc,
334 	 const char *corrected_substring, opt_code opt,
335 	 const char *gmsgid, ...)
336 {
337   format_string_diagnostic_t diag (fmt_loc, NULL, param_loc, NULL,
338 				   corrected_substring);
339   va_list ap;
340   va_start (ap, gmsgid);
341   bool warned = diag.emit_warning_va (opt, gmsgid, &ap);
342   va_end (ap);
343 
344   return warned;
345 }
346 
347 static bool
348 ATTRIBUTE_GCC_DIAG (6, 8) ATTRIBUTE_GCC_DIAG (7, 8)
fmtwarn_n(const substring_loc & fmt_loc,location_t param_loc,const char * corrected_substring,opt_code opt,unsigned HOST_WIDE_INT n,const char * singular_gmsgid,const char * plural_gmsgid,...)349 fmtwarn_n (const substring_loc &fmt_loc, location_t param_loc,
350 	   const char *corrected_substring, opt_code opt,
351 	   unsigned HOST_WIDE_INT n,
352 	   const char *singular_gmsgid, const char *plural_gmsgid, ...)
353 {
354   format_string_diagnostic_t diag (fmt_loc, NULL, param_loc, NULL,
355 				   corrected_substring);
356   va_list ap;
357   va_start (ap, plural_gmsgid);
358   bool warned = diag.emit_warning_n_va (opt, n, singular_gmsgid, plural_gmsgid,
359 					&ap);
360   va_end (ap);
361 
362   return warned;
363 }
364 
365 /* Format length modifiers.  */
366 
367 enum format_lengths
368 {
369   FMT_LEN_none,
370   FMT_LEN_hh,    // char argument
371   FMT_LEN_h,     // short
372   FMT_LEN_l,     // long
373   FMT_LEN_ll,    // long long
374   FMT_LEN_L,     // long double (and GNU long long)
375   FMT_LEN_z,     // size_t
376   FMT_LEN_t,     // ptrdiff_t
377   FMT_LEN_j      // intmax_t
378 };
379 
380 
381 /* Description of the result of conversion either of a single directive
382    or the whole format string.  */
383 
384 class fmtresult
385 {
386 public:
387   /* Construct a FMTRESULT object with all counters initialized
388      to MIN.  KNOWNRANGE is set when MIN is valid.  */
fmtresult(unsigned HOST_WIDE_INT min=HOST_WIDE_INT_MAX)389   fmtresult (unsigned HOST_WIDE_INT min = HOST_WIDE_INT_MAX)
390   : argmin (), argmax (), dst_offset (HOST_WIDE_INT_MIN), nonstr (),
391     knownrange (min < HOST_WIDE_INT_MAX),
392     mayfail (), nullp ()
393   {
394     range.min = min;
395     range.max = min;
396     range.likely = min;
397     range.unlikely = min;
398   }
399 
400   /* Construct a FMTRESULT object with MIN, MAX, and LIKELY counters.
401      KNOWNRANGE is set when both MIN and MAX are valid.   */
fmtresult(unsigned HOST_WIDE_INT min,unsigned HOST_WIDE_INT max,unsigned HOST_WIDE_INT likely=HOST_WIDE_INT_MAX)402   fmtresult (unsigned HOST_WIDE_INT min, unsigned HOST_WIDE_INT max,
403 	     unsigned HOST_WIDE_INT likely = HOST_WIDE_INT_MAX)
404   : argmin (), argmax (), dst_offset (HOST_WIDE_INT_MIN), nonstr (),
405     knownrange (min < HOST_WIDE_INT_MAX && max < HOST_WIDE_INT_MAX),
406     mayfail (), nullp ()
407   {
408     range.min = min;
409     range.max = max;
410     range.likely = max < likely ? min : likely;
411     range.unlikely = max;
412   }
413 
414   /* Adjust result upward to reflect the RANGE of values the specified
415      width or precision is known to be in.  */
416   fmtresult& adjust_for_width_or_precision (const HOST_WIDE_INT[2],
417 					    tree = NULL_TREE,
418 					    unsigned = 0, unsigned = 0);
419 
420   /* Return the maximum number of decimal digits a value of TYPE
421      formats as on output.  */
422   static unsigned type_max_digits (tree, int);
423 
424   /* The range a directive's argument is in.  */
425   tree argmin, argmax;
426 
427   /* The starting offset into the destination of the formatted function
428      call of the %s argument that points into (aliases with) the same
429      destination array.  */
430   HOST_WIDE_INT dst_offset;
431 
432   /* The minimum and maximum number of bytes that a directive
433      results in on output for an argument in the range above.  */
434   result_range range;
435 
436   /* Non-nul when the argument of a string directive is not a nul
437      terminated string.  */
438   tree nonstr;
439 
440   /* True when the range above is obtained from a known value of
441      a directive's argument or its bounds and not the result of
442      heuristics that depend on warning levels.  */
443   bool knownrange;
444 
445   /* True for a directive that may fail (such as wide character
446      directives).  */
447   bool mayfail;
448 
449   /* True when the argument is a null pointer.  */
450   bool nullp;
451 };
452 
453 /* Adjust result upward to reflect the range ADJUST of values the
454    specified width or precision is known to be in.  When non-null,
455    TYPE denotes the type of the directive whose result is being
456    adjusted, BASE gives the base of the directive (octal, decimal,
457    or hex), and ADJ denotes the additional adjustment to the LIKELY
458    counter that may need to be added when ADJUST is a range.  */
459 
460 fmtresult&
adjust_for_width_or_precision(const HOST_WIDE_INT adjust[2],tree type,unsigned base,unsigned adj)461 fmtresult::adjust_for_width_or_precision (const HOST_WIDE_INT adjust[2],
462 					  tree type /* = NULL_TREE */,
463 					  unsigned base /* = 0 */,
464 					  unsigned adj /* = 0 */)
465 {
466   bool minadjusted = false;
467 
468   /* Adjust the minimum and likely counters.  */
469   if (adjust[0] >= 0)
470     {
471       if (range.min < (unsigned HOST_WIDE_INT)adjust[0])
472 	{
473 	  range.min = adjust[0];
474 	  minadjusted = true;
475 	}
476 
477       /* Adjust the likely counter.  */
478       if (range.likely < range.min)
479 	range.likely = range.min;
480     }
481   else if (adjust[0] == target_int_min ()
482 	   && (unsigned HOST_WIDE_INT)adjust[1] == target_int_max ())
483     knownrange = false;
484 
485   /* Adjust the maximum counter.  */
486   if (adjust[1] > 0)
487     {
488       if (range.max < (unsigned HOST_WIDE_INT)adjust[1])
489 	{
490 	  range.max = adjust[1];
491 
492 	  /* Set KNOWNRANGE if both the minimum and maximum have been
493 	     adjusted.  Otherwise leave it at what it was before.  */
494 	  knownrange = minadjusted;
495 	}
496     }
497 
498   if (warn_level > 1 && type)
499     {
500       /* For large non-constant width or precision whose range spans
501 	 the maximum number of digits produced by the directive for
502 	 any argument, set the likely number of bytes to be at most
503 	 the number digits plus other adjustment determined by the
504 	 caller (one for sign or two for the hexadecimal "0x"
505 	 prefix).  */
506       unsigned dirdigs = type_max_digits (type, base);
507       if (adjust[0] < dirdigs && dirdigs < adjust[1]
508 	  && range.likely < dirdigs)
509 	range.likely = dirdigs + adj;
510     }
511   else if (range.likely < (range.min ? range.min : 1))
512     {
513       /* Conservatively, set LIKELY to at least MIN but no less than
514 	 1 unless MAX is zero.  */
515       range.likely = (range.min
516 		      ? range.min
517 		      : range.max && (range.max < HOST_WIDE_INT_MAX
518 				      || warn_level > 1) ? 1 : 0);
519     }
520 
521   /* Finally adjust the unlikely counter to be at least as large as
522      the maximum.  */
523   if (range.unlikely < range.max)
524     range.unlikely = range.max;
525 
526   return *this;
527 }
528 
529 /* Return the maximum number of digits a value of TYPE formats in
530    BASE on output, not counting base prefix .  */
531 
532 unsigned
type_max_digits(tree type,int base)533 fmtresult::type_max_digits (tree type, int base)
534 {
535   unsigned prec = TYPE_PRECISION (type);
536   switch (base)
537     {
538     case 8:
539       return (prec + 2) / 3;
540     case 10:
541       /* Decimal approximation: yields 3, 5, 10, and 20 for precision
542 	 of 8, 16, 32, and 64 bits.  */
543       return prec * 301 / 1000 + 1;
544     case 16:
545       return prec / 4;
546     }
547 
548   gcc_unreachable ();
549 }
550 
551 static bool
552 get_int_range (tree, gimple *, HOST_WIDE_INT *, HOST_WIDE_INT *,
553 	       bool, HOST_WIDE_INT, range_query *);
554 
555 struct call_info;
556 
557 /* Description of a format directive.  A directive is either a plain
558    string or a conversion specification that starts with '%'.  */
559 
560 struct directive
561 {
directive__anon347ca6030111::directive562   directive (const call_info *inf, unsigned dno)
563     : info (inf), dirno (dno), argno (), beg (), len (), flags (),
564     width (), prec (),  modifier (), specifier (), arg (), fmtfunc ()
565   { }
566 
567   /* Reference to the info structure describing the call that this
568      directive is a part of.  */
569   const call_info *info;
570 
571   /* The 1-based directive number (for debugging).  */
572   unsigned dirno;
573 
574   /* The zero-based argument number of the directive's argument ARG in
575      the function's argument list.  */
576   unsigned argno;
577 
578   /* The first character of the directive and its length.  */
579   const char *beg;
580   size_t len;
581 
582   /* A bitmap of flags, one for each character.  */
583   unsigned flags[256 / sizeof (int)];
584 
585   /* The range of values of the specified width, or -1 if not specified.  */
586   HOST_WIDE_INT width[2];
587   /* The range of values of the specified precision, or -1 if not
588      specified.  */
589   HOST_WIDE_INT prec[2];
590 
591   /* Length modifier.  */
592   format_lengths modifier;
593 
594   /* Format specifier character.  */
595   char specifier;
596 
597   /* The argument of the directive or null when the directive doesn't
598      take one or when none is available (such as for vararg functions).  */
599   tree arg;
600 
601   /* Format conversion function that given a directive and an argument
602      returns the formatting result.  */
603   fmtresult (*fmtfunc) (const directive &, tree, pointer_query &);
604 
605   /* Return True when the format flag CHR has been used.  */
get_flag__anon347ca6030111::directive606   bool get_flag (char chr) const
607   {
608     unsigned char c = chr & 0xff;
609     return (flags[c / (CHAR_BIT * sizeof *flags)]
610 	    & (1U << (c % (CHAR_BIT * sizeof *flags))));
611   }
612 
613   /* Make a record of the format flag CHR having been used.  */
set_flag__anon347ca6030111::directive614   void set_flag (char chr)
615   {
616     unsigned char c = chr & 0xff;
617     flags[c / (CHAR_BIT * sizeof *flags)]
618       |= (1U << (c % (CHAR_BIT * sizeof *flags)));
619   }
620 
621   /* Reset the format flag CHR.  */
clear_flag__anon347ca6030111::directive622   void clear_flag (char chr)
623   {
624     unsigned char c = chr & 0xff;
625     flags[c / (CHAR_BIT * sizeof *flags)]
626       &= ~(1U << (c % (CHAR_BIT * sizeof *flags)));
627   }
628 
629   /* Set both bounds of the width range to VAL.  */
set_width__anon347ca6030111::directive630   void set_width (HOST_WIDE_INT val)
631   {
632     width[0] = width[1] = val;
633   }
634 
635   /* Set the width range according to ARG, with both bounds being
636      no less than 0.  For a constant ARG set both bounds to its value
637      or 0, whichever is greater.  For a non-constant ARG in some range
638      set width to its range adjusting each bound to -1 if it's less.
639      For an indeterminate ARG set width to [0, INT_MAX].  */
640   void set_width (tree arg, range_query *);
641 
642   /* Set both bounds of the precision range to VAL.  */
set_precision__anon347ca6030111::directive643   void set_precision (HOST_WIDE_INT val)
644   {
645     prec[0] = prec[1] = val;
646   }
647 
648   /* Set the precision range according to ARG, with both bounds being
649      no less than -1.  For a constant ARG set both bounds to its value
650      or -1 whichever is greater.  For a non-constant ARG in some range
651      set precision to its range adjusting each bound to -1 if it's less.
652      For an indeterminate ARG set precision to [-1, INT_MAX].  */
653   void set_precision (tree arg, range_query *query);
654 
655   /* Return true if both width and precision are known to be
656      either constant or in some range, false otherwise.  */
known_width_and_precision__anon347ca6030111::directive657   bool known_width_and_precision () const
658   {
659     return ((width[1] < 0
660 	     || (unsigned HOST_WIDE_INT)width[1] <= target_int_max ())
661 	    && (prec[1] < 0
662 		|| (unsigned HOST_WIDE_INT)prec[1] < target_int_max ()));
663   }
664 };
665 
666 /* The result of a call to a formatted function.  */
667 
668 struct format_result
669 {
format_result__anon347ca6030111::format_result670   format_result ()
671     : range (), aliases (), alias_count (), knownrange (), posunder4k (),
672     floating (), warned () { /* No-op.  */ }
673 
~format_result__anon347ca6030111::format_result674   ~format_result ()
675   {
676     XDELETEVEC (aliases);
677   }
678 
679   /* Range of characters written by the formatted function.
680      Setting the minimum to HOST_WIDE_INT_MAX disables all
681      length tracking for the remainder of the format string.  */
682   result_range range;
683 
684   struct alias_info
685   {
686     directive dir;          /* The directive that aliases the destination.  */
687     HOST_WIDE_INT offset;   /* The offset at which it aliases it.  */
688     result_range range;     /* The raw result of the directive.  */
689   };
690 
691   /* An array of directives whose pointer argument aliases a part
692      of the destination object of the formatted function.  */
693   alias_info *aliases;
694   unsigned alias_count;
695 
696   /* True when the range above is obtained from known values of
697      directive arguments, or bounds on the amount of output such
698      as width and precision, and not the result of  heuristics that
699      depend on warning levels.  It's used to issue stricter diagnostics
700      in cases where strings of unknown lengths are bounded by the arrays
701      they are determined to refer to.  KNOWNRANGE must not be used for
702      the return value optimization.  */
703   bool knownrange;
704 
705   /* True if no individual directive could fail or result in more than
706      4095 bytes of output (the total NUMBER_CHARS_{MIN,MAX} might be
707      greater).  Implementations are not required to handle directives
708      that produce more than 4K bytes (leading to undefined behavior)
709      and so when one is found it disables the return value optimization.
710      Similarly, directives that can fail (such as wide character
711      directives) disable the optimization.  */
712   bool posunder4k;
713 
714   /* True when a floating point directive has been seen in the format
715      string.  */
716   bool floating;
717 
718   /* True when an intermediate result has caused a warning.  Used to
719      avoid issuing duplicate warnings while finishing the processing
720      of a call.  WARNED also disables the return value optimization.  */
721   bool warned;
722 
723   /* Preincrement the number of output characters by 1.  */
operator ++__anon347ca6030111::format_result724   format_result& operator++ ()
725   {
726     return *this += 1;
727   }
728 
729   /* Postincrement the number of output characters by 1.  */
operator ++__anon347ca6030111::format_result730   format_result operator++ (int)
731   {
732     format_result prev (*this);
733     *this += 1;
734     return prev;
735   }
736 
737   /* Increment the number of output characters by N.  */
738   format_result& operator+= (unsigned HOST_WIDE_INT);
739 
740   /* Add a directive to the sequence of those with potentially aliasing
741      arguments.  */
742   void append_alias (const directive &, HOST_WIDE_INT, const result_range &);
743 
744 private:
745   /* Not copyable or assignable.  */
746   format_result (format_result&);
747   void operator= (format_result&);
748 };
749 
750 format_result&
operator +=(unsigned HOST_WIDE_INT n)751 format_result::operator+= (unsigned HOST_WIDE_INT n)
752 {
753   gcc_assert (n < HOST_WIDE_INT_MAX);
754 
755   if (range.min < HOST_WIDE_INT_MAX)
756     range.min += n;
757 
758   if (range.max < HOST_WIDE_INT_MAX)
759     range.max += n;
760 
761   if (range.likely < HOST_WIDE_INT_MAX)
762     range.likely += n;
763 
764   if (range.unlikely < HOST_WIDE_INT_MAX)
765     range.unlikely += n;
766 
767   return *this;
768 }
769 
770 void
append_alias(const directive & d,HOST_WIDE_INT off,const result_range & resrng)771 format_result::append_alias (const directive &d, HOST_WIDE_INT off,
772 			     const result_range &resrng)
773 {
774   unsigned cnt = alias_count + 1;
775   alias_info *ar = XNEWVEC (alias_info, cnt);
776 
777   for (unsigned i = 0; i != alias_count; ++i)
778     ar[i] = aliases[i];
779 
780   ar[alias_count].dir = d;
781   ar[alias_count].offset = off;
782   ar[alias_count].range = resrng;
783 
784   XDELETEVEC (aliases);
785 
786   alias_count = cnt;
787   aliases = ar;
788 }
789 
790 /* Return the logarithm of X in BASE.  */
791 
792 static int
ilog(unsigned HOST_WIDE_INT x,int base)793 ilog (unsigned HOST_WIDE_INT x, int base)
794 {
795   int res = 0;
796   do
797     {
798       ++res;
799       x /= base;
800     } while (x);
801   return res;
802 }
803 
804 /* Return the number of bytes resulting from converting into a string
805    the INTEGER_CST tree node X in BASE with a minimum of PREC digits.
806    PLUS indicates whether 1 for a plus sign should be added for positive
807    numbers, and PREFIX whether the length of an octal ('O') or hexadecimal
808    ('0x') prefix should be added for nonzero numbers.  Return -1 if X cannot
809    be represented.  */
810 
811 static HOST_WIDE_INT
tree_digits(tree x,int base,HOST_WIDE_INT prec,bool plus,bool prefix)812 tree_digits (tree x, int base, HOST_WIDE_INT prec, bool plus, bool prefix)
813 {
814   unsigned HOST_WIDE_INT absval;
815 
816   HOST_WIDE_INT res;
817 
818   if (TYPE_UNSIGNED (TREE_TYPE (x)))
819     {
820       if (tree_fits_uhwi_p (x))
821 	{
822 	  absval = tree_to_uhwi (x);
823 	  res = plus;
824 	}
825       else
826 	return -1;
827     }
828   else
829     {
830       if (tree_fits_shwi_p (x))
831 	{
832 	  HOST_WIDE_INT i = tree_to_shwi (x);
833          if (HOST_WIDE_INT_MIN == i)
834            {
835              /* Avoid undefined behavior due to negating a minimum.  */
836              absval = HOST_WIDE_INT_MAX;
837              res = 1;
838            }
839          else if (i < 0)
840 	   {
841 	     absval = -i;
842 	     res = 1;
843 	   }
844 	 else
845 	   {
846 	     absval = i;
847 	     res = plus;
848 	   }
849 	}
850       else
851 	return -1;
852     }
853 
854   int ndigs = ilog (absval, base);
855 
856   res += prec < ndigs ? ndigs : prec;
857 
858   /* Adjust a non-zero value for the base prefix, either hexadecimal,
859      or, unless precision has resulted in a leading zero, also octal.  */
860   if (prefix && absval && (base == 16 || prec <= ndigs))
861     {
862       if (base == 8)
863 	res += 1;
864       else if (base == 16)
865 	res += 2;
866     }
867 
868   return res;
869 }
870 
871 /* Description of a call to a formatted function.  */
872 
873 struct call_info
874 {
875   /* Function call statement.  */
876   gimple *callstmt;
877 
878   /* Function called.  */
879   tree func;
880 
881   /* Called built-in function code.  */
882   built_in_function fncode;
883 
884   /* The "origin" of the destination pointer argument, which is either
885      the DECL of the destination buffer being written into or a pointer
886      that points to it, plus some offset.  */
887   tree dst_origin;
888 
889   /* For a destination pointing to a struct array member, the offset of
890      the member.  */
891   HOST_WIDE_INT dst_field;
892 
893   /* The offset into the destination buffer.  */
894   HOST_WIDE_INT dst_offset;
895 
896   /* Format argument and format string extracted from it.  */
897   tree format;
898   const char *fmtstr;
899 
900   /* The location of the format argument.  */
901   location_t fmtloc;
902 
903   /* The destination object size for __builtin___xxx_chk functions
904      typically determined by __builtin_object_size, or -1 if unknown.  */
905   unsigned HOST_WIDE_INT objsize;
906 
907   /* Number of the first variable argument.  */
908   unsigned HOST_WIDE_INT argidx;
909 
910   /* True for functions like snprintf that specify the size of
911      the destination, false for others like sprintf that don't.  */
912   bool bounded;
913 
914   /* True for bounded functions like snprintf that specify a zero-size
915      buffer as a request to compute the size of output without actually
916      writing any.  NOWRITE is cleared in response to the %n directive
917      which has side-effects similar to writing output.  */
918   bool nowrite;
919 
920   /* Return true if the called function's return value is used.  */
retval_used__anon347ca6030111::call_info921   bool retval_used () const
922   {
923     return gimple_get_lhs (callstmt);
924   }
925 
926   /* Return the warning option corresponding to the called function.  */
warnopt__anon347ca6030111::call_info927   opt_code warnopt () const
928   {
929     return bounded ? OPT_Wformat_truncation_ : OPT_Wformat_overflow_;
930   }
931 
932   /* Return true for calls to file formatted functions.  */
is_file_func__anon347ca6030111::call_info933   bool is_file_func () const
934   {
935     return (fncode == BUILT_IN_FPRINTF
936 	    || fncode == BUILT_IN_FPRINTF_CHK
937 	    || fncode == BUILT_IN_FPRINTF_UNLOCKED
938 	    || fncode == BUILT_IN_VFPRINTF
939 	    || fncode == BUILT_IN_VFPRINTF_CHK);
940   }
941 
942   /* Return true for calls to string formatted functions.  */
is_string_func__anon347ca6030111::call_info943   bool is_string_func () const
944   {
945     return (fncode == BUILT_IN_SPRINTF
946 	    || fncode == BUILT_IN_SPRINTF_CHK
947 	    || fncode == BUILT_IN_SNPRINTF
948 	    || fncode == BUILT_IN_SNPRINTF_CHK
949 	    || fncode == BUILT_IN_VSPRINTF
950 	    || fncode == BUILT_IN_VSPRINTF_CHK
951 	    || fncode == BUILT_IN_VSNPRINTF
952 	    || fncode == BUILT_IN_VSNPRINTF_CHK);
953   }
954 };
955 
956 void
set_width(tree arg,range_query * query)957 directive::set_width (tree arg, range_query *query)
958 {
959   get_int_range (arg, info->callstmt, width, width + 1, true, 0, query);
960 }
961 
962 void
set_precision(tree arg,range_query * query)963 directive::set_precision (tree arg, range_query *query)
964 {
965   get_int_range (arg, info->callstmt, prec, prec + 1, false, -1, query);
966 }
967 
968 /* Return the result of formatting a no-op directive (such as '%n').  */
969 
970 static fmtresult
format_none(const directive &,tree,pointer_query &)971 format_none (const directive &, tree, pointer_query &)
972 {
973   fmtresult res (0);
974   return res;
975 }
976 
977 /* Return the result of formatting the '%%' directive.  */
978 
979 static fmtresult
format_percent(const directive &,tree,pointer_query &)980 format_percent (const directive &, tree, pointer_query &)
981 {
982   fmtresult res (1);
983   return res;
984 }
985 
986 
987 /* Compute intmax_type_node and uintmax_type_node similarly to how
988    tree.cc builds size_type_node.  */
989 
990 static void
build_intmax_type_nodes(tree * pintmax,tree * puintmax)991 build_intmax_type_nodes (tree *pintmax, tree *puintmax)
992 {
993   if (strcmp (UINTMAX_TYPE, "unsigned int") == 0)
994     {
995       *pintmax = integer_type_node;
996       *puintmax = unsigned_type_node;
997     }
998   else if (strcmp (UINTMAX_TYPE, "long unsigned int") == 0)
999     {
1000       *pintmax = long_integer_type_node;
1001       *puintmax = long_unsigned_type_node;
1002     }
1003   else if (strcmp (UINTMAX_TYPE, "long long unsigned int") == 0)
1004     {
1005       *pintmax = long_long_integer_type_node;
1006       *puintmax = long_long_unsigned_type_node;
1007     }
1008   else
1009     {
1010       for (int i = 0; i < NUM_INT_N_ENTS; i++)
1011 	if (int_n_enabled_p[i])
1012 	  {
1013 	    char name[50], altname[50];
1014 	    sprintf (name, "__int%d unsigned", int_n_data[i].bitsize);
1015 	    sprintf (altname, "__int%d__ unsigned", int_n_data[i].bitsize);
1016 
1017 	    if (strcmp (name, UINTMAX_TYPE) == 0
1018 		|| strcmp (altname, UINTMAX_TYPE) == 0)
1019 	      {
1020 	        *pintmax = int_n_trees[i].signed_type;
1021 	        *puintmax = int_n_trees[i].unsigned_type;
1022 		return;
1023 	      }
1024 	  }
1025       gcc_unreachable ();
1026     }
1027 }
1028 
1029 /* Determine the range [*PMIN, *PMAX] that the expression ARG is
1030    in and that is representable in type int.
1031    Return true when the range is a subrange of that of int.
1032    When ARG is null it is as if it had the full range of int.
1033    When ABSOLUTE is true the range reflects the absolute value of
1034    the argument.  When ABSOLUTE is false, negative bounds of
1035    the determined range are replaced with NEGBOUND.  */
1036 
1037 static bool
get_int_range(tree arg,gimple * stmt,HOST_WIDE_INT * pmin,HOST_WIDE_INT * pmax,bool absolute,HOST_WIDE_INT negbound,range_query * query)1038 get_int_range (tree arg, gimple *stmt,
1039 	       HOST_WIDE_INT *pmin, HOST_WIDE_INT *pmax,
1040 	       bool absolute, HOST_WIDE_INT negbound,
1041 	       range_query *query)
1042 {
1043   /* The type of the result.  */
1044   const_tree type = integer_type_node;
1045 
1046   bool knownrange = false;
1047 
1048   if (!arg)
1049     {
1050       *pmin = tree_to_shwi (TYPE_MIN_VALUE (type));
1051       *pmax = tree_to_shwi (TYPE_MAX_VALUE (type));
1052     }
1053   else if (TREE_CODE (arg) == INTEGER_CST
1054 	   && TYPE_PRECISION (TREE_TYPE (arg)) <= TYPE_PRECISION (type))
1055     {
1056       /* For a constant argument return its value adjusted as specified
1057 	 by NEGATIVE and NEGBOUND and return true to indicate that the
1058 	 result is known.  */
1059       *pmin = tree_fits_shwi_p (arg) ? tree_to_shwi (arg) : tree_to_uhwi (arg);
1060       *pmax = *pmin;
1061       knownrange = true;
1062     }
1063   else
1064     {
1065       /* True if the argument's range cannot be determined.  */
1066       bool unknown = true;
1067 
1068       tree argtype = TREE_TYPE (arg);
1069 
1070       /* Ignore invalid arguments with greater precision that that
1071 	 of the expected type (e.g., in sprintf("%*i", 12LL, i)).
1072 	 They will have been detected and diagnosed by -Wformat and
1073 	 so it's not important to complicate this code to try to deal
1074 	 with them again.  */
1075       if (TREE_CODE (arg) == SSA_NAME
1076 	  && INTEGRAL_TYPE_P (argtype)
1077 	  && TYPE_PRECISION (argtype) <= TYPE_PRECISION (type))
1078 	{
1079 	  /* Try to determine the range of values of the integer argument.  */
1080 	  value_range vr;
1081 	  query->range_of_expr (vr, arg, stmt);
1082 
1083 	  if (!vr.undefined_p () && !vr.varying_p ())
1084 	    {
1085 	      HOST_WIDE_INT type_min
1086 		= (TYPE_UNSIGNED (argtype)
1087 		   ? tree_to_uhwi (TYPE_MIN_VALUE (argtype))
1088 		   : tree_to_shwi (TYPE_MIN_VALUE (argtype)));
1089 
1090 	      HOST_WIDE_INT type_max = tree_to_uhwi (TYPE_MAX_VALUE (argtype));
1091 
1092 	      tree type = TREE_TYPE (arg);
1093 	      tree tmin = wide_int_to_tree (type, vr.lower_bound ());
1094 	      tree tmax = wide_int_to_tree (type, vr.upper_bound ());
1095 	      *pmin = TREE_INT_CST_LOW (tmin);
1096 	      *pmax = TREE_INT_CST_LOW (tmax);
1097 
1098 	      if (*pmin < *pmax)
1099 		{
1100 		  /* Return true if the adjusted range is a subrange of
1101 		     the full range of the argument's type.  *PMAX may
1102 		     be less than *PMIN when the argument is unsigned
1103 		     and its upper bound is in excess of TYPE_MAX.  In
1104 		     that (invalid) case disregard the range and use that
1105 		     of the expected type instead.  */
1106 		  knownrange = type_min < *pmin || *pmax < type_max;
1107 
1108 		  unknown = false;
1109 		}
1110 	    }
1111 	}
1112 
1113       /* Handle an argument with an unknown range as if none had been
1114 	 provided.  */
1115       if (unknown)
1116 	return get_int_range (NULL_TREE, NULL, pmin, pmax, absolute,
1117 			      negbound, query);
1118     }
1119 
1120   /* Adjust each bound as specified by ABSOLUTE and NEGBOUND.  */
1121   if (absolute)
1122     {
1123       if (*pmin < 0)
1124 	{
1125 	  if (*pmin == *pmax)
1126 	    *pmin = *pmax = -*pmin;
1127 	  else
1128 	    {
1129 	      /* Make sure signed overlow is avoided.  */
1130 	      gcc_assert (*pmin != HOST_WIDE_INT_MIN);
1131 
1132 	      HOST_WIDE_INT tmp = -*pmin;
1133 	      *pmin = 0;
1134 	      if (*pmax < tmp)
1135 		*pmax = tmp;
1136 	    }
1137 	}
1138     }
1139   else if (*pmin < negbound)
1140     *pmin = negbound;
1141 
1142   return knownrange;
1143 }
1144 
1145 /* With the range [*ARGMIN, *ARGMAX] of an integer directive's actual
1146    argument, due to the conversion from either *ARGMIN or *ARGMAX to
1147    the type of the directive's formal argument it's possible for both
1148    to result in the same number of bytes or a range of bytes that's
1149    less than the number of bytes that would result from formatting
1150    some other value in the range [*ARGMIN, *ARGMAX].  This can be
1151    determined by checking for the actual argument being in the range
1152    of the type of the directive.  If it isn't it must be assumed to
1153    take on the full range of the directive's type.
1154    Return true when the range has been adjusted to the full range
1155    of DIRTYPE, and false otherwise.  */
1156 
1157 static bool
adjust_range_for_overflow(tree dirtype,tree * argmin,tree * argmax)1158 adjust_range_for_overflow (tree dirtype, tree *argmin, tree *argmax)
1159 {
1160   tree argtype = TREE_TYPE (*argmin);
1161   unsigned argprec = TYPE_PRECISION (argtype);
1162   unsigned dirprec = TYPE_PRECISION (dirtype);
1163 
1164   /* If the actual argument and the directive's argument have the same
1165      precision and sign there can be no overflow and so there is nothing
1166      to adjust.  */
1167   if (argprec == dirprec && TYPE_SIGN (argtype) == TYPE_SIGN (dirtype))
1168     return false;
1169 
1170   /* The logic below was inspired/lifted from the CONVERT_EXPR_CODE_P
1171      branch in the extract_range_from_unary_expr function in tree-vrp.cc.  */
1172 
1173   if (TREE_CODE (*argmin) == INTEGER_CST
1174       && TREE_CODE (*argmax) == INTEGER_CST
1175       && (dirprec >= argprec
1176 	  || integer_zerop (int_const_binop (RSHIFT_EXPR,
1177 					     int_const_binop (MINUS_EXPR,
1178 							      *argmax,
1179 							      *argmin),
1180 					     size_int (dirprec)))))
1181     {
1182       *argmin = force_fit_type (dirtype, wi::to_widest (*argmin), 0, false);
1183       *argmax = force_fit_type (dirtype, wi::to_widest (*argmax), 0, false);
1184 
1185       /* If *ARGMIN is still less than *ARGMAX the conversion above
1186 	 is safe.  Otherwise, it has overflowed and would be unsafe.  */
1187       if (tree_int_cst_le (*argmin, *argmax))
1188 	return false;
1189     }
1190 
1191   *argmin = TYPE_MIN_VALUE (dirtype);
1192   *argmax = TYPE_MAX_VALUE (dirtype);
1193   return true;
1194 }
1195 
1196 /* Return a range representing the minimum and maximum number of bytes
1197    that the format directive DIR will output for any argument given
1198    the WIDTH and PRECISION (extracted from DIR).  This function is
1199    used when the directive argument or its value isn't known.  */
1200 
1201 static fmtresult
format_integer(const directive & dir,tree arg,pointer_query & ptr_qry)1202 format_integer (const directive &dir, tree arg, pointer_query &ptr_qry)
1203 {
1204   tree intmax_type_node;
1205   tree uintmax_type_node;
1206 
1207   /* Base to format the number in.  */
1208   int base;
1209 
1210   /* True when a conversion is preceded by a prefix indicating the base
1211      of the argument (octal or hexadecimal).  */
1212   bool maybebase = dir.get_flag ('#');
1213 
1214   /* True when a signed conversion is preceded by a sign or space.  */
1215   bool maybesign = false;
1216 
1217   /* True for signed conversions (i.e., 'd' and 'i').  */
1218   bool sign = false;
1219 
1220   switch (dir.specifier)
1221     {
1222     case 'd':
1223     case 'i':
1224       /* Space and '+' are  only meaningful for signed conversions.  */
1225       maybesign = dir.get_flag (' ') | dir.get_flag ('+');
1226       sign = true;
1227       base = 10;
1228       break;
1229     case 'u':
1230       base = 10;
1231       break;
1232     case 'o':
1233       base = 8;
1234       break;
1235     case 'X':
1236     case 'x':
1237       base = 16;
1238       break;
1239     default:
1240       gcc_unreachable ();
1241     }
1242 
1243   /* The type of the "formal" argument expected by the directive.  */
1244   tree dirtype = NULL_TREE;
1245 
1246   /* Determine the expected type of the argument from the length
1247      modifier.  */
1248   switch (dir.modifier)
1249     {
1250     case FMT_LEN_none:
1251       if (dir.specifier == 'p')
1252 	dirtype = ptr_type_node;
1253       else
1254 	dirtype = sign ? integer_type_node : unsigned_type_node;
1255       break;
1256 
1257     case FMT_LEN_h:
1258       dirtype = sign ? short_integer_type_node : short_unsigned_type_node;
1259       break;
1260 
1261     case FMT_LEN_hh:
1262       dirtype = sign ? signed_char_type_node : unsigned_char_type_node;
1263       break;
1264 
1265     case FMT_LEN_l:
1266       dirtype = sign ? long_integer_type_node : long_unsigned_type_node;
1267       break;
1268 
1269     case FMT_LEN_L:
1270     case FMT_LEN_ll:
1271       dirtype = (sign
1272 		 ? long_long_integer_type_node
1273 		 : long_long_unsigned_type_node);
1274       break;
1275 
1276     case FMT_LEN_z:
1277       dirtype = signed_or_unsigned_type_for (!sign, size_type_node);
1278       break;
1279 
1280     case FMT_LEN_t:
1281       dirtype = signed_or_unsigned_type_for (!sign, ptrdiff_type_node);
1282       break;
1283 
1284     case FMT_LEN_j:
1285       build_intmax_type_nodes (&intmax_type_node, &uintmax_type_node);
1286       dirtype = sign ? intmax_type_node : uintmax_type_node;
1287       break;
1288 
1289     default:
1290       return fmtresult ();
1291     }
1292 
1293   /* The type of the argument to the directive, either deduced from
1294      the actual non-constant argument if one is known, or from
1295      the directive itself when none has been provided because it's
1296      a va_list.  */
1297   tree argtype = NULL_TREE;
1298 
1299   if (!arg)
1300     {
1301       /* When the argument has not been provided, use the type of
1302 	 the directive's argument as an approximation.  This will
1303 	 result in false positives for directives like %i with
1304 	 arguments with smaller precision (such as short or char).  */
1305       argtype = dirtype;
1306     }
1307   else if (TREE_CODE (arg) == INTEGER_CST)
1308     {
1309       /* When a constant argument has been provided use its value
1310 	 rather than type to determine the length of the output.  */
1311       fmtresult res;
1312 
1313       if ((dir.prec[0] <= 0 && dir.prec[1] >= 0) && integer_zerop (arg))
1314 	{
1315 	  /* As a special case, a precision of zero with a zero argument
1316 	     results in zero bytes except in base 8 when the '#' flag is
1317 	     specified, and for signed conversions in base 8 and 10 when
1318 	     either the space or '+' flag has been specified and it results
1319 	     in just one byte (with width having the normal effect).  This
1320 	     must extend to the case of a specified precision with
1321 	     an unknown value because it can be zero.  */
1322 	  res.range.min = ((base == 8 && dir.get_flag ('#')) || maybesign);
1323 	  if (res.range.min == 0 && dir.prec[0] != dir.prec[1])
1324 	    {
1325 	      res.range.max = 1;
1326 	      res.range.likely = 1;
1327 	    }
1328 	  else
1329 	    {
1330 	      res.range.max = res.range.min;
1331 	      res.range.likely = res.range.min;
1332 	    }
1333 	}
1334       else
1335 	{
1336 	  /* Convert the argument to the type of the directive.  */
1337 	  arg = fold_convert (dirtype, arg);
1338 
1339 	  res.range.min = tree_digits (arg, base, dir.prec[0],
1340 				       maybesign, maybebase);
1341 	  if (dir.prec[0] == dir.prec[1])
1342 	    res.range.max = res.range.min;
1343 	  else
1344 	    res.range.max = tree_digits (arg, base, dir.prec[1],
1345 					 maybesign, maybebase);
1346 	  res.range.likely = res.range.min;
1347 	  res.knownrange = true;
1348 	}
1349 
1350       res.range.unlikely = res.range.max;
1351 
1352       /* Bump up the counters if WIDTH is greater than LEN.  */
1353       res.adjust_for_width_or_precision (dir.width, dirtype, base,
1354 					 (sign | maybebase) + (base == 16));
1355       /* Bump up the counters again if PRECision is greater still.  */
1356       res.adjust_for_width_or_precision (dir.prec, dirtype, base,
1357 					 (sign | maybebase) + (base == 16));
1358 
1359       return res;
1360     }
1361   else if (INTEGRAL_TYPE_P (TREE_TYPE (arg))
1362 	   || TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE)
1363     /* Determine the type of the provided non-constant argument.  */
1364     argtype = TREE_TYPE (arg);
1365   else
1366     /* Don't bother with invalid arguments since they likely would
1367        have already been diagnosed, and disable any further checking
1368        of the format string by returning [-1, -1].  */
1369     return fmtresult ();
1370 
1371   fmtresult res;
1372 
1373   /* Using either the range the non-constant argument is in, or its
1374      type (either "formal" or actual), create a range of values that
1375      constrain the length of output given the warning level.  */
1376   tree argmin = NULL_TREE;
1377   tree argmax = NULL_TREE;
1378 
1379   if (arg
1380       && TREE_CODE (arg) == SSA_NAME
1381       && INTEGRAL_TYPE_P (argtype))
1382     {
1383       /* Try to determine the range of values of the integer argument
1384 	 (range information is not available for pointers).  */
1385       value_range vr;
1386       ptr_qry.rvals->range_of_expr (vr, arg, dir.info->callstmt);
1387 
1388       if (!vr.varying_p () && !vr.undefined_p ())
1389 	{
1390 	  argmin = wide_int_to_tree (TREE_TYPE (arg), vr.lower_bound ());
1391 	  argmax = wide_int_to_tree (TREE_TYPE (arg), vr.upper_bound ());
1392 
1393 	  /* Set KNOWNRANGE if the argument is in a known subrange
1394 	     of the directive's type and neither width nor precision
1395 	     is unknown.  (KNOWNRANGE may be reset below).  */
1396 	  res.knownrange
1397 	    = ((!tree_int_cst_equal (TYPE_MIN_VALUE (dirtype), argmin)
1398 		|| !tree_int_cst_equal (TYPE_MAX_VALUE (dirtype), argmax))
1399 	       && dir.known_width_and_precision ());
1400 
1401 	  res.argmin = argmin;
1402 	  res.argmax = argmax;
1403 	}
1404       else
1405 	{
1406 	  /* The argument here may be the result of promoting the actual
1407 	     argument to int.  Try to determine the type of the actual
1408 	     argument before promotion and narrow down its range that
1409 	     way.  */
1410 	  gimple *def = SSA_NAME_DEF_STMT (arg);
1411 	  if (is_gimple_assign (def))
1412 	    {
1413 	      tree_code code = gimple_assign_rhs_code (def);
1414 	      if (code == INTEGER_CST)
1415 		{
1416 		  arg = gimple_assign_rhs1 (def);
1417 		  return format_integer (dir, arg, ptr_qry);
1418 		}
1419 
1420 	      if (code == NOP_EXPR)
1421 		{
1422 		  tree type = TREE_TYPE (gimple_assign_rhs1 (def));
1423 		  if (INTEGRAL_TYPE_P (type)
1424 		      || TREE_CODE (type) == POINTER_TYPE)
1425 		    argtype = type;
1426 		}
1427 	    }
1428 	}
1429     }
1430 
1431   if (!argmin)
1432     {
1433       if (TREE_CODE (argtype) == POINTER_TYPE)
1434 	{
1435 	  argmin = build_int_cst (pointer_sized_int_node, 0);
1436 	  argmax = build_all_ones_cst (pointer_sized_int_node);
1437 	}
1438       else
1439 	{
1440 	  argmin = TYPE_MIN_VALUE (argtype);
1441 	  argmax = TYPE_MAX_VALUE (argtype);
1442 	}
1443     }
1444 
1445   /* Clear KNOWNRANGE if the range has been adjusted to the maximum
1446      of the directive.  If it has been cleared then since ARGMIN and/or
1447      ARGMAX have been adjusted also adjust the corresponding ARGMIN and
1448      ARGMAX in the result to include in diagnostics.  */
1449   if (adjust_range_for_overflow (dirtype, &argmin, &argmax))
1450     {
1451       res.knownrange = false;
1452       res.argmin = argmin;
1453       res.argmax = argmax;
1454     }
1455 
1456   /* Recursively compute the minimum and maximum from the known range.  */
1457   if (TYPE_UNSIGNED (dirtype) || tree_int_cst_sgn (argmin) >= 0)
1458     {
1459       /* For unsigned conversions/directives or signed when
1460 	 the minimum is positive, use the minimum and maximum to compute
1461 	 the shortest and longest output, respectively.  */
1462       res.range.min = format_integer (dir, argmin, ptr_qry).range.min;
1463       res.range.max = format_integer (dir, argmax, ptr_qry).range.max;
1464     }
1465   else if (tree_int_cst_sgn (argmax) < 0)
1466     {
1467       /* For signed conversions/directives if maximum is negative,
1468 	 use the minimum as the longest output and maximum as the
1469 	 shortest output.  */
1470       res.range.min = format_integer (dir, argmax, ptr_qry).range.min;
1471       res.range.max = format_integer (dir, argmin, ptr_qry).range.max;
1472     }
1473   else
1474     {
1475       /* Otherwise, 0 is inside of the range and minimum negative.  Use 0
1476 	 as the shortest output and for the longest output compute the
1477 	 length of the output of both minimum and maximum and pick the
1478 	 longer.  */
1479       unsigned HOST_WIDE_INT max1
1480 	= format_integer (dir, argmin, ptr_qry).range.max;
1481       unsigned HOST_WIDE_INT max2
1482 	= format_integer (dir, argmax, ptr_qry).range.max;
1483       res.range.min
1484 	= format_integer (dir, integer_zero_node, ptr_qry).range.min;
1485       res.range.max = MAX (max1, max2);
1486     }
1487 
1488   /* If the range is known, use the maximum as the likely length.  */
1489   if (res.knownrange)
1490     res.range.likely = res.range.max;
1491   else
1492     {
1493       /* Otherwise, use the minimum.  Except for the case where for %#x or
1494          %#o the minimum is just for a single value in the range (0) and
1495          for all other values it is something longer, like 0x1 or 01.
1496 	  Use the length for value 1 in that case instead as the likely
1497 	  length.  */
1498       res.range.likely = res.range.min;
1499       if (maybebase
1500 	  && base != 10
1501 	  && (tree_int_cst_sgn (argmin) < 0 || tree_int_cst_sgn (argmax) > 0))
1502 	{
1503 	  if (res.range.min == 1)
1504 	    res.range.likely += base == 8 ? 1 : 2;
1505 	  else if (res.range.min == 2
1506 		   && base == 16
1507 		   && (dir.width[0] == 2 || dir.prec[0] == 2))
1508 	    ++res.range.likely;
1509 	}
1510     }
1511 
1512   res.range.unlikely = res.range.max;
1513   res.adjust_for_width_or_precision (dir.width, dirtype, base,
1514 				     (sign | maybebase) + (base == 16));
1515   res.adjust_for_width_or_precision (dir.prec, dirtype, base,
1516 				     (sign | maybebase) + (base == 16));
1517 
1518   return res;
1519 }
1520 
1521 /* Return the number of bytes that a format directive consisting of FLAGS,
1522    PRECision, format SPECification, and MPFR rounding specifier RNDSPEC,
1523    would result for argument X under ideal conditions (i.e., if PREC
1524    weren't excessive).  MPFR 3.1 allocates large amounts of memory for
1525    values of PREC with large magnitude and can fail (see MPFR bug #21056).
1526    This function works around those problems.  */
1527 
1528 static unsigned HOST_WIDE_INT
get_mpfr_format_length(mpfr_ptr x,const char * flags,HOST_WIDE_INT prec,char spec,char rndspec)1529 get_mpfr_format_length (mpfr_ptr x, const char *flags, HOST_WIDE_INT prec,
1530 			char spec, char rndspec)
1531 {
1532   char fmtstr[40];
1533 
1534   HOST_WIDE_INT len = strlen (flags);
1535 
1536   fmtstr[0] = '%';
1537   memcpy (fmtstr + 1, flags, len);
1538   memcpy (fmtstr + 1 + len, ".*R", 3);
1539   fmtstr[len + 4] = rndspec;
1540   fmtstr[len + 5] = spec;
1541   fmtstr[len + 6] = '\0';
1542 
1543   spec = TOUPPER (spec);
1544   if (spec == 'E' || spec == 'F')
1545     {
1546       /* For %e, specify the precision explicitly since mpfr_sprintf
1547 	 does its own thing just to be different (see MPFR bug 21088).  */
1548       if (prec < 0)
1549 	prec = 6;
1550     }
1551   else
1552     {
1553       /* Avoid passing negative precisions with larger magnitude to MPFR
1554 	 to avoid exposing its bugs.  (A negative precision is supposed
1555 	 to be ignored.)  */
1556       if (prec < 0)
1557 	prec = -1;
1558     }
1559 
1560   HOST_WIDE_INT p = prec;
1561 
1562   if (spec == 'G' && !strchr (flags, '#'))
1563     {
1564       /* For G/g without the pound flag, precision gives the maximum number
1565 	 of significant digits which is bounded by LDBL_MAX_10_EXP, or, for
1566 	 a 128 bit IEEE extended precision, 4932.  Using twice as much here
1567 	 should be more than sufficient for any real format.  */
1568       if ((IEEE_MAX_10_EXP * 2) < prec)
1569 	prec = IEEE_MAX_10_EXP * 2;
1570       p = prec;
1571     }
1572   else
1573     {
1574       /* Cap precision arbitrarily at 1KB and add the difference
1575 	 (if any) to the MPFR result.  */
1576       if (prec > 1024)
1577 	p = 1024;
1578     }
1579 
1580   len = mpfr_snprintf (NULL, 0, fmtstr, (int)p, x);
1581 
1582   /* Handle the unlikely (impossible?) error by returning more than
1583      the maximum dictated by the function's return type.  */
1584   if (len < 0)
1585     return target_dir_max () + 1;
1586 
1587   /* Adjust the return value by the difference.  */
1588   if (p < prec)
1589     len += prec - p;
1590 
1591   return len;
1592 }
1593 
1594 /* Return the number of bytes to format using the format specifier
1595    SPEC and the precision PREC the largest value in the real floating
1596    TYPE.  */
1597 
1598 static unsigned HOST_WIDE_INT
format_floating_max(tree type,char spec,HOST_WIDE_INT prec)1599 format_floating_max (tree type, char spec, HOST_WIDE_INT prec)
1600 {
1601   machine_mode mode = TYPE_MODE (type);
1602 
1603   /* IBM Extended mode.  */
1604   if (MODE_COMPOSITE_P (mode))
1605     mode = DFmode;
1606 
1607   /* Get the real type format description for the target.  */
1608   const real_format *rfmt = REAL_MODE_FORMAT (mode);
1609   REAL_VALUE_TYPE rv;
1610 
1611   real_maxval (&rv, 0, mode);
1612 
1613   /* Convert the GCC real value representation with the precision
1614      of the real type to the mpfr_t format with the GCC default
1615      round-to-nearest mode.  */
1616   mpfr_t x;
1617   mpfr_init2 (x, rfmt->p);
1618   mpfr_from_real (x, &rv, MPFR_RNDN);
1619 
1620   /* Return a value one greater to account for the leading minus sign.  */
1621   unsigned HOST_WIDE_INT r
1622     = 1 + get_mpfr_format_length (x, "", prec, spec, 'D');
1623   mpfr_clear (x);
1624   return r;
1625 }
1626 
1627 /* Return a range representing the minimum and maximum number of bytes
1628    that the directive DIR will output for any argument.  PREC gives
1629    the adjusted precision range to account for negative precisions
1630    meaning the default 6.  This function is used when the directive
1631    argument or its value isn't known.  */
1632 
1633 static fmtresult
format_floating(const directive & dir,const HOST_WIDE_INT prec[2])1634 format_floating (const directive &dir, const HOST_WIDE_INT prec[2])
1635 {
1636   tree type;
1637 
1638   switch (dir.modifier)
1639     {
1640     case FMT_LEN_l:
1641     case FMT_LEN_none:
1642       type = double_type_node;
1643       break;
1644 
1645     case FMT_LEN_L:
1646       type = long_double_type_node;
1647       break;
1648 
1649     case FMT_LEN_ll:
1650       type = long_double_type_node;
1651       break;
1652 
1653     default:
1654       return fmtresult ();
1655     }
1656 
1657   /* The minimum and maximum number of bytes produced by the directive.  */
1658   fmtresult res;
1659 
1660   /* The minimum output as determined by flags.  It's always at least 1.
1661      When plus or space are set the output is preceded by either a sign
1662      or a space.  */
1663   unsigned flagmin = (1 /* for the first digit */
1664 		      + (dir.get_flag ('+') | dir.get_flag (' ')));
1665 
1666   /* The minimum is 3 for "inf" and "nan" for all specifiers, plus 1
1667      for the plus sign/space with the '+' and ' ' flags, respectively,
1668      unless reduced below.  */
1669   res.range.min = 2 + flagmin;
1670 
1671   /* When the pound flag is set the decimal point is included in output
1672      regardless of precision.  Whether or not a decimal point is included
1673      otherwise depends on the specification and precision.  */
1674   bool radix = dir.get_flag ('#');
1675 
1676   switch (dir.specifier)
1677     {
1678     case 'A':
1679     case 'a':
1680       {
1681 	HOST_WIDE_INT minprec = 6 + !radix /* decimal point */;
1682 	if (dir.prec[0] <= 0)
1683 	  minprec = 0;
1684 	else if (dir.prec[0] > 0)
1685 	  minprec = dir.prec[0] + !radix /* decimal point */;
1686 
1687 	res.range.likely = (2 /* 0x */
1688 			    + flagmin
1689 			    + radix
1690 			    + minprec
1691 			    + 3 /* p+0 */);
1692 
1693 	res.range.max = format_floating_max (type, 'a', prec[1]);
1694 
1695 	/* The unlikely maximum accounts for the longest multibyte
1696 	   decimal point character.  */
1697 	res.range.unlikely = res.range.max;
1698 	if (dir.prec[1] > 0)
1699 	  res.range.unlikely += target_mb_len_max () - 1;
1700 
1701 	break;
1702       }
1703 
1704     case 'E':
1705     case 'e':
1706       {
1707 	/* Minimum output attributable to precision and, when it's
1708 	   non-zero, decimal point.  */
1709 	HOST_WIDE_INT minprec = prec[0] ? prec[0] + !radix : 0;
1710 
1711 	/* The likely minimum output is "[-+]1.234567e+00" regardless
1712 	   of the value of the actual argument.  */
1713 	res.range.likely = (flagmin
1714 			    + radix
1715 			    + minprec
1716 			    + 2 /* e+ */ + 2);
1717 
1718 	res.range.max = format_floating_max (type, 'e', prec[1]);
1719 
1720 	/* The unlikely maximum accounts for the longest multibyte
1721 	   decimal point character.  */
1722 	if (dir.prec[0] != dir.prec[1]
1723 	    || dir.prec[0] == -1 || dir.prec[0] > 0)
1724 	  res.range.unlikely = res.range.max + target_mb_len_max () -1;
1725 	else
1726 	  res.range.unlikely = res.range.max;
1727 	break;
1728       }
1729 
1730     case 'F':
1731     case 'f':
1732       {
1733 	/* Minimum output attributable to precision and, when it's non-zero,
1734 	   decimal point.  */
1735 	HOST_WIDE_INT minprec = prec[0] ? prec[0] + !radix : 0;
1736 
1737 	/* For finite numbers (i.e., not infinity or NaN) the lower bound
1738 	   when precision isn't specified is 8 bytes ("1.23456" since
1739 	   precision is taken to be 6).  When precision is zero, the lower
1740 	   bound is 1 byte (e.g., "1").  Otherwise, when precision is greater
1741 	   than zero, then the lower bound is 2 plus precision (plus flags).
1742 	   But in all cases, the lower bound is no greater than 3.  */
1743 	unsigned HOST_WIDE_INT min = flagmin + radix + minprec;
1744 	if (min < res.range.min)
1745 	  res.range.min = min;
1746 
1747 	/* Compute the upper bound for -TYPE_MAX.  */
1748 	res.range.max = format_floating_max (type, 'f', prec[1]);
1749 
1750 	/* The minimum output with unknown precision is a single byte
1751 	   (e.g., "0") but the more likely output is 3 bytes ("0.0").  */
1752 	if (dir.prec[0] < 0 && dir.prec[1] > 0)
1753 	  res.range.likely = 3;
1754 	else
1755 	  res.range.likely = min;
1756 
1757 	/* The unlikely maximum accounts for the longest multibyte
1758 	   decimal point character.  */
1759 	if (dir.prec[0] != dir.prec[1]
1760 	    || dir.prec[0] == -1 || dir.prec[0] > 0)
1761 	  res.range.unlikely = res.range.max + target_mb_len_max () - 1;
1762 	break;
1763       }
1764 
1765     case 'G':
1766     case 'g':
1767       {
1768 	/* The %g output depends on precision and the exponent of
1769 	   the argument.  Since the value of the argument isn't known
1770 	   the lower bound on the range of bytes (not counting flags
1771 	   or width) is 1 plus radix (i.e., either "0" or "0." for
1772 	   "%g" and "%#g", respectively, with a zero argument).  */
1773 	unsigned HOST_WIDE_INT min = flagmin + radix;
1774 	if (min < res.range.min)
1775 	  res.range.min = min;
1776 
1777 	char spec = 'g';
1778 	HOST_WIDE_INT maxprec = dir.prec[1];
1779 	if (radix && maxprec)
1780 	  {
1781 	    /* When the pound flag (radix) is set, trailing zeros aren't
1782 	       trimmed and so the longest output is the same as for %e,
1783 	       except with precision minus 1 (as specified in C11).  */
1784 	    spec = 'e';
1785 	    if (maxprec > 0)
1786 	      --maxprec;
1787 	    else if (maxprec < 0)
1788 	      maxprec = 5;
1789 	  }
1790 	else
1791 	  maxprec = prec[1];
1792 
1793 	res.range.max = format_floating_max (type, spec, maxprec);
1794 
1795 	/* The likely output is either the maximum computed above
1796 	   minus 1 (assuming the maximum is positive) when precision
1797 	   is known (or unspecified), or the same minimum as for %e
1798 	   (which is computed for a non-negative argument).  Unlike
1799 	   for the other specifiers above the likely output isn't
1800 	   the minimum because for %g that's 1 which is unlikely.  */
1801 	if (dir.prec[1] < 0
1802 	    || (unsigned HOST_WIDE_INT)dir.prec[1] < target_int_max ())
1803 	  res.range.likely = res.range.max - 1;
1804 	else
1805 	  {
1806 	    HOST_WIDE_INT minprec = 6 + !radix /* decimal point */;
1807 	    res.range.likely = (flagmin
1808 				+ radix
1809 				+ minprec
1810 				+ 2 /* e+ */ + 2);
1811 	  }
1812 
1813 	/* The unlikely maximum accounts for the longest multibyte
1814 	   decimal point character.  */
1815 	res.range.unlikely = res.range.max + target_mb_len_max () - 1;
1816 	break;
1817       }
1818 
1819     default:
1820       return fmtresult ();
1821     }
1822 
1823   /* Bump up the byte counters if WIDTH is greater.  */
1824   res.adjust_for_width_or_precision (dir.width);
1825   return res;
1826 }
1827 
1828 /* Return a range representing the minimum and maximum number of bytes
1829    that the directive DIR will write on output for the floating argument
1830    ARG.  */
1831 
1832 static fmtresult
format_floating(const directive & dir,tree arg,pointer_query &)1833 format_floating (const directive &dir, tree arg, pointer_query &)
1834 {
1835   HOST_WIDE_INT prec[] = { dir.prec[0], dir.prec[1] };
1836   tree type = (dir.modifier == FMT_LEN_L || dir.modifier == FMT_LEN_ll
1837 	       ? long_double_type_node : double_type_node);
1838 
1839   /* For an indeterminate precision the lower bound must be assumed
1840      to be zero.  */
1841   if (TOUPPER (dir.specifier) == 'A')
1842     {
1843       /* Get the number of fractional decimal digits needed to represent
1844 	 the argument without a loss of accuracy.  */
1845       unsigned fmtprec
1846 	= REAL_MODE_FORMAT (TYPE_MODE (type))->p;
1847 
1848       /* The precision of the IEEE 754 double format is 53.
1849 	 The precision of all other GCC binary double formats
1850 	 is 56 or less.  */
1851       unsigned maxprec = fmtprec <= 56 ? 13 : 15;
1852 
1853       /* For %a, leave the minimum precision unspecified to let
1854 	 MFPR trim trailing zeros (as it and many other systems
1855 	 including Glibc happen to do) and set the maximum
1856 	 precision to reflect what it would be with trailing zeros
1857 	 present (as Solaris and derived systems do).  */
1858       if (dir.prec[1] < 0)
1859 	{
1860 	  /* Both bounds are negative implies that precision has
1861 	     not been specified.  */
1862 	  prec[0] = maxprec;
1863 	  prec[1] = -1;
1864 	}
1865       else if (dir.prec[0] < 0)
1866 	{
1867 	  /* With a negative lower bound and a non-negative upper
1868 	     bound set the minimum precision to zero and the maximum
1869 	     to the greater of the maximum precision (i.e., with
1870 	     trailing zeros present) and the specified upper bound.  */
1871 	  prec[0] = 0;
1872 	  prec[1] = dir.prec[1] < maxprec ? maxprec : dir.prec[1];
1873 	}
1874     }
1875   else if (dir.prec[0] < 0)
1876     {
1877       if (dir.prec[1] < 0)
1878 	{
1879 	  /* A precision in a strictly negative range is ignored and
1880 	     the default of 6 is used instead.  */
1881 	  prec[0] = prec[1] = 6;
1882 	}
1883       else
1884 	{
1885 	  /* For a precision in a partly negative range, the lower bound
1886 	     must be assumed to be zero and the new upper bound is the
1887 	     greater of 6 (the default precision used when the specified
1888 	     precision is negative) and the upper bound of the specified
1889 	     range.  */
1890 	  prec[0] = 0;
1891 	  prec[1] = dir.prec[1] < 6 ? 6 : dir.prec[1];
1892 	}
1893     }
1894 
1895   if (!arg
1896       || TREE_CODE (arg) != REAL_CST
1897       || !useless_type_conversion_p (type, TREE_TYPE (arg)))
1898     return format_floating (dir, prec);
1899 
1900   /* The minimum and maximum number of bytes produced by the directive.  */
1901   fmtresult res;
1902 
1903   /* Get the real type format description for the target.  */
1904   const REAL_VALUE_TYPE *rvp = TREE_REAL_CST_PTR (arg);
1905   const real_format *rfmt = REAL_MODE_FORMAT (TYPE_MODE (TREE_TYPE (arg)));
1906 
1907   if (!real_isfinite (rvp))
1908     {
1909       /* The format for Infinity and NaN is "[-]inf"/"[-]infinity"
1910 	 and "[-]nan" with the choice being implementation-defined
1911 	 but not locale dependent.  */
1912       bool sign = dir.get_flag ('+') || real_isneg (rvp);
1913       res.range.min = 3 + sign;
1914 
1915       res.range.likely = res.range.min;
1916       res.range.max = res.range.min;
1917       /* The unlikely maximum is "[-/+]infinity" or "[-/+][qs]nan".
1918 	 For NaN, the C/POSIX standards specify two formats:
1919 	   "[-/+]nan"
1920 	 and
1921 	   "[-/+]nan(n-char-sequence)"
1922 	 No known printf implementation outputs the latter format but AIX
1923 	 outputs QNaN and SNaN for quiet and signalling NaN, respectively,
1924 	 so the unlikely maximum reflects that.  */
1925       res.range.unlikely = sign + (real_isinf (rvp) ? 8 : 4);
1926 
1927       /* The range for infinity and NaN is known unless either width
1928 	 or precision is unknown.  Width has the same effect regardless
1929 	 of whether the argument is finite.  Precision is either ignored
1930 	 (e.g., Glibc) or can have an effect on the short vs long format
1931 	 such as inf/infinity (e.g., Solaris).  */
1932       res.knownrange = dir.known_width_and_precision ();
1933 
1934       /* Adjust the range for width but ignore precision.  */
1935       res.adjust_for_width_or_precision (dir.width);
1936 
1937       return res;
1938     }
1939 
1940   char fmtstr [40];
1941   char *pfmt = fmtstr;
1942 
1943   /* Append flags.  */
1944   for (const char *pf = "-+ #0"; *pf; ++pf)
1945     if (dir.get_flag (*pf))
1946       *pfmt++ = *pf;
1947 
1948   *pfmt = '\0';
1949 
1950   {
1951     /* Set up an array to easily iterate over.  */
1952     unsigned HOST_WIDE_INT* const minmax[] = {
1953       &res.range.min, &res.range.max
1954     };
1955 
1956     for (int i = 0; i != sizeof minmax / sizeof *minmax; ++i)
1957       {
1958 	/* Convert the GCC real value representation with the precision
1959 	   of the real type to the mpfr_t format rounding down in the
1960 	   first iteration that computes the minimum and up in the second
1961 	   that computes the maximum.  This order is arbitrary because
1962 	   rounding in either direction can result in longer output.  */
1963 	mpfr_t mpfrval;
1964 	mpfr_init2 (mpfrval, rfmt->p);
1965 	mpfr_from_real (mpfrval, rvp, i ? MPFR_RNDU : MPFR_RNDD);
1966 
1967 	/* Use the MPFR rounding specifier to round down in the first
1968 	   iteration and then up.  In most but not all cases this will
1969 	   result in the same number of bytes.  */
1970 	char rndspec = "DU"[i];
1971 
1972 	/* Format it and store the result in the corresponding member
1973 	   of the result struct.  */
1974 	*minmax[i] = get_mpfr_format_length (mpfrval, fmtstr, prec[i],
1975 					     dir.specifier, rndspec);
1976 	mpfr_clear (mpfrval);
1977       }
1978   }
1979 
1980   /* Make sure the minimum is less than the maximum (MPFR rounding
1981      in the call to mpfr_snprintf can result in the reverse.  */
1982   if (res.range.max < res.range.min)
1983     {
1984       unsigned HOST_WIDE_INT tmp = res.range.min;
1985       res.range.min = res.range.max;
1986       res.range.max = tmp;
1987     }
1988 
1989   /* The range is known unless either width or precision is unknown.  */
1990   res.knownrange = dir.known_width_and_precision ();
1991 
1992   /* For the same floating point constant, unless width or precision
1993      is unknown, use the longer output as the likely maximum since
1994      with round to nearest either is equally likely.  Otherwise, when
1995      precision is unknown, use the greater of the minimum and 3 as
1996      the likely output (for "0.0" since zero precision is unlikely).  */
1997   if (res.knownrange)
1998     res.range.likely = res.range.max;
1999   else if (res.range.min < 3
2000 	   && dir.prec[0] < 0
2001 	   && (unsigned HOST_WIDE_INT)dir.prec[1] == target_int_max ())
2002     res.range.likely = 3;
2003   else
2004     res.range.likely = res.range.min;
2005 
2006   res.range.unlikely = res.range.max;
2007 
2008   if (res.range.max > 2 && (prec[0] != 0 || prec[1] != 0))
2009     {
2010       /* Unless the precision is zero output longer than 2 bytes may
2011 	 include the decimal point which must be a single character
2012 	 up to MB_LEN_MAX in length.  This is overly conservative
2013 	 since in some conversions some constants result in no decimal
2014 	 point (e.g., in %g).  */
2015       res.range.unlikely += target_mb_len_max () - 1;
2016     }
2017 
2018   res.adjust_for_width_or_precision (dir.width);
2019   return res;
2020 }
2021 
2022 /* Return a FMTRESULT struct set to the lengths of the shortest and longest
2023    strings referenced by the expression STR, or (-1, -1) when not known.
2024    Used by the format_string function below.  */
2025 
2026 static fmtresult
get_string_length(tree str,gimple * stmt,unsigned HOST_WIDE_INT max_size,unsigned eltsize,pointer_query & ptr_qry)2027 get_string_length (tree str, gimple *stmt, unsigned HOST_WIDE_INT max_size,
2028 		   unsigned eltsize, pointer_query &ptr_qry)
2029 {
2030   if (!str)
2031     return fmtresult ();
2032 
2033   /* Try to determine the dynamic string length first.
2034      Set MAXBOUND to an arbitrary non-null non-integer node as a request
2035      to have it set to the length of the longest string in a PHI.  */
2036   c_strlen_data lendata = { };
2037   lendata.maxbound = str;
2038   if (eltsize == 1)
2039     get_range_strlen_dynamic (str, stmt, &lendata, ptr_qry);
2040   else
2041     {
2042       /* Determine the length of the shortest and longest string referenced
2043 	 by STR.  Strings of unknown lengths are bounded by the sizes of
2044 	 arrays that subexpressions of STR may refer to.  Pointers that
2045 	 aren't known to point any such arrays result in LENDATA.MAXLEN
2046 	 set to SIZE_MAX.  */
2047       get_range_strlen (str, &lendata, eltsize);
2048     }
2049 
2050   /* If LENDATA.MAXBOUND is not equal to .MINLEN it corresponds to the bound
2051      of the largest array STR refers to, if known, or it's set to SIZE_MAX
2052      otherwise.  */
2053 
2054   /* Return the default result when nothing is known about the string.  */
2055   if ((lendata.maxbound && !tree_fits_uhwi_p (lendata.maxbound))
2056       || !tree_fits_uhwi_p (lendata.maxlen))
2057     {
2058       fmtresult res;
2059       res.nonstr = lendata.decl;
2060       return res;
2061     }
2062 
2063   unsigned HOST_WIDE_INT lenmax = tree_to_uhwi (max_object_size ()) - 2;
2064   if (integer_zerop (lendata.minlen)
2065       && (!lendata.maxbound || lenmax <= tree_to_uhwi (lendata.maxbound))
2066       && lenmax <= tree_to_uhwi (lendata.maxlen))
2067     {
2068       if (max_size > 0 && max_size < HOST_WIDE_INT_MAX)
2069 	{
2070 	  /* Adjust the conservative unknown/unbounded result if MAX_SIZE
2071 	     is valid.  Set UNLIKELY to maximum in case MAX_SIZE refers
2072 	     to a subobject.
2073 	     TODO: This is overly conservative.  Set UNLIKELY to the size
2074 	     of the outermost enclosing declared object.  */
2075 	  fmtresult res (0, max_size - 1);
2076 	  res.nonstr = lendata.decl;
2077 	  res.range.likely = res.range.max;
2078 	  res.range.unlikely = HOST_WIDE_INT_MAX;
2079 	  return res;
2080 	}
2081 
2082       fmtresult res;
2083       res.nonstr = lendata.decl;
2084       return res;
2085     }
2086 
2087   /* The minimum length of the string.  */
2088   HOST_WIDE_INT min
2089     = (tree_fits_uhwi_p (lendata.minlen)
2090        ? tree_to_uhwi (lendata.minlen)
2091        : 0);
2092 
2093   /* The maximum length of the string; initially set to MAXBOUND which
2094      may be less than MAXLEN, but may be adjusted up below.  */
2095   HOST_WIDE_INT max
2096     = (lendata.maxbound && tree_fits_uhwi_p (lendata.maxbound)
2097        ? tree_to_uhwi (lendata.maxbound)
2098        : HOST_WIDE_INT_M1U);
2099 
2100   /* True if either the maximum length is unknown or (conservatively)
2101      the array bound is less than the maximum length.  That can happen
2102      when the length of the string is unknown but the array in which
2103      the string is stored is a member of a struct.  The warning uses
2104      the size of the member as the upper bound but the optimization
2105      doesn't.  The optimization could still use the size of
2106      enclosing object as the upper bound but that's not done here.  */
2107   const bool unbounded
2108     = (integer_all_onesp (lendata.maxlen)
2109        || (lendata.maxbound
2110 	   && tree_int_cst_lt (lendata.maxbound, lendata.maxlen)));
2111 
2112   /* Set the max/likely counters to unbounded when a minimum is known
2113      but the maximum length isn't bounded.  This implies that STR is
2114      a conditional expression involving a string of known length and
2115      an expression of unknown/unbounded length.  */
2116   if (min
2117       && (unsigned HOST_WIDE_INT)min < HOST_WIDE_INT_M1U
2118       && unbounded)
2119     max = HOST_WIDE_INT_M1U;
2120 
2121   /* get_range_strlen() returns the target value of SIZE_MAX for
2122      strings of unknown length.  Bump it up to HOST_WIDE_INT_M1U
2123      which may be bigger.  */
2124   if ((unsigned HOST_WIDE_INT)min == target_size_max ())
2125     min = HOST_WIDE_INT_M1U;
2126   if ((unsigned HOST_WIDE_INT)max == target_size_max ())
2127     max = HOST_WIDE_INT_M1U;
2128 
2129   fmtresult res (min, max);
2130   res.nonstr = lendata.decl;
2131 
2132   /* Set RES.KNOWNRANGE to true if and only if all strings referenced
2133      by STR are known to be bounded (though not necessarily by their
2134      actual length but perhaps by their maximum possible length).  */
2135   if (res.range.max < target_int_max ())
2136     {
2137       res.knownrange = true;
2138       /* When the length of the longest string is known and not
2139 	 excessive use it as the likely length of the string(s).  */
2140       res.range.likely = res.range.max;
2141     }
2142   else
2143     {
2144       /* When the upper bound is unknown (it can be zero or excessive)
2145 	 set the likely length to the greater of 1.  If MAXBOUND is
2146 	 known, also reset the length of the lower bound to zero.  */
2147       res.range.likely = res.range.min ? res.range.min : warn_level > 1;
2148       if (lendata.maxbound && !integer_all_onesp (lendata.maxbound))
2149 	res.range.min = 0;
2150     }
2151 
2152   res.range.unlikely = unbounded ? HOST_WIDE_INT_MAX : res.range.max;
2153 
2154   return res;
2155 }
2156 
2157 /* Return the minimum and maximum number of characters formatted
2158    by the '%c' format directives and its wide character form for
2159    the argument ARG.  ARG can be null (for functions such as
2160    vsprinf).  */
2161 
2162 static fmtresult
format_character(const directive & dir,tree arg,pointer_query & ptr_qry)2163 format_character (const directive &dir, tree arg, pointer_query &ptr_qry)
2164 {
2165   fmtresult res;
2166 
2167   res.knownrange = true;
2168 
2169   if (dir.specifier == 'C' || dir.modifier == FMT_LEN_l)
2170     {
2171       /* A wide character can result in as few as zero bytes.  */
2172       res.range.min = 0;
2173 
2174       HOST_WIDE_INT min, max;
2175       if (get_int_range (arg, dir.info->callstmt, &min, &max, false, 0,
2176 			 ptr_qry.rvals))
2177 	{
2178 	  if (min == 0 && max == 0)
2179 	    {
2180 	      /* In strict reading of older ISO C or POSIX, this required
2181 		 no characters to be emitted.  ISO C23 changes that, so
2182 		 does POSIX, to match what has been implemented in most of the
2183 		 implementations, namely emitting a single NUL character.
2184 		 Let's use 0 for minimum and 1 for all the other values.  */
2185 	      res.range.max = 1;
2186 	      res.range.likely = res.range.unlikely = 1;
2187 	    }
2188 	  else if (min >= 0 && min < 128)
2189 	    {
2190 	      /* Be conservative if the target execution character set
2191 		 is not a 1-to-1 mapping to the source character set or
2192 		 if the source set is not ASCII.  */
2193 	      bool one_2_one_ascii
2194 		= (target_to_host_charmap[0] == 1
2195 		   && target_to_host ('a') == 97);
2196 
2197 	      /* A wide character in the ASCII range most likely results
2198 		 in a single byte, and only unlikely in up to MB_LEN_MAX.  */
2199 	      res.range.max = one_2_one_ascii ? 1 : target_mb_len_max ();
2200 	      res.range.likely = 1;
2201 	      res.range.unlikely = target_mb_len_max ();
2202 	      res.mayfail = !one_2_one_ascii;
2203 	    }
2204 	  else
2205 	    {
2206 	      /* A wide character outside the ASCII range likely results
2207 		 in up to two bytes, and only unlikely in up to MB_LEN_MAX.  */
2208 	      res.range.max = target_mb_len_max ();
2209 	      res.range.likely = 2;
2210 	      res.range.unlikely = res.range.max;
2211 	      /* Converting such a character may fail.  */
2212 	      res.mayfail = true;
2213 	    }
2214 	}
2215       else
2216 	{
2217 	  /* An unknown wide character is treated the same as a wide
2218 	     character outside the ASCII range.  */
2219 	  res.range.max = target_mb_len_max ();
2220 	  res.range.likely = 2;
2221 	  res.range.unlikely = res.range.max;
2222 	  res.mayfail = true;
2223 	}
2224     }
2225   else
2226     {
2227       /* A plain '%c' directive.  Its output is exactly 1.  */
2228       res.range.min = res.range.max = 1;
2229       res.range.likely = res.range.unlikely = 1;
2230     }
2231 
2232   /* Bump up the byte counters if WIDTH is greater.  */
2233   return res.adjust_for_width_or_precision (dir.width);
2234 }
2235 
2236 /* If TYPE is an array or struct or union, increment *FLDOFF by the starting
2237    offset of the member that *OFF points into if one can be determined and
2238    set *FLDSIZE to its size in bytes and decrement *OFF by the same.
2239    Otherwise do nothing.  */
2240 
2241 static void
set_aggregate_size_and_offset(tree type,HOST_WIDE_INT * fldoff,HOST_WIDE_INT * fldsize,HOST_WIDE_INT * off)2242 set_aggregate_size_and_offset (tree type, HOST_WIDE_INT *fldoff,
2243 			       HOST_WIDE_INT *fldsize, HOST_WIDE_INT *off)
2244 {
2245   /* The byte offset of the most basic struct member the byte
2246      offset *OFF corresponds to, or for a (multidimensional)
2247      array member, the byte offset of the array element.  */
2248   if (TREE_CODE (type) == ARRAY_TYPE
2249       && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
2250     {
2251       HOST_WIDE_INT index = 0, arrsize = 0;
2252       if (array_elt_at_offset (type, *off, &index, &arrsize))
2253 	{
2254 	  *fldoff += index;
2255 	  *fldsize = arrsize;
2256 	}
2257       /* Otherwise leave *FLDOFF et al. unchanged.  */
2258     }
2259   else if (RECORD_OR_UNION_TYPE_P (type))
2260     {
2261       HOST_WIDE_INT index = 0;
2262       tree sub = field_at_offset (type, NULL_TREE, *off, &index);
2263       if (sub)
2264 	{
2265 	  tree subsize = DECL_SIZE_UNIT (sub);
2266 	  if (*fldsize < HOST_WIDE_INT_MAX
2267 	      && subsize
2268 	      && tree_fits_uhwi_p (subsize))
2269 	    *fldsize = tree_to_uhwi (subsize);
2270 	  else
2271 	    *fldsize = HOST_WIDE_INT_MAX;
2272 	  *fldoff += index;
2273 	  *off -= index;
2274 	}
2275       /* Otherwise leave *FLDOFF et al. unchanged.  */
2276     }
2277 }
2278 
2279 /* For an expression X of pointer type, recursively try to find its origin
2280    (either object DECL or pointer such as PARM_DECL) Y and return such a Y.
2281    When X refers to an array element or struct member, set *FLDOFF to
2282    the offset of the element or member from the beginning of the "most
2283    derived" object and *FLDSIZE to its size.  When nonnull, set *OFF to
2284    the overall offset from the beginning of the object so that
2285    *FLDOFF <= *OFF.  */
2286 
2287 static tree
get_origin_and_offset_r(tree x,HOST_WIDE_INT * fldoff,HOST_WIDE_INT * fldsize,HOST_WIDE_INT * off)2288 get_origin_and_offset_r (tree x, HOST_WIDE_INT *fldoff, HOST_WIDE_INT *fldsize,
2289 			 HOST_WIDE_INT *off)
2290 {
2291   HOST_WIDE_INT sizebuf = -1;
2292   if (!fldsize)
2293     fldsize = &sizebuf;
2294 
2295   if (DECL_P (x))
2296     {
2297       /* Set the size if it hasn't been set yet.  */
2298       if (tree size = DECL_SIZE_UNIT (x))
2299 	if (*fldsize < 0 && tree_fits_shwi_p (size))
2300 	  *fldsize = tree_to_shwi (size);
2301       return x;
2302     }
2303 
2304   switch (TREE_CODE (x))
2305     {
2306     case ADDR_EXPR:
2307       x = TREE_OPERAND (x, 0);
2308       return get_origin_and_offset_r (x, fldoff, fldsize, off);
2309 
2310     case ARRAY_REF:
2311       {
2312 	tree sub = TREE_OPERAND (x, 1);
2313 	unsigned HOST_WIDE_INT idx =
2314 	  tree_fits_uhwi_p (sub) ? tree_to_uhwi (sub) : HOST_WIDE_INT_MAX;
2315 
2316 	tree elsz = array_ref_element_size (x);
2317 	unsigned HOST_WIDE_INT elbytes =
2318 	  tree_fits_shwi_p (elsz) ? tree_to_shwi (elsz) : HOST_WIDE_INT_MAX;
2319 
2320 	unsigned HOST_WIDE_INT byteoff = idx * elbytes;
2321 
2322 	if (byteoff < HOST_WIDE_INT_MAX
2323 	    && elbytes < HOST_WIDE_INT_MAX
2324 	    && (elbytes == 0 || byteoff / elbytes == idx))
2325 	  {
2326 	    /* For in-bounds constant offsets into constant-sized arrays
2327 	       bump up *OFF, and for what's likely arrays or structs of
2328 	       arrays, also *FLDOFF, as necessary.  */
2329 	    if (off)
2330 	      *off += byteoff;
2331 	    if (elbytes > 1)
2332 	      *fldoff += byteoff;
2333 	  }
2334 	else
2335 	  *fldoff = HOST_WIDE_INT_MAX;
2336 
2337 	x = TREE_OPERAND (x, 0);
2338 	return get_origin_and_offset_r (x, fldoff, fldsize, off);
2339       }
2340 
2341     case MEM_REF:
2342       if (off)
2343 	{
2344 	  tree offset = TREE_OPERAND (x, 1);
2345 	  *off = (tree_fits_uhwi_p (offset)
2346 		  ? tree_to_uhwi (offset) : HOST_WIDE_INT_MAX);
2347 	}
2348 
2349       x = TREE_OPERAND (x, 0);
2350 
2351       if (off)
2352 	{
2353 	  tree xtype
2354 	    = (TREE_CODE (x) == ADDR_EXPR
2355 	       ? TREE_TYPE (TREE_OPERAND (x, 0)) : TREE_TYPE (TREE_TYPE (x)));
2356 
2357 	  set_aggregate_size_and_offset (xtype, fldoff, fldsize, off);
2358 	}
2359 
2360       return get_origin_and_offset_r (x, fldoff, fldsize, nullptr);
2361 
2362     case COMPONENT_REF:
2363       {
2364 	tree foff = component_ref_field_offset (x);
2365 	tree fld = TREE_OPERAND (x, 1);
2366 	if (!tree_fits_shwi_p (foff)
2367 	    || !tree_fits_shwi_p (DECL_FIELD_BIT_OFFSET (fld)))
2368 	  return x;
2369 	*fldoff += (tree_to_shwi (foff)
2370 		    + (tree_to_shwi (DECL_FIELD_BIT_OFFSET (fld))
2371 		       / BITS_PER_UNIT));
2372 
2373 	get_origin_and_offset_r (fld, fldoff, fldsize, off);
2374 	x = TREE_OPERAND (x, 0);
2375 	return get_origin_and_offset_r (x, fldoff, nullptr, off);
2376       }
2377 
2378     case SSA_NAME:
2379       {
2380 	gimple *def = SSA_NAME_DEF_STMT (x);
2381 	if (is_gimple_assign (def))
2382 	  {
2383 	    tree_code code = gimple_assign_rhs_code (def);
2384 	    if (code == ADDR_EXPR)
2385 	      {
2386 		x = gimple_assign_rhs1 (def);
2387 		return get_origin_and_offset_r (x, fldoff, fldsize, off);
2388 	      }
2389 
2390 	    if (code == POINTER_PLUS_EXPR)
2391 	      {
2392 		tree offset = gimple_assign_rhs2 (def);
2393 		if (off && tree_fits_uhwi_p (offset))
2394 		  *off = tree_to_uhwi (offset);
2395 
2396 		x = gimple_assign_rhs1 (def);
2397 		x = get_origin_and_offset_r (x, fldoff, fldsize, off);
2398 		if (off && !tree_fits_uhwi_p (offset))
2399 		  *off = HOST_WIDE_INT_MAX;
2400 		if (off)
2401 		  {
2402 		    tree xtype = TREE_TYPE (x);
2403 		    set_aggregate_size_and_offset (xtype, fldoff, fldsize, off);
2404 		  }
2405 		return x;
2406 	      }
2407 	    else if (code == VAR_DECL)
2408 	      {
2409 		x = gimple_assign_rhs1 (def);
2410 		return get_origin_and_offset_r (x, fldoff, fldsize, off);
2411 	      }
2412 	  }
2413 	else if (gimple_nop_p (def) && SSA_NAME_VAR (x))
2414 	  x = SSA_NAME_VAR (x);
2415 
2416 	tree xtype = TREE_TYPE (x);
2417 	if (POINTER_TYPE_P (xtype))
2418 	  xtype = TREE_TYPE (xtype);
2419 
2420 	if (off)
2421 	  set_aggregate_size_and_offset (xtype, fldoff, fldsize, off);
2422       }
2423 
2424     default:
2425       break;
2426     }
2427 
2428   return x;
2429 }
2430 
2431 /* Nonrecursive version of the above.
2432    The function never returns null unless X is null to begin with.  */
2433 
2434 static tree
get_origin_and_offset(tree x,HOST_WIDE_INT * fldoff,HOST_WIDE_INT * off,HOST_WIDE_INT * fldsize=nullptr)2435 get_origin_and_offset (tree x, HOST_WIDE_INT *fldoff, HOST_WIDE_INT *off,
2436 		       HOST_WIDE_INT *fldsize = nullptr)
2437 {
2438   if (!x)
2439     return NULL_TREE;
2440 
2441   HOST_WIDE_INT sizebuf;
2442   if (!fldsize)
2443     fldsize = &sizebuf;
2444 
2445   /* Invalidate *FLDSIZE.  */
2446   *fldsize = -1;
2447   *fldoff = *off = 0;
2448 
2449   return get_origin_and_offset_r (x, fldoff, fldsize, off);
2450 }
2451 
2452 /* If ARG refers to the same (sub)object or array element as described
2453    by DST and DST_FLD, return the byte offset into the struct member or
2454    array element referenced by ARG and set *ARG_SIZE to the size of
2455    the (sub)object.  Otherwise return HOST_WIDE_INT_MIN to indicate
2456    that ARG and DST do not refer to the same object.  */
2457 
2458 static HOST_WIDE_INT
alias_offset(tree arg,HOST_WIDE_INT * arg_size,tree dst,HOST_WIDE_INT dst_fld)2459 alias_offset (tree arg, HOST_WIDE_INT *arg_size,
2460 	      tree dst, HOST_WIDE_INT dst_fld)
2461 {
2462   /* See if the argument refers to the same base object as the destination
2463      of the formatted function call, and if so, try to determine if they
2464      can alias.  */
2465   if (!arg || !dst || !ptr_derefs_may_alias_p (arg, dst))
2466     return HOST_WIDE_INT_MIN;
2467 
2468   /* The two arguments may refer to the same object.  If they both refer
2469      to a struct member, see if the members are one and the same.  If so,
2470      return the offset into the member.  */
2471   HOST_WIDE_INT arg_off = 0, arg_fld = 0;
2472 
2473   tree arg_orig = get_origin_and_offset (arg, &arg_fld, &arg_off, arg_size);
2474 
2475   if (arg_orig == dst && arg_fld == dst_fld)
2476     return arg_off;
2477 
2478   return HOST_WIDE_INT_MIN;
2479 }
2480 
2481 /* Return the minimum and maximum number of characters formatted
2482    by the '%s' format directive and its wide character form for
2483    the argument ARG.  ARG can be null (for functions such as
2484    vsprinf).  */
2485 
2486 static fmtresult
format_string(const directive & dir,tree arg,pointer_query & ptr_qry)2487 format_string (const directive &dir, tree arg, pointer_query &ptr_qry)
2488 {
2489   fmtresult res;
2490 
2491   /* The size of the (sub)object ARG refers to.  Used to adjust
2492      the conservative get_string_length() result.  */
2493   HOST_WIDE_INT arg_size = 0;
2494 
2495   if (warn_restrict)
2496     {
2497       /* See if ARG might alias the destination of the call with
2498 	 DST_ORIGIN and DST_FIELD.  If so, store the starting offset
2499 	 so that the overlap can be determined for certain later,
2500 	 when the amount of output of the call (including subsequent
2501 	 directives) has been computed.  Otherwise, store HWI_MIN.  */
2502       res.dst_offset = alias_offset (arg, &arg_size, dir.info->dst_origin,
2503 				     dir.info->dst_field);
2504       if (res.dst_offset >= 0 && res.dst_offset <= arg_size)
2505 	arg_size -= res.dst_offset;
2506       else
2507 	arg_size = 0;
2508     }
2509 
2510   /* Compute the range the argument's length can be in.  */
2511   int count_by = 1;
2512   if (dir.specifier == 'S' || dir.modifier == FMT_LEN_l)
2513     {
2514       /* Get a node for a C type that will be the same size
2515 	 as a wchar_t on the target.  */
2516       tree node = get_typenode_from_name (MODIFIED_WCHAR_TYPE);
2517 
2518       /* Now that we have a suitable node, get the number of
2519 	 bytes it occupies.  */
2520       count_by = int_size_in_bytes (node);
2521       gcc_checking_assert (count_by == 2 || count_by == 4);
2522     }
2523 
2524   fmtresult slen =
2525     get_string_length (arg, dir.info->callstmt, arg_size, count_by, ptr_qry);
2526   if (slen.range.min == slen.range.max
2527       && slen.range.min < HOST_WIDE_INT_MAX)
2528     {
2529       /* The argument is either a string constant or it refers
2530 	 to one of a number of strings of the same length.  */
2531 
2532       /* A '%s' directive with a string argument with constant length.  */
2533       res.range = slen.range;
2534 
2535       if (dir.specifier == 'S'
2536 	  || dir.modifier == FMT_LEN_l)
2537 	{
2538 	  /* In the worst case the length of output of a wide string S
2539 	     is bounded by MB_LEN_MAX * wcslen (S).  */
2540 	  res.range.max *= target_mb_len_max ();
2541 	  res.range.unlikely = res.range.max;
2542 	  /* It's likely that the total length is not more that
2543 	     2 * wcslen (S).*/
2544 	  res.range.likely = res.range.min * 2;
2545 
2546 	  if (dir.prec[1] >= 0
2547 	      && (unsigned HOST_WIDE_INT)dir.prec[1] < res.range.max)
2548 	    {
2549 	      res.range.max = dir.prec[1];
2550 	      res.range.likely = dir.prec[1];
2551 	      res.range.unlikely = dir.prec[1];
2552 	    }
2553 
2554 	  if (dir.prec[0] < 0 && dir.prec[1] > -1)
2555 	    res.range.min = 0;
2556 	  else if (dir.prec[0] >= 0)
2557 	    res.range.likely = dir.prec[0];
2558 
2559 	  /* Even a non-empty wide character string need not convert into
2560 	     any bytes.  */
2561 	  res.range.min = 0;
2562 
2563 	  /* A non-empty wide character conversion may fail.  */
2564 	  if (slen.range.max > 0)
2565 	    res.mayfail = true;
2566 	}
2567       else
2568 	{
2569 	  res.knownrange = true;
2570 
2571 	  if (dir.prec[0] < 0 && dir.prec[1] > -1)
2572 	    res.range.min = 0;
2573 	  else if ((unsigned HOST_WIDE_INT)dir.prec[0] < res.range.min)
2574 	    res.range.min = dir.prec[0];
2575 
2576 	  if ((unsigned HOST_WIDE_INT)dir.prec[1] < res.range.max)
2577 	    {
2578 	      res.range.max = dir.prec[1];
2579 	      res.range.likely = dir.prec[1];
2580 	      res.range.unlikely = dir.prec[1];
2581 	    }
2582 	}
2583     }
2584   else if (arg && integer_zerop (arg))
2585     {
2586       /* Handle null pointer argument.  */
2587 
2588       fmtresult res (0);
2589       res.nullp = true;
2590       return res;
2591     }
2592   else
2593     {
2594       /* For a '%s' and '%ls' directive with a non-constant string (either
2595 	 one of a number of strings of known length or an unknown string)
2596 	 the minimum number of characters is lesser of PRECISION[0] and
2597 	 the length of the shortest known string or zero, and the maximum
2598 	 is the lesser of the length of the longest known string or
2599 	 PTRDIFF_MAX and PRECISION[1].  The likely length is either
2600 	 the minimum at level 1 and the greater of the minimum and 1
2601 	 at level 2.  This result is adjust upward for width (if it's
2602 	 specified).  */
2603 
2604       if (dir.specifier == 'S'
2605 	  || dir.modifier == FMT_LEN_l)
2606 	{
2607 	  /* A wide character converts to as few as zero bytes.  */
2608 	  slen.range.min = 0;
2609 	  if (slen.range.max < target_int_max ())
2610 	    slen.range.max *= target_mb_len_max ();
2611 
2612 	  if (slen.range.likely < target_int_max ())
2613 	    slen.range.likely *= 2;
2614 
2615 	  if (slen.range.likely < target_int_max ())
2616 	    slen.range.unlikely *= target_mb_len_max ();
2617 
2618 	  /* A non-empty wide character conversion may fail.  */
2619 	  if (slen.range.max > 0)
2620 	    res.mayfail = true;
2621 	}
2622 
2623       res.range = slen.range;
2624 
2625       if (dir.prec[0] >= 0)
2626 	{
2627 	  /* Adjust the minimum to zero if the string length is unknown,
2628 	     or at most the lower bound of the precision otherwise.  */
2629 	  if (slen.range.min >= target_int_max ())
2630 	    res.range.min = 0;
2631 	  else if ((unsigned HOST_WIDE_INT)dir.prec[0] < slen.range.min)
2632 	    res.range.min = dir.prec[0];
2633 
2634 	  /* Make both maxima no greater than the upper bound of precision.  */
2635 	  if ((unsigned HOST_WIDE_INT)dir.prec[1] < slen.range.max
2636 	      || slen.range.max >= target_int_max ())
2637 	    {
2638 	      res.range.max = dir.prec[1];
2639 	      res.range.unlikely = dir.prec[1];
2640 	    }
2641 
2642 	  /* If precision is constant, set the likely counter to the lesser
2643 	     of it and the maximum string length.  Otherwise, if the lower
2644 	     bound of precision is greater than zero, set the likely counter
2645 	     to the minimum.  Otherwise set it to zero or one based on
2646 	     the warning level.  */
2647 	  if (dir.prec[0] == dir.prec[1])
2648 	    res.range.likely
2649 	      = ((unsigned HOST_WIDE_INT)dir.prec[0] < slen.range.max
2650 		 ? dir.prec[0] : slen.range.max);
2651 	  else if (dir.prec[0] > 0)
2652 	    res.range.likely = res.range.min;
2653 	  else
2654 	    res.range.likely = warn_level > 1;
2655 	}
2656       else if (dir.prec[1] >= 0)
2657 	{
2658 	  res.range.min = 0;
2659 	  if ((unsigned HOST_WIDE_INT)dir.prec[1] < slen.range.max)
2660 	    res.range.max = dir.prec[1];
2661 	  res.range.likely = dir.prec[1] ? warn_level > 1 : 0;
2662 	  if ((unsigned HOST_WIDE_INT)dir.prec[1] < slen.range.unlikely)
2663 	    res.range.unlikely = dir.prec[1];
2664 	}
2665       else if (slen.range.min >= target_int_max ())
2666 	{
2667 	  res.range.min = 0;
2668 	  res.range.max = HOST_WIDE_INT_MAX;
2669 	  /* At level 1 strings of unknown length are assumed to be
2670 	     empty, while at level 1 they are assumed to be one byte
2671 	     long.  */
2672 	  res.range.likely = warn_level > 1;
2673 	  res.range.unlikely = HOST_WIDE_INT_MAX;
2674 	}
2675       else
2676 	{
2677 	  /* A string of unknown length unconstrained by precision is
2678 	     assumed to be empty at level 1 and just one character long
2679 	     at higher levels.  */
2680 	  if (res.range.likely >= target_int_max ())
2681 	    res.range.likely = warn_level > 1;
2682 	}
2683     }
2684 
2685   /* If the argument isn't a nul-terminated string and the number
2686      of bytes on output isn't bounded by precision, set NONSTR.  */
2687   if (slen.nonstr && slen.range.min < (unsigned HOST_WIDE_INT)dir.prec[0])
2688     res.nonstr = slen.nonstr;
2689 
2690   /* Bump up the byte counters if WIDTH is greater.  */
2691   return res.adjust_for_width_or_precision (dir.width);
2692 }
2693 
2694 /* Format plain string (part of the format string itself).  */
2695 
2696 static fmtresult
format_plain(const directive & dir,tree,pointer_query &)2697 format_plain (const directive &dir, tree, pointer_query &)
2698 {
2699   fmtresult res (dir.len);
2700   return res;
2701 }
2702 
2703 /* Return true if the RESULT of a directive in a call describe by INFO
2704    should be diagnosed given the AVAILable space in the destination.  */
2705 
2706 static bool
should_warn_p(const call_info & info,const result_range & avail,const result_range & result)2707 should_warn_p (const call_info &info,
2708 	       const result_range &avail, const result_range &result)
2709 {
2710   if (result.max <= avail.min)
2711     {
2712       /* The least amount of space remaining in the destination is big
2713 	 enough for the longest output.  */
2714       return false;
2715     }
2716 
2717   if (info.bounded)
2718     {
2719       if (warn_format_trunc == 1 && result.min <= avail.max
2720 	  && info.retval_used ())
2721 	{
2722 	  /* The likely amount of space remaining in the destination is big
2723 	     enough for the least output and the return value is used.  */
2724 	  return false;
2725 	}
2726 
2727       if (warn_format_trunc == 1 && result.likely <= avail.likely
2728 	  && !info.retval_used ())
2729 	{
2730 	  /* The likely amount of space remaining in the destination is big
2731 	     enough for the likely output and the return value is unused.  */
2732 	  return false;
2733 	}
2734 
2735       if (warn_format_trunc == 2
2736 	  && result.likely <= avail.min
2737 	  && (result.max <= avail.min
2738 	      || result.max > HOST_WIDE_INT_MAX))
2739 	{
2740 	  /* The minimum amount of space remaining in the destination is big
2741 	     enough for the longest output.  */
2742 	  return false;
2743 	}
2744     }
2745   else
2746     {
2747       if (warn_level == 1 && result.likely <= avail.likely)
2748 	{
2749 	  /* The likely amount of space remaining in the destination is big
2750 	     enough for the likely output.  */
2751 	  return false;
2752 	}
2753 
2754       if (warn_level == 2
2755 	  && result.likely <= avail.min
2756 	  && (result.max <= avail.min
2757 	      || result.max > HOST_WIDE_INT_MAX))
2758 	{
2759 	  /* The minimum amount of space remaining in the destination is big
2760 	     enough for the longest output.  */
2761 	  return false;
2762 	}
2763     }
2764 
2765   return true;
2766 }
2767 
2768 /* At format string location describe by DIRLOC in a call described
2769    by INFO, issue a warning for a directive DIR whose output may be
2770    in excess of the available space AVAIL_RANGE in the destination
2771    given the formatting result FMTRES.  This function does nothing
2772    except decide whether to issue a warning for a possible write
2773    past the end or truncation and, if so, format the warning.
2774    Return true if a warning has been issued.  */
2775 
2776 static bool
maybe_warn(substring_loc & dirloc,location_t argloc,const call_info & info,const result_range & avail_range,const result_range & res,const directive & dir)2777 maybe_warn (substring_loc &dirloc, location_t argloc,
2778 	    const call_info &info,
2779 	    const result_range &avail_range, const result_range &res,
2780 	    const directive &dir)
2781 {
2782   if (!should_warn_p (info, avail_range, res))
2783     return false;
2784 
2785   /* A warning will definitely be issued below.  */
2786 
2787   /* The maximum byte count to reference in the warning.  Larger counts
2788      imply that the upper bound is unknown (and could be anywhere between
2789      RES.MIN + 1 and SIZE_MAX / 2) are printed as "N or more bytes" rather
2790      than "between N and X" where X is some huge number.  */
2791   unsigned HOST_WIDE_INT maxbytes = target_dir_max ();
2792 
2793   /* True when there is enough room in the destination for the least
2794      amount of a directive's output but not enough for its likely or
2795      maximum output.  */
2796   bool maybe = (res.min <= avail_range.max
2797 		&& (avail_range.min < res.likely
2798 		    || (res.max < HOST_WIDE_INT_MAX
2799 			&& avail_range.min < res.max)));
2800 
2801   /* Buffer for the directive in the host character set (used when
2802      the source character set is different).  */
2803   char hostdir[32];
2804 
2805   if (avail_range.min == avail_range.max)
2806     {
2807       /* The size of the destination region is exact.  */
2808       unsigned HOST_WIDE_INT navail = avail_range.max;
2809 
2810       if (target_to_host (*dir.beg) != '%')
2811 	{
2812 	  /* For plain character directives (i.e., the format string itself)
2813 	     but not others, point the caret at the first character that's
2814 	     past the end of the destination.  */
2815 	  if (navail < dir.len)
2816 	    dirloc.set_caret_index (dirloc.get_caret_idx () + navail);
2817 	}
2818 
2819       if (*dir.beg == '\0')
2820 	{
2821 	  /* This is the terminating nul.  */
2822 	  gcc_assert (res.min == 1 && res.min == res.max);
2823 
2824 	  return fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
2825 			  info.bounded
2826 			  ? (maybe
2827 			     ? G_("%qE output may be truncated before the "
2828 				  "last format character")
2829 			     : G_("%qE output truncated before the last "
2830 				  "format character"))
2831 			  : (maybe
2832 			     ? G_("%qE may write a terminating nul past the "
2833 				  "end of the destination")
2834 			     : G_("%qE writing a terminating nul past the "
2835 				  "end of the destination")),
2836 			  info.func);
2837 	}
2838 
2839       if (res.min == res.max)
2840 	{
2841 	  const char *d = target_to_host (hostdir, sizeof hostdir, dir.beg);
2842 	  if (!info.bounded)
2843 	    return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2844 			      "%<%.*s%> directive writing %wu byte into a "
2845 			      "region of size %wu",
2846 			      "%<%.*s%> directive writing %wu bytes into a "
2847 			      "region of size %wu",
2848 			      (int) dir.len, d, res.min, navail);
2849 	  else if (maybe)
2850 	    return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2851 			      "%<%.*s%> directive output may be truncated "
2852 			      "writing %wu byte into a region of size %wu",
2853 			      "%<%.*s%> directive output may be truncated "
2854 			      "writing %wu bytes into a region of size %wu",
2855 			      (int) dir.len, d, res.min, navail);
2856 	  else
2857 	    return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2858 			      "%<%.*s%> directive output truncated writing "
2859 			      "%wu byte into a region of size %wu",
2860 			      "%<%.*s%> directive output truncated writing "
2861 			      "%wu bytes into a region of size %wu",
2862 			      (int) dir.len, d, res.min, navail);
2863 	}
2864       if (res.min == 0 && res.max < maxbytes)
2865 	return fmtwarn (dirloc, argloc, NULL,
2866 			info.warnopt (),
2867 			info.bounded
2868 			? (maybe
2869 			   ? G_("%<%.*s%> directive output may be truncated "
2870 				"writing up to %wu bytes into a region of "
2871 				"size %wu")
2872 			   : G_("%<%.*s%> directive output truncated writing "
2873 				"up to %wu bytes into a region of size %wu"))
2874 			: G_("%<%.*s%> directive writing up to %wu bytes "
2875 			     "into a region of size %wu"), (int) dir.len,
2876 			target_to_host (hostdir, sizeof hostdir, dir.beg),
2877 			res.max, navail);
2878 
2879       if (res.min == 0 && maxbytes <= res.max)
2880 	/* This is a special case to avoid issuing the potentially
2881 	   confusing warning:
2882 	     writing 0 or more bytes into a region of size 0.  */
2883 	return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2884 			info.bounded
2885 			? (maybe
2886 			   ? G_("%<%.*s%> directive output may be truncated "
2887 				"writing likely %wu or more bytes into a "
2888 				"region of size %wu")
2889 			   : G_("%<%.*s%> directive output truncated writing "
2890 				"likely %wu or more bytes into a region of "
2891 				"size %wu"))
2892 			: G_("%<%.*s%> directive writing likely %wu or more "
2893 			     "bytes into a region of size %wu"), (int) dir.len,
2894 			target_to_host (hostdir, sizeof hostdir, dir.beg),
2895 			res.likely, navail);
2896 
2897       if (res.max < maxbytes)
2898 	return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2899 			info.bounded
2900 			? (maybe
2901 			   ? G_("%<%.*s%> directive output may be truncated "
2902 				"writing between %wu and %wu bytes into a "
2903 				"region of size %wu")
2904 			   : G_("%<%.*s%> directive output truncated "
2905 				"writing between %wu and %wu bytes into a "
2906 				"region of size %wu"))
2907 			: G_("%<%.*s%> directive writing between %wu and "
2908 			     "%wu bytes into a region of size %wu"),
2909 			(int) dir.len,
2910 			target_to_host (hostdir, sizeof hostdir, dir.beg),
2911 			res.min, res.max, navail);
2912 
2913       return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2914 		      info.bounded
2915 		      ? (maybe
2916 			 ? G_("%<%.*s%> directive output may be truncated "
2917 			      "writing %wu or more bytes into a region of "
2918 			      "size %wu")
2919 			 : G_("%<%.*s%> directive output truncated writing "
2920 			      "%wu or more bytes into a region of size %wu"))
2921 		      : G_("%<%.*s%> directive writing %wu or more bytes "
2922 			   "into a region of size %wu"), (int) dir.len,
2923 		      target_to_host (hostdir, sizeof hostdir, dir.beg),
2924 		      res.min, navail);
2925     }
2926 
2927   /* The size of the destination region is a range.  */
2928 
2929   if (target_to_host (*dir.beg) != '%')
2930     {
2931       unsigned HOST_WIDE_INT navail = avail_range.max;
2932 
2933       /* For plain character directives (i.e., the format string itself)
2934 	 but not others, point the caret at the first character that's
2935 	 past the end of the destination.  */
2936       if (navail < dir.len)
2937 	dirloc.set_caret_index (dirloc.get_caret_idx () + navail);
2938     }
2939 
2940   if (*dir.beg == '\0')
2941     {
2942       gcc_assert (res.min == 1 && res.min == res.max);
2943 
2944       return fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
2945 		      info.bounded
2946 		      ? (maybe
2947 			 ? G_("%qE output may be truncated before the last "
2948 			      "format character")
2949 			 : G_("%qE output truncated before the last format "
2950 			      "character"))
2951 		      : (maybe
2952 			 ? G_("%qE may write a terminating nul past the end "
2953 			      "of the destination")
2954 			 : G_("%qE writing a terminating nul past the end "
2955 			      "of the destination")), info.func);
2956     }
2957 
2958   if (res.min == res.max)
2959     {
2960       const char *d = target_to_host (hostdir, sizeof hostdir, dir.beg);
2961       if (!info.bounded)
2962 	return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2963 			  "%<%.*s%> directive writing %wu byte into a region "
2964 			  "of size between %wu and %wu",
2965 			  "%<%.*s%> directive writing %wu bytes into a region "
2966 			  "of size between %wu and %wu", (int) dir.len, d,
2967 			  res.min, avail_range.min, avail_range.max);
2968       else if (maybe)
2969 	return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2970 			  "%<%.*s%> directive output may be truncated writing "
2971 			  "%wu byte into a region of size between %wu and %wu",
2972 			  "%<%.*s%> directive output may be truncated writing "
2973 			  "%wu bytes into a region of size between %wu and "
2974 			  "%wu", (int) dir.len, d, res.min, avail_range.min,
2975 			  avail_range.max);
2976       else
2977 	return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2978 			  "%<%.*s%> directive output truncated writing %wu "
2979 			  "byte into a region of size between %wu and %wu",
2980 			  "%<%.*s%> directive output truncated writing %wu "
2981 			  "bytes into a region of size between %wu and %wu",
2982 			  (int) dir.len, d, res.min, avail_range.min,
2983 			  avail_range.max);
2984     }
2985 
2986   if (res.min == 0 && res.max < maxbytes)
2987     return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2988 		    info.bounded
2989 		    ? (maybe
2990 		       ? G_("%<%.*s%> directive output may be truncated "
2991 			    "writing up to %wu bytes into a region of size "
2992 			    "between %wu and %wu")
2993 		       : G_("%<%.*s%> directive output truncated writing "
2994 			    "up to %wu bytes into a region of size between "
2995 			    "%wu and %wu"))
2996 		    : G_("%<%.*s%> directive writing up to %wu bytes "
2997 			 "into a region of size between %wu and %wu"),
2998 		    (int) dir.len,
2999 		    target_to_host (hostdir, sizeof hostdir, dir.beg),
3000 		    res.max, avail_range.min, avail_range.max);
3001 
3002   if (res.min == 0 && maxbytes <= res.max)
3003     /* This is a special case to avoid issuing the potentially confusing
3004        warning:
3005 	 writing 0 or more bytes into a region of size between 0 and N.  */
3006     return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3007 		    info.bounded
3008 		    ? (maybe
3009 		       ? G_("%<%.*s%> directive output may be truncated "
3010 			    "writing likely %wu or more bytes into a region "
3011 			    "of size between %wu and %wu")
3012 		       : G_("%<%.*s%> directive output truncated writing "
3013 			    "likely %wu or more bytes into a region of size "
3014 			    "between %wu and %wu"))
3015 		    : G_("%<%.*s%> directive writing likely %wu or more bytes "
3016 			 "into a region of size between %wu and %wu"),
3017 		    (int) dir.len,
3018 		    target_to_host (hostdir, sizeof hostdir, dir.beg),
3019 		    res.likely, avail_range.min, avail_range.max);
3020 
3021   if (res.max < maxbytes)
3022     return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3023 		    info.bounded
3024 		    ? (maybe
3025 		       ? G_("%<%.*s%> directive output may be truncated "
3026 			    "writing between %wu and %wu bytes into a region "
3027 			    "of size between %wu and %wu")
3028 		       : G_("%<%.*s%> directive output truncated writing "
3029 			    "between %wu and %wu bytes into a region of size "
3030 			    "between %wu and %wu"))
3031 		    : G_("%<%.*s%> directive writing between %wu and "
3032 			 "%wu bytes into a region of size between %wu and "
3033 			 "%wu"), (int) dir.len,
3034 		    target_to_host (hostdir, sizeof hostdir, dir.beg),
3035 		    res.min, res.max, avail_range.min, avail_range.max);
3036 
3037   return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3038 		  info.bounded
3039 		  ? (maybe
3040 		     ? G_("%<%.*s%> directive output may be truncated writing "
3041 			  "%wu or more bytes into a region of size between "
3042 			  "%wu and %wu")
3043 		     : G_("%<%.*s%> directive output truncated writing "
3044 			  "%wu or more bytes into a region of size between "
3045 			  "%wu and %wu"))
3046 		  : G_("%<%.*s%> directive writing %wu or more bytes "
3047 		       "into a region of size between %wu and %wu"),
3048 		  (int) dir.len,
3049 		  target_to_host (hostdir, sizeof hostdir, dir.beg),
3050 		  res.min, avail_range.min, avail_range.max);
3051 }
3052 
3053 /* Given the formatting result described by RES and NAVAIL, the number
3054    of available bytes in the destination, return the range of bytes
3055    remaining in the destination.  */
3056 
3057 static inline result_range
bytes_remaining(unsigned HOST_WIDE_INT navail,const format_result & res)3058 bytes_remaining (unsigned HOST_WIDE_INT navail, const format_result &res)
3059 {
3060   result_range range;
3061 
3062   if (HOST_WIDE_INT_MAX <= navail)
3063     {
3064       range.min = range.max = range.likely = range.unlikely = navail;
3065       return range;
3066     }
3067 
3068   /* The lower bound of the available range is the available size
3069      minus the maximum output size, and the upper bound is the size
3070      minus the minimum.  */
3071   range.max = res.range.min < navail ? navail - res.range.min : 0;
3072 
3073   range.likely = res.range.likely < navail ? navail - res.range.likely : 0;
3074 
3075   if (res.range.max < HOST_WIDE_INT_MAX)
3076     range.min = res.range.max < navail ? navail - res.range.max : 0;
3077   else
3078     range.min = range.likely;
3079 
3080   range.unlikely = (res.range.unlikely < navail
3081 		    ? navail - res.range.unlikely : 0);
3082 
3083   return range;
3084 }
3085 
3086 /* Compute the length of the output resulting from the directive DIR
3087    in a call described by INFO and update the overall result of the call
3088    in *RES.  Return true if the directive has been handled.  */
3089 
3090 static bool
format_directive(const call_info & info,format_result * res,const directive & dir,pointer_query & ptr_qry)3091 format_directive (const call_info &info,
3092 		  format_result *res, const directive &dir,
3093 		  pointer_query &ptr_qry)
3094 {
3095   /* Offset of the beginning of the directive from the beginning
3096      of the format string.  */
3097   size_t offset = dir.beg - info.fmtstr;
3098   size_t start = offset;
3099   size_t length = offset + dir.len - !!dir.len;
3100 
3101   /* Create a location for the whole directive from the % to the format
3102      specifier.  */
3103   substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
3104 			offset, start, length);
3105 
3106   /* Also get the location of the argument if possible.
3107      This doesn't work for integer literals or function calls.  */
3108   location_t argloc = UNKNOWN_LOCATION;
3109   if (dir.arg)
3110     argloc = EXPR_LOCATION (dir.arg);
3111 
3112   /* Bail when there is no function to compute the output length,
3113      or when minimum length checking has been disabled.   */
3114   if (!dir.fmtfunc || res->range.min >= HOST_WIDE_INT_MAX)
3115     return false;
3116 
3117   /* Compute the range of lengths of the formatted output.  */
3118   fmtresult fmtres = dir.fmtfunc (dir, dir.arg, ptr_qry);
3119 
3120   /* Record whether the output of all directives is known to be
3121      bounded by some maximum, implying that their arguments are
3122      either known exactly or determined to be in a known range
3123      or, for strings, limited by the upper bounds of the arrays
3124      they refer to.  */
3125   res->knownrange &= fmtres.knownrange;
3126 
3127   if (!fmtres.knownrange)
3128     {
3129       /* Only when the range is known, check it against the host value
3130 	 of INT_MAX + (the number of bytes of the "%.*Lf" directive with
3131 	 INT_MAX precision, which is the longest possible output of any
3132 	 single directive).  That's the largest valid byte count (though
3133 	 not valid call to a printf-like function because it can never
3134 	 return such a count).  Otherwise, the range doesn't correspond
3135 	 to known values of the argument.  */
3136       if (fmtres.range.max > target_dir_max ())
3137 	{
3138 	  /* Normalize the MAX counter to avoid having to deal with it
3139 	     later.  The counter can be less than HOST_WIDE_INT_M1U
3140 	     when compiling for an ILP32 target on an LP64 host.  */
3141 	  fmtres.range.max = HOST_WIDE_INT_M1U;
3142 	  /* Disable exact and maximum length checking after a failure
3143 	     to determine the maximum number of characters (for example
3144 	     for wide characters or wide character strings) but continue
3145 	     tracking the minimum number of characters.  */
3146 	  res->range.max = HOST_WIDE_INT_M1U;
3147 	}
3148 
3149       if (fmtres.range.min > target_dir_max ())
3150 	{
3151 	  /* Disable exact length checking after a failure to determine
3152 	     even the minimum number of characters (it shouldn't happen
3153 	     except in an error) but keep tracking the minimum and maximum
3154 	     number of characters.  */
3155 	  return true;
3156 	}
3157     }
3158 
3159   /* Buffer for the directive in the host character set (used when
3160      the source character set is different).  */
3161   char hostdir[32];
3162 
3163   int dirlen = dir.len;
3164 
3165   if (fmtres.nullp)
3166     {
3167       fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3168 	       "%<%.*s%> directive argument is null",
3169 	       dirlen, target_to_host (hostdir, sizeof hostdir, dir.beg));
3170 
3171       /* Don't bother processing the rest of the format string.  */
3172       res->warned = true;
3173       res->range.min = HOST_WIDE_INT_M1U;
3174       res->range.max = HOST_WIDE_INT_M1U;
3175       return false;
3176     }
3177 
3178   /* Compute the number of available bytes in the destination.  There
3179      must always be at least one byte of space for the terminating
3180      NUL that's appended after the format string has been processed.  */
3181   result_range avail_range = bytes_remaining (info.objsize, *res);
3182 
3183   /* If the argument aliases a part of the destination of the formatted
3184      call at offset FMTRES.DST_OFFSET append the directive and its result
3185      to the set of aliases for later processing.  */
3186   if (fmtres.dst_offset != HOST_WIDE_INT_MIN)
3187     res->append_alias (dir, fmtres.dst_offset, fmtres.range);
3188 
3189   bool warned = res->warned;
3190 
3191   if (!warned)
3192     warned = maybe_warn (dirloc, argloc, info, avail_range,
3193 			 fmtres.range, dir);
3194 
3195   /* Bump up the total maximum if it isn't too big.  */
3196   if (res->range.max < HOST_WIDE_INT_MAX
3197       && fmtres.range.max < HOST_WIDE_INT_MAX)
3198     res->range.max += fmtres.range.max;
3199 
3200   /* Raise the total unlikely maximum by the larger of the maximum
3201      and the unlikely maximum.  */
3202   unsigned HOST_WIDE_INT save = res->range.unlikely;
3203   if (fmtres.range.max < fmtres.range.unlikely)
3204     res->range.unlikely += fmtres.range.unlikely;
3205   else
3206     res->range.unlikely += fmtres.range.max;
3207 
3208   if (res->range.unlikely < save)
3209     res->range.unlikely = HOST_WIDE_INT_M1U;
3210 
3211   res->range.min += fmtres.range.min;
3212   res->range.likely += fmtres.range.likely;
3213 
3214   /* Has the minimum directive output length exceeded the maximum
3215      of 4095 bytes required to be supported?  */
3216   bool minunder4k = fmtres.range.min < 4096;
3217   bool maxunder4k = fmtres.range.max < 4096;
3218   /* Clear POSUNDER4K in the overall result if the maximum has exceeded
3219      the 4k (this is necessary to avoid the return value optimization
3220      that may not be safe in the maximum case).  */
3221   if (!maxunder4k)
3222     res->posunder4k = false;
3223   /* Also clear POSUNDER4K if the directive may fail.  */
3224   if (fmtres.mayfail)
3225     res->posunder4k = false;
3226 
3227   if (!warned
3228       /* Only warn at level 2.  */
3229       && warn_level > 1
3230       /* Only warn for string functions.  */
3231       && info.is_string_func ()
3232       && (!minunder4k
3233 	  || (!maxunder4k && fmtres.range.max < HOST_WIDE_INT_MAX)))
3234     {
3235       /* The directive output may be longer than the maximum required
3236 	 to be handled by an implementation according to 7.21.6.1, p15
3237 	 of C11.  Warn on this only at level 2 but remember this and
3238 	 prevent folding the return value when done.  This allows for
3239 	 the possibility of the actual libc call failing due to ENOMEM
3240 	 (like Glibc does with very large precision or width).
3241 	 Issue the "may exceed" warning only for string functions and
3242 	 not for fprintf or printf.  */
3243 
3244       if (fmtres.range.min == fmtres.range.max)
3245 	warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3246 			  "%<%.*s%> directive output of %wu bytes exceeds "
3247 			  "minimum required size of 4095", dirlen,
3248 			  target_to_host (hostdir, sizeof hostdir, dir.beg),
3249 			  fmtres.range.min);
3250       else if (!minunder4k)
3251 	warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3252 			  "%<%.*s%> directive output between %wu and %wu "
3253 			  "bytes exceeds minimum required size of 4095",
3254 			  dirlen,
3255 			  target_to_host (hostdir, sizeof hostdir, dir.beg),
3256 			  fmtres.range.min, fmtres.range.max);
3257       else if (!info.retval_used () && info.is_string_func ())
3258 	warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3259 			  "%<%.*s%> directive output between %wu and %wu "
3260 			  "bytes may exceed minimum required size of "
3261 			  "4095",
3262 			  dirlen,
3263 			  target_to_host (hostdir, sizeof hostdir, dir.beg),
3264 			  fmtres.range.min, fmtres.range.max);
3265     }
3266 
3267   /* Has the likely and maximum directive output exceeded INT_MAX?  */
3268   bool likelyximax = *dir.beg && res->range.likely > target_int_max ();
3269   /* Don't consider the maximum to be in excess when it's the result
3270      of a string of unknown length (i.e., whose maximum has been set
3271      to be greater than or equal to HOST_WIDE_INT_MAX.  */
3272   bool maxximax = (*dir.beg
3273 		   && res->range.max > target_int_max ()
3274 		   && res->range.max < HOST_WIDE_INT_MAX);
3275 
3276   if (!warned
3277       /* Warn for the likely output size at level 1.  */
3278       && (likelyximax
3279 	  /* But only warn for the maximum at level 2.  */
3280 	  || (warn_level > 1
3281 	      && maxximax
3282 	      && fmtres.range.max < HOST_WIDE_INT_MAX)))
3283     {
3284       if (fmtres.range.min > target_int_max ())
3285 	{
3286 	  /* The directive output exceeds INT_MAX bytes.  */
3287 	  if (fmtres.range.min == fmtres.range.max)
3288 	    warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3289 			      "%<%.*s%> directive output of %wu bytes exceeds "
3290 			      "%<INT_MAX%>", dirlen,
3291 			      target_to_host (hostdir, sizeof hostdir, dir.beg),
3292 			      fmtres.range.min);
3293 	  else
3294 	    warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3295 			      "%<%.*s%> directive output between %wu and "
3296 			      "%wu bytes exceeds %<INT_MAX%>", dirlen,
3297 			      target_to_host (hostdir, sizeof hostdir, dir.beg),
3298 			      fmtres.range.min, fmtres.range.max);
3299 	}
3300       else if (res->range.min > target_int_max ())
3301 	{
3302 	  /* The directive output is under INT_MAX but causes the result
3303 	     to exceed INT_MAX bytes.  */
3304 	  if (fmtres.range.min == fmtres.range.max)
3305 	    warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3306 			      "%<%.*s%> directive output of %wu bytes causes "
3307 			      "result to exceed %<INT_MAX%>", dirlen,
3308 			      target_to_host (hostdir, sizeof hostdir, dir.beg),
3309 			      fmtres.range.min);
3310 	  else
3311 	    warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3312 			      "%<%.*s%> directive output between %wu and "
3313 			      "%wu bytes causes result to exceed %<INT_MAX%>",
3314 			      dirlen,
3315 			      target_to_host (hostdir, sizeof hostdir, dir.beg),
3316 			      fmtres.range.min, fmtres.range.max);
3317 	}
3318       else if ((!info.retval_used () || !info.bounded)
3319 	       && (info.is_string_func ()))
3320 	/* Warn for calls to string functions that either aren't bounded
3321 	   (sprintf) or whose return value isn't used.  */
3322 	warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3323 			  "%<%.*s%> directive output between %wu and "
3324 			  "%wu bytes may cause result to exceed "
3325 			  "%<INT_MAX%>", dirlen,
3326 			  target_to_host (hostdir, sizeof hostdir, dir.beg),
3327 			  fmtres.range.min, fmtres.range.max);
3328     }
3329 
3330   if (!warned && fmtres.nonstr)
3331     {
3332       warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3333 			"%<%.*s%> directive argument is not a nul-terminated "
3334 			"string",
3335 			dirlen,
3336 			target_to_host (hostdir, sizeof hostdir, dir.beg));
3337       if (warned && DECL_P (fmtres.nonstr))
3338 	inform (DECL_SOURCE_LOCATION (fmtres.nonstr),
3339 		"referenced argument declared here");
3340       return false;
3341     }
3342 
3343   if (warned && fmtres.range.min < fmtres.range.likely
3344       && fmtres.range.likely < fmtres.range.max)
3345     inform_n (info.fmtloc, fmtres.range.likely,
3346 	      "assuming directive output of %wu byte",
3347 	      "assuming directive output of %wu bytes",
3348 	      fmtres.range.likely);
3349 
3350   if (warned && fmtres.argmin)
3351     {
3352       if (fmtres.argmin == fmtres.argmax)
3353 	inform (info.fmtloc, "directive argument %qE", fmtres.argmin);
3354       else if (fmtres.knownrange)
3355 	inform (info.fmtloc, "directive argument in the range [%E, %E]",
3356 		fmtres.argmin, fmtres.argmax);
3357       else
3358 	inform (info.fmtloc,
3359 		"using the range [%E, %E] for directive argument",
3360 		fmtres.argmin, fmtres.argmax);
3361     }
3362 
3363   res->warned |= warned;
3364 
3365   if (!dir.beg[0] && res->warned)
3366     {
3367       location_t callloc = gimple_location (info.callstmt);
3368 
3369       unsigned HOST_WIDE_INT min = res->range.min;
3370       unsigned HOST_WIDE_INT max = res->range.max;
3371 
3372       if (info.objsize < HOST_WIDE_INT_MAX)
3373 	{
3374 	  /* If a warning has been issued for buffer overflow or truncation
3375 	     help the user figure out how big a buffer they need.  */
3376 
3377 	  if (min == max)
3378 	    inform_n (callloc, min,
3379 		      "%qE output %wu byte into a destination of size %wu",
3380 		      "%qE output %wu bytes into a destination of size %wu",
3381 		      info.func, min, info.objsize);
3382 	  else if (max < HOST_WIDE_INT_MAX)
3383 	    inform (callloc,
3384 		    "%qE output between %wu and %wu bytes into "
3385 		    "a destination of size %wu",
3386 		    info.func, min, max, info.objsize);
3387 	  else if (min < res->range.likely && res->range.likely < max)
3388 	    inform (callloc,
3389 		    "%qE output %wu or more bytes (assuming %wu) into "
3390 		    "a destination of size %wu",
3391 		    info.func, min, res->range.likely, info.objsize);
3392 	  else
3393 	    inform (callloc,
3394 		    "%qE output %wu or more bytes into a destination of size "
3395 		    "%wu",
3396 		    info.func, min, info.objsize);
3397 	}
3398       else if (!info.is_string_func ())
3399 	{
3400 	  /* If the warning is for a file function like fprintf
3401 	     of printf with no destination size just print the computed
3402 	     result.  */
3403 	  if (min == max)
3404 	    inform_n (callloc, min,
3405 		      "%qE output %wu byte", "%qE output %wu bytes",
3406 		      info.func, min);
3407 	  else if (max < HOST_WIDE_INT_MAX)
3408 	    inform (callloc,
3409 		    "%qE output between %wu and %wu bytes",
3410 		    info.func, min, max);
3411 	  else if (min < res->range.likely && res->range.likely < max)
3412 	    inform (callloc,
3413 		    "%qE output %wu or more bytes (assuming %wu)",
3414 		    info.func, min, res->range.likely);
3415 	  else
3416 	    inform (callloc,
3417 		    "%qE output %wu or more bytes",
3418 		    info.func, min);
3419 	}
3420     }
3421 
3422   if (dump_file && *dir.beg)
3423     {
3424       fprintf (dump_file,
3425 	       "    Result: "
3426 	       HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ", "
3427 	       HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC " ("
3428 	       HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ", "
3429 	       HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ")\n",
3430 	       fmtres.range.min, fmtres.range.likely,
3431 	       fmtres.range.max, fmtres.range.unlikely,
3432 	       res->range.min, res->range.likely,
3433 	       res->range.max, res->range.unlikely);
3434     }
3435 
3436   return true;
3437 }
3438 
3439 /* Parse a format directive in function call described by INFO starting
3440    at STR and populate DIR structure.  Bump up *ARGNO by the number of
3441    arguments extracted for the directive.  Return the length of
3442    the directive.  */
3443 
3444 static size_t
parse_directive(call_info & info,directive & dir,format_result * res,const char * str,unsigned * argno,range_query * query)3445 parse_directive (call_info &info,
3446 		 directive &dir, format_result *res,
3447 		 const char *str, unsigned *argno,
3448 		 range_query *query)
3449 {
3450   const char *pcnt = strchr (str, target_percent);
3451   dir.beg = str;
3452 
3453   if (size_t len = pcnt ? pcnt - str : *str ? strlen (str) : 1)
3454     {
3455       /* This directive is either a plain string or the terminating nul
3456 	 (which isn't really a directive but it simplifies things to
3457 	 handle it as if it were).  */
3458       dir.len = len;
3459       dir.fmtfunc = format_plain;
3460 
3461       if (dump_file)
3462 	{
3463 	  fprintf (dump_file, "  Directive %u at offset "
3464 		   HOST_WIDE_INT_PRINT_UNSIGNED ": \"%.*s\", "
3465 		   "length = " HOST_WIDE_INT_PRINT_UNSIGNED "\n",
3466 		   dir.dirno,
3467 		   (unsigned HOST_WIDE_INT)(size_t)(dir.beg - info.fmtstr),
3468 		   (int)dir.len, dir.beg, (unsigned HOST_WIDE_INT) dir.len);
3469 	}
3470 
3471       return len - !*str;
3472     }
3473 
3474   /* Set the directive argument's number to correspond to its position
3475      in the formatted function call's argument list.  */
3476   dir.argno = *argno;
3477 
3478   const char *pf = pcnt + 1;
3479 
3480     /* POSIX numbered argument index or zero when none.  */
3481   HOST_WIDE_INT dollar = 0;
3482 
3483   /* With and precision.  -1 when not specified, HOST_WIDE_INT_MIN
3484      when given by a va_list argument, and a non-negative value
3485      when specified in the format string itself.  */
3486   HOST_WIDE_INT width = -1;
3487   HOST_WIDE_INT precision = -1;
3488 
3489   /* Pointers to the beginning of the width and precision decimal
3490      string (if any) within the directive.  */
3491   const char *pwidth = 0;
3492   const char *pprec = 0;
3493 
3494   /* When the value of the decimal string that specifies width or
3495      precision is out of range, points to the digit that causes
3496      the value to exceed the limit.  */
3497   const char *werange = NULL;
3498   const char *perange = NULL;
3499 
3500   /* Width specified via the asterisk.  Need not be INTEGER_CST.
3501      For vararg functions set to void_node.  */
3502   tree star_width = NULL_TREE;
3503 
3504   /* Width specified via the asterisk.  Need not be INTEGER_CST.
3505      For vararg functions set to void_node.  */
3506   tree star_precision = NULL_TREE;
3507 
3508   if (ISDIGIT (target_to_host (*pf)))
3509     {
3510       /* This could be either a POSIX positional argument, the '0'
3511 	 flag, or a width, depending on what follows.  Store it as
3512 	 width and sort it out later after the next character has
3513 	 been seen.  */
3514       pwidth = pf;
3515       width = target_strtowi (&pf, &werange);
3516     }
3517   else if (target_to_host (*pf) == '*')
3518     {
3519       /* Similarly to the block above, this could be either a POSIX
3520 	 positional argument or a width, depending on what follows.  */
3521       if (*argno < gimple_call_num_args (info.callstmt))
3522 	star_width = gimple_call_arg (info.callstmt, (*argno)++);
3523       else
3524 	star_width = void_node;
3525       ++pf;
3526     }
3527 
3528   if (target_to_host (*pf) == '$')
3529     {
3530       /* Handle the POSIX dollar sign which references the 1-based
3531 	 positional argument number.  */
3532       if (width != -1)
3533 	dollar = width + info.argidx;
3534       else if (star_width
3535 	       && TREE_CODE (star_width) == INTEGER_CST
3536 	       && (TYPE_PRECISION (TREE_TYPE (star_width))
3537 		   <= TYPE_PRECISION (integer_type_node)))
3538 	dollar = width + tree_to_shwi (star_width);
3539 
3540       /* Bail when the numbered argument is out of range (it will
3541 	 have already been diagnosed by -Wformat).  */
3542       if (dollar == 0
3543 	  || dollar == (int)info.argidx
3544 	  || dollar > gimple_call_num_args (info.callstmt))
3545 	return false;
3546 
3547       --dollar;
3548 
3549       star_width = NULL_TREE;
3550       width = -1;
3551       ++pf;
3552     }
3553 
3554   if (dollar || !star_width)
3555     {
3556       if (width != -1)
3557 	{
3558 	  if (width == 0)
3559 	    {
3560 	      /* The '0' that has been interpreted as a width above is
3561 		 actually a flag.  Reset HAVE_WIDTH, set the '0' flag,
3562 		 and continue processing other flags.  */
3563 	      width = -1;
3564 	      dir.set_flag ('0');
3565 	    }
3566 	  else if (!dollar)
3567 	    {
3568 	      /* (Non-zero) width has been seen.  The next character
3569 		 is either a period or a digit.  */
3570 	      goto start_precision;
3571 	    }
3572 	}
3573       /* When either '$' has been seen, or width has not been seen,
3574 	 the next field is the optional flags followed by an optional
3575 	 width.  */
3576       for ( ; ; ) {
3577 	switch (target_to_host (*pf))
3578 	  {
3579 	  case ' ':
3580 	  case '0':
3581 	  case '+':
3582 	  case '-':
3583 	  case '#':
3584 	    dir.set_flag (target_to_host (*pf++));
3585 	    break;
3586 
3587 	  default:
3588 	    goto start_width;
3589 	  }
3590       }
3591 
3592     start_width:
3593       if (ISDIGIT (target_to_host (*pf)))
3594 	{
3595 	  werange = 0;
3596 	  pwidth = pf;
3597 	  width = target_strtowi (&pf, &werange);
3598 	}
3599       else if (target_to_host (*pf) == '*')
3600 	{
3601 	  if (*argno < gimple_call_num_args (info.callstmt))
3602 	    star_width = gimple_call_arg (info.callstmt, (*argno)++);
3603 	  else
3604 	    {
3605 	      /* This is (likely) a va_list.  It could also be an invalid
3606 		 call with insufficient arguments.  */
3607 	      star_width = void_node;
3608 	    }
3609 	  ++pf;
3610 	}
3611       else if (target_to_host (*pf) == '\'')
3612 	{
3613 	  /* The POSIX apostrophe indicating a numeric grouping
3614 	     in the current locale.  Even though it's possible to
3615 	     estimate the upper bound on the size of the output
3616 	     based on the number of digits it probably isn't worth
3617 	     continuing.  */
3618 	  return 0;
3619 	}
3620     }
3621 
3622  start_precision:
3623   if (target_to_host (*pf) == '.')
3624     {
3625       ++pf;
3626 
3627       if (ISDIGIT (target_to_host (*pf)))
3628 	{
3629 	  pprec = pf;
3630 	  precision = target_strtowi (&pf, &perange);
3631 	}
3632       else if (target_to_host (*pf) == '*')
3633 	{
3634 	  if (*argno < gimple_call_num_args (info.callstmt))
3635 	    star_precision = gimple_call_arg (info.callstmt, (*argno)++);
3636 	  else
3637 	    {
3638 	      /* This is (likely) a va_list.  It could also be an invalid
3639 		 call with insufficient arguments.  */
3640 	      star_precision = void_node;
3641 	    }
3642 	  ++pf;
3643 	}
3644       else
3645 	{
3646 	  /* The decimal precision or the asterisk are optional.
3647 	     When neither is specified it's taken to be zero.  */
3648 	  precision = 0;
3649 	}
3650     }
3651 
3652   switch (target_to_host (*pf))
3653     {
3654     case 'h':
3655       if (target_to_host (pf[1]) == 'h')
3656 	{
3657 	  ++pf;
3658 	  dir.modifier = FMT_LEN_hh;
3659 	}
3660       else
3661 	dir.modifier = FMT_LEN_h;
3662       ++pf;
3663       break;
3664 
3665     case 'j':
3666       dir.modifier = FMT_LEN_j;
3667       ++pf;
3668       break;
3669 
3670     case 'L':
3671       dir.modifier = FMT_LEN_L;
3672       ++pf;
3673       break;
3674 
3675     case 'l':
3676       if (target_to_host (pf[1]) == 'l')
3677 	{
3678 	  ++pf;
3679 	  dir.modifier = FMT_LEN_ll;
3680 	}
3681       else
3682 	dir.modifier = FMT_LEN_l;
3683       ++pf;
3684       break;
3685 
3686     case 't':
3687       dir.modifier = FMT_LEN_t;
3688       ++pf;
3689       break;
3690 
3691     case 'z':
3692       dir.modifier = FMT_LEN_z;
3693       ++pf;
3694       break;
3695     }
3696 
3697   switch (target_to_host (*pf))
3698     {
3699       /* Handle a sole '%' character the same as "%%" but since it's
3700 	 undefined prevent the result from being folded.  */
3701     case '\0':
3702       --pf;
3703       res->range.min = res->range.max = HOST_WIDE_INT_M1U;
3704       /* FALLTHRU */
3705     case '%':
3706       dir.fmtfunc = format_percent;
3707       break;
3708 
3709     case 'a':
3710     case 'A':
3711     case 'e':
3712     case 'E':
3713     case 'f':
3714     case 'F':
3715     case 'g':
3716     case 'G':
3717       res->floating = true;
3718       dir.fmtfunc = format_floating;
3719       break;
3720 
3721     case 'd':
3722     case 'i':
3723     case 'o':
3724     case 'u':
3725     case 'x':
3726     case 'X':
3727       dir.fmtfunc = format_integer;
3728       break;
3729 
3730     case 'p':
3731       /* The %p output is implementation-defined.  It's possible
3732 	 to determine this format but due to extensions (especially
3733 	 those of the Linux kernel -- see bug 78512) the first %p
3734 	 in the format string disables any further processing.  */
3735       return false;
3736 
3737     case 'n':
3738       /* %n has side-effects even when nothing is actually printed to
3739 	 any buffer.  */
3740       info.nowrite = false;
3741       dir.fmtfunc = format_none;
3742       break;
3743 
3744     case 'C':
3745     case 'c':
3746       /* POSIX wide character and C/POSIX narrow character.  */
3747       dir.fmtfunc = format_character;
3748       break;
3749 
3750     case 'S':
3751     case 's':
3752       /* POSIX wide string and C/POSIX narrow character string.  */
3753       dir.fmtfunc = format_string;
3754       break;
3755 
3756     default:
3757       /* Unknown conversion specification.  */
3758       return 0;
3759     }
3760 
3761   dir.specifier = target_to_host (*pf++);
3762 
3763   /* Store the length of the format directive.  */
3764   dir.len = pf - pcnt;
3765 
3766   /* Buffer for the directive in the host character set (used when
3767      the source character set is different).  */
3768   char hostdir[32];
3769 
3770   if (star_width)
3771     {
3772       if (INTEGRAL_TYPE_P (TREE_TYPE (star_width)))
3773 	dir.set_width (star_width, query);
3774       else
3775 	{
3776 	  /* Width specified by a va_list takes on the range [0, -INT_MIN]
3777 	     (width is the absolute value of that specified).  */
3778 	  dir.width[0] = 0;
3779 	  dir.width[1] = target_int_max () + 1;
3780 	}
3781     }
3782   else
3783     {
3784       if (width == HOST_WIDE_INT_MAX && werange)
3785 	{
3786 	  size_t begin = dir.beg - info.fmtstr + (pwidth - pcnt);
3787 	  size_t caret = begin + (werange - pcnt);
3788 	  size_t end = pf - info.fmtstr - 1;
3789 
3790 	  /* Create a location for the width part of the directive,
3791 	     pointing the caret at the first out-of-range digit.  */
3792 	  substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
3793 				caret, begin, end);
3794 
3795 	  fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
3796 		   "%<%.*s%> directive width out of range", (int) dir.len,
3797 		   target_to_host (hostdir, sizeof hostdir, dir.beg));
3798 	}
3799 
3800       dir.set_width (width);
3801     }
3802 
3803   if (star_precision)
3804     {
3805       if (INTEGRAL_TYPE_P (TREE_TYPE (star_precision)))
3806 	dir.set_precision (star_precision, query);
3807       else
3808 	{
3809 	  /* Precision specified by a va_list takes on the range [-1, INT_MAX]
3810 	     (unlike width, negative precision is ignored).  */
3811 	  dir.prec[0] = -1;
3812 	  dir.prec[1] = target_int_max ();
3813 	}
3814     }
3815   else
3816     {
3817       if (precision == HOST_WIDE_INT_MAX && perange)
3818 	{
3819 	  size_t begin = dir.beg - info.fmtstr + (pprec - pcnt) - 1;
3820 	  size_t caret = dir.beg - info.fmtstr + (perange - pcnt) - 1;
3821 	  size_t end = pf - info.fmtstr - 2;
3822 
3823 	  /* Create a location for the precision part of the directive,
3824 	     including the leading period, pointing the caret at the first
3825 	     out-of-range digit .  */
3826 	  substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
3827 				caret, begin, end);
3828 
3829 	  fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
3830 		   "%<%.*s%> directive precision out of range", (int) dir.len,
3831 		   target_to_host (hostdir, sizeof hostdir, dir.beg));
3832 	}
3833 
3834       dir.set_precision (precision);
3835     }
3836 
3837   /* Extract the argument if the directive takes one and if it's
3838      available (e.g., the function doesn't take a va_list).  Treat
3839      missing arguments the same as va_list, even though they will
3840      have likely already been diagnosed by -Wformat.  */
3841   if (dir.specifier != '%'
3842       && *argno < gimple_call_num_args (info.callstmt))
3843     dir.arg = gimple_call_arg (info.callstmt, dollar ? dollar : (*argno)++);
3844 
3845   if (dump_file)
3846     {
3847       fprintf (dump_file,
3848 	       "  Directive %u at offset " HOST_WIDE_INT_PRINT_UNSIGNED
3849 	       ": \"%.*s\"",
3850 	       dir.dirno,
3851 	       (unsigned HOST_WIDE_INT)(size_t)(dir.beg - info.fmtstr),
3852 	       (int)dir.len, dir.beg);
3853       if (star_width)
3854 	{
3855 	  if (dir.width[0] == dir.width[1])
3856 	    fprintf (dump_file, ", width = " HOST_WIDE_INT_PRINT_DEC,
3857 		     dir.width[0]);
3858 	  else
3859 	    fprintf (dump_file,
3860 		     ", width in range [" HOST_WIDE_INT_PRINT_DEC
3861 		     ", " HOST_WIDE_INT_PRINT_DEC "]",
3862 		     dir.width[0], dir.width[1]);
3863 	}
3864 
3865       if (star_precision)
3866 	{
3867 	  if (dir.prec[0] == dir.prec[1])
3868 	    fprintf (dump_file, ", precision = " HOST_WIDE_INT_PRINT_DEC,
3869 		     dir.prec[0]);
3870 	  else
3871 	    fprintf (dump_file,
3872 		     ", precision in range [" HOST_WIDE_INT_PRINT_DEC
3873 		     HOST_WIDE_INT_PRINT_DEC "]",
3874 		     dir.prec[0], dir.prec[1]);
3875 	}
3876       fputc ('\n', dump_file);
3877     }
3878 
3879   return dir.len;
3880 }
3881 
3882 /* Diagnose overlap between destination and %s directive arguments.  */
3883 
3884 static void
maybe_warn_overlap(call_info & info,format_result * res)3885 maybe_warn_overlap (call_info &info, format_result *res)
3886 {
3887   /* Two vectors of 1-based indices corresponding to either certainly
3888      or possibly aliasing arguments.  */
3889   auto_vec<int, 16> aliasarg[2];
3890 
3891   /* Go through the array of potentially aliasing directives and collect
3892      argument numbers of those that do or may overlap the destination
3893      object given the full result.  */
3894   for (unsigned i = 0; i != res->alias_count; ++i)
3895     {
3896       const format_result::alias_info &alias = res->aliases[i];
3897 
3898       enum { possible = -1, none = 0, certain = 1 } overlap = none;
3899 
3900       /* If the precision is zero there is no overlap.  (This only
3901 	 considers %s directives and ignores %n.)  */
3902       if (alias.dir.prec[0] == 0 && alias.dir.prec[1] == 0)
3903 	continue;
3904 
3905       if (alias.offset == HOST_WIDE_INT_MAX
3906 	  || info.dst_offset == HOST_WIDE_INT_MAX)
3907 	overlap = possible;
3908       else if (alias.offset == info.dst_offset)
3909 	overlap = alias.dir.prec[0] == 0 ? possible : certain;
3910       else
3911 	{
3912 	  /* Determine overlap from the range of output and offsets
3913 	     into the same destination as the source, and rule out
3914 	     impossible overlap.  */
3915 	  unsigned HOST_WIDE_INT albeg = alias.offset;
3916 	  unsigned HOST_WIDE_INT dstbeg = info.dst_offset;
3917 
3918 	  unsigned HOST_WIDE_INT alend = albeg + alias.range.min;
3919 	  unsigned HOST_WIDE_INT dstend = dstbeg + res->range.min - 1;
3920 
3921 	  if ((albeg <= dstbeg && alend > dstbeg)
3922 	      || (albeg >= dstbeg && albeg < dstend))
3923 	    overlap = certain;
3924 	  else
3925 	    {
3926 	      alend = albeg + alias.range.max;
3927 	      if (alend < albeg)
3928 		alend = HOST_WIDE_INT_M1U;
3929 
3930 	      dstend = dstbeg + res->range.max - 1;
3931 	      if (dstend < dstbeg)
3932 		dstend = HOST_WIDE_INT_M1U;
3933 
3934 	      if ((albeg >= dstbeg && albeg <= dstend)
3935 		  || (alend >= dstbeg && alend <= dstend))
3936 		overlap = possible;
3937 	    }
3938 	}
3939 
3940       if (overlap == none)
3941 	continue;
3942 
3943       /* Append the 1-based argument number.  */
3944       aliasarg[overlap != certain].safe_push (alias.dir.argno + 1);
3945 
3946       /* Disable any kind of optimization.  */
3947       res->range.unlikely = HOST_WIDE_INT_M1U;
3948     }
3949 
3950   tree arg0 = gimple_call_arg (info.callstmt, 0);
3951   location_t loc = gimple_location (info.callstmt);
3952 
3953   bool aliaswarn = false;
3954 
3955   unsigned ncertain = aliasarg[0].length ();
3956   unsigned npossible = aliasarg[1].length ();
3957   if (ncertain && npossible)
3958     {
3959       /* If there are multiple arguments that overlap, some certainly
3960 	 and some possibly, handle both sets in a single diagnostic.  */
3961       aliaswarn
3962 	= warning_at (loc, OPT_Wrestrict,
3963 		      "%qE arguments %Z and maybe %Z overlap destination "
3964 		      "object %qE",
3965 		      info.func, aliasarg[0].address (), ncertain,
3966 		      aliasarg[1].address (), npossible,
3967 		      info.dst_origin);
3968     }
3969   else if (ncertain)
3970     {
3971       /* There is only one set of two or more arguments and they all
3972 	 certainly overlap the destination.  */
3973       aliaswarn
3974 	= warning_n (loc, OPT_Wrestrict, ncertain,
3975 		     "%qE argument %Z overlaps destination object %qE",
3976 		     "%qE arguments %Z overlap destination object %qE",
3977 		     info.func, aliasarg[0].address (), ncertain,
3978 		     info.dst_origin);
3979     }
3980   else if (npossible)
3981     {
3982       /* There is only one set of two or more arguments and they all
3983 	 may overlap (but need not).  */
3984       aliaswarn
3985 	= warning_n (loc, OPT_Wrestrict, npossible,
3986 		     "%qE argument %Z may overlap destination object %qE",
3987 		     "%qE arguments %Z may overlap destination object %qE",
3988 		     info.func, aliasarg[1].address (), npossible,
3989 		     info.dst_origin);
3990     }
3991 
3992   if (aliaswarn)
3993     {
3994       res->warned = true;
3995 
3996       if (info.dst_origin != arg0)
3997 	{
3998 	  /* If its location is different from the first argument of the call
3999 	     point either at the destination object itself or at the expression
4000 	     that was used to determine the overlap.  */
4001 	  loc = (DECL_P (info.dst_origin)
4002 		 ? DECL_SOURCE_LOCATION (info.dst_origin)
4003 		 : EXPR_LOCATION (info.dst_origin));
4004 	  if (loc != UNKNOWN_LOCATION)
4005 	    inform (loc,
4006 		    "destination object referenced by %<restrict%>-qualified "
4007 		    "argument 1 was declared here");
4008 	}
4009     }
4010 }
4011 
4012 /* Compute the length of the output resulting from the call to a formatted
4013    output function described by INFO and store the result of the call in
4014    *RES.  Issue warnings for detected past the end writes.  Return true
4015    if the complete format string has been processed and *RES can be relied
4016    on, false otherwise (e.g., when a unknown or unhandled directive was seen
4017    that caused the processing to be terminated early).  */
4018 
4019 static bool
compute_format_length(call_info & info,format_result * res,pointer_query & ptr_qry)4020 compute_format_length (call_info &info, format_result *res,
4021 		       pointer_query &ptr_qry)
4022 {
4023   if (dump_file)
4024     {
4025       location_t callloc = gimple_location (info.callstmt);
4026       fprintf (dump_file, "%s:%i: ",
4027 	       LOCATION_FILE (callloc), LOCATION_LINE (callloc));
4028       print_generic_expr (dump_file, info.func, dump_flags);
4029 
4030       fprintf (dump_file,
4031 	       ": objsize = " HOST_WIDE_INT_PRINT_UNSIGNED
4032 	       ", fmtstr = \"%s\"\n",
4033 	       info.objsize, info.fmtstr);
4034     }
4035 
4036   /* Reset the minimum and maximum byte counters.  */
4037   res->range.min = res->range.max = 0;
4038 
4039   /* No directive has been seen yet so the length of output is bounded
4040      by the known range [0, 0] (with no conversion resulting in a failure
4041      or producing more than 4K bytes) until determined otherwise.  */
4042   res->knownrange = true;
4043   res->floating = false;
4044   res->warned = false;
4045 
4046   /* 1-based directive counter.  */
4047   unsigned dirno = 1;
4048 
4049   /* The variadic argument counter.  */
4050   unsigned argno = info.argidx;
4051 
4052   bool success = true;
4053 
4054   for (const char *pf = info.fmtstr; ; ++dirno)
4055     {
4056       directive dir (&info, dirno);
4057 
4058       size_t n = parse_directive (info, dir, res, pf, &argno, ptr_qry.rvals);
4059 
4060       /* Return failure if the format function fails.  */
4061       if (!format_directive (info, res, dir, ptr_qry))
4062 	return false;
4063 
4064       /* Return success when the directive is zero bytes long and it's
4065 	 the last thing in the format string (i.e., it's the terminating
4066 	 nul, which isn't really a directive but handling it as one makes
4067 	 things simpler).  */
4068       if (!n)
4069 	{
4070 	  success = *pf == '\0';
4071 	  break;
4072 	}
4073 
4074       pf += n;
4075     }
4076 
4077   maybe_warn_overlap (info, res);
4078 
4079   /* The complete format string was processed (with or without warnings).  */
4080   return success;
4081 }
4082 
4083 /* Return the size of the object referenced by the expression DEST in
4084    statement STMT, if available, or the maximum possible size otherwise.  */
4085 
4086 static unsigned HOST_WIDE_INT
get_destination_size(tree dest,gimple * stmt,pointer_query & ptr_qry)4087 get_destination_size (tree dest, gimple *stmt, pointer_query &ptr_qry)
4088 {
4089   /* When there is no destination return the maximum.  */
4090   if (!dest)
4091     return HOST_WIDE_INT_MAX;
4092 
4093   /* Use compute_objsize to determine the size of the destination object.  */
4094   access_ref aref;
4095   if (!ptr_qry.get_ref (dest, stmt, &aref))
4096     return HOST_WIDE_INT_MAX;
4097 
4098   offset_int remsize = aref.size_remaining ();
4099   if (!wi::fits_uhwi_p (remsize))
4100     return HOST_WIDE_INT_MAX;
4101 
4102   return remsize.to_uhwi ();
4103 }
4104 
4105 /* Return true if the call described by INFO with result RES safe to
4106    optimize (i.e., no undefined behavior), and set RETVAL to the range
4107    of its return values.  */
4108 
4109 static bool
is_call_safe(const call_info & info,const format_result & res,bool under4k,unsigned HOST_WIDE_INT retval[2])4110 is_call_safe (const call_info &info,
4111 	      const format_result &res, bool under4k,
4112 	      unsigned HOST_WIDE_INT retval[2])
4113 {
4114   if (under4k && !res.posunder4k)
4115     return false;
4116 
4117   /* The minimum return value.  */
4118   retval[0] = res.range.min;
4119 
4120   /* The maximum return value is in most cases bounded by RES.RANGE.MAX
4121      but in cases involving multibyte characters could be as large as
4122      RES.RANGE.UNLIKELY.  */
4123   retval[1]
4124     = res.range.unlikely < res.range.max ? res.range.max : res.range.unlikely;
4125 
4126   /* Adjust the number of bytes which includes the terminating nul
4127      to reflect the return value of the function which does not.
4128      Because the valid range of the function is [INT_MIN, INT_MAX],
4129      a valid range before the adjustment below is [0, INT_MAX + 1]
4130      (the functions only return negative values on error or undefined
4131      behavior).  */
4132   if (retval[0] <= target_int_max () + 1)
4133     --retval[0];
4134   if (retval[1] <= target_int_max () + 1)
4135     --retval[1];
4136 
4137   /* Avoid the return value optimization when the behavior of the call
4138      is undefined either because any directive may have produced 4K or
4139      more of output, or the return value exceeds INT_MAX, or because
4140      the output overflows the destination object (but leave it enabled
4141      when the function is bounded because then the behavior is well-
4142      defined).  */
4143   if (retval[0] == retval[1]
4144       && (info.bounded || retval[0] < info.objsize)
4145       && retval[0] <= target_int_max ())
4146     return true;
4147 
4148   if ((info.bounded || retval[1] < info.objsize)
4149       && (retval[0] < target_int_max ()
4150 	  && retval[1] < target_int_max ()))
4151     return true;
4152 
4153   if (!under4k && (info.bounded || retval[0] < info.objsize))
4154     return true;
4155 
4156   return false;
4157 }
4158 
4159 /* Given a suitable result RES of a call to a formatted output function
4160    described by INFO, substitute the result for the return value of
4161    the call.  The result is suitable if the number of bytes it represents
4162    is known and exact.  A result that isn't suitable for substitution may
4163    have its range set to the range of return values, if that is known.
4164    Return true if the call is removed and gsi_next should not be performed
4165    in the caller.  */
4166 
4167 static bool
try_substitute_return_value(gimple_stmt_iterator * gsi,const call_info & info,const format_result & res)4168 try_substitute_return_value (gimple_stmt_iterator *gsi,
4169 			     const call_info &info,
4170 			     const format_result &res)
4171 {
4172   tree lhs = gimple_get_lhs (info.callstmt);
4173 
4174   /* Set to true when the entire call has been removed.  */
4175   bool removed = false;
4176 
4177   /* The minimum and maximum return value.  */
4178   unsigned HOST_WIDE_INT retval[2] = {0};
4179   bool safe = is_call_safe (info, res, true, retval);
4180 
4181   if (safe
4182       && retval[0] == retval[1]
4183       /* Not prepared to handle possibly throwing calls here; they shouldn't
4184 	 appear in non-artificial testcases, except when the __*_chk routines
4185 	 are badly declared.  */
4186       && !stmt_ends_bb_p (info.callstmt))
4187     {
4188       tree cst = build_int_cst (lhs ? TREE_TYPE (lhs) : integer_type_node,
4189 				retval[0]);
4190 
4191       if (lhs == NULL_TREE && info.nowrite)
4192 	{
4193 	  /* Remove the call to the bounded function with a zero size
4194 	     (e.g., snprintf(0, 0, "%i", 123)) if there is no lhs.  */
4195 	  unlink_stmt_vdef (info.callstmt);
4196 	  gsi_remove (gsi, true);
4197 	  removed = true;
4198 	}
4199       else if (info.nowrite)
4200 	{
4201 	  /* Replace the call to the bounded function with a zero size
4202 	     (e.g., snprintf(0, 0, "%i", 123) with the constant result
4203 	     of the function.  */
4204 	  gimplify_and_update_call_from_tree (gsi, cst);
4205 	  gimple *callstmt = gsi_stmt (*gsi);
4206 	  update_stmt (callstmt);
4207 	}
4208       else if (lhs)
4209 	{
4210 	  /* Replace the left-hand side of the call with the constant
4211 	     result of the formatted function.  */
4212 	  gimple_call_set_lhs (info.callstmt, NULL_TREE);
4213 	  gimple *g = gimple_build_assign (lhs, cst);
4214 	  gsi_insert_after (gsi, g, GSI_NEW_STMT);
4215 	  update_stmt (info.callstmt);
4216 	}
4217 
4218       if (dump_file)
4219 	{
4220 	  if (removed)
4221 	    fprintf (dump_file, "  Removing call statement.");
4222 	  else
4223 	    {
4224 	      fprintf (dump_file, "  Substituting ");
4225 	      print_generic_expr (dump_file, cst, dump_flags);
4226 	      fprintf (dump_file, " for %s.\n",
4227 		       info.nowrite ? "statement" : "return value");
4228 	    }
4229 	}
4230     }
4231   else if (lhs && types_compatible_p (TREE_TYPE (lhs), integer_type_node))
4232     {
4233       bool setrange = false;
4234 
4235       if (safe
4236 	  && (info.bounded || retval[1] < info.objsize)
4237 	  && (retval[0] < target_int_max ()
4238 	      && retval[1] < target_int_max ()))
4239 	{
4240 	  /* If the result is in a valid range bounded by the size of
4241 	     the destination set it so that it can be used for subsequent
4242 	     optimizations.  */
4243 	  int prec = TYPE_PRECISION (integer_type_node);
4244 
4245 	  wide_int min = wi::shwi (retval[0], prec);
4246 	  wide_int max = wi::shwi (retval[1], prec);
4247 	  set_range_info (lhs, VR_RANGE, min, max);
4248 
4249 	  setrange = true;
4250 	}
4251 
4252       if (dump_file)
4253 	{
4254 	  const char *inbounds
4255 	    = (retval[0] < info.objsize
4256 	       ? (retval[1] < info.objsize
4257 		  ? "in" : "potentially out-of")
4258 	       : "out-of");
4259 
4260 	  const char *what = setrange ? "Setting" : "Discarding";
4261 	  if (retval[0] != retval[1])
4262 	    fprintf (dump_file,
4263 		     "  %s %s-bounds return value range ["
4264 		     HOST_WIDE_INT_PRINT_UNSIGNED ", "
4265 		     HOST_WIDE_INT_PRINT_UNSIGNED "].\n",
4266 		     what, inbounds, retval[0], retval[1]);
4267 	  else
4268 	    fprintf (dump_file, "  %s %s-bounds return value "
4269 		     HOST_WIDE_INT_PRINT_UNSIGNED ".\n",
4270 		     what, inbounds, retval[0]);
4271 	}
4272     }
4273 
4274   if (dump_file)
4275     fputc ('\n', dump_file);
4276 
4277   return removed;
4278 }
4279 
4280 /* Try to simplify a s{,n}printf call described by INFO with result
4281    RES by replacing it with a simpler and presumably more efficient
4282    call (such as strcpy).  */
4283 
4284 static bool
try_simplify_call(gimple_stmt_iterator * gsi,const call_info & info,const format_result & res)4285 try_simplify_call (gimple_stmt_iterator *gsi,
4286 		   const call_info &info,
4287 		   const format_result &res)
4288 {
4289   unsigned HOST_WIDE_INT dummy[2];
4290   if (!is_call_safe (info, res, info.retval_used (), dummy))
4291     return false;
4292 
4293   switch (info.fncode)
4294     {
4295     case BUILT_IN_SNPRINTF:
4296       return gimple_fold_builtin_snprintf (gsi);
4297 
4298     case BUILT_IN_SPRINTF:
4299       return gimple_fold_builtin_sprintf (gsi);
4300 
4301     default:
4302       ;
4303     }
4304 
4305   return false;
4306 }
4307 
4308 /* Return the zero-based index of the format string argument of a printf
4309    like function and set *IDX_ARGS to the first format argument.  When
4310    no such index exists return UINT_MAX.  */
4311 
4312 static unsigned
get_user_idx_format(tree fndecl,unsigned * idx_args)4313 get_user_idx_format (tree fndecl, unsigned *idx_args)
4314 {
4315   tree attrs = lookup_attribute ("format", DECL_ATTRIBUTES (fndecl));
4316   if (!attrs)
4317     attrs = lookup_attribute ("format", TYPE_ATTRIBUTES (TREE_TYPE (fndecl)));
4318 
4319   if (!attrs)
4320     return UINT_MAX;
4321 
4322   attrs = TREE_VALUE (attrs);
4323 
4324   tree archetype = TREE_VALUE (attrs);
4325   if (strcmp ("printf", IDENTIFIER_POINTER (archetype)))
4326     return UINT_MAX;
4327 
4328   attrs = TREE_CHAIN (attrs);
4329   tree fmtarg = TREE_VALUE (attrs);
4330 
4331   attrs = TREE_CHAIN (attrs);
4332   tree elliparg = TREE_VALUE (attrs);
4333 
4334   /* Attribute argument indices are 1-based but we use zero-based.  */
4335   *idx_args = tree_to_uhwi (elliparg) - 1;
4336   return tree_to_uhwi (fmtarg) - 1;
4337 }
4338 
4339 }   /* Unnamed namespace.  */
4340 
4341 /* Determine if a GIMPLE call at *GSI is to one of the sprintf-like built-in
4342    functions and if so, handle it.  Return true if the call is removed and
4343    gsi_next should not be performed in the caller.  */
4344 
4345 bool
handle_printf_call(gimple_stmt_iterator * gsi,pointer_query & ptr_qry)4346 handle_printf_call (gimple_stmt_iterator *gsi, pointer_query &ptr_qry)
4347 {
4348   init_target_to_host_charmap ();
4349 
4350   call_info info = call_info ();
4351 
4352   info.callstmt = gsi_stmt (*gsi);
4353   info.func = gimple_call_fndecl (info.callstmt);
4354   if (!info.func)
4355     return false;
4356 
4357   /* Format string argument number (valid for all functions).  */
4358   unsigned idx_format = UINT_MAX;
4359   if (gimple_call_builtin_p (info.callstmt, BUILT_IN_NORMAL))
4360     info.fncode = DECL_FUNCTION_CODE (info.func);
4361   else
4362     {
4363       unsigned idx_args;
4364       idx_format = get_user_idx_format (info.func, &idx_args);
4365       if (idx_format == UINT_MAX
4366 	  || idx_format >= gimple_call_num_args (info.callstmt)
4367 	  || idx_args > gimple_call_num_args (info.callstmt)
4368 	  || !POINTER_TYPE_P (TREE_TYPE (gimple_call_arg (info.callstmt,
4369 							  idx_format))))
4370 	return false;
4371       info.fncode = BUILT_IN_NONE;
4372       info.argidx = idx_args;
4373     }
4374 
4375   /* The size of the destination as in snprintf(dest, size, ...).  */
4376   unsigned HOST_WIDE_INT dstsize = HOST_WIDE_INT_M1U;
4377 
4378   /* The size of the destination determined by __builtin_object_size.  */
4379   unsigned HOST_WIDE_INT objsize = HOST_WIDE_INT_M1U;
4380 
4381   /* Zero-based buffer size argument number (snprintf and vsnprintf).  */
4382   unsigned idx_dstsize = UINT_MAX;
4383 
4384   /* Object size argument number (snprintf_chk and vsnprintf_chk).  */
4385   unsigned idx_objsize = UINT_MAX;
4386 
4387   /* Destinaton argument number (valid for sprintf functions only).  */
4388   unsigned idx_dstptr = 0;
4389 
4390   switch (info.fncode)
4391     {
4392     case BUILT_IN_NONE:
4393       // User-defined function with attribute format (printf).
4394       idx_dstptr = -1;
4395       break;
4396 
4397     case BUILT_IN_FPRINTF:
4398       // Signature:
4399       //   __builtin_fprintf (FILE*, format, ...)
4400       idx_format = 1;
4401       info.argidx = 2;
4402       idx_dstptr = -1;
4403       break;
4404 
4405     case BUILT_IN_FPRINTF_CHK:
4406       // Signature:
4407       //   __builtin_fprintf_chk (FILE*, ost, format, ...)
4408       idx_format = 2;
4409       info.argidx = 3;
4410       idx_dstptr = -1;
4411       break;
4412 
4413     case BUILT_IN_FPRINTF_UNLOCKED:
4414       // Signature:
4415       //   __builtin_fprintf_unnlocked (FILE*, format, ...)
4416       idx_format = 1;
4417       info.argidx = 2;
4418       idx_dstptr = -1;
4419       break;
4420 
4421     case BUILT_IN_PRINTF:
4422       // Signature:
4423       //   __builtin_printf (format, ...)
4424       idx_format = 0;
4425       info.argidx = 1;
4426       idx_dstptr = -1;
4427       break;
4428 
4429     case BUILT_IN_PRINTF_CHK:
4430       // Signature:
4431       //   __builtin_printf_chk (ost, format, ...)
4432       idx_format = 1;
4433       info.argidx = 2;
4434       idx_dstptr = -1;
4435       break;
4436 
4437     case BUILT_IN_PRINTF_UNLOCKED:
4438       // Signature:
4439       //   __builtin_printf (format, ...)
4440       idx_format = 0;
4441       info.argidx = 1;
4442       idx_dstptr = -1;
4443       break;
4444 
4445     case BUILT_IN_SPRINTF:
4446       // Signature:
4447       //   __builtin_sprintf (dst, format, ...)
4448       idx_format = 1;
4449       info.argidx = 2;
4450       break;
4451 
4452     case BUILT_IN_SPRINTF_CHK:
4453       // Signature:
4454       //   __builtin___sprintf_chk (dst, ost, objsize, format, ...)
4455       idx_objsize = 2;
4456       idx_format = 3;
4457       info.argidx = 4;
4458       break;
4459 
4460     case BUILT_IN_SNPRINTF:
4461       // Signature:
4462       //   __builtin_snprintf (dst, size, format, ...)
4463       idx_dstsize = 1;
4464       idx_format = 2;
4465       info.argidx = 3;
4466       info.bounded = true;
4467       break;
4468 
4469     case BUILT_IN_SNPRINTF_CHK:
4470       // Signature:
4471       //   __builtin___snprintf_chk (dst, size, ost, objsize, format, ...)
4472       idx_dstsize = 1;
4473       idx_objsize = 3;
4474       idx_format = 4;
4475       info.argidx = 5;
4476       info.bounded = true;
4477       break;
4478 
4479     case BUILT_IN_VFPRINTF:
4480       // Signature:
4481       //   __builtin_vprintf (FILE*, format, va_list)
4482       idx_format = 1;
4483       info.argidx = -1;
4484       idx_dstptr = -1;
4485       break;
4486 
4487     case BUILT_IN_VFPRINTF_CHK:
4488       // Signature:
4489       //   __builtin___vfprintf_chk (FILE*, ost, format, va_list)
4490       idx_format = 2;
4491       info.argidx = -1;
4492       idx_dstptr = -1;
4493       break;
4494 
4495     case BUILT_IN_VPRINTF:
4496       // Signature:
4497       //   __builtin_vprintf (format, va_list)
4498       idx_format = 0;
4499       info.argidx = -1;
4500       idx_dstptr = -1;
4501       break;
4502 
4503     case BUILT_IN_VPRINTF_CHK:
4504       // Signature:
4505       //   __builtin___vprintf_chk (ost, format, va_list)
4506       idx_format = 1;
4507       info.argidx = -1;
4508       idx_dstptr = -1;
4509       break;
4510 
4511     case BUILT_IN_VSNPRINTF:
4512       // Signature:
4513       //   __builtin_vsprintf (dst, size, format, va)
4514       idx_dstsize = 1;
4515       idx_format = 2;
4516       info.argidx = -1;
4517       info.bounded = true;
4518       break;
4519 
4520     case BUILT_IN_VSNPRINTF_CHK:
4521       // Signature:
4522       //   __builtin___vsnprintf_chk (dst, size, ost, objsize, format, va)
4523       idx_dstsize = 1;
4524       idx_objsize = 3;
4525       idx_format = 4;
4526       info.argidx = -1;
4527       info.bounded = true;
4528       break;
4529 
4530     case BUILT_IN_VSPRINTF:
4531       // Signature:
4532       //   __builtin_vsprintf (dst, format, va)
4533       idx_format = 1;
4534       info.argidx = -1;
4535       break;
4536 
4537     case BUILT_IN_VSPRINTF_CHK:
4538       // Signature:
4539       //   __builtin___vsprintf_chk (dst, ost, objsize, format, va)
4540       idx_format = 3;
4541       idx_objsize = 2;
4542       info.argidx = -1;
4543       break;
4544 
4545     default:
4546       return false;
4547     }
4548 
4549   /* Set the global warning level for this function.  */
4550   warn_level = info.bounded ? warn_format_trunc : warn_format_overflow;
4551 
4552   /* For all string functions the first argument is a pointer to
4553      the destination.  */
4554   tree dstptr = (idx_dstptr < gimple_call_num_args (info.callstmt)
4555 		 ? gimple_call_arg (info.callstmt, 0) : NULL_TREE);
4556 
4557   info.format = gimple_call_arg (info.callstmt, idx_format);
4558 
4559   /* True when the destination size is constant as opposed to the lower
4560      or upper bound of a range.  */
4561   bool dstsize_cst_p = true;
4562   bool posunder4k = true;
4563 
4564   if (idx_dstsize == UINT_MAX)
4565     {
4566       /* For non-bounded functions like sprintf, determine the size
4567 	 of the destination from the object or pointer passed to it
4568 	 as the first argument.  */
4569       dstsize = get_destination_size (dstptr, info.callstmt, ptr_qry);
4570     }
4571   else if (tree size = gimple_call_arg (info.callstmt, idx_dstsize))
4572     {
4573       /* For bounded functions try to get the size argument.  */
4574 
4575       if (TREE_CODE (size) == INTEGER_CST)
4576 	{
4577 	  dstsize = tree_to_uhwi (size);
4578 	  /* No object can be larger than SIZE_MAX bytes (half the address
4579 	     space) on the target.
4580 	     The functions are defined only for output of at most INT_MAX
4581 	     bytes.  Specifying a bound in excess of that limit effectively
4582 	     defeats the bounds checking (and on some implementations such
4583 	     as Solaris cause the function to fail with EINVAL).  */
4584 	  if (dstsize > target_size_max () / 2)
4585 	    {
4586 	      /* Avoid warning if -Wstringop-overflow is specified since
4587 		 it also warns for the same thing though only for the
4588 		 checking built-ins.  */
4589 	      if ((idx_objsize == UINT_MAX
4590 		   || !warn_stringop_overflow))
4591 		warning_at (gimple_location (info.callstmt), info.warnopt (),
4592 			    "specified bound %wu exceeds maximum object size "
4593 			    "%wu",
4594 			    dstsize, target_size_max () / 2);
4595 	      /* POSIX requires snprintf to fail if DSTSIZE is greater
4596 		 than INT_MAX.  Even though not all POSIX implementations
4597 		 conform to the requirement, avoid folding in this case.  */
4598 	      posunder4k = false;
4599 	    }
4600 	  else if (dstsize > target_int_max ())
4601 	    {
4602 	      warning_at (gimple_location (info.callstmt), info.warnopt (),
4603 			  "specified bound %wu exceeds %<INT_MAX%>",
4604 			  dstsize);
4605 	      /* POSIX requires snprintf to fail if DSTSIZE is greater
4606 		 than INT_MAX.  Avoid folding in that case.  */
4607 	      posunder4k = false;
4608 	    }
4609 	}
4610       else if (TREE_CODE (size) == SSA_NAME)
4611 	{
4612 	  /* Try to determine the range of values of the argument
4613 	     and use the greater of the two at level 1 and the smaller
4614 	     of them at level 2.  */
4615 	  value_range vr;
4616 	  ptr_qry.rvals->range_of_expr (vr, size, info.callstmt);
4617 
4618 	  if (!vr.undefined_p ())
4619 	    {
4620 	      tree type = TREE_TYPE (size);
4621 	      tree tmin = wide_int_to_tree (type, vr.lower_bound ());
4622 	      tree tmax = wide_int_to_tree (type, vr.upper_bound ());
4623 	      unsigned HOST_WIDE_INT minsize = TREE_INT_CST_LOW (tmin);
4624 	      unsigned HOST_WIDE_INT maxsize = TREE_INT_CST_LOW (tmax);
4625 	      dstsize = warn_level < 2 ? maxsize : minsize;
4626 
4627 	      if (minsize > target_int_max ())
4628 		warning_at (gimple_location (info.callstmt), info.warnopt (),
4629 			    "specified bound range [%wu, %wu] exceeds "
4630 			    "%<INT_MAX%>",
4631 			    minsize, maxsize);
4632 
4633 	      /* POSIX requires snprintf to fail if DSTSIZE is greater
4634 		 than INT_MAX.  Avoid folding if that's possible.  */
4635 	      if (maxsize > target_int_max ())
4636 		posunder4k = false;
4637 	    }
4638 
4639 	  /* The destination size is not constant.  If the function is
4640 	     bounded (e.g., snprintf) a lower bound of zero doesn't
4641 	     necessarily imply it can be eliminated.  */
4642 	  dstsize_cst_p = false;
4643 	}
4644     }
4645 
4646   if (idx_objsize != UINT_MAX)
4647     if (tree size = gimple_call_arg (info.callstmt, idx_objsize))
4648       if (tree_fits_uhwi_p (size))
4649 	objsize = tree_to_uhwi (size);
4650 
4651   if (info.bounded && !dstsize)
4652     {
4653       /* As a special case, when the explicitly specified destination
4654 	 size argument (to a bounded function like snprintf) is zero
4655 	 it is a request to determine the number of bytes on output
4656 	 without actually producing any.  Pretend the size is
4657 	 unlimited in this case.  */
4658       info.objsize = HOST_WIDE_INT_MAX;
4659       info.nowrite = dstsize_cst_p;
4660     }
4661   else
4662     {
4663       /* For calls to non-bounded functions or to those of bounded
4664 	 functions with a non-zero size, warn if the destination
4665 	 pointer is null.  */
4666       if (dstptr && integer_zerop (dstptr))
4667 	{
4668 	  /* This is diagnosed with -Wformat only when the null is a constant
4669 	     pointer.  The warning here diagnoses instances where the pointer
4670 	     is not constant.  */
4671 	  location_t loc = gimple_location (info.callstmt);
4672 	  warning_at (EXPR_LOC_OR_LOC (dstptr, loc),
4673 		      info.warnopt (), "null destination pointer");
4674 	  return false;
4675 	}
4676 
4677       /* Set the object size to the smaller of the two arguments
4678 	 of both have been specified and they're not equal.  */
4679       info.objsize = dstsize < objsize ? dstsize : objsize;
4680 
4681       if (info.bounded
4682 	  && dstsize < target_size_max () / 2 && objsize < dstsize
4683 	  /* Avoid warning if -Wstringop-overflow is specified since
4684 	     it also warns for the same thing though only for the
4685 	     checking built-ins.  */
4686 	  && (idx_objsize == UINT_MAX
4687 	      || !warn_stringop_overflow))
4688 	{
4689 	  warning_at (gimple_location (info.callstmt), info.warnopt (),
4690 		      "specified bound %wu exceeds the size %wu "
4691 		      "of the destination object", dstsize, objsize);
4692 	}
4693     }
4694 
4695   /* Determine if the format argument may be null and warn if not
4696      and if the argument is null.  */
4697   if (integer_zerop (info.format)
4698       && gimple_call_builtin_p (info.callstmt, BUILT_IN_NORMAL))
4699     {
4700       location_t loc = gimple_location (info.callstmt);
4701       warning_at (EXPR_LOC_OR_LOC (info.format, loc),
4702 		  info.warnopt (), "null format string");
4703       return false;
4704     }
4705 
4706   info.fmtstr = get_format_string (info.format, &info.fmtloc);
4707   if (!info.fmtstr)
4708     return false;
4709 
4710   if (warn_restrict)
4711     {
4712       /* Compute the origin of the destination pointer and its offset
4713 	 from the base object/pointer if possible.  */
4714       info.dst_offset = 0;
4715       info.dst_origin = get_origin_and_offset (dstptr, &info.dst_field,
4716 					       &info.dst_offset);
4717     }
4718 
4719   /* The result is the number of bytes output by the formatted function,
4720      including the terminating NUL.  */
4721   format_result res;
4722 
4723   /* I/O functions with no destination argument (i.e., all forms of fprintf
4724      and printf) may fail under any conditions.  Others (i.e., all forms of
4725      sprintf) may only fail under specific conditions determined for each
4726      directive.  Clear POSUNDER4K for the former set of functions and set
4727      it to true for the latter (it can only be cleared later, but it is
4728      never set to true again).  */
4729   res.posunder4k = posunder4k && dstptr;
4730 
4731   bool success = compute_format_length (info, &res, ptr_qry);
4732   if (res.warned)
4733     suppress_warning (info.callstmt, info.warnopt ());
4734 
4735   /* When optimizing and the printf return value optimization is enabled,
4736      attempt to substitute the computed result for the return value of
4737      the call.  Avoid this optimization when -frounding-math is in effect
4738      and the format string contains a floating point directive.  */
4739   bool call_removed = false;
4740   if (success && optimize > 0)
4741     {
4742       /* Save a copy of the iterator pointing at the call.  The iterator
4743 	 may change to point past the call in try_substitute_return_value
4744 	 but the original value is needed in try_simplify_call.  */
4745       gimple_stmt_iterator gsi_call = *gsi;
4746 
4747       if (flag_printf_return_value
4748 	  && (!flag_rounding_math || !res.floating))
4749 	call_removed = try_substitute_return_value (gsi, info, res);
4750 
4751       if (!call_removed)
4752 	try_simplify_call (&gsi_call, info, res);
4753     }
4754 
4755   return call_removed;
4756 }
4757