1 /* String length optimization
2 Copyright (C) 2011-2022 Free Software Foundation, Inc.
3 Contributed by Jakub Jelinek <jakub@redhat.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "alloc-pool.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "cgraph.h"
32 #include "gimple-pretty-print.h"
33 #include "gimple-ssa-warn-access.h"
34 #include "gimple-ssa-warn-restrict.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-fold.h"
38 #include "tree-eh.h"
39 #include "gimplify.h"
40 #include "gimple-iterator.h"
41 #include "gimplify-me.h"
42 #include "expr.h"
43 #include "tree-cfg.h"
44 #include "tree-dfa.h"
45 #include "domwalk.h"
46 #include "tree-ssa-alias.h"
47 #include "tree-ssa-propagate.h"
48 #include "tree-ssa-strlen.h"
49 #include "tree-hash-traits.h"
50 #include "builtins.h"
51 #include "pointer-query.h"
52 #include "target.h"
53 #include "diagnostic-core.h"
54 #include "diagnostic.h"
55 #include "intl.h"
56 #include "attribs.h"
57 #include "calls.h"
58 #include "cfgloop.h"
59 #include "tree-ssa-loop.h"
60 #include "tree-scalar-evolution.h"
61 #include "vr-values.h"
62 #include "gimple-range.h"
63 #include "tree-ssa.h"
64
65 /* A vector indexed by SSA_NAME_VERSION. 0 means unknown, positive value
66 is an index into strinfo vector, negative value stands for
67 string length of a string literal (~strlen). */
68 static vec<int> ssa_ver_to_stridx;
69
70 /* Number of currently active string indexes plus one. */
71 static int max_stridx;
72
73 /* Set to true to optimize, false when just checking. */
74 static bool strlen_optimize;
75
76 /* String information record. */
77 struct strinfo
78 {
79 /* Number of leading characters that are known to be nonzero. This is
80 also the length of the string if FULL_STRING_P.
81
82 The values in a list of related string pointers must be consistent;
83 that is, if strinfo B comes X bytes after strinfo A, it must be
84 the case that A->nonzero_chars == X + B->nonzero_chars. */
85 tree nonzero_chars;
86 /* Any of the corresponding pointers for querying alias oracle. */
87 tree ptr;
88 /* STMT is used for two things:
89
90 - To record the statement that should be used for delayed length
91 computations. We maintain the invariant that all related strinfos
92 have delayed lengths or none do.
93
94 - To record the malloc or calloc call that produced this result
95 to optimize away malloc/memset sequences. STMT is reset after
96 a calloc-allocated object has been stored a non-zero value into. */
97 gimple *stmt;
98 /* Set to the dynamic allocation statement for the object (alloca,
99 calloc, malloc, or VLA). Unlike STMT, once set for a strinfo
100 object, ALLOC doesn't change. */
101 gimple *alloc;
102 /* Pointer to '\0' if known, if NULL, it can be computed as
103 ptr + length. */
104 tree endptr;
105 /* Reference count. Any changes to strinfo entry possibly shared
106 with dominating basic blocks need unshare_strinfo first, except
107 for dont_invalidate which affects only the immediately next
108 maybe_invalidate. */
109 int refcount;
110 /* Copy of index. get_strinfo (si->idx) should return si; */
111 int idx;
112 /* These 3 fields are for chaining related string pointers together.
113 E.g. for
114 bl = strlen (b); dl = strlen (d); strcpy (a, b); c = a + bl;
115 strcpy (c, d); e = c + dl;
116 strinfo(a) -> strinfo(c) -> strinfo(e)
117 All have ->first field equal to strinfo(a)->idx and are doubly
118 chained through prev/next fields. The later strinfos are required
119 to point into the same string with zero or more bytes after
120 the previous pointer and all bytes in between the two pointers
121 must be non-zero. Functions like strcpy or memcpy are supposed
122 to adjust all previous strinfo lengths, but not following strinfo
123 lengths (those are uncertain, usually invalidated during
124 maybe_invalidate, except when the alias oracle knows better).
125 Functions like strcat on the other side adjust the whole
126 related strinfo chain.
127 They are updated lazily, so to use the chain the same first fields
128 and si->prev->next == si->idx needs to be verified. */
129 int first;
130 int next;
131 int prev;
132 /* A flag whether the string is known to be written in the current
133 function. */
134 bool writable;
135 /* A flag for the next maybe_invalidate that this strinfo shouldn't
136 be invalidated. Always cleared by maybe_invalidate. */
137 bool dont_invalidate;
138 /* True if the string is known to be nul-terminated after NONZERO_CHARS
139 characters. False is useful when detecting strings that are built
140 up via successive memcpys. */
141 bool full_string_p;
142 };
143
144 /* Pool for allocating strinfo_struct entries. */
145 static object_allocator<strinfo> strinfo_pool ("strinfo pool");
146
147 /* Vector mapping positive string indexes to strinfo, for the
148 current basic block. The first pointer in the vector is special,
149 it is either NULL, meaning the vector isn't shared, or it is
150 a basic block pointer to the owner basic_block if shared.
151 If some other bb wants to modify the vector, the vector needs
152 to be unshared first, and only the owner bb is supposed to free it. */
153 static vec<strinfo *, va_heap, vl_embed> *stridx_to_strinfo;
154
155 /* One OFFSET->IDX mapping. */
156 struct stridxlist
157 {
158 struct stridxlist *next;
159 HOST_WIDE_INT offset;
160 int idx;
161 };
162
163 /* Hash table entry, mapping a DECL to a chain of OFFSET->IDX mappings. */
164 struct decl_stridxlist_map
165 {
166 struct tree_map_base base;
167 struct stridxlist list;
168 };
169
170 /* Hash table for mapping decls to a chained list of offset -> idx
171 mappings. */
172 typedef hash_map<tree_decl_hash, stridxlist> decl_to_stridxlist_htab_t;
173 static decl_to_stridxlist_htab_t *decl_to_stridxlist_htab;
174
175 /* Hash table mapping strlen (or strnlen with constant bound and return
176 smaller than bound) calls to stridx instances describing
177 the calls' arguments. Non-null only when warn_stringop_truncation
178 is non-zero. */
179 typedef std::pair<int, location_t> stridx_strlenloc;
180 static hash_map<tree, stridx_strlenloc> *strlen_to_stridx;
181
182 /* Obstack for struct stridxlist and struct decl_stridxlist_map. */
183 static struct obstack stridx_obstack;
184
185 /* Last memcpy statement if it could be adjusted if the trailing
186 '\0' written is immediately overwritten, or
187 *x = '\0' store that could be removed if it is immediately overwritten. */
188 struct laststmt_struct
189 {
190 gimple *stmt;
191 tree len;
192 int stridx;
193 } laststmt;
194
195 static int get_stridx_plus_constant (strinfo *, unsigned HOST_WIDE_INT, tree);
196 static bool get_range_strlen_dynamic (tree, gimple *, c_strlen_data *,
197 bitmap, pointer_query *, unsigned *);
198
199 /* Sets MINMAX to either the constant value or the range VAL is in
200 and returns either the constant value or VAL on success or null
201 when the range couldn't be determined. Uses RVALS or CFUN for
202 range info, whichever is nonnull. */
203
204 tree
get_range(tree val,gimple * stmt,wide_int minmax[2],range_query * rvals)205 get_range (tree val, gimple *stmt, wide_int minmax[2],
206 range_query *rvals /* = NULL */)
207 {
208 if (!rvals)
209 {
210 if (!cfun)
211 /* When called from front ends for global initializers CFUN
212 may be null. */
213 return NULL_TREE;
214
215 rvals = get_range_query (cfun);
216 }
217
218 value_range vr;
219 if (!rvals->range_of_expr (vr, val, stmt))
220 return NULL_TREE;
221
222 value_range_kind rng = vr.kind ();
223 if (rng == VR_RANGE)
224 {
225 /* Only handle straight ranges. */
226 minmax[0] = wi::to_wide (vr.min ());
227 minmax[1] = wi::to_wide (vr.max ());
228 return val;
229 }
230
231 return NULL_TREE;
232 }
233
234 class strlen_pass : public dom_walker
235 {
236 public:
strlen_pass(cdi_direction direction)237 strlen_pass (cdi_direction direction)
238 : dom_walker (direction),
239 ptr_qry (&m_ranger),
240 m_cleanup_cfg (false)
241 {
242 }
243
244 ~strlen_pass ();
245
246 virtual edge before_dom_children (basic_block);
247 virtual void after_dom_children (basic_block);
248
249 bool check_and_optimize_stmt (bool *cleanup_eh);
250 bool check_and_optimize_call (bool *zero_write);
251 bool handle_assign (tree lhs, bool *zero_write);
252 bool handle_store (bool *zero_write);
253 void handle_pointer_plus ();
254 void handle_builtin_strlen ();
255 void handle_builtin_strchr ();
256 void handle_builtin_strcpy (built_in_function);
257 void handle_integral_assign (bool *cleanup_eh);
258 void handle_builtin_stxncpy_strncat (bool append_p);
259 void handle_builtin_memcpy (built_in_function bcode);
260 void handle_builtin_strcat (built_in_function bcode);
261 void handle_builtin_strncat (built_in_function);
262 bool handle_builtin_memset (bool *zero_write);
263 bool handle_builtin_memcmp ();
264 bool handle_builtin_string_cmp ();
265 void handle_alloc_call (built_in_function);
266 void maybe_warn_overflow (gimple *stmt, bool call_lhs, tree len,
267 strinfo *si = NULL, bool plus_one = false,
268 bool rawmem = false);
269 void maybe_warn_overflow (gimple *stmt, bool call_lhs,
270 unsigned HOST_WIDE_INT len,
271 strinfo *si = NULL,
272 bool plus_one = false, bool rawmem = false);
273 void adjust_last_stmt (strinfo *si, gimple *stmt, bool is_strcat);
274 tree strxcmp_eqz_result (gimple *stmt, tree arg1, int idx1,
275 tree arg2, int idx2,
276 unsigned HOST_WIDE_INT bound,
277 unsigned HOST_WIDE_INT len[2],
278 unsigned HOST_WIDE_INT *psize);
279 bool count_nonzero_bytes (tree expr_or_type,
280 gimple *stmt,
281 unsigned lenrange[3], bool *nulterm,
282 bool *allnul, bool *allnonnul);
283 bool count_nonzero_bytes (tree exp,
284 gimple *stmt,
285 unsigned HOST_WIDE_INT offset,
286 unsigned HOST_WIDE_INT nbytes,
287 unsigned lenrange[3], bool *nulterm,
288 bool *allnul, bool *allnonnul,
289 ssa_name_limit_t &snlim);
290 bool count_nonzero_bytes_addr (tree exp,
291 gimple *stmt,
292 unsigned HOST_WIDE_INT offset,
293 unsigned HOST_WIDE_INT nbytes,
294 unsigned lenrange[3], bool *nulterm,
295 bool *allnul, bool *allnonnul,
296 ssa_name_limit_t &snlim);
297 bool get_len_or_size (gimple *stmt, tree arg, int idx,
298 unsigned HOST_WIDE_INT lenrng[2],
299 unsigned HOST_WIDE_INT *size, bool *nulterm);
300
301 gimple_ranger m_ranger;
302
303 /* A pointer_query object to store information about pointers and
304 their targets in. */
305 pointer_query ptr_qry;
306
307 gimple_stmt_iterator m_gsi;
308
309 /* Flag that will trigger TODO_cleanup_cfg to be returned in strlen
310 execute function. */
311 bool m_cleanup_cfg;
312 };
313
314 /* Return:
315
316 * +1 if SI is known to start with more than OFF nonzero characters.
317
318 * 0 if SI is known to start with exactly OFF nonzero characters.
319
320 * -1 if SI either does not start with OFF nonzero characters
321 or the relationship between the number of leading nonzero
322 characters in SI and OFF is unknown. */
323
324 static int
compare_nonzero_chars(strinfo * si,unsigned HOST_WIDE_INT off)325 compare_nonzero_chars (strinfo *si, unsigned HOST_WIDE_INT off)
326 {
327 if (si->nonzero_chars
328 && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
329 return compare_tree_int (si->nonzero_chars, off);
330 else
331 return -1;
332 }
333
334 /* Same as above but suitable also for strings with non-constant lengths.
335 Uses RVALS to determine length range. */
336
337 static int
compare_nonzero_chars(strinfo * si,gimple * stmt,unsigned HOST_WIDE_INT off,range_query * rvals)338 compare_nonzero_chars (strinfo *si, gimple *stmt,
339 unsigned HOST_WIDE_INT off,
340 range_query *rvals)
341 {
342 if (!si->nonzero_chars)
343 return -1;
344
345 if (TREE_CODE (si->nonzero_chars) == INTEGER_CST)
346 return compare_tree_int (si->nonzero_chars, off);
347
348 if (!rvals || TREE_CODE (si->nonzero_chars) != SSA_NAME)
349 return -1;
350
351 value_range vr;
352 if (!rvals->range_of_expr (vr, si->nonzero_chars, stmt))
353 return -1;
354 value_range_kind rng = vr.kind ();
355 if (rng != VR_RANGE)
356 return -1;
357
358 /* If the offset is less than the minimum length or if the bounds
359 of the length range are equal return the result of the comparison
360 same as in the constant case. Otherwise return a conservative
361 result. */
362 int cmpmin = compare_tree_int (vr.min (), off);
363 if (cmpmin > 0 || tree_int_cst_equal (vr.min (), vr.max ()))
364 return cmpmin;
365
366 return -1;
367 }
368
369 /* Return true if SI is known to be a zero-length string. */
370
371 static inline bool
zero_length_string_p(strinfo * si)372 zero_length_string_p (strinfo *si)
373 {
374 return si->full_string_p && integer_zerop (si->nonzero_chars);
375 }
376
377 /* Return strinfo vector entry IDX. */
378
379 static inline strinfo *
get_strinfo(int idx)380 get_strinfo (int idx)
381 {
382 if (vec_safe_length (stridx_to_strinfo) <= (unsigned int) idx)
383 return NULL;
384 return (*stridx_to_strinfo)[idx];
385 }
386
387 /* Get the next strinfo in the chain after SI, or null if none. */
388
389 static inline strinfo *
get_next_strinfo(strinfo * si)390 get_next_strinfo (strinfo *si)
391 {
392 if (si->next == 0)
393 return NULL;
394 strinfo *nextsi = get_strinfo (si->next);
395 if (nextsi == NULL || nextsi->first != si->first || nextsi->prev != si->idx)
396 return NULL;
397 return nextsi;
398 }
399
400 /* Helper function for get_stridx. Return the strinfo index of the address
401 of EXP, which is available in PTR if nonnull. If OFFSET_OUT, it is
402 OK to return the index for some X <= &EXP and store &EXP - X in
403 *OFFSET_OUT. When RVALS is nonnull uses it to determine range
404 information. */
405
406 static int
get_addr_stridx(tree exp,gimple * stmt,tree ptr,unsigned HOST_WIDE_INT * offset_out,range_query * rvals=NULL)407 get_addr_stridx (tree exp, gimple *stmt,
408 tree ptr, unsigned HOST_WIDE_INT *offset_out,
409 range_query *rvals = NULL)
410 {
411 HOST_WIDE_INT off;
412 struct stridxlist *list, *last = NULL;
413 tree base;
414
415 if (!decl_to_stridxlist_htab)
416 return 0;
417
418 poly_int64 poff;
419 base = get_addr_base_and_unit_offset (exp, &poff);
420 if (base == NULL || !DECL_P (base) || !poff.is_constant (&off))
421 return 0;
422
423 list = decl_to_stridxlist_htab->get (base);
424 if (list == NULL)
425 return 0;
426
427 do
428 {
429 if (list->offset == off)
430 {
431 if (offset_out)
432 *offset_out = 0;
433 return list->idx;
434 }
435 if (list->offset > off)
436 return 0;
437 last = list;
438 list = list->next;
439 }
440 while (list);
441
442 if ((offset_out || ptr) && last && last->idx > 0)
443 {
444 unsigned HOST_WIDE_INT rel_off
445 = (unsigned HOST_WIDE_INT) off - last->offset;
446 strinfo *si = get_strinfo (last->idx);
447 if (si && compare_nonzero_chars (si, stmt, rel_off, rvals) >= 0)
448 {
449 if (offset_out)
450 {
451 *offset_out = rel_off;
452 return last->idx;
453 }
454 else
455 return get_stridx_plus_constant (si, rel_off, ptr);
456 }
457 }
458 return 0;
459 }
460
461 /* Returns string index for EXP. When EXP is an SSA_NAME that refers
462 to a known strinfo with an offset and OFFRNG is non-null, sets
463 both elements of the OFFRNG array to the range of the offset and
464 returns the index of the known strinfo. In this case the result
465 must not be used in for functions that modify the string.
466 When nonnull, uses RVALS to determine range information. */
467
468 static int
get_stridx(tree exp,gimple * stmt,wide_int offrng[2]=NULL,range_query * rvals=NULL)469 get_stridx (tree exp, gimple *stmt,
470 wide_int offrng[2] = NULL, range_query *rvals = NULL)
471 {
472 if (offrng)
473 offrng[0] = offrng[1] = wi::zero (TYPE_PRECISION (ptrdiff_type_node));
474
475 if (TREE_CODE (exp) == SSA_NAME)
476 {
477 if (ssa_ver_to_stridx[SSA_NAME_VERSION (exp)])
478 return ssa_ver_to_stridx[SSA_NAME_VERSION (exp)];
479
480 tree e = exp;
481 int last_idx = 0;
482 HOST_WIDE_INT offset = 0;
483 /* Follow a chain of at most 5 assignments. */
484 for (int i = 0; i < 5; i++)
485 {
486 gimple *def_stmt = SSA_NAME_DEF_STMT (e);
487 if (!is_gimple_assign (def_stmt))
488 return last_idx;
489
490 tree_code rhs_code = gimple_assign_rhs_code (def_stmt);
491 tree ptr, off;
492
493 if (rhs_code == ADDR_EXPR)
494 {
495 /* Handle indices/offsets into VLAs which are implemented
496 as pointers to arrays. */
497 ptr = gimple_assign_rhs1 (def_stmt);
498 ptr = TREE_OPERAND (ptr, 0);
499
500 /* Handle also VLAs of types larger than char. */
501 if (tree eltsize = TYPE_SIZE_UNIT (TREE_TYPE (ptr)))
502 {
503 if (TREE_CODE (ptr) == ARRAY_REF)
504 {
505 off = TREE_OPERAND (ptr, 1);
506 ptr = TREE_OPERAND (ptr, 0);
507 if (!integer_onep (eltsize))
508 {
509 /* Scale the array index by the size of the element
510 type in the rare case that it's greater than
511 the typical 1 for char, making sure both operands
512 have the same type. */
513 eltsize = fold_convert (ssizetype, eltsize);
514 off = fold_convert (ssizetype, off);
515 off = fold_build2 (MULT_EXPR, ssizetype, off, eltsize);
516 }
517 }
518 else
519 off = integer_zero_node;
520 }
521 else
522 return 0;
523
524 if (TREE_CODE (ptr) != MEM_REF)
525 return 0;
526
527 /* Add the MEM_REF byte offset. */
528 tree mem_off = TREE_OPERAND (ptr, 1);
529 off = fold_build2 (PLUS_EXPR, TREE_TYPE (off), off, mem_off);
530 ptr = TREE_OPERAND (ptr, 0);
531 }
532 else if (rhs_code == POINTER_PLUS_EXPR)
533 {
534 ptr = gimple_assign_rhs1 (def_stmt);
535 off = gimple_assign_rhs2 (def_stmt);
536 }
537 else
538 return 0;
539
540 if (TREE_CODE (ptr) != SSA_NAME)
541 return 0;
542
543 if (!tree_fits_shwi_p (off))
544 {
545 if (int idx = ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)])
546 if (offrng)
547 {
548 /* Only when requested by setting OFFRNG to non-null,
549 return the index corresponding to the SSA_NAME.
550 Do this irrespective of the whether the offset
551 is known. */
552 if (get_range (off, def_stmt, offrng, rvals))
553 {
554 /* When the offset range is known, increment it
555 it by the constant offset computed in prior
556 iterations and store it in the OFFRNG array. */
557 offrng[0] += offset;
558 offrng[1] += offset;
559 }
560 else
561 {
562 /* When the offset range cannot be determined
563 store [0, SIZE_MAX] and let the caller decide
564 if the offset matters. */
565 offrng[1] = wi::to_wide (TYPE_MAX_VALUE (sizetype));
566 offrng[0] = wi::zero (offrng[1].get_precision ());
567 }
568 return idx;
569 }
570 return 0;
571 }
572
573 HOST_WIDE_INT this_off = tree_to_shwi (off);
574 if (offrng)
575 {
576 offrng[0] += wi::shwi (this_off, offrng->get_precision ());
577 offrng[1] += offrng[0];
578 }
579
580 if (this_off < 0)
581 return last_idx;
582
583 offset = (unsigned HOST_WIDE_INT) offset + this_off;
584 if (offset < 0)
585 return last_idx;
586
587 if (int idx = ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)])
588 {
589 strinfo *si = get_strinfo (idx);
590 if (si)
591 {
592 if (compare_nonzero_chars (si, offset) >= 0)
593 return get_stridx_plus_constant (si, offset, exp);
594
595 if (offrng)
596 last_idx = idx;
597 }
598 }
599 e = ptr;
600 }
601
602 return last_idx;
603 }
604
605 if (TREE_CODE (exp) == ADDR_EXPR)
606 {
607 int idx = get_addr_stridx (TREE_OPERAND (exp, 0), stmt, exp, NULL);
608 if (idx != 0)
609 return idx;
610 }
611
612 const char *p = c_getstr (exp);
613 if (p)
614 return ~(int) strlen (p);
615
616 return 0;
617 }
618
619 /* Return true if strinfo vector is shared with the immediate dominator. */
620
621 static inline bool
strinfo_shared(void)622 strinfo_shared (void)
623 {
624 return vec_safe_length (stridx_to_strinfo)
625 && (*stridx_to_strinfo)[0] != NULL;
626 }
627
628 /* Unshare strinfo vector that is shared with the immediate dominator. */
629
630 static void
unshare_strinfo_vec(void)631 unshare_strinfo_vec (void)
632 {
633 strinfo *si;
634 unsigned int i = 0;
635
636 gcc_assert (strinfo_shared ());
637 stridx_to_strinfo = vec_safe_copy (stridx_to_strinfo);
638 for (i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
639 if (si != NULL)
640 si->refcount++;
641 (*stridx_to_strinfo)[0] = NULL;
642 }
643
644 /* Attempt to create a string index for exp, ADDR_EXPR's operand.
645 Return a pointer to the location where the string index can
646 be stored (if 0) or is stored, or NULL if this can't be tracked. */
647
648 static int *
addr_stridxptr(tree exp)649 addr_stridxptr (tree exp)
650 {
651 HOST_WIDE_INT off;
652
653 poly_int64 poff;
654 tree base = get_addr_base_and_unit_offset (exp, &poff);
655 if (base == NULL_TREE || !DECL_P (base) || !poff.is_constant (&off))
656 return NULL;
657
658 if (!decl_to_stridxlist_htab)
659 {
660 decl_to_stridxlist_htab
661 = new hash_map<tree_decl_hash, stridxlist> (64);
662 gcc_obstack_init (&stridx_obstack);
663 }
664
665 bool existed;
666 stridxlist *list = &decl_to_stridxlist_htab->get_or_insert (base, &existed);
667 if (existed)
668 {
669 int i;
670 stridxlist *before = NULL;
671 for (i = 0; i < 32; i++)
672 {
673 if (list->offset == off)
674 return &list->idx;
675 if (list->offset > off && before == NULL)
676 before = list;
677 if (list->next == NULL)
678 break;
679 list = list->next;
680 }
681 if (i == 32)
682 return NULL;
683 if (before)
684 {
685 list = before;
686 before = XOBNEW (&stridx_obstack, struct stridxlist);
687 *before = *list;
688 list->next = before;
689 list->offset = off;
690 list->idx = 0;
691 return &list->idx;
692 }
693 list->next = XOBNEW (&stridx_obstack, struct stridxlist);
694 list = list->next;
695 }
696
697 list->next = NULL;
698 list->offset = off;
699 list->idx = 0;
700 return &list->idx;
701 }
702
703 /* Create a new string index, or return 0 if reached limit. */
704
705 static int
new_stridx(tree exp)706 new_stridx (tree exp)
707 {
708 int idx;
709 if (max_stridx >= param_max_tracked_strlens)
710 return 0;
711 if (TREE_CODE (exp) == SSA_NAME)
712 {
713 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
714 return 0;
715 idx = max_stridx++;
716 ssa_ver_to_stridx[SSA_NAME_VERSION (exp)] = idx;
717 return idx;
718 }
719 if (TREE_CODE (exp) == ADDR_EXPR)
720 {
721 int *pidx = addr_stridxptr (TREE_OPERAND (exp, 0));
722 if (pidx != NULL)
723 {
724 gcc_assert (*pidx == 0);
725 *pidx = max_stridx++;
726 return *pidx;
727 }
728 }
729 return 0;
730 }
731
732 /* Like new_stridx, but for ADDR_EXPR's operand instead. */
733
734 static int
new_addr_stridx(tree exp)735 new_addr_stridx (tree exp)
736 {
737 int *pidx;
738 if (max_stridx >= param_max_tracked_strlens)
739 return 0;
740 pidx = addr_stridxptr (exp);
741 if (pidx != NULL)
742 {
743 gcc_assert (*pidx == 0);
744 *pidx = max_stridx++;
745 return *pidx;
746 }
747 return 0;
748 }
749
750 /* Create a new strinfo. */
751
752 static strinfo *
new_strinfo(tree ptr,int idx,tree nonzero_chars,bool full_string_p)753 new_strinfo (tree ptr, int idx, tree nonzero_chars, bool full_string_p)
754 {
755 strinfo *si = strinfo_pool.allocate ();
756 si->nonzero_chars = nonzero_chars;
757 STRIP_USELESS_TYPE_CONVERSION (ptr);
758 si->ptr = ptr;
759 si->stmt = NULL;
760 si->alloc = NULL;
761 si->endptr = NULL_TREE;
762 si->refcount = 1;
763 si->idx = idx;
764 si->first = 0;
765 si->prev = 0;
766 si->next = 0;
767 si->writable = false;
768 si->dont_invalidate = false;
769 si->full_string_p = full_string_p;
770 return si;
771 }
772
773 /* Decrease strinfo refcount and free it if not referenced anymore. */
774
775 static inline void
free_strinfo(strinfo * si)776 free_strinfo (strinfo *si)
777 {
778 if (si && --si->refcount == 0)
779 strinfo_pool.remove (si);
780 }
781
782 /* Set strinfo in the vector entry IDX to SI. */
783
784 static inline void
set_strinfo(int idx,strinfo * si)785 set_strinfo (int idx, strinfo *si)
786 {
787 if (vec_safe_length (stridx_to_strinfo) && (*stridx_to_strinfo)[0])
788 unshare_strinfo_vec ();
789 if (vec_safe_length (stridx_to_strinfo) <= (unsigned int) idx)
790 vec_safe_grow_cleared (stridx_to_strinfo, idx + 1, true);
791 (*stridx_to_strinfo)[idx] = si;
792 }
793
794 /* Return the first strinfo in the related strinfo chain
795 if all strinfos in between belong to the chain, otherwise NULL. */
796
797 static strinfo *
verify_related_strinfos(strinfo * origsi)798 verify_related_strinfos (strinfo *origsi)
799 {
800 strinfo *si = origsi, *psi;
801
802 if (origsi->first == 0)
803 return NULL;
804 for (; si->prev; si = psi)
805 {
806 if (si->first != origsi->first)
807 return NULL;
808 psi = get_strinfo (si->prev);
809 if (psi == NULL)
810 return NULL;
811 if (psi->next != si->idx)
812 return NULL;
813 }
814 if (si->idx != si->first)
815 return NULL;
816 return si;
817 }
818
819 /* Set SI's endptr to ENDPTR and compute its length based on SI->ptr.
820 Use LOC for folding. */
821
822 static void
set_endptr_and_length(location_t loc,strinfo * si,tree endptr)823 set_endptr_and_length (location_t loc, strinfo *si, tree endptr)
824 {
825 si->endptr = endptr;
826 si->stmt = NULL;
827 tree start_as_size = fold_convert_loc (loc, size_type_node, si->ptr);
828 tree end_as_size = fold_convert_loc (loc, size_type_node, endptr);
829 si->nonzero_chars = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
830 end_as_size, start_as_size);
831 si->full_string_p = true;
832 }
833
834 /* Return the string length, or NULL if it can't be computed.
835 The length may but need not be constant. Instead, it might be
836 the result of a strlen() call. */
837
838 static tree
get_string_length(strinfo * si)839 get_string_length (strinfo *si)
840 {
841 /* If the length has already been computed return it if it's exact
842 (i.e., the string is nul-terminated at NONZERO_CHARS), or return
843 null if it isn't. */
844 if (si->nonzero_chars)
845 return si->full_string_p ? si->nonzero_chars : NULL;
846
847 /* If the string is the result of one of the built-in calls below
848 attempt to compute the length from the call statement. */
849 if (si->stmt)
850 {
851 gimple *stmt = si->stmt, *lenstmt;
852 tree callee, lhs, fn, tem;
853 location_t loc;
854 gimple_stmt_iterator gsi;
855
856 gcc_assert (is_gimple_call (stmt));
857 callee = gimple_call_fndecl (stmt);
858 gcc_assert (callee && fndecl_built_in_p (callee, BUILT_IN_NORMAL));
859 lhs = gimple_call_lhs (stmt);
860 /* unshare_strinfo is intentionally not called here. The (delayed)
861 transformation of strcpy or strcat into stpcpy is done at the place
862 of the former strcpy/strcat call and so can affect all the strinfos
863 with the same stmt. If they were unshared before and transformation
864 has been already done, the handling of BUILT_IN_STPCPY{,_CHK} should
865 just compute the right length. */
866 switch (DECL_FUNCTION_CODE (callee))
867 {
868 case BUILT_IN_STRCAT:
869 case BUILT_IN_STRCAT_CHK:
870 gsi = gsi_for_stmt (stmt);
871 fn = builtin_decl_implicit (BUILT_IN_STRLEN);
872 gcc_assert (lhs == NULL_TREE);
873 tem = unshare_expr (gimple_call_arg (stmt, 0));
874 lenstmt = gimple_build_call (fn, 1, tem);
875 lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (fn)), lenstmt);
876 gimple_call_set_lhs (lenstmt, lhs);
877 gimple_set_vuse (lenstmt, gimple_vuse (stmt));
878 gsi_insert_before (&gsi, lenstmt, GSI_SAME_STMT);
879 tem = gimple_call_arg (stmt, 0);
880 if (!ptrofftype_p (TREE_TYPE (lhs)))
881 {
882 lhs = convert_to_ptrofftype (lhs);
883 lhs = force_gimple_operand_gsi (&gsi, lhs, true, NULL_TREE,
884 true, GSI_SAME_STMT);
885 }
886 lenstmt = gimple_build_assign
887 (make_ssa_name (TREE_TYPE (gimple_call_arg (stmt, 0))),
888 POINTER_PLUS_EXPR,tem, lhs);
889 gsi_insert_before (&gsi, lenstmt, GSI_SAME_STMT);
890 gimple_call_set_arg (stmt, 0, gimple_assign_lhs (lenstmt));
891 lhs = NULL_TREE;
892 /* FALLTHRU */
893 case BUILT_IN_STRCPY:
894 case BUILT_IN_STRCPY_CHK:
895 gcc_assert (builtin_decl_implicit_p (BUILT_IN_STPCPY));
896 if (gimple_call_num_args (stmt) == 2)
897 fn = builtin_decl_implicit (BUILT_IN_STPCPY);
898 else
899 fn = builtin_decl_explicit (BUILT_IN_STPCPY_CHK);
900 gcc_assert (lhs == NULL_TREE);
901 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
902 {
903 fprintf (dump_file, "Optimizing: ");
904 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
905 }
906 gimple_call_set_fndecl (stmt, fn);
907 lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (fn)), stmt);
908 gimple_call_set_lhs (stmt, lhs);
909 update_stmt (stmt);
910 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
911 {
912 fprintf (dump_file, "into: ");
913 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
914 }
915 /* FALLTHRU */
916 case BUILT_IN_STPCPY:
917 case BUILT_IN_STPCPY_CHK:
918 gcc_assert (lhs != NULL_TREE);
919 loc = gimple_location (stmt);
920 set_endptr_and_length (loc, si, lhs);
921 for (strinfo *chainsi = verify_related_strinfos (si);
922 chainsi != NULL;
923 chainsi = get_next_strinfo (chainsi))
924 if (chainsi->nonzero_chars == NULL)
925 set_endptr_and_length (loc, chainsi, lhs);
926 break;
927 case BUILT_IN_ALLOCA:
928 case BUILT_IN_ALLOCA_WITH_ALIGN:
929 case BUILT_IN_MALLOC:
930 break;
931 /* BUILT_IN_CALLOC always has si->nonzero_chars set. */
932 default:
933 gcc_unreachable ();
934 break;
935 }
936 }
937
938 return si->nonzero_chars;
939 }
940
941 /* Dump strlen data to FP for statement STMT. When non-null, RVALS
942 points to the valuation engine used to calculate ranges, and is
943 used to dump strlen range for non-constant results. */
944
945 DEBUG_FUNCTION void
dump_strlen_info(FILE * fp,gimple * stmt,range_query * rvals)946 dump_strlen_info (FILE *fp, gimple *stmt, range_query *rvals)
947 {
948 if (stmt)
949 {
950 fprintf (fp, "\nDumping strlen pass data after ");
951 print_gimple_expr (fp, stmt, TDF_LINENO);
952 fputc ('\n', fp);
953 }
954 else
955 fprintf (fp, "\nDumping strlen pass data\n");
956
957 fprintf (fp, "max_stridx = %i\n", max_stridx);
958 fprintf (fp, "ssa_ver_to_stridx has %u elements\n",
959 ssa_ver_to_stridx.length ());
960 fprintf (fp, "stridx_to_strinfo");
961 if (stridx_to_strinfo)
962 {
963 fprintf (fp, " has %u elements\n", stridx_to_strinfo->length ());
964 for (unsigned i = 0; i != stridx_to_strinfo->length (); ++i)
965 {
966 if (strinfo *si = (*stridx_to_strinfo)[i])
967 {
968 if (!si->idx)
969 continue;
970 fprintf (fp, " idx = %i", si->idx);
971 if (si->ptr)
972 {
973 fprintf (fp, ", ptr = ");
974 print_generic_expr (fp, si->ptr);
975 }
976
977 if (si->nonzero_chars)
978 {
979 fprintf (fp, ", nonzero_chars = ");
980 print_generic_expr (fp, si->nonzero_chars);
981 if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
982 {
983 value_range_kind rng = VR_UNDEFINED;
984 wide_int min, max;
985 if (rvals)
986 {
987 value_range vr;
988 rvals->range_of_expr (vr, si->nonzero_chars,
989 si->stmt);
990 rng = vr.kind ();
991 if (range_int_cst_p (&vr))
992 {
993 min = wi::to_wide (vr.min ());
994 max = wi::to_wide (vr.max ());
995 }
996 else
997 rng = VR_UNDEFINED;
998 }
999 else
1000 {
1001 value_range vr;
1002 get_range_query (cfun)
1003 ->range_of_expr (vr, si->nonzero_chars);
1004 rng = vr.kind ();
1005 if (!vr.undefined_p ())
1006 {
1007 min = wi::to_wide (vr.min ());
1008 max = wi::to_wide (vr.max ());
1009 }
1010 }
1011
1012 if (rng == VR_RANGE || rng == VR_ANTI_RANGE)
1013 {
1014 fprintf (fp, " %s[%llu, %llu]",
1015 rng == VR_RANGE ? "" : "~",
1016 (long long) min.to_uhwi (),
1017 (long long) max.to_uhwi ());
1018 }
1019 }
1020 }
1021
1022 fprintf (fp, ", refcount = %i", si->refcount);
1023 if (si->stmt)
1024 {
1025 fprintf (fp, ", stmt = ");
1026 print_gimple_expr (fp, si->stmt, 0);
1027 }
1028 if (si->alloc)
1029 {
1030 fprintf (fp, ", alloc = ");
1031 print_gimple_expr (fp, si->alloc, 0);
1032 }
1033 if (si->writable)
1034 fprintf (fp, ", writable");
1035 if (si->dont_invalidate)
1036 fprintf (fp, ", dont_invalidate");
1037 if (si->full_string_p)
1038 fprintf (fp, ", full_string_p");
1039 if (strinfo *next = get_next_strinfo (si))
1040 {
1041 fprintf (fp, ", {");
1042 do
1043 fprintf (fp, "%i%s", next->idx, next->first ? ", " : "");
1044 while ((next = get_next_strinfo (next)));
1045 fprintf (fp, "}");
1046 }
1047 fputs ("\n", fp);
1048 }
1049 }
1050 }
1051 else
1052 fprintf (fp, " = null\n");
1053
1054 fprintf (fp, "decl_to_stridxlist_htab");
1055 if (decl_to_stridxlist_htab)
1056 {
1057 fputs ("\n", fp);
1058 typedef decl_to_stridxlist_htab_t::iterator iter_t;
1059 for (iter_t it = decl_to_stridxlist_htab->begin ();
1060 it != decl_to_stridxlist_htab->end (); ++it)
1061 {
1062 tree decl = (*it).first;
1063 stridxlist *list = &(*it).second;
1064 fprintf (fp, " decl = ");
1065 print_generic_expr (fp, decl);
1066 if (list)
1067 {
1068 fprintf (fp, ", offsets = {");
1069 for (; list; list = list->next)
1070 fprintf (fp, "%lli%s", (long long) list->offset,
1071 list->next ? ", " : "");
1072 fputs ("}", fp);
1073 }
1074 fputs ("\n", fp);
1075 }
1076 }
1077 else
1078 fprintf (fp, " = null\n");
1079
1080 if (laststmt.stmt)
1081 {
1082 fprintf (fp, "laststmt = ");
1083 print_gimple_expr (fp, laststmt.stmt, 0);
1084 fprintf (fp, ", len = ");
1085 print_generic_expr (fp, laststmt.len);
1086 fprintf (fp, ", stridx = %i\n", laststmt.stridx);
1087 }
1088 }
1089
1090 /* Helper of get_range_strlen_dynamic(). See below. */
1091
1092 static bool
get_range_strlen_phi(tree src,gphi * phi,c_strlen_data * pdata,bitmap visited,pointer_query * ptr_qry,unsigned * pssa_def_max)1093 get_range_strlen_phi (tree src, gphi *phi,
1094 c_strlen_data *pdata, bitmap visited,
1095 pointer_query *ptr_qry, unsigned *pssa_def_max)
1096 {
1097 if (!bitmap_set_bit (visited, SSA_NAME_VERSION (src)))
1098 return true;
1099
1100 if (*pssa_def_max == 0)
1101 return false;
1102
1103 --*pssa_def_max;
1104
1105 /* Iterate over the PHI arguments and determine the minimum and maximum
1106 length/size of each and incorporate them into the overall result. */
1107 for (unsigned i = 0; i != gimple_phi_num_args (phi); ++i)
1108 {
1109 tree arg = gimple_phi_arg_def (phi, i);
1110 if (arg == gimple_phi_result (phi))
1111 continue;
1112
1113 c_strlen_data argdata = { };
1114 if (!get_range_strlen_dynamic (arg, phi, &argdata, visited, ptr_qry,
1115 pssa_def_max))
1116 {
1117 pdata->maxlen = build_all_ones_cst (size_type_node);
1118 continue;
1119 }
1120
1121 /* Set the DECL of an unterminated array this argument refers to
1122 if one hasn't been found yet. */
1123 if (!pdata->decl && argdata.decl)
1124 pdata->decl = argdata.decl;
1125
1126 if (!argdata.minlen
1127 || (integer_zerop (argdata.minlen)
1128 && (!argdata.maxbound
1129 || integer_all_onesp (argdata.maxbound))
1130 && integer_all_onesp (argdata.maxlen)))
1131 {
1132 /* Set the upper bound of the length to unbounded. */
1133 pdata->maxlen = build_all_ones_cst (size_type_node);
1134 continue;
1135 }
1136
1137 /* Adjust the minimum and maximum length determined so far and
1138 the upper bound on the array size. */
1139 if (TREE_CODE (argdata.minlen) == INTEGER_CST
1140 && (!pdata->minlen
1141 || tree_int_cst_lt (argdata.minlen, pdata->minlen)))
1142 pdata->minlen = argdata.minlen;
1143
1144 if (TREE_CODE (argdata.maxlen) == INTEGER_CST
1145 && (!pdata->maxlen
1146 || (argdata.maxlen
1147 && tree_int_cst_lt (pdata->maxlen, argdata.maxlen))))
1148 pdata->maxlen = argdata.maxlen;
1149
1150 if (!pdata->maxbound
1151 || TREE_CODE (pdata->maxbound) != INTEGER_CST
1152 || (argdata.maxbound
1153 && tree_int_cst_lt (pdata->maxbound, argdata.maxbound)
1154 && !integer_all_onesp (argdata.maxbound)))
1155 pdata->maxbound = argdata.maxbound;
1156 }
1157
1158 return true;
1159 }
1160
1161 /* Return the maximum possible length of the string PTR that's less
1162 than MAXLEN given the size of the object of subobject it points
1163 to at the given STMT. MAXLEN is the maximum length of the string
1164 determined so far. Return null when no such maximum can be
1165 determined. */
1166
1167 static tree
get_maxbound(tree ptr,gimple * stmt,offset_int maxlen,pointer_query * ptr_qry)1168 get_maxbound (tree ptr, gimple *stmt, offset_int maxlen,
1169 pointer_query *ptr_qry)
1170 {
1171 access_ref aref;
1172 if (!ptr_qry->get_ref (ptr, stmt, &aref))
1173 return NULL_TREE;
1174
1175 offset_int sizrem = aref.size_remaining ();
1176 if (sizrem <= 0)
1177 return NULL_TREE;
1178
1179 if (sizrem < maxlen)
1180 maxlen = sizrem - 1;
1181
1182 /* Try to determine the maximum from the subobject at the offset.
1183 This handles MEM [&some-struct, member-offset] that's often
1184 the result of folding COMPONENT_REF [some-struct, member]. */
1185 tree reftype = TREE_TYPE (aref.ref);
1186 if (!RECORD_OR_UNION_TYPE_P (reftype)
1187 || aref.offrng[0] != aref.offrng[1]
1188 || !wi::fits_shwi_p (aref.offrng[0]))
1189 return wide_int_to_tree (size_type_node, maxlen);
1190
1191 HOST_WIDE_INT off = aref.offrng[0].to_shwi ();
1192 tree fld = field_at_offset (reftype, NULL_TREE, off);
1193 if (!fld || !DECL_SIZE_UNIT (fld))
1194 return wide_int_to_tree (size_type_node, maxlen);
1195
1196 offset_int size = wi::to_offset (DECL_SIZE_UNIT (fld));
1197 if (maxlen < size)
1198 return wide_int_to_tree (size_type_node, maxlen);
1199
1200 return wide_int_to_tree (size_type_node, size - 1);
1201 }
1202
1203 /* Attempt to determine the length of the string SRC. On success, store
1204 the length in *PDATA and return true. Otherwise, return false.
1205 VISITED is a bitmap of visited PHI nodes. RVALS points to the valuation
1206 engine used to calculate ranges. PSSA_DEF_MAX to an SSA_NAME
1207 assignment limit used to prevent runaway recursion. */
1208
1209 static bool
get_range_strlen_dynamic(tree src,gimple * stmt,c_strlen_data * pdata,bitmap visited,pointer_query * ptr_qry,unsigned * pssa_def_max)1210 get_range_strlen_dynamic (tree src, gimple *stmt,
1211 c_strlen_data *pdata, bitmap visited,
1212 pointer_query *ptr_qry, unsigned *pssa_def_max)
1213 {
1214 int idx = get_stridx (src, stmt);
1215 if (!idx)
1216 {
1217 if (TREE_CODE (src) == SSA_NAME)
1218 {
1219 gimple *def_stmt = SSA_NAME_DEF_STMT (src);
1220 if (gphi *phi = dyn_cast<gphi *>(def_stmt))
1221 return get_range_strlen_phi (src, phi, pdata, visited, ptr_qry,
1222 pssa_def_max);
1223 }
1224
1225 /* Return success regardless of the result and handle *PDATA
1226 in the caller. */
1227 get_range_strlen (src, pdata, 1);
1228 return true;
1229 }
1230
1231 if (idx < 0)
1232 {
1233 /* SRC is a string of constant length. */
1234 pdata->minlen = build_int_cst (size_type_node, ~idx);
1235 pdata->maxlen = pdata->minlen;
1236 pdata->maxbound = pdata->maxlen;
1237 return true;
1238 }
1239
1240 if (strinfo *si = get_strinfo (idx))
1241 {
1242 pdata->minlen = get_string_length (si);
1243 if (!pdata->minlen && si->nonzero_chars)
1244 {
1245 if (TREE_CODE (si->nonzero_chars) == INTEGER_CST)
1246 pdata->minlen = si->nonzero_chars;
1247 else if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
1248 {
1249 value_range vr;
1250 ptr_qry->rvals->range_of_expr (vr, si->nonzero_chars, si->stmt);
1251 if (range_int_cst_p (&vr))
1252 {
1253 pdata->minlen = vr.min ();
1254 pdata->maxlen = vr.max ();
1255 }
1256 else
1257 pdata->minlen = build_zero_cst (size_type_node);
1258 }
1259 else
1260 pdata->minlen = build_zero_cst (size_type_node);
1261
1262 tree base = si->ptr;
1263 if (TREE_CODE (base) == ADDR_EXPR)
1264 base = TREE_OPERAND (base, 0);
1265
1266 HOST_WIDE_INT off;
1267 poly_int64 poff;
1268 base = get_addr_base_and_unit_offset (base, &poff);
1269 if (base
1270 && DECL_P (base)
1271 && TREE_CODE (TREE_TYPE (base)) == ARRAY_TYPE
1272 && TYPE_SIZE_UNIT (TREE_TYPE (base))
1273 && poff.is_constant (&off))
1274 {
1275 tree basetype = TREE_TYPE (base);
1276 tree size = TYPE_SIZE_UNIT (basetype);
1277 if (TREE_CODE (size) == INTEGER_CST)
1278 {
1279 ++off; /* Increment for the terminating nul. */
1280 tree toffset = build_int_cst (size_type_node, off);
1281 pdata->maxlen = fold_build2 (MINUS_EXPR, size_type_node, size,
1282 toffset);
1283 pdata->maxbound = pdata->maxlen;
1284 }
1285 else
1286 pdata->maxlen = build_all_ones_cst (size_type_node);
1287 }
1288 else
1289 pdata->maxlen = build_all_ones_cst (size_type_node);
1290 }
1291 else if (pdata->minlen && TREE_CODE (pdata->minlen) == SSA_NAME)
1292 {
1293 value_range vr;
1294 ptr_qry->rvals->range_of_expr (vr, si->nonzero_chars, stmt);
1295 if (range_int_cst_p (&vr))
1296 {
1297 pdata->minlen = vr.min ();
1298 pdata->maxlen = vr.max ();
1299 offset_int max = offset_int::from (vr.upper_bound (0), SIGNED);
1300 if (tree maxbound = get_maxbound (si->ptr, stmt, max, ptr_qry))
1301 pdata->maxbound = maxbound;
1302 else
1303 pdata->maxbound = pdata->maxlen;
1304 }
1305 else
1306 {
1307 pdata->minlen = build_zero_cst (size_type_node);
1308 pdata->maxlen = build_all_ones_cst (size_type_node);
1309 }
1310 }
1311 else if (pdata->minlen && TREE_CODE (pdata->minlen) == INTEGER_CST)
1312 {
1313 pdata->maxlen = pdata->minlen;
1314 pdata->maxbound = pdata->minlen;
1315 }
1316 else
1317 {
1318 /* For PDATA->MINLEN that's a non-constant expression such
1319 as PLUS_EXPR whose value range is unknown, set the bounds
1320 to zero and SIZE_MAX. */
1321 pdata->minlen = build_zero_cst (size_type_node);
1322 pdata->maxlen = build_all_ones_cst (size_type_node);
1323 }
1324
1325 return true;
1326 }
1327
1328 return false;
1329 }
1330
1331 /* Analogous to get_range_strlen but for dynamically created strings,
1332 i.e., those created by calls to strcpy as opposed to just string
1333 constants.
1334 Try to obtain the range of the lengths of the string(s) referenced
1335 by SRC, or the size of the largest array SRC refers to if the range
1336 of lengths cannot be determined, and store all in *PDATA. RVALS
1337 points to the valuation engine used to calculate ranges. */
1338
1339 void
get_range_strlen_dynamic(tree src,gimple * stmt,c_strlen_data * pdata,pointer_query & ptr_qry)1340 get_range_strlen_dynamic (tree src, gimple *stmt, c_strlen_data *pdata,
1341 pointer_query &ptr_qry)
1342 {
1343 auto_bitmap visited;
1344 tree maxbound = pdata->maxbound;
1345
1346 unsigned limit = param_ssa_name_def_chain_limit;
1347 if (!get_range_strlen_dynamic (src, stmt, pdata, visited, &ptr_qry, &limit))
1348 {
1349 /* On failure extend the length range to an impossible maximum
1350 (a valid MAXLEN must be less than PTRDIFF_MAX - 1). Other
1351 members can stay unchanged regardless. */
1352 pdata->minlen = ssize_int (0);
1353 pdata->maxlen = build_all_ones_cst (size_type_node);
1354 }
1355 else if (!pdata->minlen)
1356 pdata->minlen = ssize_int (0);
1357
1358 /* If it's unchanged from it initial non-null value, set the conservative
1359 MAXBOUND to SIZE_MAX. Otherwise leave it null (if it is null). */
1360 if (maxbound && pdata->maxbound == maxbound)
1361 pdata->maxbound = build_all_ones_cst (size_type_node);
1362 }
1363
1364 /* Invalidate string length information for strings whose length might
1365 change due to stores in STMT, except those marked DONT_INVALIDATE.
1366 For string-modifying statements, ZERO_WRITE is set when the statement
1367 wrote only zeros.
1368 Returns true if any STRIDX_TO_STRINFO entries were considered
1369 for invalidation. */
1370
1371 static bool
maybe_invalidate(gimple * stmt,bool zero_write=false)1372 maybe_invalidate (gimple *stmt, bool zero_write = false)
1373 {
1374 if (dump_file && (dump_flags & TDF_DETAILS))
1375 {
1376 fprintf (dump_file, "%s called for ", __func__);
1377 print_gimple_stmt (dump_file, stmt, TDF_LINENO);
1378 }
1379
1380 strinfo *si;
1381 bool nonempty = false;
1382
1383 for (unsigned i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
1384 {
1385 if (si == NULL || !POINTER_TYPE_P (TREE_TYPE (si->ptr)))
1386 continue;
1387
1388 nonempty = true;
1389
1390 /* Unconditionally reset DONT_INVALIDATE. */
1391 bool dont_invalidate = si->dont_invalidate;
1392 si->dont_invalidate = false;
1393
1394 if (dont_invalidate)
1395 continue;
1396
1397 ao_ref r;
1398 tree size = si->nonzero_chars;
1399 ao_ref_init_from_ptr_and_size (&r, si->ptr, size);
1400 /* Include the terminating nul in the size of the string
1401 to consider when determining possible clobber. But do not
1402 add it to 'size' since we don't know whether it would
1403 actually fit the allocated area. */
1404 if (known_size_p (r.size))
1405 {
1406 if (known_le (r.size, HOST_WIDE_INT_MAX - BITS_PER_UNIT))
1407 r.max_size += BITS_PER_UNIT;
1408 else
1409 r.max_size = -1;
1410 }
1411 if (stmt_may_clobber_ref_p_1 (stmt, &r))
1412 {
1413 if (dump_file && (dump_flags & TDF_DETAILS))
1414 {
1415 fputs (" statement may clobber object ", dump_file);
1416 print_generic_expr (dump_file, si->ptr);
1417 if (size && tree_fits_uhwi_p (size))
1418 fprintf (dump_file, " " HOST_WIDE_INT_PRINT_UNSIGNED
1419 " bytes in size", tree_to_uhwi (size));
1420 fputc ('\n', dump_file);
1421 }
1422
1423 set_strinfo (i, NULL);
1424 free_strinfo (si);
1425 continue;
1426 }
1427
1428 if (size
1429 && !zero_write
1430 && si->stmt
1431 && is_gimple_call (si->stmt)
1432 && (DECL_FUNCTION_CODE (gimple_call_fndecl (si->stmt))
1433 == BUILT_IN_CALLOC))
1434 {
1435 /* If the clobber test above considered the length of
1436 the string (including the nul), then for (potentially)
1437 non-zero writes that might modify storage allocated by
1438 calloc consider the whole object and if it might be
1439 clobbered by the statement reset the statement. */
1440 ao_ref_init_from_ptr_and_size (&r, si->ptr, NULL_TREE);
1441 if (stmt_may_clobber_ref_p_1 (stmt, &r))
1442 si->stmt = NULL;
1443 }
1444 }
1445
1446 if (dump_file && (dump_flags & TDF_DETAILS))
1447 fprintf (dump_file, "%s returns %i\n", __func__, nonempty);
1448
1449 return nonempty;
1450 }
1451
1452 /* Unshare strinfo record SI, if it has refcount > 1 or
1453 if stridx_to_strinfo vector is shared with some other
1454 bbs. */
1455
1456 static strinfo *
unshare_strinfo(strinfo * si)1457 unshare_strinfo (strinfo *si)
1458 {
1459 strinfo *nsi;
1460
1461 if (si->refcount == 1 && !strinfo_shared ())
1462 return si;
1463
1464 nsi = new_strinfo (si->ptr, si->idx, si->nonzero_chars, si->full_string_p);
1465 nsi->stmt = si->stmt;
1466 nsi->alloc = si->alloc;
1467 nsi->endptr = si->endptr;
1468 nsi->first = si->first;
1469 nsi->prev = si->prev;
1470 nsi->next = si->next;
1471 nsi->writable = si->writable;
1472 set_strinfo (si->idx, nsi);
1473 free_strinfo (si);
1474 return nsi;
1475 }
1476
1477 /* Attempt to create a new strinfo for BASESI + OFF, or find existing
1478 strinfo if there is any. Return it's idx, or 0 if no strinfo has
1479 been created. */
1480
1481 static int
get_stridx_plus_constant(strinfo * basesi,unsigned HOST_WIDE_INT off,tree ptr)1482 get_stridx_plus_constant (strinfo *basesi, unsigned HOST_WIDE_INT off,
1483 tree ptr)
1484 {
1485 if (TREE_CODE (ptr) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ptr))
1486 return 0;
1487
1488 if (compare_nonzero_chars (basesi, off) < 0
1489 || !tree_fits_uhwi_p (basesi->nonzero_chars))
1490 return 0;
1491
1492 unsigned HOST_WIDE_INT nonzero_chars
1493 = tree_to_uhwi (basesi->nonzero_chars) - off;
1494 strinfo *si = basesi, *chainsi;
1495 if (si->first || si->prev || si->next)
1496 si = verify_related_strinfos (basesi);
1497 if (si == NULL
1498 || si->nonzero_chars == NULL_TREE
1499 || TREE_CODE (si->nonzero_chars) != INTEGER_CST)
1500 return 0;
1501
1502 if (TREE_CODE (ptr) == SSA_NAME
1503 && ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1504 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
1505
1506 gcc_checking_assert (compare_tree_int (si->nonzero_chars, off) != -1);
1507 for (chainsi = si; chainsi->next; chainsi = si)
1508 {
1509 si = get_next_strinfo (chainsi);
1510 if (si == NULL
1511 || si->nonzero_chars == NULL_TREE
1512 || TREE_CODE (si->nonzero_chars) != INTEGER_CST)
1513 break;
1514 int r = compare_tree_int (si->nonzero_chars, nonzero_chars);
1515 if (r != 1)
1516 {
1517 if (r == 0)
1518 {
1519 if (TREE_CODE (ptr) == SSA_NAME)
1520 ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = si->idx;
1521 else
1522 {
1523 int *pidx = addr_stridxptr (TREE_OPERAND (ptr, 0));
1524 if (pidx != NULL && *pidx == 0)
1525 *pidx = si->idx;
1526 }
1527 return si->idx;
1528 }
1529 break;
1530 }
1531 }
1532
1533 int idx = new_stridx (ptr);
1534 if (idx == 0)
1535 return 0;
1536 si = new_strinfo (ptr, idx, build_int_cst (size_type_node, nonzero_chars),
1537 basesi->full_string_p);
1538 set_strinfo (idx, si);
1539 if (strinfo *nextsi = get_strinfo (chainsi->next))
1540 {
1541 nextsi = unshare_strinfo (nextsi);
1542 si->next = nextsi->idx;
1543 nextsi->prev = idx;
1544 }
1545 chainsi = unshare_strinfo (chainsi);
1546 if (chainsi->first == 0)
1547 chainsi->first = chainsi->idx;
1548 chainsi->next = idx;
1549 if (chainsi->endptr == NULL_TREE && zero_length_string_p (si))
1550 chainsi->endptr = ptr;
1551 si->endptr = chainsi->endptr;
1552 si->prev = chainsi->idx;
1553 si->first = chainsi->first;
1554 si->writable = chainsi->writable;
1555 return si->idx;
1556 }
1557
1558 /* Note that PTR, a pointer SSA_NAME initialized in the current stmt, points
1559 to a zero-length string and if possible chain it to a related strinfo
1560 chain whose part is or might be CHAINSI. */
1561
1562 static strinfo *
zero_length_string(tree ptr,strinfo * chainsi)1563 zero_length_string (tree ptr, strinfo *chainsi)
1564 {
1565 strinfo *si;
1566 int idx;
1567 if (ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1568 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
1569 gcc_checking_assert (TREE_CODE (ptr) == SSA_NAME
1570 && ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] == 0);
1571
1572 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ptr))
1573 return NULL;
1574 if (chainsi != NULL)
1575 {
1576 si = verify_related_strinfos (chainsi);
1577 if (si)
1578 {
1579 do
1580 {
1581 /* We shouldn't mix delayed and non-delayed lengths. */
1582 gcc_assert (si->full_string_p);
1583 if (si->endptr == NULL_TREE)
1584 {
1585 si = unshare_strinfo (si);
1586 si->endptr = ptr;
1587 }
1588 chainsi = si;
1589 si = get_next_strinfo (si);
1590 }
1591 while (si != NULL);
1592 if (zero_length_string_p (chainsi))
1593 {
1594 if (chainsi->next)
1595 {
1596 chainsi = unshare_strinfo (chainsi);
1597 chainsi->next = 0;
1598 }
1599 ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = chainsi->idx;
1600 return chainsi;
1601 }
1602 }
1603 else
1604 {
1605 /* We shouldn't mix delayed and non-delayed lengths. */
1606 gcc_assert (chainsi->full_string_p);
1607 if (chainsi->first || chainsi->prev || chainsi->next)
1608 {
1609 chainsi = unshare_strinfo (chainsi);
1610 chainsi->first = 0;
1611 chainsi->prev = 0;
1612 chainsi->next = 0;
1613 }
1614 }
1615 }
1616 idx = new_stridx (ptr);
1617 if (idx == 0)
1618 return NULL;
1619 si = new_strinfo (ptr, idx, build_int_cst (size_type_node, 0), true);
1620 set_strinfo (idx, si);
1621 si->endptr = ptr;
1622 if (chainsi != NULL)
1623 {
1624 chainsi = unshare_strinfo (chainsi);
1625 if (chainsi->first == 0)
1626 chainsi->first = chainsi->idx;
1627 chainsi->next = idx;
1628 if (chainsi->endptr == NULL_TREE)
1629 chainsi->endptr = ptr;
1630 si->prev = chainsi->idx;
1631 si->first = chainsi->first;
1632 si->writable = chainsi->writable;
1633 }
1634 return si;
1635 }
1636
1637 /* For strinfo ORIGSI whose length has been just updated, adjust other
1638 related strinfos so that they match the new ORIGSI. This involves:
1639
1640 - adding ADJ to the nonzero_chars fields
1641 - copying full_string_p from the new ORIGSI. */
1642
1643 static void
adjust_related_strinfos(location_t loc,strinfo * origsi,tree adj)1644 adjust_related_strinfos (location_t loc, strinfo *origsi, tree adj)
1645 {
1646 strinfo *si = verify_related_strinfos (origsi);
1647
1648 if (si == NULL)
1649 return;
1650
1651 while (1)
1652 {
1653 strinfo *nsi;
1654
1655 if (si != origsi)
1656 {
1657 tree tem;
1658
1659 si = unshare_strinfo (si);
1660 /* We shouldn't see delayed lengths here; the caller must
1661 have calculated the old length in order to calculate
1662 the adjustment. */
1663 gcc_assert (si->nonzero_chars);
1664 tem = fold_convert_loc (loc, TREE_TYPE (si->nonzero_chars), adj);
1665 si->nonzero_chars = fold_build2_loc (loc, PLUS_EXPR,
1666 TREE_TYPE (si->nonzero_chars),
1667 si->nonzero_chars, tem);
1668 si->full_string_p = origsi->full_string_p;
1669
1670 si->endptr = NULL_TREE;
1671 si->dont_invalidate = true;
1672 }
1673 nsi = get_next_strinfo (si);
1674 if (nsi == NULL)
1675 return;
1676 si = nsi;
1677 }
1678 }
1679
1680 /* Find if there are other SSA_NAME pointers equal to PTR
1681 for which we don't track their string lengths yet. If so, use
1682 IDX for them. */
1683
1684 static void
find_equal_ptrs(tree ptr,int idx)1685 find_equal_ptrs (tree ptr, int idx)
1686 {
1687 if (TREE_CODE (ptr) != SSA_NAME)
1688 return;
1689 while (1)
1690 {
1691 gimple *stmt = SSA_NAME_DEF_STMT (ptr);
1692 if (!is_gimple_assign (stmt))
1693 return;
1694 ptr = gimple_assign_rhs1 (stmt);
1695 switch (gimple_assign_rhs_code (stmt))
1696 {
1697 case SSA_NAME:
1698 break;
1699 CASE_CONVERT:
1700 if (!POINTER_TYPE_P (TREE_TYPE (ptr)))
1701 return;
1702 if (TREE_CODE (ptr) == SSA_NAME)
1703 break;
1704 if (TREE_CODE (ptr) != ADDR_EXPR)
1705 return;
1706 /* FALLTHRU */
1707 case ADDR_EXPR:
1708 {
1709 int *pidx = addr_stridxptr (TREE_OPERAND (ptr, 0));
1710 if (pidx != NULL && *pidx == 0)
1711 *pidx = idx;
1712 return;
1713 }
1714 default:
1715 return;
1716 }
1717
1718 /* We might find an endptr created in this pass. Grow the
1719 vector in that case. */
1720 if (ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1721 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
1722
1723 if (ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] != 0)
1724 return;
1725 ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = idx;
1726 }
1727 }
1728
1729 /* Return true if STMT is a call to a builtin function with the right
1730 arguments and attributes that should be considered for optimization
1731 by this pass. */
1732
1733 static bool
valid_builtin_call(gimple * stmt)1734 valid_builtin_call (gimple *stmt)
1735 {
1736 if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
1737 return false;
1738
1739 tree callee = gimple_call_fndecl (stmt);
1740 switch (DECL_FUNCTION_CODE (callee))
1741 {
1742 case BUILT_IN_MEMCMP:
1743 case BUILT_IN_MEMCMP_EQ:
1744 case BUILT_IN_STRCMP:
1745 case BUILT_IN_STRNCMP:
1746 case BUILT_IN_STRCHR:
1747 case BUILT_IN_STRLEN:
1748 case BUILT_IN_STRNLEN:
1749 /* The above functions should be pure. Punt if they aren't. */
1750 if (gimple_vdef (stmt) || gimple_vuse (stmt) == NULL_TREE)
1751 return false;
1752 break;
1753
1754 case BUILT_IN_ALLOCA:
1755 case BUILT_IN_ALLOCA_WITH_ALIGN:
1756 case BUILT_IN_CALLOC:
1757 case BUILT_IN_MALLOC:
1758 case BUILT_IN_MEMCPY:
1759 case BUILT_IN_MEMCPY_CHK:
1760 case BUILT_IN_MEMPCPY:
1761 case BUILT_IN_MEMPCPY_CHK:
1762 case BUILT_IN_MEMSET:
1763 case BUILT_IN_STPCPY:
1764 case BUILT_IN_STPCPY_CHK:
1765 case BUILT_IN_STPNCPY:
1766 case BUILT_IN_STPNCPY_CHK:
1767 case BUILT_IN_STRCAT:
1768 case BUILT_IN_STRCAT_CHK:
1769 case BUILT_IN_STRCPY:
1770 case BUILT_IN_STRCPY_CHK:
1771 case BUILT_IN_STRNCAT:
1772 case BUILT_IN_STRNCAT_CHK:
1773 case BUILT_IN_STRNCPY:
1774 case BUILT_IN_STRNCPY_CHK:
1775 /* The above functions should be neither const nor pure. Punt if they
1776 aren't. */
1777 if (gimple_vdef (stmt) == NULL_TREE || gimple_vuse (stmt) == NULL_TREE)
1778 return false;
1779 break;
1780
1781 default:
1782 break;
1783 }
1784
1785 return true;
1786 }
1787
1788 /* If the last .MEM setter statement before STMT is
1789 memcpy (x, y, strlen (y) + 1), the only .MEM use of it is STMT
1790 and STMT is known to overwrite x[strlen (x)], adjust the last memcpy to
1791 just memcpy (x, y, strlen (y)). SI must be the zero length
1792 strinfo. */
1793
1794 void
adjust_last_stmt(strinfo * si,gimple * stmt,bool is_strcat)1795 strlen_pass::adjust_last_stmt (strinfo *si, gimple *stmt, bool is_strcat)
1796 {
1797 tree vuse, callee, len;
1798 struct laststmt_struct last = laststmt;
1799 strinfo *lastsi, *firstsi;
1800 unsigned len_arg_no = 2;
1801
1802 laststmt.stmt = NULL;
1803 laststmt.len = NULL_TREE;
1804 laststmt.stridx = 0;
1805
1806 if (last.stmt == NULL)
1807 return;
1808
1809 vuse = gimple_vuse (stmt);
1810 if (vuse == NULL_TREE
1811 || SSA_NAME_DEF_STMT (vuse) != last.stmt
1812 || !has_single_use (vuse))
1813 return;
1814
1815 gcc_assert (last.stridx > 0);
1816 lastsi = get_strinfo (last.stridx);
1817 if (lastsi == NULL)
1818 return;
1819
1820 if (lastsi != si)
1821 {
1822 if (lastsi->first == 0 || lastsi->first != si->first)
1823 return;
1824
1825 firstsi = verify_related_strinfos (si);
1826 if (firstsi == NULL)
1827 return;
1828 while (firstsi != lastsi)
1829 {
1830 firstsi = get_next_strinfo (firstsi);
1831 if (firstsi == NULL)
1832 return;
1833 }
1834 }
1835
1836 if (!is_strcat && !zero_length_string_p (si))
1837 return;
1838
1839 if (is_gimple_assign (last.stmt))
1840 {
1841 gimple_stmt_iterator gsi;
1842
1843 if (!integer_zerop (gimple_assign_rhs1 (last.stmt)))
1844 return;
1845 if (stmt_could_throw_p (cfun, last.stmt))
1846 return;
1847 gsi = gsi_for_stmt (last.stmt);
1848 unlink_stmt_vdef (last.stmt);
1849 release_defs (last.stmt);
1850 gsi_remove (&gsi, true);
1851 return;
1852 }
1853
1854 if (!valid_builtin_call (last.stmt))
1855 return;
1856
1857 callee = gimple_call_fndecl (last.stmt);
1858 switch (DECL_FUNCTION_CODE (callee))
1859 {
1860 case BUILT_IN_MEMCPY:
1861 case BUILT_IN_MEMCPY_CHK:
1862 break;
1863 default:
1864 return;
1865 }
1866
1867 len = gimple_call_arg (last.stmt, len_arg_no);
1868 if (tree_fits_uhwi_p (len))
1869 {
1870 if (!tree_fits_uhwi_p (last.len)
1871 || integer_zerop (len)
1872 || tree_to_uhwi (len) != tree_to_uhwi (last.len) + 1)
1873 return;
1874 /* Don't adjust the length if it is divisible by 4, it is more efficient
1875 to store the extra '\0' in that case. */
1876 if ((tree_to_uhwi (len) & 3) == 0)
1877 return;
1878
1879 /* Don't fold away an out of bounds access, as this defeats proper
1880 warnings. */
1881 tree dst = gimple_call_arg (last.stmt, 0);
1882
1883 access_ref aref;
1884 tree size = compute_objsize (dst, stmt, 1, &aref, &ptr_qry);
1885 if (size && tree_int_cst_lt (size, len))
1886 return;
1887 }
1888 else if (TREE_CODE (len) == SSA_NAME)
1889 {
1890 gimple *def_stmt = SSA_NAME_DEF_STMT (len);
1891 if (!is_gimple_assign (def_stmt)
1892 || gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
1893 || gimple_assign_rhs1 (def_stmt) != last.len
1894 || !integer_onep (gimple_assign_rhs2 (def_stmt)))
1895 return;
1896 }
1897 else
1898 return;
1899
1900 gimple_call_set_arg (last.stmt, len_arg_no, last.len);
1901 update_stmt (last.stmt);
1902 }
1903
1904 /* For an LHS that is an SSA_NAME that is the result of a strlen()
1905 call, or when BOUND is non-null, of a strnlen() call, set LHS
1906 range info to [0, min (MAX, BOUND)] when the range includes more
1907 than one value and return LHS. Otherwise, when the range
1908 [MIN, MAX] is such that MIN == MAX, return the tree representation
1909 of (MIN). The latter allows callers to fold suitable strnlen() calls
1910 to constants. */
1911
1912 tree
set_strlen_range(tree lhs,wide_int min,wide_int max,tree bound)1913 set_strlen_range (tree lhs, wide_int min, wide_int max,
1914 tree bound /* = NULL_TREE */)
1915 {
1916 if (TREE_CODE (lhs) != SSA_NAME
1917 || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
1918 return NULL_TREE;
1919
1920 if (bound)
1921 {
1922 /* For strnlen, adjust MIN and MAX as necessary. If the bound
1923 is less than the size of the array set MAX to it. It it's
1924 greater than MAX and MAX is non-zero bump MAX down to account
1925 for the necessary terminating nul. Otherwise leave it alone. */
1926 if (TREE_CODE (bound) == INTEGER_CST)
1927 {
1928 wide_int wibnd = wi::to_wide (bound);
1929 int cmp = wi::cmpu (wibnd, max);
1930 if (cmp < 0)
1931 max = wibnd;
1932 else if (cmp && wi::ne_p (max, min))
1933 --max;
1934 }
1935 else if (TREE_CODE (bound) == SSA_NAME)
1936 {
1937 value_range r;
1938 get_range_query (cfun)->range_of_expr (r, bound);
1939 if (!r.undefined_p ())
1940 {
1941 /* For a bound in a known range, adjust the range determined
1942 above as necessary. For a bound in some anti-range or
1943 in an unknown range, use the range determined by callers. */
1944 if (wi::ltu_p (r.lower_bound (), min))
1945 min = r.lower_bound ();
1946 if (wi::ltu_p (r.upper_bound (), max))
1947 max = r.upper_bound ();
1948 }
1949 }
1950 }
1951
1952 if (min == max)
1953 return wide_int_to_tree (size_type_node, min);
1954
1955 set_range_info (lhs, VR_RANGE, min, max);
1956 return lhs;
1957 }
1958
1959 /* For an LHS that is an SSA_NAME and for strlen() or strnlen() argument
1960 SRC, set LHS range info to [0, min (N, BOUND)] if SRC refers to
1961 a character array A[N] with unknown length bounded by N, and for
1962 strnlen(), by min (N, BOUND). */
1963
1964 static tree
maybe_set_strlen_range(tree lhs,tree src,tree bound)1965 maybe_set_strlen_range (tree lhs, tree src, tree bound)
1966 {
1967 if (TREE_CODE (lhs) != SSA_NAME
1968 || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
1969 return NULL_TREE;
1970
1971 if (TREE_CODE (src) == SSA_NAME)
1972 {
1973 gimple *def = SSA_NAME_DEF_STMT (src);
1974 if (is_gimple_assign (def)
1975 && gimple_assign_rhs_code (def) == ADDR_EXPR)
1976 src = gimple_assign_rhs1 (def);
1977 }
1978
1979 /* The longest string is PTRDIFF_MAX - 1 bytes including the final
1980 NUL so that the difference between a pointer to just past it and
1981 one to its beginning is positive. */
1982 wide_int max = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node)) - 2;
1983
1984 if (TREE_CODE (src) == ADDR_EXPR)
1985 {
1986 /* The last array member of a struct can be bigger than its size
1987 suggests if it's treated as a poor-man's flexible array member. */
1988 src = TREE_OPERAND (src, 0);
1989 if (TREE_CODE (src) != MEM_REF
1990 && !array_at_struct_end_p (src))
1991 {
1992 tree type = TREE_TYPE (src);
1993 tree size = TYPE_SIZE_UNIT (type);
1994 if (size
1995 && TREE_CODE (size) == INTEGER_CST
1996 && !integer_zerop (size))
1997 {
1998 /* Even though such uses of strlen would be undefined,
1999 avoid relying on arrays of arrays in case some genius
2000 decides to call strlen on an unterminated array element
2001 that's followed by a terminated one. Likewise, avoid
2002 assuming that a struct array member is necessarily
2003 nul-terminated (the nul may be in the member that
2004 follows). In those cases, assume that the length
2005 of the string stored in such an array is bounded
2006 by the size of the enclosing object if one can be
2007 determined. */
2008 tree base = get_base_address (src);
2009 if (VAR_P (base))
2010 {
2011 if (tree size = DECL_SIZE_UNIT (base))
2012 if (size
2013 && TREE_CODE (size) == INTEGER_CST
2014 && TREE_CODE (TREE_TYPE (base)) != POINTER_TYPE)
2015 max = wi::to_wide (size);
2016 }
2017 }
2018
2019 /* For strlen() the upper bound above is equal to
2020 the longest string that can be stored in the array
2021 (i.e., it accounts for the terminating nul. For
2022 strnlen() bump up the maximum by one since the array
2023 need not be nul-terminated. */
2024 if (!bound && max != 0)
2025 --max;
2026 }
2027 }
2028
2029 wide_int min = wi::zero (max.get_precision ());
2030 return set_strlen_range (lhs, min, max, bound);
2031 }
2032
2033 /* Diagnose buffer overflow by a STMT writing LEN + PLUS_ONE bytes,
2034 either into a region allocated for the object SI when non-null,
2035 or into an object designated by the LHS of STMT otherwise.
2036 For a call STMT, when CALL_LHS is set use its left hand side
2037 as the destination, otherwise use argument zero.
2038 When nonnull uses RVALS to determine range information.
2039 RAWMEM may be set by memcpy and other raw memory functions
2040 to allow accesses across subobject boundaries. */
2041
2042 void
maybe_warn_overflow(gimple * stmt,bool call_lhs,tree len,strinfo * si,bool plus_one,bool rawmem)2043 strlen_pass::maybe_warn_overflow (gimple *stmt, bool call_lhs, tree len,
2044 strinfo *si, bool plus_one, bool rawmem)
2045 {
2046 if (!len || warning_suppressed_p (stmt, OPT_Wstringop_overflow_))
2047 return;
2048
2049 /* The DECL of the function performing the write if it is done
2050 by one. */
2051 tree writefn = NULL_TREE;
2052 /* The destination expression involved in the store or call STMT. */
2053 tree dest = NULL_TREE;
2054
2055 if (is_gimple_assign (stmt))
2056 dest = gimple_assign_lhs (stmt);
2057 else if (is_gimple_call (stmt))
2058 {
2059 if (call_lhs)
2060 dest = gimple_call_lhs (stmt);
2061 else
2062 {
2063 gcc_assert (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL));
2064 dest = gimple_call_arg (stmt, 0);
2065 }
2066
2067 if (!dest)
2068 return;
2069 writefn = gimple_call_fndecl (stmt);
2070 }
2071 else
2072 return;
2073
2074 if (warning_suppressed_p (dest, OPT_Wstringop_overflow_))
2075 return;
2076
2077 const int ostype = rawmem ? 0 : 1;
2078
2079 /* Use maximum precision to avoid overflow in the addition below.
2080 Make sure all operands have the same precision to keep wide_int
2081 from ICE'ing. */
2082
2083 access_ref aref;
2084 /* The size of the destination region (which is smaller than
2085 the destination object for stores at a non-zero offset). */
2086 tree destsize = compute_objsize (dest, stmt, ostype, &aref, &ptr_qry);
2087
2088 if (!destsize)
2089 {
2090 aref.sizrng[0] = 0;
2091 aref.sizrng[1] = wi::to_offset (max_object_size ());
2092 }
2093
2094 /* Return early if the DESTSIZE size expression is the same as LEN
2095 and the offset into the destination is zero. This might happen
2096 in the case of a pair of malloc and memset calls to allocate
2097 an object and clear it as if by calloc. */
2098 if (destsize == len && !plus_one
2099 && aref.offrng[0] == 0 && aref.offrng[0] == aref.offrng[1])
2100 return;
2101
2102 wide_int rng[2];
2103 if (!get_range (len, stmt, rng, ptr_qry.rvals))
2104 return;
2105
2106 widest_int lenrng[2] =
2107 { widest_int::from (rng[0], SIGNED), widest_int::from (rng[1], SIGNED) };
2108
2109 if (plus_one)
2110 {
2111 lenrng[0] += 1;
2112 lenrng[1] += 1;
2113 }
2114
2115 /* The size of the remaining space in the destination computed
2116 as the size of the latter minus the offset into it. */
2117 widest_int spcrng[2];
2118 {
2119 offset_int remrng[2];
2120 remrng[1] = aref.size_remaining (remrng);
2121 spcrng[0] = remrng[0] == -1 ? 0 : widest_int::from (remrng[0], UNSIGNED);
2122 spcrng[1] = widest_int::from (remrng[1], UNSIGNED);
2123 }
2124
2125 if (wi::leu_p (lenrng[0], spcrng[0])
2126 && wi::leu_p (lenrng[1], spcrng[1]))
2127 return;
2128
2129 location_t loc = gimple_or_expr_nonartificial_location (stmt, dest);
2130 bool warned = false;
2131 if (wi::leu_p (lenrng[0], spcrng[1]))
2132 {
2133 if (len != destsize
2134 && (!si || rawmem || !is_strlen_related_p (si->ptr, len)))
2135 return;
2136
2137 warned = (writefn
2138 ? warning_at (loc, OPT_Wstringop_overflow_,
2139 "%qD writing one too many bytes into a region "
2140 "of a size that depends on %<strlen%>",
2141 writefn)
2142 : warning_at (loc, OPT_Wstringop_overflow_,
2143 "writing one too many bytes into a region "
2144 "of a size that depends on %<strlen%>"));
2145 }
2146 else if (lenrng[0] == lenrng[1])
2147 {
2148 if (spcrng[0] == spcrng[1])
2149 warned = (writefn
2150 ? warning_n (loc, OPT_Wstringop_overflow_,
2151 lenrng[0].to_uhwi (),
2152 "%qD writing %wu byte into a region "
2153 "of size %wu",
2154 "%qD writing %wu bytes into a region "
2155 "of size %wu",
2156 writefn, lenrng[0].to_uhwi (),
2157 spcrng[0].to_uhwi ())
2158 : warning_n (loc, OPT_Wstringop_overflow_,
2159 lenrng[0].to_uhwi (),
2160 "writing %wu byte into a region "
2161 "of size %wu",
2162 "writing %wu bytes into a region "
2163 "of size %wu",
2164 lenrng[0].to_uhwi (),
2165 spcrng[0].to_uhwi ()));
2166 else
2167 warned = (writefn
2168 ? warning_n (loc, OPT_Wstringop_overflow_,
2169 lenrng[0].to_uhwi (),
2170 "%qD writing %wu byte into a region "
2171 "of size between %wu and %wu",
2172 "%qD writing %wu bytes into a region "
2173 "of size between %wu and %wu",
2174 writefn, lenrng[0].to_uhwi (),
2175 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ())
2176 : warning_n (loc, OPT_Wstringop_overflow_,
2177 lenrng[0].to_uhwi (),
2178 "writing %wu byte into a region "
2179 "of size between %wu and %wu",
2180 "writing %wu bytes into a region "
2181 "of size between %wu and %wu",
2182 lenrng[0].to_uhwi (),
2183 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ()));
2184 }
2185 else if (spcrng[0] == spcrng[1])
2186 warned = (writefn
2187 ? warning_at (loc, OPT_Wstringop_overflow_,
2188 "%qD writing between %wu and %wu bytes "
2189 "into a region of size %wu",
2190 writefn, lenrng[0].to_uhwi (),
2191 lenrng[1].to_uhwi (),
2192 spcrng[0].to_uhwi ())
2193 : warning_at (loc, OPT_Wstringop_overflow_,
2194 "writing between %wu and %wu bytes "
2195 "into a region of size %wu",
2196 lenrng[0].to_uhwi (),
2197 lenrng[1].to_uhwi (),
2198 spcrng[0].to_uhwi ()));
2199 else
2200 warned = (writefn
2201 ? warning_at (loc, OPT_Wstringop_overflow_,
2202 "%qD writing between %wu and %wu bytes "
2203 "into a region of size between %wu and %wu",
2204 writefn, lenrng[0].to_uhwi (),
2205 lenrng[1].to_uhwi (),
2206 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ())
2207 : warning_at (loc, OPT_Wstringop_overflow_,
2208 "writing between %wu and %wu bytes "
2209 "into a region of size between %wu and %wu",
2210 lenrng[0].to_uhwi (),
2211 lenrng[1].to_uhwi (),
2212 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ()));
2213
2214 if (!warned)
2215 return;
2216
2217 suppress_warning (stmt, OPT_Wstringop_overflow_);
2218
2219 aref.inform_access (access_write_only);
2220 }
2221
2222 /* Convenience wrapper for the above. */
2223
2224 void
maybe_warn_overflow(gimple * stmt,bool call_lhs,unsigned HOST_WIDE_INT len,strinfo * si,bool plus_one,bool rawmem)2225 strlen_pass::maybe_warn_overflow (gimple *stmt, bool call_lhs,
2226 unsigned HOST_WIDE_INT len,
2227 strinfo *si, bool plus_one, bool rawmem)
2228 {
2229 tree tlen = build_int_cst (size_type_node, len);
2230 maybe_warn_overflow (stmt, call_lhs, tlen, si, plus_one, rawmem);
2231 }
2232
2233 /* Handle a strlen call. If strlen of the argument is known, replace
2234 the strlen call with the known value, otherwise remember that strlen
2235 of the argument is stored in the lhs SSA_NAME. */
2236
2237 void
handle_builtin_strlen()2238 strlen_pass::handle_builtin_strlen ()
2239 {
2240 gimple *stmt = gsi_stmt (m_gsi);
2241 tree lhs = gimple_call_lhs (stmt);
2242
2243 if (lhs == NULL_TREE)
2244 return;
2245
2246 location_t loc = gimple_location (stmt);
2247 tree callee = gimple_call_fndecl (stmt);
2248 tree src = gimple_call_arg (stmt, 0);
2249 tree bound = (DECL_FUNCTION_CODE (callee) == BUILT_IN_STRNLEN
2250 ? gimple_call_arg (stmt, 1) : NULL_TREE);
2251 int idx = get_stridx (src, stmt);
2252 if (idx || (bound && integer_zerop (bound)))
2253 {
2254 strinfo *si = NULL;
2255 tree rhs;
2256
2257 if (idx < 0)
2258 rhs = build_int_cst (TREE_TYPE (lhs), ~idx);
2259 else if (idx == 0)
2260 rhs = bound;
2261 else
2262 {
2263 rhs = NULL_TREE;
2264 si = get_strinfo (idx);
2265 if (si != NULL)
2266 {
2267 rhs = get_string_length (si);
2268 /* For strnlen, if bound is constant, even if si is not known
2269 to be zero terminated, if we know at least bound bytes are
2270 not zero, the return value will be bound. */
2271 if (rhs == NULL_TREE
2272 && bound != NULL_TREE
2273 && TREE_CODE (bound) == INTEGER_CST
2274 && si->nonzero_chars != NULL_TREE
2275 && TREE_CODE (si->nonzero_chars) == INTEGER_CST
2276 && tree_int_cst_le (bound, si->nonzero_chars))
2277 rhs = bound;
2278 }
2279 }
2280 if (rhs != NULL_TREE)
2281 {
2282 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2283 {
2284 fprintf (dump_file, "Optimizing: ");
2285 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2286 }
2287 rhs = unshare_expr (rhs);
2288 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2289 rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2290
2291 if (bound)
2292 rhs = fold_build2_loc (loc, MIN_EXPR, TREE_TYPE (rhs), rhs, bound);
2293
2294 gimplify_and_update_call_from_tree (&m_gsi, rhs);
2295 stmt = gsi_stmt (m_gsi);
2296 update_stmt (stmt);
2297 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2298 {
2299 fprintf (dump_file, "into: ");
2300 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2301 }
2302
2303 if (si != NULL
2304 /* Don't update anything for strnlen. */
2305 && bound == NULL_TREE
2306 && TREE_CODE (si->nonzero_chars) != SSA_NAME
2307 && TREE_CODE (si->nonzero_chars) != INTEGER_CST
2308 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2309 {
2310 si = unshare_strinfo (si);
2311 si->nonzero_chars = lhs;
2312 gcc_assert (si->full_string_p);
2313 }
2314
2315 if (strlen_to_stridx
2316 && (bound == NULL_TREE
2317 /* For strnlen record this only if the call is proven
2318 to return the same value as strlen would. */
2319 || (TREE_CODE (bound) == INTEGER_CST
2320 && TREE_CODE (rhs) == INTEGER_CST
2321 && tree_int_cst_lt (rhs, bound))))
2322 strlen_to_stridx->put (lhs, stridx_strlenloc (idx, loc));
2323
2324 return;
2325 }
2326 }
2327 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2328 return;
2329
2330 if (idx == 0)
2331 idx = new_stridx (src);
2332 else
2333 {
2334 strinfo *si = get_strinfo (idx);
2335 if (si != NULL)
2336 {
2337 if (!si->full_string_p && !si->stmt)
2338 {
2339 /* Until now we only had a lower bound on the string length.
2340 Install LHS as the actual length. */
2341 si = unshare_strinfo (si);
2342 tree old = si->nonzero_chars;
2343 si->nonzero_chars = lhs;
2344 si->full_string_p = true;
2345 if (old && TREE_CODE (old) == INTEGER_CST)
2346 {
2347 old = fold_convert_loc (loc, TREE_TYPE (lhs), old);
2348 tree adj = fold_build2_loc (loc, MINUS_EXPR,
2349 TREE_TYPE (lhs), lhs, old);
2350 adjust_related_strinfos (loc, si, adj);
2351 /* Use the constant minimum length as the lower bound
2352 of the non-constant length. */
2353 wide_int min = wi::to_wide (old);
2354 wide_int max
2355 = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node)) - 2;
2356 set_strlen_range (lhs, min, max);
2357 }
2358 else
2359 {
2360 si->first = 0;
2361 si->prev = 0;
2362 si->next = 0;
2363 }
2364 }
2365 return;
2366 }
2367 }
2368 if (idx)
2369 {
2370 if (!bound)
2371 {
2372 /* Only store the new length information for calls to strlen(),
2373 not for those to strnlen(). */
2374 strinfo *si = new_strinfo (src, idx, lhs, true);
2375 set_strinfo (idx, si);
2376 find_equal_ptrs (src, idx);
2377 }
2378
2379 /* For SRC that is an array of N elements, set LHS's range
2380 to [0, min (N, BOUND)]. A constant return value means
2381 the range would have consisted of a single value. In
2382 that case, fold the result into the returned constant. */
2383 if (tree ret = maybe_set_strlen_range (lhs, src, bound))
2384 if (TREE_CODE (ret) == INTEGER_CST)
2385 {
2386 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2387 {
2388 fprintf (dump_file, "Optimizing: ");
2389 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2390 }
2391 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (ret)))
2392 ret = fold_convert_loc (loc, TREE_TYPE (lhs), ret);
2393 gimplify_and_update_call_from_tree (&m_gsi, ret);
2394 stmt = gsi_stmt (m_gsi);
2395 update_stmt (stmt);
2396 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2397 {
2398 fprintf (dump_file, "into: ");
2399 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2400 }
2401 }
2402
2403 if (strlen_to_stridx && !bound)
2404 strlen_to_stridx->put (lhs, stridx_strlenloc (idx, loc));
2405 }
2406 }
2407
2408 /* Handle a strchr call. If strlen of the first argument is known, replace
2409 the strchr (x, 0) call with the endptr or x + strlen, otherwise remember
2410 that lhs of the call is endptr and strlen of the argument is endptr - x. */
2411
2412 void
handle_builtin_strchr()2413 strlen_pass::handle_builtin_strchr ()
2414 {
2415 gimple *stmt = gsi_stmt (m_gsi);
2416 tree lhs = gimple_call_lhs (stmt);
2417
2418 if (lhs == NULL_TREE)
2419 return;
2420
2421 if (!integer_zerop (gimple_call_arg (stmt, 1)))
2422 return;
2423
2424 tree src = gimple_call_arg (stmt, 0);
2425
2426 /* Avoid folding if the first argument is not a nul-terminated array.
2427 Defer warning until later. */
2428 if (!check_nul_terminated_array (NULL_TREE, src))
2429 return;
2430
2431 int idx = get_stridx (src, stmt);
2432 if (idx)
2433 {
2434 strinfo *si = NULL;
2435 tree rhs;
2436
2437 if (idx < 0)
2438 rhs = build_int_cst (size_type_node, ~idx);
2439 else
2440 {
2441 rhs = NULL_TREE;
2442 si = get_strinfo (idx);
2443 if (si != NULL)
2444 rhs = get_string_length (si);
2445 }
2446 if (rhs != NULL_TREE)
2447 {
2448 location_t loc = gimple_location (stmt);
2449
2450 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2451 {
2452 fprintf (dump_file, "Optimizing: ");
2453 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2454 }
2455 if (si != NULL && si->endptr != NULL_TREE)
2456 {
2457 rhs = unshare_expr (si->endptr);
2458 if (!useless_type_conversion_p (TREE_TYPE (lhs),
2459 TREE_TYPE (rhs)))
2460 rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2461 }
2462 else
2463 {
2464 rhs = fold_convert_loc (loc, sizetype, unshare_expr (rhs));
2465 rhs = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2466 TREE_TYPE (src), src, rhs);
2467 if (!useless_type_conversion_p (TREE_TYPE (lhs),
2468 TREE_TYPE (rhs)))
2469 rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2470 }
2471 gimplify_and_update_call_from_tree (&m_gsi, rhs);
2472 stmt = gsi_stmt (m_gsi);
2473 update_stmt (stmt);
2474 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2475 {
2476 fprintf (dump_file, "into: ");
2477 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2478 }
2479 if (si != NULL
2480 && si->endptr == NULL_TREE
2481 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2482 {
2483 si = unshare_strinfo (si);
2484 si->endptr = lhs;
2485 }
2486 zero_length_string (lhs, si);
2487 return;
2488 }
2489 }
2490 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2491 return;
2492 if (TREE_CODE (src) != SSA_NAME || !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (src))
2493 {
2494 if (idx == 0)
2495 idx = new_stridx (src);
2496 else if (get_strinfo (idx) != NULL)
2497 {
2498 zero_length_string (lhs, NULL);
2499 return;
2500 }
2501 if (idx)
2502 {
2503 location_t loc = gimple_location (stmt);
2504 tree lhsu = fold_convert_loc (loc, size_type_node, lhs);
2505 tree srcu = fold_convert_loc (loc, size_type_node, src);
2506 tree length = fold_build2_loc (loc, MINUS_EXPR,
2507 size_type_node, lhsu, srcu);
2508 strinfo *si = new_strinfo (src, idx, length, true);
2509 si->endptr = lhs;
2510 set_strinfo (idx, si);
2511 find_equal_ptrs (src, idx);
2512 zero_length_string (lhs, si);
2513 }
2514 }
2515 else
2516 zero_length_string (lhs, NULL);
2517 }
2518
2519 /* Handle a strcpy-like ({st{r,p}cpy,__st{r,p}cpy_chk}) call.
2520 If strlen of the second argument is known, strlen of the first argument
2521 is the same after this call. Furthermore, attempt to convert it to
2522 memcpy. Uses RVALS to determine range information. */
2523
2524 void
handle_builtin_strcpy(built_in_function bcode)2525 strlen_pass::handle_builtin_strcpy (built_in_function bcode)
2526 {
2527 int idx, didx;
2528 tree src, dst, srclen, len, lhs, type, fn, oldlen;
2529 bool success;
2530 gimple *stmt = gsi_stmt (m_gsi);
2531 strinfo *si, *dsi, *olddsi, *zsi;
2532 location_t loc;
2533
2534 src = gimple_call_arg (stmt, 1);
2535 dst = gimple_call_arg (stmt, 0);
2536 lhs = gimple_call_lhs (stmt);
2537 idx = get_stridx (src, stmt);
2538 si = NULL;
2539 if (idx > 0)
2540 si = get_strinfo (idx);
2541
2542 didx = get_stridx (dst, stmt);
2543 olddsi = NULL;
2544 oldlen = NULL_TREE;
2545 if (didx > 0)
2546 olddsi = get_strinfo (didx);
2547 else if (didx < 0)
2548 return;
2549
2550 if (olddsi != NULL)
2551 adjust_last_stmt (olddsi, stmt, false);
2552
2553 srclen = NULL_TREE;
2554 if (si != NULL)
2555 srclen = get_string_length (si);
2556 else if (idx < 0)
2557 srclen = build_int_cst (size_type_node, ~idx);
2558
2559 maybe_warn_overflow (stmt, false, srclen, olddsi, true);
2560
2561 if (olddsi != NULL)
2562 adjust_last_stmt (olddsi, stmt, false);
2563
2564 loc = gimple_location (stmt);
2565 if (srclen == NULL_TREE)
2566 switch (bcode)
2567 {
2568 case BUILT_IN_STRCPY:
2569 case BUILT_IN_STRCPY_CHK:
2570 if (lhs != NULL_TREE || !builtin_decl_implicit_p (BUILT_IN_STPCPY))
2571 return;
2572 break;
2573 case BUILT_IN_STPCPY:
2574 case BUILT_IN_STPCPY_CHK:
2575 if (lhs == NULL_TREE)
2576 return;
2577 else
2578 {
2579 tree lhsuint = fold_convert_loc (loc, size_type_node, lhs);
2580 srclen = fold_convert_loc (loc, size_type_node, dst);
2581 srclen = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
2582 lhsuint, srclen);
2583 }
2584 break;
2585 default:
2586 gcc_unreachable ();
2587 }
2588
2589 if (didx == 0)
2590 {
2591 didx = new_stridx (dst);
2592 if (didx == 0)
2593 return;
2594 }
2595 if (olddsi != NULL)
2596 {
2597 oldlen = olddsi->nonzero_chars;
2598 dsi = unshare_strinfo (olddsi);
2599 dsi->nonzero_chars = srclen;
2600 dsi->full_string_p = (srclen != NULL_TREE);
2601 /* Break the chain, so adjust_related_strinfo on later pointers in
2602 the chain won't adjust this one anymore. */
2603 dsi->next = 0;
2604 dsi->stmt = NULL;
2605 dsi->endptr = NULL_TREE;
2606 }
2607 else
2608 {
2609 dsi = new_strinfo (dst, didx, srclen, srclen != NULL_TREE);
2610 set_strinfo (didx, dsi);
2611 find_equal_ptrs (dst, didx);
2612 }
2613 dsi->writable = true;
2614 dsi->dont_invalidate = true;
2615
2616 if (dsi->nonzero_chars == NULL_TREE)
2617 {
2618 strinfo *chainsi;
2619
2620 /* If string length of src is unknown, use delayed length
2621 computation. If string length of dst will be needed, it
2622 can be computed by transforming this strcpy call into
2623 stpcpy and subtracting dst from the return value. */
2624
2625 /* Look for earlier strings whose length could be determined if
2626 this strcpy is turned into an stpcpy. */
2627
2628 if (dsi->prev != 0 && (chainsi = verify_related_strinfos (dsi)) != NULL)
2629 {
2630 for (; chainsi && chainsi != dsi; chainsi = get_strinfo (chainsi->next))
2631 {
2632 /* When setting a stmt for delayed length computation
2633 prevent all strinfos through dsi from being
2634 invalidated. */
2635 chainsi = unshare_strinfo (chainsi);
2636 chainsi->stmt = stmt;
2637 chainsi->nonzero_chars = NULL_TREE;
2638 chainsi->full_string_p = false;
2639 chainsi->endptr = NULL_TREE;
2640 chainsi->dont_invalidate = true;
2641 }
2642 }
2643 dsi->stmt = stmt;
2644
2645 /* Try to detect overlap before returning. This catches cases
2646 like strcpy (d, d + n) where n is non-constant whose range
2647 is such that (n <= strlen (d) holds).
2648
2649 OLDDSI->NONZERO_chars may have been reset by this point with
2650 oldlen holding it original value. */
2651 if (olddsi && oldlen)
2652 {
2653 /* Add 1 for the terminating NUL. */
2654 tree type = TREE_TYPE (oldlen);
2655 oldlen = fold_build2 (PLUS_EXPR, type, oldlen,
2656 build_int_cst (type, 1));
2657 check_bounds_or_overlap (stmt, olddsi->ptr, src, oldlen, NULL_TREE);
2658 }
2659
2660 return;
2661 }
2662
2663 if (olddsi != NULL)
2664 {
2665 tree adj = NULL_TREE;
2666 if (oldlen == NULL_TREE)
2667 ;
2668 else if (integer_zerop (oldlen))
2669 adj = srclen;
2670 else if (TREE_CODE (oldlen) == INTEGER_CST
2671 || TREE_CODE (srclen) == INTEGER_CST)
2672 adj = fold_build2_loc (loc, MINUS_EXPR,
2673 TREE_TYPE (srclen), srclen,
2674 fold_convert_loc (loc, TREE_TYPE (srclen),
2675 oldlen));
2676 if (adj != NULL_TREE)
2677 adjust_related_strinfos (loc, dsi, adj);
2678 else
2679 dsi->prev = 0;
2680 }
2681 /* strcpy src may not overlap dst, so src doesn't need to be
2682 invalidated either. */
2683 if (si != NULL)
2684 si->dont_invalidate = true;
2685
2686 fn = NULL_TREE;
2687 zsi = NULL;
2688 switch (bcode)
2689 {
2690 case BUILT_IN_STRCPY:
2691 fn = builtin_decl_implicit (BUILT_IN_MEMCPY);
2692 if (lhs)
2693 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
2694 break;
2695 case BUILT_IN_STRCPY_CHK:
2696 fn = builtin_decl_explicit (BUILT_IN_MEMCPY_CHK);
2697 if (lhs)
2698 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
2699 break;
2700 case BUILT_IN_STPCPY:
2701 /* This would need adjustment of the lhs (subtract one),
2702 or detection that the trailing '\0' doesn't need to be
2703 written, if it will be immediately overwritten.
2704 fn = builtin_decl_explicit (BUILT_IN_MEMPCPY); */
2705 if (lhs)
2706 {
2707 dsi->endptr = lhs;
2708 zsi = zero_length_string (lhs, dsi);
2709 }
2710 break;
2711 case BUILT_IN_STPCPY_CHK:
2712 /* This would need adjustment of the lhs (subtract one),
2713 or detection that the trailing '\0' doesn't need to be
2714 written, if it will be immediately overwritten.
2715 fn = builtin_decl_explicit (BUILT_IN_MEMPCPY_CHK); */
2716 if (lhs)
2717 {
2718 dsi->endptr = lhs;
2719 zsi = zero_length_string (lhs, dsi);
2720 }
2721 break;
2722 default:
2723 gcc_unreachable ();
2724 }
2725 if (zsi != NULL)
2726 zsi->dont_invalidate = true;
2727
2728 if (fn)
2729 {
2730 tree args = TYPE_ARG_TYPES (TREE_TYPE (fn));
2731 type = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (args)));
2732 }
2733 else
2734 type = size_type_node;
2735
2736 len = fold_convert_loc (loc, type, unshare_expr (srclen));
2737 len = fold_build2_loc (loc, PLUS_EXPR, type, len, build_int_cst (type, 1));
2738
2739 /* Disable warning for the transformed statement? */
2740 opt_code no_warning_opt = no_warning;
2741
2742 if (const strinfo *chksi = si ? olddsi ? olddsi : dsi : NULL)
2743 {
2744 no_warning_opt = check_bounds_or_overlap (stmt, chksi->ptr, si->ptr,
2745 NULL_TREE, len);
2746 if (no_warning_opt)
2747 suppress_warning (stmt, no_warning_opt);
2748 }
2749
2750 if (fn == NULL_TREE)
2751 return;
2752
2753 len = force_gimple_operand_gsi (&m_gsi, len, true, NULL_TREE, true,
2754 GSI_SAME_STMT);
2755 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2756 {
2757 fprintf (dump_file, "Optimizing: ");
2758 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2759 }
2760 if (gimple_call_num_args (stmt) == 2)
2761 success = update_gimple_call (&m_gsi, fn, 3, dst, src, len);
2762 else
2763 success = update_gimple_call (&m_gsi, fn, 4, dst, src, len,
2764 gimple_call_arg (stmt, 2));
2765 if (success)
2766 {
2767 stmt = gsi_stmt (m_gsi);
2768 update_stmt (stmt);
2769 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2770 {
2771 fprintf (dump_file, "into: ");
2772 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2773 }
2774 /* Allow adjust_last_stmt to decrease this memcpy's size. */
2775 laststmt.stmt = stmt;
2776 laststmt.len = srclen;
2777 laststmt.stridx = dsi->idx;
2778 }
2779 else if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2780 fprintf (dump_file, "not possible.\n");
2781
2782 if (no_warning_opt)
2783 suppress_warning (stmt, no_warning_opt);
2784 }
2785
2786 /* Check the size argument to the built-in forms of stpncpy and strncpy
2787 for out-of-bounds offsets or overlapping access, and to see if the
2788 size argument is derived from a call to strlen() on the source argument,
2789 and if so, issue an appropriate warning. */
2790
2791 void
handle_builtin_strncat(built_in_function)2792 strlen_pass::handle_builtin_strncat (built_in_function)
2793 {
2794 /* Same as stxncpy(). */
2795 handle_builtin_stxncpy_strncat (true);
2796 }
2797
2798 /* Return true if LEN depends on a call to strlen(SRC) in an interesting
2799 way. LEN can either be an integer expression, or a pointer (to char).
2800 When it is the latter (such as in recursive calls to self) it is
2801 assumed to be the argument in some call to strlen() whose relationship
2802 to SRC is being ascertained. */
2803
2804 bool
is_strlen_related_p(tree src,tree len)2805 is_strlen_related_p (tree src, tree len)
2806 {
2807 if (TREE_CODE (TREE_TYPE (len)) == POINTER_TYPE
2808 && operand_equal_p (src, len, 0))
2809 return true;
2810
2811 if (TREE_CODE (len) != SSA_NAME)
2812 return false;
2813
2814 if (TREE_CODE (src) == SSA_NAME)
2815 {
2816 gimple *srcdef = SSA_NAME_DEF_STMT (src);
2817 if (is_gimple_assign (srcdef))
2818 {
2819 /* Handle bitwise AND used in conversions from wider size_t
2820 to narrower unsigned types. */
2821 tree_code code = gimple_assign_rhs_code (srcdef);
2822 if (code == BIT_AND_EXPR
2823 || code == NOP_EXPR)
2824 return is_strlen_related_p (gimple_assign_rhs1 (srcdef), len);
2825
2826 return false;
2827 }
2828
2829 if (gimple_call_builtin_p (srcdef, BUILT_IN_NORMAL))
2830 {
2831 /* If SRC is the result of a call to an allocation function
2832 or strlen, use the function's argument instead. */
2833 tree func = gimple_call_fndecl (srcdef);
2834 built_in_function code = DECL_FUNCTION_CODE (func);
2835 if (code == BUILT_IN_ALLOCA
2836 || code == BUILT_IN_ALLOCA_WITH_ALIGN
2837 || code == BUILT_IN_MALLOC
2838 || code == BUILT_IN_STRLEN)
2839 return is_strlen_related_p (gimple_call_arg (srcdef, 0), len);
2840
2841 /* FIXME: Handle other functions with attribute alloc_size. */
2842 return false;
2843 }
2844 }
2845
2846 gimple *lendef = SSA_NAME_DEF_STMT (len);
2847 if (!lendef)
2848 return false;
2849
2850 if (is_gimple_call (lendef))
2851 {
2852 tree func = gimple_call_fndecl (lendef);
2853 if (!valid_builtin_call (lendef)
2854 || DECL_FUNCTION_CODE (func) != BUILT_IN_STRLEN)
2855 return false;
2856
2857 tree arg = gimple_call_arg (lendef, 0);
2858 return is_strlen_related_p (src, arg);
2859 }
2860
2861 if (!is_gimple_assign (lendef))
2862 return false;
2863
2864 tree_code code = gimple_assign_rhs_code (lendef);
2865 tree rhs1 = gimple_assign_rhs1 (lendef);
2866 tree rhstype = TREE_TYPE (rhs1);
2867
2868 if ((TREE_CODE (rhstype) == POINTER_TYPE && code == POINTER_PLUS_EXPR)
2869 || (INTEGRAL_TYPE_P (rhstype)
2870 && (code == BIT_AND_EXPR
2871 || code == NOP_EXPR)))
2872 {
2873 /* Pointer plus (an integer), and truncation are considered among
2874 the (potentially) related expressions to strlen. */
2875 return is_strlen_related_p (src, rhs1);
2876 }
2877
2878 if (tree rhs2 = gimple_assign_rhs2 (lendef))
2879 {
2880 /* Integer subtraction is considered strlen-related when both
2881 arguments are integers and second one is strlen-related. */
2882 rhstype = TREE_TYPE (rhs2);
2883 if (INTEGRAL_TYPE_P (rhstype) && code == MINUS_EXPR)
2884 return is_strlen_related_p (src, rhs2);
2885 }
2886
2887 return false;
2888 }
2889
2890 /* Called by handle_builtin_stxncpy_strncat and by
2891 gimple_fold_builtin_strncpy in gimple-fold.cc.
2892 Check to see if the specified bound is a) equal to the size of
2893 the destination DST and if so, b) if it's immediately followed by
2894 DST[CNT - 1] = '\0'. If a) holds and b) does not, warn. Otherwise,
2895 do nothing. Return true if diagnostic has been issued.
2896
2897 The purpose is to diagnose calls to strncpy and stpncpy that do
2898 not nul-terminate the copy while allowing for the idiom where
2899 such a call is immediately followed by setting the last element
2900 to nul, as in:
2901 char a[32];
2902 strncpy (a, s, sizeof a);
2903 a[sizeof a - 1] = '\0';
2904 */
2905
2906 bool
maybe_diag_stxncpy_trunc(gimple_stmt_iterator gsi,tree src,tree cnt,pointer_query * ptr_qry)2907 maybe_diag_stxncpy_trunc (gimple_stmt_iterator gsi, tree src, tree cnt,
2908 pointer_query *ptr_qry /* = NULL */)
2909 {
2910 gimple *stmt = gsi_stmt (gsi);
2911 if (warning_suppressed_p (stmt, OPT_Wstringop_truncation))
2912 return false;
2913
2914 wide_int cntrange[2];
2915 value_range r;
2916 if (!get_range_query (cfun)->range_of_expr (r, cnt)
2917 || r.varying_p ()
2918 || r.undefined_p ())
2919 return false;
2920
2921 cntrange[0] = wi::to_wide (r.min ());
2922 cntrange[1] = wi::to_wide (r.max ());
2923 if (r.kind () == VR_ANTI_RANGE)
2924 {
2925 wide_int maxobjsize = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node));
2926
2927 if (wi::ltu_p (cntrange[1], maxobjsize))
2928 {
2929 cntrange[0] = cntrange[1] + 1;
2930 cntrange[1] = maxobjsize;
2931 }
2932 else
2933 {
2934 cntrange[1] = cntrange[0] - 1;
2935 cntrange[0] = wi::zero (TYPE_PRECISION (TREE_TYPE (cnt)));
2936 }
2937 }
2938
2939 /* Negative value is the constant string length. If it's less than
2940 the lower bound there is no truncation. Avoid calling get_stridx()
2941 when ssa_ver_to_stridx is empty. That implies the caller isn't
2942 running under the control of this pass and ssa_ver_to_stridx hasn't
2943 been created yet. */
2944 int sidx = ssa_ver_to_stridx.length () ? get_stridx (src, stmt) : 0;
2945 if (sidx < 0 && wi::gtu_p (cntrange[0], ~sidx))
2946 return false;
2947
2948 tree dst = gimple_call_arg (stmt, 0);
2949 tree dstdecl = dst;
2950 if (TREE_CODE (dstdecl) == ADDR_EXPR)
2951 dstdecl = TREE_OPERAND (dstdecl, 0);
2952
2953 tree ref = NULL_TREE;
2954
2955 if (!sidx)
2956 {
2957 /* If the source is a non-string return early to avoid warning
2958 for possible truncation (if the truncation is certain SIDX
2959 is non-zero). */
2960 tree srcdecl = gimple_call_arg (stmt, 1);
2961 if (TREE_CODE (srcdecl) == ADDR_EXPR)
2962 srcdecl = TREE_OPERAND (srcdecl, 0);
2963 if (get_attr_nonstring_decl (srcdecl, &ref))
2964 return false;
2965 }
2966
2967 /* Likewise, if the destination refers to an array/pointer declared
2968 nonstring return early. */
2969 if (get_attr_nonstring_decl (dstdecl, &ref))
2970 return false;
2971
2972 /* Look for dst[i] = '\0'; after the stxncpy() call and if found
2973 avoid the truncation warning. */
2974 gsi_next_nondebug (&gsi);
2975 gimple *next_stmt = gsi_stmt (gsi);
2976 if (!next_stmt)
2977 {
2978 /* When there is no statement in the same basic block check
2979 the immediate successor block. */
2980 if (basic_block bb = gimple_bb (stmt))
2981 {
2982 if (single_succ_p (bb))
2983 {
2984 /* For simplicity, ignore blocks with multiple outgoing
2985 edges for now and only consider successor blocks along
2986 normal edges. */
2987 edge e = EDGE_SUCC (bb, 0);
2988 if (!(e->flags & EDGE_ABNORMAL))
2989 {
2990 gsi = gsi_start_bb (e->dest);
2991 next_stmt = gsi_stmt (gsi);
2992 if (next_stmt && is_gimple_debug (next_stmt))
2993 {
2994 gsi_next_nondebug (&gsi);
2995 next_stmt = gsi_stmt (gsi);
2996 }
2997 }
2998 }
2999 }
3000 }
3001
3002 if (next_stmt && is_gimple_assign (next_stmt))
3003 {
3004 tree lhs = gimple_assign_lhs (next_stmt);
3005 tree_code code = TREE_CODE (lhs);
3006 if (code == ARRAY_REF || code == MEM_REF)
3007 lhs = TREE_OPERAND (lhs, 0);
3008
3009 tree func = gimple_call_fndecl (stmt);
3010 if (DECL_FUNCTION_CODE (func) == BUILT_IN_STPNCPY)
3011 {
3012 tree ret = gimple_call_lhs (stmt);
3013 if (ret && operand_equal_p (ret, lhs, 0))
3014 return false;
3015 }
3016
3017 /* Determine the base address and offset of the reference,
3018 ignoring the innermost array index. */
3019 if (TREE_CODE (ref) == ARRAY_REF)
3020 ref = TREE_OPERAND (ref, 0);
3021
3022 poly_int64 dstoff;
3023 tree dstbase = get_addr_base_and_unit_offset (ref, &dstoff);
3024
3025 poly_int64 lhsoff;
3026 tree lhsbase = get_addr_base_and_unit_offset (lhs, &lhsoff);
3027 if (lhsbase
3028 && dstbase
3029 && known_eq (dstoff, lhsoff)
3030 && operand_equal_p (dstbase, lhsbase, 0))
3031 return false;
3032 }
3033
3034 int prec = TYPE_PRECISION (TREE_TYPE (cnt));
3035 wide_int lenrange[2];
3036 if (strinfo *sisrc = sidx > 0 ? get_strinfo (sidx) : NULL)
3037 {
3038 lenrange[0] = (sisrc->nonzero_chars
3039 && TREE_CODE (sisrc->nonzero_chars) == INTEGER_CST
3040 ? wi::to_wide (sisrc->nonzero_chars)
3041 : wi::zero (prec));
3042 lenrange[1] = lenrange[0];
3043 }
3044 else if (sidx < 0)
3045 lenrange[0] = lenrange[1] = wi::shwi (~sidx, prec);
3046 else
3047 {
3048 c_strlen_data lendata = { };
3049 /* Set MAXBOUND to an arbitrary non-null non-integer node as a request
3050 to have it set to the length of the longest string in a PHI. */
3051 lendata.maxbound = src;
3052 get_range_strlen (src, &lendata, /* eltsize = */1);
3053 if (TREE_CODE (lendata.minlen) == INTEGER_CST
3054 && TREE_CODE (lendata.maxbound) == INTEGER_CST)
3055 {
3056 /* When LENDATA.MAXLEN is unknown, reset LENDATA.MINLEN
3057 which stores the length of the shortest known string. */
3058 if (integer_all_onesp (lendata.maxlen))
3059 lenrange[0] = wi::shwi (0, prec);
3060 else
3061 lenrange[0] = wi::to_wide (lendata.minlen, prec);
3062 lenrange[1] = wi::to_wide (lendata.maxbound, prec);
3063 }
3064 else
3065 {
3066 lenrange[0] = wi::shwi (0, prec);
3067 lenrange[1] = wi::shwi (-1, prec);
3068 }
3069 }
3070
3071 location_t callloc = gimple_or_expr_nonartificial_location (stmt, dst);
3072 tree func = gimple_call_fndecl (stmt);
3073
3074 if (lenrange[0] != 0 || !wi::neg_p (lenrange[1]))
3075 {
3076 /* If the longest source string is shorter than the lower bound
3077 of the specified count the copy is definitely nul-terminated. */
3078 if (wi::ltu_p (lenrange[1], cntrange[0]))
3079 return false;
3080
3081 if (wi::neg_p (lenrange[1]))
3082 {
3083 /* The length of one of the strings is unknown but at least
3084 one has non-zero length and that length is stored in
3085 LENRANGE[1]. Swap the bounds to force a "may be truncated"
3086 warning below. */
3087 lenrange[1] = lenrange[0];
3088 lenrange[0] = wi::shwi (0, prec);
3089 }
3090
3091 /* Set to true for strncat whose bound is derived from the length
3092 of the destination (the expected usage pattern). */
3093 bool cat_dstlen_bounded = false;
3094 if (DECL_FUNCTION_CODE (func) == BUILT_IN_STRNCAT)
3095 cat_dstlen_bounded = is_strlen_related_p (dst, cnt);
3096
3097 if (lenrange[0] == cntrange[1] && cntrange[0] == cntrange[1])
3098 return warning_n (callloc, OPT_Wstringop_truncation,
3099 cntrange[0].to_uhwi (),
3100 "%qD output truncated before terminating "
3101 "nul copying %E byte from a string of the "
3102 "same length",
3103 "%qD output truncated before terminating nul "
3104 "copying %E bytes from a string of the same "
3105 "length",
3106 func, cnt);
3107 else if (!cat_dstlen_bounded)
3108 {
3109 if (wi::geu_p (lenrange[0], cntrange[1]))
3110 {
3111 /* The shortest string is longer than the upper bound of
3112 the count so the truncation is certain. */
3113 if (cntrange[0] == cntrange[1])
3114 return warning_n (callloc, OPT_Wstringop_truncation,
3115 cntrange[0].to_uhwi (),
3116 "%qD output truncated copying %E byte "
3117 "from a string of length %wu",
3118 "%qD output truncated copying %E bytes "
3119 "from a string of length %wu",
3120 func, cnt, lenrange[0].to_uhwi ());
3121
3122 return warning_at (callloc, OPT_Wstringop_truncation,
3123 "%qD output truncated copying between %wu "
3124 "and %wu bytes from a string of length %wu",
3125 func, cntrange[0].to_uhwi (),
3126 cntrange[1].to_uhwi (), lenrange[0].to_uhwi ());
3127 }
3128 else if (wi::geu_p (lenrange[1], cntrange[1]))
3129 {
3130 /* The longest string is longer than the upper bound of
3131 the count so the truncation is possible. */
3132 if (cntrange[0] == cntrange[1])
3133 return warning_n (callloc, OPT_Wstringop_truncation,
3134 cntrange[0].to_uhwi (),
3135 "%qD output may be truncated copying %E "
3136 "byte from a string of length %wu",
3137 "%qD output may be truncated copying %E "
3138 "bytes from a string of length %wu",
3139 func, cnt, lenrange[1].to_uhwi ());
3140
3141 return warning_at (callloc, OPT_Wstringop_truncation,
3142 "%qD output may be truncated copying between "
3143 "%wu and %wu bytes from a string of length %wu",
3144 func, cntrange[0].to_uhwi (),
3145 cntrange[1].to_uhwi (), lenrange[1].to_uhwi ());
3146 }
3147 }
3148
3149 if (!cat_dstlen_bounded
3150 && cntrange[0] != cntrange[1]
3151 && wi::leu_p (cntrange[0], lenrange[0])
3152 && wi::leu_p (cntrange[1], lenrange[0] + 1))
3153 {
3154 /* If the source (including the terminating nul) is longer than
3155 the lower bound of the specified count but shorter than the
3156 upper bound the copy may (but need not) be truncated. */
3157 return warning_at (callloc, OPT_Wstringop_truncation,
3158 "%qD output may be truncated copying between "
3159 "%wu and %wu bytes from a string of length %wu",
3160 func, cntrange[0].to_uhwi (),
3161 cntrange[1].to_uhwi (), lenrange[0].to_uhwi ());
3162 }
3163 }
3164
3165 access_ref aref;
3166 if (tree dstsize = compute_objsize (dst, stmt, 1, &aref, ptr_qry))
3167 {
3168 /* The source length is unknown. Try to determine the destination
3169 size and see if it matches the specified bound. If not, bail.
3170 Otherwise go on to see if it should be diagnosed for possible
3171 truncation. */
3172 if (!dstsize)
3173 return false;
3174
3175 if (wi::to_wide (dstsize) != cntrange[1])
3176 return false;
3177
3178 /* Avoid warning for strncpy(a, b, N) calls where the following
3179 equalities hold:
3180 N == sizeof a && N == sizeof b */
3181 if (tree srcsize = compute_objsize (src, stmt, 1, &aref, ptr_qry))
3182 if (wi::to_wide (srcsize) == cntrange[1])
3183 return false;
3184
3185 if (cntrange[0] == cntrange[1])
3186 return warning_at (callloc, OPT_Wstringop_truncation,
3187 "%qD specified bound %E equals destination size",
3188 func, cnt);
3189 }
3190
3191 return false;
3192 }
3193
3194 /* Check the arguments to the built-in forms of stpncpy, strncpy, and
3195 strncat, for out-of-bounds offsets or overlapping access, and to see
3196 if the size is derived from calling strlen() on the source argument,
3197 and if so, issue the appropriate warning.
3198 APPEND_P is true for strncat. */
3199
3200 void
handle_builtin_stxncpy_strncat(bool append_p)3201 strlen_pass::handle_builtin_stxncpy_strncat (bool append_p)
3202 {
3203 if (!strlen_to_stridx)
3204 return;
3205
3206 gimple *stmt = gsi_stmt (m_gsi);
3207
3208 tree dst = gimple_call_arg (stmt, 0);
3209 tree src = gimple_call_arg (stmt, 1);
3210 tree len = gimple_call_arg (stmt, 2);
3211 /* An upper bound of the size of the destination. */
3212 tree dstsize = NULL_TREE;
3213 /* The length of the destination and source strings (plus 1 for those
3214 whose FULL_STRING_P is set, i.e., whose length is exact rather than
3215 a lower bound). */
3216 tree dstlenp1 = NULL_TREE, srclenp1 = NULL_TREE;;
3217
3218 int didx = get_stridx (dst, stmt);
3219 if (strinfo *sidst = didx > 0 ? get_strinfo (didx) : NULL)
3220 {
3221 /* Compute the size of the destination string including the nul
3222 if it is known to be nul-terminated. */
3223 if (sidst->nonzero_chars)
3224 {
3225 if (sidst->full_string_p)
3226 {
3227 /* String is known to be nul-terminated. */
3228 tree type = TREE_TYPE (sidst->nonzero_chars);
3229 dstlenp1 = fold_build2 (PLUS_EXPR, type, sidst->nonzero_chars,
3230 build_int_cst (type, 1));
3231 }
3232 else
3233 dstlenp1 = sidst->nonzero_chars;
3234 }
3235 else if (TREE_CODE (sidst->ptr) == SSA_NAME)
3236 {
3237 gimple *def_stmt = SSA_NAME_DEF_STMT (sidst->ptr);
3238 dstsize = gimple_call_alloc_size (def_stmt);
3239 }
3240
3241 dst = sidst->ptr;
3242 }
3243
3244 int sidx = get_stridx (src, stmt);
3245 strinfo *sisrc = sidx > 0 ? get_strinfo (sidx) : NULL;
3246 if (sisrc)
3247 {
3248 /* strncat() and strncpy() can modify the source string by writing
3249 over the terminating nul so SISRC->DONT_INVALIDATE must be left
3250 clear. */
3251
3252 /* Compute the size of the source string including the terminating
3253 nul if its known to be nul-terminated. */
3254 if (sisrc->nonzero_chars)
3255 {
3256 if (sisrc->full_string_p)
3257 {
3258 tree type = TREE_TYPE (sisrc->nonzero_chars);
3259 srclenp1 = fold_build2 (PLUS_EXPR, type, sisrc->nonzero_chars,
3260 build_int_cst (type, 1));
3261 }
3262 else
3263 srclenp1 = sisrc->nonzero_chars;
3264 }
3265
3266 src = sisrc->ptr;
3267 }
3268 else
3269 srclenp1 = NULL_TREE;
3270
3271 opt_code opt = check_bounds_or_overlap (stmt, dst, src, dstlenp1, srclenp1);
3272 if (opt != no_warning)
3273 {
3274 suppress_warning (stmt, opt);
3275 return;
3276 }
3277
3278 /* If the length argument was computed from strlen(S) for some string
3279 S retrieve the strinfo index for the string (PSS->FIRST) along with
3280 the location of the strlen() call (PSS->SECOND). */
3281 stridx_strlenloc *pss = strlen_to_stridx->get (len);
3282 if (!pss || pss->first <= 0)
3283 {
3284 if (maybe_diag_stxncpy_trunc (m_gsi, src, len))
3285 suppress_warning (stmt, OPT_Wstringop_truncation);
3286
3287 return;
3288 }
3289
3290 /* Retrieve the strinfo data for the string S that LEN was computed
3291 from as some function F of strlen (S) (i.e., LEN need not be equal
3292 to strlen(S)). */
3293 strinfo *silen = get_strinfo (pss->first);
3294
3295 location_t callloc = gimple_or_expr_nonartificial_location (stmt, dst);
3296
3297 tree func = gimple_call_fndecl (stmt);
3298
3299 bool warned = false;
3300
3301 /* When -Wstringop-truncation is set, try to determine truncation
3302 before diagnosing possible overflow. Truncation is implied by
3303 the LEN argument being equal to strlen(SRC), regardless of
3304 whether its value is known. Otherwise, when appending, or
3305 when copying into a destination of known size, issue the more
3306 generic -Wstringop-overflow which triggers for LEN arguments
3307 that in any meaningful way depend on strlen(SRC). */
3308 if (!append_p
3309 && sisrc == silen
3310 && is_strlen_related_p (src, len)
3311 && warning_at (callloc, OPT_Wstringop_truncation,
3312 "%qD output truncated before terminating nul "
3313 "copying as many bytes from a string as its length",
3314 func))
3315 warned = true;
3316 else if ((append_p || !dstsize || len == dstlenp1)
3317 && silen && is_strlen_related_p (src, silen->ptr))
3318 {
3319 /* Issue -Wstringop-overflow when appending or when writing into
3320 a destination of a known size. Otherwise, when copying into
3321 a destination of an unknown size, it's truncation. */
3322 opt_code opt = (append_p || dstsize
3323 ? OPT_Wstringop_overflow_ : OPT_Wstringop_truncation);
3324 warned = warning_at (callloc, opt,
3325 "%qD specified bound depends on the length "
3326 "of the source argument",
3327 func);
3328 }
3329 if (warned)
3330 {
3331 location_t strlenloc = pss->second;
3332 if (strlenloc != UNKNOWN_LOCATION && strlenloc != callloc)
3333 inform (strlenloc, "length computed here");
3334 }
3335 }
3336
3337 /* Handle a memcpy-like ({mem{,p}cpy,__mem{,p}cpy_chk}) call.
3338 If strlen of the second argument is known and length of the third argument
3339 is that plus one, strlen of the first argument is the same after this
3340 call. Uses RVALS to determine range information. */
3341
3342 void
handle_builtin_memcpy(built_in_function bcode)3343 strlen_pass::handle_builtin_memcpy (built_in_function bcode)
3344 {
3345 tree lhs, oldlen, newlen;
3346 gimple *stmt = gsi_stmt (m_gsi);
3347 strinfo *si, *dsi;
3348
3349 tree len = gimple_call_arg (stmt, 2);
3350 tree src = gimple_call_arg (stmt, 1);
3351 tree dst = gimple_call_arg (stmt, 0);
3352
3353 int didx = get_stridx (dst, stmt);
3354 strinfo *olddsi = NULL;
3355 if (didx > 0)
3356 olddsi = get_strinfo (didx);
3357 else if (didx < 0)
3358 return;
3359
3360 if (olddsi != NULL
3361 && !integer_zerop (len))
3362 {
3363 maybe_warn_overflow (stmt, false, len, olddsi, false, true);
3364 if (tree_fits_uhwi_p (len))
3365 adjust_last_stmt (olddsi, stmt, false);
3366 }
3367
3368 int idx = get_stridx (src, stmt);
3369 if (idx == 0)
3370 return;
3371
3372 bool full_string_p;
3373 if (idx > 0)
3374 {
3375 gimple *def_stmt;
3376
3377 /* Handle memcpy (x, y, l) where l's relationship with strlen (y)
3378 is known. */
3379 si = get_strinfo (idx);
3380 if (si == NULL || si->nonzero_chars == NULL_TREE)
3381 return;
3382 if (TREE_CODE (len) == INTEGER_CST
3383 && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
3384 {
3385 if (tree_int_cst_le (len, si->nonzero_chars))
3386 {
3387 /* Copying LEN nonzero characters, where LEN is constant. */
3388 newlen = len;
3389 full_string_p = false;
3390 }
3391 else
3392 {
3393 /* Copying the whole of the analyzed part of SI. */
3394 newlen = si->nonzero_chars;
3395 full_string_p = si->full_string_p;
3396 }
3397 }
3398 else
3399 {
3400 if (!si->full_string_p)
3401 return;
3402 if (TREE_CODE (len) != SSA_NAME)
3403 return;
3404 def_stmt = SSA_NAME_DEF_STMT (len);
3405 if (!is_gimple_assign (def_stmt)
3406 || gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
3407 || gimple_assign_rhs1 (def_stmt) != si->nonzero_chars
3408 || !integer_onep (gimple_assign_rhs2 (def_stmt)))
3409 return;
3410 /* Copying variable-length string SI (and no more). */
3411 newlen = si->nonzero_chars;
3412 full_string_p = true;
3413 }
3414 }
3415 else
3416 {
3417 si = NULL;
3418 /* Handle memcpy (x, "abcd", 5) or
3419 memcpy (x, "abc\0uvw", 7). */
3420 if (!tree_fits_uhwi_p (len))
3421 return;
3422
3423 unsigned HOST_WIDE_INT clen = tree_to_uhwi (len);
3424 unsigned HOST_WIDE_INT nonzero_chars = ~idx;
3425 newlen = build_int_cst (size_type_node, MIN (nonzero_chars, clen));
3426 full_string_p = clen > nonzero_chars;
3427 }
3428
3429 if (!full_string_p
3430 && olddsi
3431 && olddsi->nonzero_chars
3432 && TREE_CODE (olddsi->nonzero_chars) == INTEGER_CST
3433 && tree_int_cst_le (newlen, olddsi->nonzero_chars))
3434 {
3435 /* The SRC substring being written strictly overlaps
3436 a subsequence of the existing string OLDDSI. */
3437 newlen = olddsi->nonzero_chars;
3438 full_string_p = olddsi->full_string_p;
3439 }
3440
3441 if (olddsi != NULL && TREE_CODE (len) == SSA_NAME)
3442 adjust_last_stmt (olddsi, stmt, false);
3443
3444 if (didx == 0)
3445 {
3446 didx = new_stridx (dst);
3447 if (didx == 0)
3448 return;
3449 }
3450 oldlen = NULL_TREE;
3451 if (olddsi != NULL)
3452 {
3453 dsi = unshare_strinfo (olddsi);
3454 oldlen = olddsi->nonzero_chars;
3455 dsi->nonzero_chars = newlen;
3456 dsi->full_string_p = full_string_p;
3457 /* Break the chain, so adjust_related_strinfo on later pointers in
3458 the chain won't adjust this one anymore. */
3459 dsi->next = 0;
3460 dsi->stmt = NULL;
3461 dsi->endptr = NULL_TREE;
3462 }
3463 else
3464 {
3465 dsi = new_strinfo (dst, didx, newlen, full_string_p);
3466 set_strinfo (didx, dsi);
3467 find_equal_ptrs (dst, didx);
3468 }
3469 dsi->writable = true;
3470 dsi->dont_invalidate = true;
3471 if (olddsi != NULL)
3472 {
3473 tree adj = NULL_TREE;
3474 location_t loc = gimple_location (stmt);
3475 if (oldlen == NULL_TREE)
3476 ;
3477 else if (integer_zerop (oldlen))
3478 adj = newlen;
3479 else if (TREE_CODE (oldlen) == INTEGER_CST
3480 || TREE_CODE (newlen) == INTEGER_CST)
3481 adj = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (newlen), newlen,
3482 fold_convert_loc (loc, TREE_TYPE (newlen),
3483 oldlen));
3484 if (adj != NULL_TREE)
3485 adjust_related_strinfos (loc, dsi, adj);
3486 else
3487 dsi->prev = 0;
3488 }
3489 /* memcpy src may not overlap dst, so src doesn't need to be
3490 invalidated either. */
3491 if (si != NULL)
3492 si->dont_invalidate = true;
3493
3494 if (full_string_p)
3495 {
3496 lhs = gimple_call_lhs (stmt);
3497 switch (bcode)
3498 {
3499 case BUILT_IN_MEMCPY:
3500 case BUILT_IN_MEMCPY_CHK:
3501 /* Allow adjust_last_stmt to decrease this memcpy's size. */
3502 laststmt.stmt = stmt;
3503 laststmt.len = dsi->nonzero_chars;
3504 laststmt.stridx = dsi->idx;
3505 if (lhs)
3506 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
3507 break;
3508 case BUILT_IN_MEMPCPY:
3509 case BUILT_IN_MEMPCPY_CHK:
3510 break;
3511 default:
3512 gcc_unreachable ();
3513 }
3514 }
3515 }
3516
3517 /* Handle a strcat-like ({strcat,__strcat_chk}) call.
3518 If strlen of the second argument is known, strlen of the first argument
3519 is increased by the length of the second argument. Furthermore, attempt
3520 to convert it to memcpy/strcpy if the length of the first argument
3521 is known. */
3522
3523 void
handle_builtin_strcat(built_in_function bcode)3524 strlen_pass::handle_builtin_strcat (built_in_function bcode)
3525 {
3526 int idx, didx;
3527 tree srclen, args, type, fn, objsz, endptr;
3528 bool success;
3529 gimple *stmt = gsi_stmt (m_gsi);
3530 strinfo *si, *dsi;
3531 location_t loc = gimple_location (stmt);
3532
3533 tree src = gimple_call_arg (stmt, 1);
3534 tree dst = gimple_call_arg (stmt, 0);
3535
3536 /* Bail if the source is the same as destination. It will be diagnosed
3537 elsewhere. */
3538 if (operand_equal_p (src, dst, 0))
3539 return;
3540
3541 tree lhs = gimple_call_lhs (stmt);
3542
3543 didx = get_stridx (dst, stmt);
3544 if (didx < 0)
3545 return;
3546
3547 dsi = NULL;
3548 if (didx > 0)
3549 dsi = get_strinfo (didx);
3550
3551 srclen = NULL_TREE;
3552 si = NULL;
3553 idx = get_stridx (src, stmt);
3554 if (idx < 0)
3555 srclen = build_int_cst (size_type_node, ~idx);
3556 else if (idx > 0)
3557 {
3558 si = get_strinfo (idx);
3559 if (si != NULL)
3560 srclen = get_string_length (si);
3561 }
3562
3563 /* Disable warning for the transformed statement? */
3564 opt_code no_warning_opt = no_warning;
3565
3566 if (dsi == NULL || get_string_length (dsi) == NULL_TREE)
3567 {
3568 {
3569 /* The concatenation always involves copying at least one byte
3570 (the terminating nul), even if the source string is empty.
3571 If the source is unknown assume it's one character long and
3572 used that as both sizes. */
3573 tree slen = srclen;
3574 if (slen)
3575 {
3576 tree type = TREE_TYPE (slen);
3577 slen = fold_build2 (PLUS_EXPR, type, slen, build_int_cst (type, 1));
3578 }
3579
3580 tree sptr = si && si->ptr ? si->ptr : src;
3581 no_warning_opt = check_bounds_or_overlap (stmt, dst, sptr, NULL_TREE,
3582 slen);
3583 if (no_warning_opt)
3584 suppress_warning (stmt, no_warning_opt);
3585 }
3586
3587 /* strcat (p, q) can be transformed into
3588 tmp = p + strlen (p); endptr = stpcpy (tmp, q);
3589 with length endptr - p if we need to compute the length
3590 later on. Don't do this transformation if we don't need
3591 it. */
3592 if (builtin_decl_implicit_p (BUILT_IN_STPCPY) && lhs == NULL_TREE)
3593 {
3594 if (didx == 0)
3595 {
3596 didx = new_stridx (dst);
3597 if (didx == 0)
3598 return;
3599 }
3600 if (dsi == NULL)
3601 {
3602 dsi = new_strinfo (dst, didx, NULL_TREE, false);
3603 set_strinfo (didx, dsi);
3604 find_equal_ptrs (dst, didx);
3605 }
3606 else
3607 {
3608 dsi = unshare_strinfo (dsi);
3609 dsi->nonzero_chars = NULL_TREE;
3610 dsi->full_string_p = false;
3611 dsi->next = 0;
3612 dsi->endptr = NULL_TREE;
3613 }
3614 dsi->writable = true;
3615 dsi->stmt = stmt;
3616 dsi->dont_invalidate = true;
3617 }
3618 return;
3619 }
3620
3621 tree dstlen = dsi->nonzero_chars;
3622 endptr = dsi->endptr;
3623
3624 dsi = unshare_strinfo (dsi);
3625 dsi->endptr = NULL_TREE;
3626 dsi->stmt = NULL;
3627 dsi->writable = true;
3628
3629 if (srclen != NULL_TREE)
3630 {
3631 dsi->nonzero_chars = fold_build2_loc (loc, PLUS_EXPR,
3632 TREE_TYPE (dsi->nonzero_chars),
3633 dsi->nonzero_chars, srclen);
3634 gcc_assert (dsi->full_string_p);
3635 adjust_related_strinfos (loc, dsi, srclen);
3636 dsi->dont_invalidate = true;
3637 }
3638 else
3639 {
3640 dsi->nonzero_chars = NULL;
3641 dsi->full_string_p = false;
3642 if (lhs == NULL_TREE && builtin_decl_implicit_p (BUILT_IN_STPCPY))
3643 dsi->dont_invalidate = true;
3644 }
3645
3646 if (si != NULL)
3647 /* strcat src may not overlap dst, so src doesn't need to be
3648 invalidated either. */
3649 si->dont_invalidate = true;
3650
3651 /* For now. Could remove the lhs from the call and add
3652 lhs = dst; afterwards. */
3653 if (lhs)
3654 return;
3655
3656 fn = NULL_TREE;
3657 objsz = NULL_TREE;
3658 switch (bcode)
3659 {
3660 case BUILT_IN_STRCAT:
3661 if (srclen != NULL_TREE)
3662 fn = builtin_decl_implicit (BUILT_IN_MEMCPY);
3663 else
3664 fn = builtin_decl_implicit (BUILT_IN_STRCPY);
3665 break;
3666 case BUILT_IN_STRCAT_CHK:
3667 if (srclen != NULL_TREE)
3668 fn = builtin_decl_explicit (BUILT_IN_MEMCPY_CHK);
3669 else
3670 fn = builtin_decl_explicit (BUILT_IN_STRCPY_CHK);
3671 objsz = gimple_call_arg (stmt, 2);
3672 break;
3673 default:
3674 gcc_unreachable ();
3675 }
3676
3677 if (fn == NULL_TREE)
3678 return;
3679
3680 if (dsi && dstlen)
3681 {
3682 tree type = TREE_TYPE (dstlen);
3683
3684 /* Compute the size of the source sequence, including the nul. */
3685 tree srcsize = srclen ? srclen : size_zero_node;
3686 tree one = build_int_cst (type, 1);
3687 srcsize = fold_build2 (PLUS_EXPR, type, srcsize, one);
3688 tree dstsize = fold_build2 (PLUS_EXPR, type, dstlen, one);
3689 tree sptr = si && si->ptr ? si->ptr : src;
3690
3691 no_warning_opt = check_bounds_or_overlap (stmt, dst, sptr, dstsize,
3692 srcsize);
3693 if (no_warning_opt)
3694 suppress_warning (stmt, no_warning_opt);
3695 }
3696
3697 tree len = NULL_TREE;
3698 if (srclen != NULL_TREE)
3699 {
3700 args = TYPE_ARG_TYPES (TREE_TYPE (fn));
3701 type = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (args)));
3702
3703 len = fold_convert_loc (loc, type, unshare_expr (srclen));
3704 len = fold_build2_loc (loc, PLUS_EXPR, type, len,
3705 build_int_cst (type, 1));
3706 len = force_gimple_operand_gsi (&m_gsi, len, true, NULL_TREE, true,
3707 GSI_SAME_STMT);
3708 }
3709 if (endptr)
3710 dst = fold_convert_loc (loc, TREE_TYPE (dst), unshare_expr (endptr));
3711 else
3712 dst = fold_build2_loc (loc, POINTER_PLUS_EXPR, TREE_TYPE (dst), dst,
3713 fold_convert_loc (loc, sizetype,
3714 unshare_expr (dstlen)));
3715 dst = force_gimple_operand_gsi (&m_gsi, dst, true, NULL_TREE, true,
3716 GSI_SAME_STMT);
3717 if (objsz)
3718 {
3719 objsz = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (objsz), objsz,
3720 fold_convert_loc (loc, TREE_TYPE (objsz),
3721 unshare_expr (dstlen)));
3722 objsz = force_gimple_operand_gsi (&m_gsi, objsz, true, NULL_TREE, true,
3723 GSI_SAME_STMT);
3724 }
3725 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3726 {
3727 fprintf (dump_file, "Optimizing: ");
3728 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3729 }
3730 if (srclen != NULL_TREE)
3731 success = update_gimple_call (&m_gsi, fn, 3 + (objsz != NULL_TREE),
3732 dst, src, len, objsz);
3733 else
3734 success = update_gimple_call (&m_gsi, fn, 2 + (objsz != NULL_TREE),
3735 dst, src, objsz);
3736 if (success)
3737 {
3738 stmt = gsi_stmt (m_gsi);
3739 update_stmt (stmt);
3740 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3741 {
3742 fprintf (dump_file, "into: ");
3743 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3744 }
3745 /* If srclen == NULL, note that current string length can be
3746 computed by transforming this strcpy into stpcpy. */
3747 if (srclen == NULL_TREE && dsi->dont_invalidate)
3748 dsi->stmt = stmt;
3749 adjust_last_stmt (dsi, stmt, true);
3750 if (srclen != NULL_TREE)
3751 {
3752 laststmt.stmt = stmt;
3753 laststmt.len = srclen;
3754 laststmt.stridx = dsi->idx;
3755 }
3756 }
3757 else if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3758 fprintf (dump_file, "not possible.\n");
3759
3760 if (no_warning_opt)
3761 suppress_warning (stmt, no_warning_opt);
3762 }
3763
3764 /* Handle a call to an allocation function like alloca, malloc or calloc,
3765 or an ordinary allocation function declared with attribute alloc_size. */
3766
3767 void
handle_alloc_call(built_in_function bcode)3768 strlen_pass::handle_alloc_call (built_in_function bcode)
3769 {
3770 gimple *stmt = gsi_stmt (m_gsi);
3771 tree lhs = gimple_call_lhs (stmt);
3772 if (lhs == NULL_TREE)
3773 return;
3774
3775 gcc_assert (get_stridx (lhs, stmt) == 0);
3776 int idx = new_stridx (lhs);
3777 tree length = NULL_TREE;
3778 if (bcode == BUILT_IN_CALLOC)
3779 length = build_int_cst (size_type_node, 0);
3780 strinfo *si = new_strinfo (lhs, idx, length, length != NULL_TREE);
3781 if (bcode == BUILT_IN_CALLOC)
3782 {
3783 /* Only set STMT for calloc and malloc. */
3784 si->stmt = stmt;
3785 /* Only set ENDPTR for calloc. */
3786 si->endptr = lhs;
3787 }
3788 else if (bcode == BUILT_IN_MALLOC)
3789 si->stmt = stmt;
3790
3791 /* Set ALLOC is set for all allocation functions. */
3792 si->alloc = stmt;
3793 set_strinfo (idx, si);
3794 si->writable = true;
3795 si->dont_invalidate = true;
3796 }
3797
3798 /* Handle a call to memset.
3799 After a call to calloc, memset(,0,) is unnecessary.
3800 memset(malloc(n),0,n) is calloc(n,1).
3801 return true when the call is transformed, false otherwise.
3802 When nonnull uses RVALS to determine range information. */
3803
3804 bool
handle_builtin_memset(bool * zero_write)3805 strlen_pass::handle_builtin_memset (bool *zero_write)
3806 {
3807 gimple *memset_stmt = gsi_stmt (m_gsi);
3808 tree ptr = gimple_call_arg (memset_stmt, 0);
3809 /* Set to the non-constant offset added to PTR. */
3810 wide_int offrng[2];
3811 int idx1 = get_stridx (ptr, memset_stmt, offrng, ptr_qry.rvals);
3812 if (idx1 <= 0)
3813 return false;
3814 strinfo *si1 = get_strinfo (idx1);
3815 if (!si1)
3816 return false;
3817 gimple *alloc_stmt = si1->alloc;
3818 if (!alloc_stmt || !is_gimple_call (alloc_stmt))
3819 return false;
3820 tree callee1 = gimple_call_fndecl (alloc_stmt);
3821 if (!valid_builtin_call (alloc_stmt))
3822 return false;
3823 tree alloc_size = gimple_call_arg (alloc_stmt, 0);
3824 tree memset_size = gimple_call_arg (memset_stmt, 2);
3825
3826 /* Check for overflow. */
3827 maybe_warn_overflow (memset_stmt, false, memset_size, NULL, false, true);
3828
3829 /* Bail when there is no statement associated with the destination
3830 (the statement may be null even when SI1->ALLOC is not). */
3831 if (!si1->stmt)
3832 return false;
3833
3834 /* Avoid optimizing if store is at a variable offset from the beginning
3835 of the allocated object. */
3836 if (offrng[0] != 0 || offrng[0] != offrng[1])
3837 return false;
3838
3839 /* Bail when the call writes a non-zero value. */
3840 if (!integer_zerop (gimple_call_arg (memset_stmt, 1)))
3841 return false;
3842
3843 /* Let the caller know the memset call cleared the destination. */
3844 *zero_write = true;
3845
3846 enum built_in_function code1 = DECL_FUNCTION_CODE (callee1);
3847 if (code1 == BUILT_IN_CALLOC)
3848 /* Not touching alloc_stmt */ ;
3849 else if (code1 == BUILT_IN_MALLOC
3850 && operand_equal_p (memset_size, alloc_size, 0))
3851 {
3852 /* Replace the malloc + memset calls with calloc. */
3853 gimple_stmt_iterator gsi1 = gsi_for_stmt (si1->stmt);
3854 update_gimple_call (&gsi1, builtin_decl_implicit (BUILT_IN_CALLOC), 2,
3855 alloc_size, build_one_cst (size_type_node));
3856 si1->nonzero_chars = build_int_cst (size_type_node, 0);
3857 si1->full_string_p = true;
3858 si1->stmt = gsi_stmt (gsi1);
3859 }
3860 else
3861 return false;
3862 tree lhs = gimple_call_lhs (memset_stmt);
3863 unlink_stmt_vdef (memset_stmt);
3864 if (lhs)
3865 {
3866 gimple *assign = gimple_build_assign (lhs, ptr);
3867 gsi_replace (&m_gsi, assign, false);
3868 }
3869 else
3870 {
3871 gsi_remove (&m_gsi, true);
3872 release_defs (memset_stmt);
3873 }
3874
3875 return true;
3876 }
3877
3878 /* Return first such statement if RES is used in statements testing its
3879 equality to zero, and null otherwise. If EXCLUSIVE is true, return
3880 nonnull if and only RES is used in such expressions exclusively and
3881 in none other. */
3882
3883 static gimple *
use_in_zero_equality(tree res,bool exclusive=true)3884 use_in_zero_equality (tree res, bool exclusive = true)
3885 {
3886 gimple *first_use = NULL;
3887
3888 use_operand_p use_p;
3889 imm_use_iterator iter;
3890
3891 FOR_EACH_IMM_USE_FAST (use_p, iter, res)
3892 {
3893 gimple *use_stmt = USE_STMT (use_p);
3894
3895 if (is_gimple_debug (use_stmt))
3896 continue;
3897
3898 if (gimple_code (use_stmt) == GIMPLE_ASSIGN)
3899 {
3900 tree_code code = gimple_assign_rhs_code (use_stmt);
3901 if (code == COND_EXPR)
3902 {
3903 tree cond_expr = gimple_assign_rhs1 (use_stmt);
3904 if ((TREE_CODE (cond_expr) != EQ_EXPR
3905 && (TREE_CODE (cond_expr) != NE_EXPR))
3906 || !integer_zerop (TREE_OPERAND (cond_expr, 1)))
3907 {
3908 if (exclusive)
3909 return NULL;
3910 continue;
3911 }
3912 }
3913 else if (code == EQ_EXPR || code == NE_EXPR)
3914 {
3915 if (!integer_zerop (gimple_assign_rhs2 (use_stmt)))
3916 {
3917 if (exclusive)
3918 return NULL;
3919 continue;
3920 }
3921 }
3922 else if (exclusive)
3923 return NULL;
3924 else
3925 continue;
3926 }
3927 else if (gimple_code (use_stmt) == GIMPLE_COND)
3928 {
3929 tree_code code = gimple_cond_code (use_stmt);
3930 if ((code != EQ_EXPR && code != NE_EXPR)
3931 || !integer_zerop (gimple_cond_rhs (use_stmt)))
3932 {
3933 if (exclusive)
3934 return NULL;
3935 continue;
3936 }
3937 }
3938 else if (exclusive)
3939 return NULL;
3940 else
3941 continue;
3942
3943 if (!first_use)
3944 first_use = use_stmt;
3945 }
3946
3947 return first_use;
3948 }
3949
3950 /* Handle a call to memcmp. We try to handle small comparisons by
3951 converting them to load and compare, and replacing the call to memcmp
3952 with a __builtin_memcmp_eq call where possible.
3953 return true when call is transformed, return false otherwise. */
3954
3955 bool
handle_builtin_memcmp()3956 strlen_pass::handle_builtin_memcmp ()
3957 {
3958 gcall *stmt = as_a <gcall *> (gsi_stmt (m_gsi));
3959 tree res = gimple_call_lhs (stmt);
3960
3961 if (!res || !use_in_zero_equality (res))
3962 return false;
3963
3964 tree arg1 = gimple_call_arg (stmt, 0);
3965 tree arg2 = gimple_call_arg (stmt, 1);
3966 tree len = gimple_call_arg (stmt, 2);
3967 unsigned HOST_WIDE_INT leni;
3968
3969 if (tree_fits_uhwi_p (len)
3970 && (leni = tree_to_uhwi (len)) <= GET_MODE_SIZE (word_mode)
3971 && pow2p_hwi (leni))
3972 {
3973 leni *= CHAR_TYPE_SIZE;
3974 unsigned align1 = get_pointer_alignment (arg1);
3975 unsigned align2 = get_pointer_alignment (arg2);
3976 unsigned align = MIN (align1, align2);
3977 scalar_int_mode mode;
3978 if (int_mode_for_size (leni, 1).exists (&mode)
3979 && (align >= leni || !targetm.slow_unaligned_access (mode, align)))
3980 {
3981 location_t loc = gimple_location (stmt);
3982 tree type, off;
3983 type = build_nonstandard_integer_type (leni, 1);
3984 gcc_assert (known_eq (GET_MODE_BITSIZE (TYPE_MODE (type)), leni));
3985 tree ptrtype = build_pointer_type_for_mode (char_type_node,
3986 ptr_mode, true);
3987 off = build_int_cst (ptrtype, 0);
3988 arg1 = build2_loc (loc, MEM_REF, type, arg1, off);
3989 arg2 = build2_loc (loc, MEM_REF, type, arg2, off);
3990 tree tem1 = fold_const_aggregate_ref (arg1);
3991 if (tem1)
3992 arg1 = tem1;
3993 tree tem2 = fold_const_aggregate_ref (arg2);
3994 if (tem2)
3995 arg2 = tem2;
3996 res = fold_convert_loc (loc, TREE_TYPE (res),
3997 fold_build2_loc (loc, NE_EXPR,
3998 boolean_type_node,
3999 arg1, arg2));
4000 gimplify_and_update_call_from_tree (&m_gsi, res);
4001 return true;
4002 }
4003 }
4004
4005 gimple_call_set_fndecl (stmt, builtin_decl_explicit (BUILT_IN_MEMCMP_EQ));
4006 return true;
4007 }
4008
4009 /* Given strinfo IDX for ARG, sets LENRNG[] to the range of lengths
4010 of the string(s) referenced by ARG if it can be determined.
4011 If the length cannot be determined, sets *SIZE to the size of
4012 the array the string is stored in, if any. If no such array is
4013 known, sets *SIZE to -1. When the strings are nul-terminated sets
4014 *NULTERM to true, otherwise to false. When nonnull uses RVALS to
4015 determine range information. Returns true on success. */
4016
4017 bool
get_len_or_size(gimple * stmt,tree arg,int idx,unsigned HOST_WIDE_INT lenrng[2],unsigned HOST_WIDE_INT * size,bool * nulterm)4018 strlen_pass::get_len_or_size (gimple *stmt, tree arg, int idx,
4019 unsigned HOST_WIDE_INT lenrng[2],
4020 unsigned HOST_WIDE_INT *size, bool *nulterm)
4021 {
4022 /* Invalidate. */
4023 *size = HOST_WIDE_INT_M1U;
4024
4025 if (idx < 0)
4026 {
4027 /* IDX is the inverted constant string length. */
4028 lenrng[0] = ~idx;
4029 lenrng[1] = lenrng[0];
4030 *nulterm = true;
4031 return true;
4032 }
4033
4034 /* Set so that both LEN and ~LEN are invalid lengths, i.e., maximum
4035 possible length + 1. */
4036 lenrng[0] = lenrng[1] = HOST_WIDE_INT_MAX;
4037
4038 if (strinfo *si = idx ? get_strinfo (idx) : NULL)
4039 {
4040 /* FIXME: Handle all this in_range_strlen_dynamic. */
4041 if (!si->nonzero_chars)
4042 ;
4043 else if (tree_fits_uhwi_p (si->nonzero_chars))
4044 {
4045 lenrng[0] = tree_to_uhwi (si->nonzero_chars);
4046 *nulterm = si->full_string_p;
4047 /* Set the upper bound only if the string is known to be
4048 nul-terminated, otherwise leave it at maximum + 1. */
4049 if (*nulterm)
4050 lenrng[1] = lenrng[0];
4051 }
4052 else if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
4053 {
4054 value_range r;
4055 get_range_query (cfun)->range_of_expr (r, si->nonzero_chars);
4056 if (r.kind () == VR_RANGE)
4057 {
4058 lenrng[0] = r.lower_bound ().to_uhwi ();
4059 lenrng[1] = r.upper_bound ().to_uhwi ();
4060 *nulterm = si->full_string_p;
4061 }
4062 }
4063 }
4064
4065 if (lenrng[0] != HOST_WIDE_INT_MAX)
4066 return true;
4067
4068 /* Compute the minimum and maximum real or possible lengths. */
4069 c_strlen_data lendata = { };
4070 /* Set MAXBOUND to an arbitrary non-null non-integer node as a request
4071 to have it set to the length of the longest string in a PHI. */
4072 lendata.maxbound = arg;
4073 get_range_strlen_dynamic (arg, stmt, &lendata, ptr_qry);
4074
4075 unsigned HOST_WIDE_INT maxbound = HOST_WIDE_INT_M1U;
4076 if (tree_fits_uhwi_p (lendata.maxbound)
4077 && !integer_all_onesp (lendata.maxbound))
4078 maxbound = tree_to_uhwi (lendata.maxbound);
4079
4080 if (tree_fits_uhwi_p (lendata.minlen) && tree_fits_uhwi_p (lendata.maxlen))
4081 {
4082 unsigned HOST_WIDE_INT minlen = tree_to_uhwi (lendata.minlen);
4083 unsigned HOST_WIDE_INT maxlen = tree_to_uhwi (lendata.maxlen);
4084
4085 /* The longest string in this data model. */
4086 const unsigned HOST_WIDE_INT lenmax
4087 = tree_to_uhwi (max_object_size ()) - 2;
4088
4089 if (maxbound == HOST_WIDE_INT_M1U)
4090 {
4091 lenrng[0] = minlen;
4092 lenrng[1] = maxlen;
4093 *nulterm = minlen == maxlen;
4094 }
4095 else if (maxlen < lenmax)
4096 {
4097 *size = maxbound + 1;
4098 *nulterm = false;
4099 }
4100 else
4101 return false;
4102
4103 return true;
4104 }
4105
4106 if (maxbound != HOST_WIDE_INT_M1U
4107 && lendata.maxlen
4108 && !integer_all_onesp (lendata.maxlen))
4109 {
4110 /* Set *SIZE to LENDATA.MAXBOUND which is a conservative estimate
4111 of the longest string based on the sizes of the arrays referenced
4112 by ARG. */
4113 *size = maxbound + 1;
4114 *nulterm = false;
4115 return true;
4116 }
4117
4118 return false;
4119 }
4120
4121 /* If IDX1 and IDX2 refer to strings A and B of unequal lengths, return
4122 the result of 0 == strncmp (A, B, BOUND) (which is the same as strcmp
4123 for a sufficiently large BOUND). If the result is based on the length
4124 of one string being greater than the longest string that would fit in
4125 the array pointer to by the argument, set *PLEN and *PSIZE to
4126 the corresponding length (or its complement when the string is known
4127 to be at least as long and need not be nul-terminated) and size.
4128 Otherwise return null. */
4129
4130 tree
strxcmp_eqz_result(gimple * stmt,tree arg1,int idx1,tree arg2,int idx2,unsigned HOST_WIDE_INT bound,unsigned HOST_WIDE_INT len[2],unsigned HOST_WIDE_INT * psize)4131 strlen_pass::strxcmp_eqz_result (gimple *stmt, tree arg1, int idx1,
4132 tree arg2, int idx2,
4133 unsigned HOST_WIDE_INT bound,
4134 unsigned HOST_WIDE_INT len[2],
4135 unsigned HOST_WIDE_INT *psize)
4136 {
4137 /* Determine the range the length of each string is in and whether it's
4138 known to be nul-terminated, or the size of the array it's stored in. */
4139 bool nul1, nul2;
4140 unsigned HOST_WIDE_INT siz1, siz2;
4141 unsigned HOST_WIDE_INT len1rng[2], len2rng[2];
4142 if (!get_len_or_size (stmt, arg1, idx1, len1rng, &siz1, &nul1)
4143 || !get_len_or_size (stmt, arg2, idx2, len2rng, &siz2, &nul2))
4144 return NULL_TREE;
4145
4146 /* BOUND is set to HWI_M1U for strcmp and less to strncmp, and LENiRNG
4147 to HWI_MAX when invalid. Adjust the length of each string to consider
4148 to be no more than BOUND. */
4149 if (len1rng[0] < HOST_WIDE_INT_MAX && len1rng[0] > bound)
4150 len1rng[0] = bound;
4151 if (len1rng[1] < HOST_WIDE_INT_MAX && len1rng[1] > bound)
4152 len1rng[1] = bound;
4153 if (len2rng[0] < HOST_WIDE_INT_MAX && len2rng[0] > bound)
4154 len2rng[0] = bound;
4155 if (len2rng[1] < HOST_WIDE_INT_MAX && len2rng[1] > bound)
4156 len2rng[1] = bound;
4157
4158 /* Two empty strings are equal. */
4159 if (len1rng[1] == 0 && len2rng[1] == 0)
4160 return integer_one_node;
4161
4162 /* The strings are definitely unequal when the lower bound of the length
4163 of one of them is greater than the length of the longest string that
4164 would fit into the other array. */
4165 if (len1rng[0] == HOST_WIDE_INT_MAX
4166 && len2rng[0] != HOST_WIDE_INT_MAX
4167 && ((len2rng[0] < bound && len2rng[0] >= siz1)
4168 || len2rng[0] > siz1))
4169 {
4170 *psize = siz1;
4171 len[0] = len1rng[0];
4172 /* Set LEN[0] to the lower bound of ARG1's length when it's
4173 nul-terminated or to the complement of its minimum length
4174 otherwise, */
4175 len[1] = nul2 ? len2rng[0] : ~len2rng[0];
4176 return integer_zero_node;
4177 }
4178
4179 if (len2rng[0] == HOST_WIDE_INT_MAX
4180 && len1rng[0] != HOST_WIDE_INT_MAX
4181 && ((len1rng[0] < bound && len1rng[0] >= siz2)
4182 || len1rng[0] > siz2))
4183 {
4184 *psize = siz2;
4185 len[0] = nul1 ? len1rng[0] : ~len1rng[0];
4186 len[1] = len2rng[0];
4187 return integer_zero_node;
4188 }
4189
4190 /* The strings are also definitely unequal when their lengths are unequal
4191 and at least one is nul-terminated. */
4192 if (len1rng[0] != HOST_WIDE_INT_MAX
4193 && len2rng[0] != HOST_WIDE_INT_MAX
4194 && ((len1rng[1] < len2rng[0] && nul1)
4195 || (len2rng[1] < len1rng[0] && nul2)))
4196 {
4197 if (bound <= len1rng[0] || bound <= len2rng[0])
4198 *psize = bound;
4199 else
4200 *psize = HOST_WIDE_INT_M1U;
4201
4202 len[0] = len1rng[0];
4203 len[1] = len2rng[0];
4204 return integer_zero_node;
4205 }
4206
4207 /* The string lengths may be equal or unequal. Even when equal and
4208 both strings nul-terminated, without the string contents there's
4209 no way to determine whether they are equal. */
4210 return NULL_TREE;
4211 }
4212
4213 /* Diagnose pointless calls to strcmp or strncmp STMT with string
4214 arguments of lengths LEN or size SIZ and (for strncmp) BOUND,
4215 whose result is used in equality expressions that evaluate to
4216 a constant due to one argument being longer than the size of
4217 the other. */
4218
4219 static void
maybe_warn_pointless_strcmp(gimple * stmt,HOST_WIDE_INT bound,unsigned HOST_WIDE_INT len[2],unsigned HOST_WIDE_INT siz)4220 maybe_warn_pointless_strcmp (gimple *stmt, HOST_WIDE_INT bound,
4221 unsigned HOST_WIDE_INT len[2],
4222 unsigned HOST_WIDE_INT siz)
4223 {
4224 tree lhs = gimple_call_lhs (stmt);
4225 gimple *use = use_in_zero_equality (lhs, /* exclusive = */ false);
4226 if (!use)
4227 return;
4228
4229 bool at_least = false;
4230
4231 /* Excessive LEN[i] indicates a lower bound. */
4232 if (len[0] > HOST_WIDE_INT_MAX)
4233 {
4234 at_least = true;
4235 len[0] = ~len[0];
4236 }
4237
4238 if (len[1] > HOST_WIDE_INT_MAX)
4239 {
4240 at_least = true;
4241 len[1] = ~len[1];
4242 }
4243
4244 unsigned HOST_WIDE_INT minlen = MIN (len[0], len[1]);
4245
4246 /* FIXME: Include a note pointing to the declaration of the smaller
4247 array. */
4248 location_t stmt_loc = gimple_or_expr_nonartificial_location (stmt, lhs);
4249
4250 tree callee = gimple_call_fndecl (stmt);
4251 bool warned = false;
4252 if (siz <= minlen && bound == -1)
4253 warned = warning_at (stmt_loc, OPT_Wstring_compare,
4254 (at_least
4255 ? G_("%qD of a string of length %wu or more and "
4256 "an array of size %wu evaluates to nonzero")
4257 : G_("%qD of a string of length %wu and an array "
4258 "of size %wu evaluates to nonzero")),
4259 callee, minlen, siz);
4260 else if (!at_least && siz <= HOST_WIDE_INT_MAX)
4261 {
4262 if (len[0] != HOST_WIDE_INT_MAX && len[1] != HOST_WIDE_INT_MAX)
4263 warned = warning_at (stmt_loc, OPT_Wstring_compare,
4264 "%qD of strings of length %wu and %wu "
4265 "and bound of %wu evaluates to nonzero",
4266 callee, len[0], len[1], bound);
4267 else
4268 warned = warning_at (stmt_loc, OPT_Wstring_compare,
4269 "%qD of a string of length %wu, an array "
4270 "of size %wu and bound of %wu evaluates to "
4271 "nonzero",
4272 callee, minlen, siz, bound);
4273 }
4274
4275 if (!warned)
4276 return;
4277
4278 location_t use_loc = gimple_location (use);
4279 if (LOCATION_LINE (stmt_loc) != LOCATION_LINE (use_loc))
4280 inform (use_loc, "in this expression");
4281 }
4282
4283
4284 /* Optimize a call to strcmp or strncmp either by folding it to a constant
4285 when possible or by transforming the latter to the former. Warn about
4286 calls where the length of one argument is greater than the size of
4287 the array to which the other argument points if the latter's length
4288 is not known. Return true when the call has been transformed into
4289 another and false otherwise. */
4290
4291 bool
handle_builtin_string_cmp()4292 strlen_pass::handle_builtin_string_cmp ()
4293 {
4294 gcall *stmt = as_a <gcall *> (gsi_stmt (m_gsi));
4295 tree lhs = gimple_call_lhs (stmt);
4296
4297 if (!lhs)
4298 return false;
4299
4300 tree arg1 = gimple_call_arg (stmt, 0);
4301 tree arg2 = gimple_call_arg (stmt, 1);
4302 int idx1 = get_stridx (arg1, stmt);
4303 int idx2 = get_stridx (arg2, stmt);
4304
4305 /* For strncmp set to the value of the third argument if known. */
4306 HOST_WIDE_INT bound = -1;
4307 tree len = NULL_TREE;
4308 /* Extract the strncmp bound. */
4309 if (gimple_call_num_args (stmt) == 3)
4310 {
4311 len = gimple_call_arg (stmt, 2);
4312 if (tree_fits_shwi_p (len))
4313 bound = tree_to_shwi (len);
4314
4315 /* If the bound argument is NOT known, do nothing. */
4316 if (bound < 0)
4317 return false;
4318 }
4319
4320 /* Avoid folding if either argument is not a nul-terminated array.
4321 Defer warning until later. */
4322 if (!check_nul_terminated_array (NULL_TREE, arg1, len)
4323 || !check_nul_terminated_array (NULL_TREE, arg2, len))
4324 return false;
4325
4326 {
4327 /* Set to the length of one argument (or its complement if it's
4328 the lower bound of a range) and the size of the array storing
4329 the other if the result is based on the former being equal to
4330 or greater than the latter. */
4331 unsigned HOST_WIDE_INT len[2] = { HOST_WIDE_INT_MAX, HOST_WIDE_INT_MAX };
4332 unsigned HOST_WIDE_INT siz = HOST_WIDE_INT_M1U;
4333
4334 /* Try to determine if the two strings are either definitely equal
4335 or definitely unequal and if so, either fold the result to zero
4336 (when equal) or set the range of the result to ~[0, 0] otherwise. */
4337 if (tree eqz = strxcmp_eqz_result (stmt, arg1, idx1, arg2, idx2, bound,
4338 len, &siz))
4339 {
4340 if (integer_zerop (eqz))
4341 {
4342 maybe_warn_pointless_strcmp (stmt, bound, len, siz);
4343
4344 /* When the lengths of the first two string arguments are
4345 known to be unequal set the range of the result to non-zero.
4346 This allows the call to be eliminated if its result is only
4347 used in tests for equality to zero. */
4348 wide_int zero = wi::zero (TYPE_PRECISION (TREE_TYPE (lhs)));
4349 set_range_info (lhs, VR_ANTI_RANGE, zero, zero);
4350 return false;
4351 }
4352 /* When the two strings are definitely equal (such as when they
4353 are both empty) fold the call to the constant result. */
4354 replace_call_with_value (&m_gsi, integer_zero_node);
4355 return true;
4356 }
4357 }
4358
4359 /* Return if nothing is known about the strings pointed to by ARG1
4360 and ARG2. */
4361 if (idx1 == 0 && idx2 == 0)
4362 return false;
4363
4364 /* Determine either the length or the size of each of the strings,
4365 whichever is available. */
4366 HOST_WIDE_INT cstlen1 = -1, cstlen2 = -1;
4367 HOST_WIDE_INT arysiz1 = -1, arysiz2 = -1;
4368
4369 {
4370 unsigned HOST_WIDE_INT len1rng[2], len2rng[2];
4371 unsigned HOST_WIDE_INT arsz1, arsz2;
4372 bool nulterm[2];
4373
4374 if (!get_len_or_size (stmt, arg1, idx1, len1rng, &arsz1, nulterm)
4375 || !get_len_or_size (stmt, arg2, idx2, len2rng, &arsz2, nulterm + 1))
4376 return false;
4377
4378 if (len1rng[0] == len1rng[1] && len1rng[0] < HOST_WIDE_INT_MAX)
4379 cstlen1 = len1rng[0];
4380 else if (arsz1 < HOST_WIDE_INT_M1U)
4381 arysiz1 = arsz1;
4382
4383 if (len2rng[0] == len2rng[1] && len2rng[0] < HOST_WIDE_INT_MAX)
4384 cstlen2 = len2rng[0];
4385 else if (arsz2 < HOST_WIDE_INT_M1U)
4386 arysiz2 = arsz2;
4387 }
4388
4389 /* Bail if neither the string length nor the size of the array
4390 it is stored in can be determined. */
4391 if ((cstlen1 < 0 && arysiz1 < 0)
4392 || (cstlen2 < 0 && arysiz2 < 0)
4393 || (cstlen1 < 0 && cstlen2 < 0))
4394 return false;
4395
4396 if (cstlen1 >= 0)
4397 ++cstlen1;
4398 if (cstlen2 >= 0)
4399 ++cstlen2;
4400
4401 /* The exact number of characters to compare. */
4402 HOST_WIDE_INT cmpsiz;
4403 if (cstlen1 >= 0 && cstlen2 >= 0)
4404 cmpsiz = MIN (cstlen1, cstlen2);
4405 else if (cstlen1 >= 0)
4406 cmpsiz = cstlen1;
4407 else
4408 cmpsiz = cstlen2;
4409 if (bound >= 0)
4410 cmpsiz = MIN (cmpsiz, bound);
4411 /* The size of the array in which the unknown string is stored. */
4412 HOST_WIDE_INT varsiz = arysiz1 < 0 ? arysiz2 : arysiz1;
4413
4414 if ((varsiz < 0 || cmpsiz < varsiz) && use_in_zero_equality (lhs))
4415 {
4416 /* If the known length is less than the size of the other array
4417 and the strcmp result is only used to test equality to zero,
4418 transform the call to the equivalent _eq call. */
4419 if (tree fn = builtin_decl_implicit (bound < 0 ? BUILT_IN_STRCMP_EQ
4420 : BUILT_IN_STRNCMP_EQ))
4421 {
4422 tree n = build_int_cst (size_type_node, cmpsiz);
4423 update_gimple_call (&m_gsi, fn, 3, arg1, arg2, n);
4424 return true;
4425 }
4426 }
4427
4428 return false;
4429 }
4430
4431 /* Handle a POINTER_PLUS_EXPR statement.
4432 For p = "abcd" + 2; compute associated length, or if
4433 p = q + off is pointing to a '\0' character of a string, call
4434 zero_length_string on it. */
4435
4436 void
handle_pointer_plus()4437 strlen_pass::handle_pointer_plus ()
4438 {
4439 gimple *stmt = gsi_stmt (m_gsi);
4440 tree lhs = gimple_assign_lhs (stmt), off;
4441 int idx = get_stridx (gimple_assign_rhs1 (stmt), stmt);
4442 strinfo *si, *zsi;
4443
4444 if (idx == 0)
4445 return;
4446
4447 if (idx < 0)
4448 {
4449 tree off = gimple_assign_rhs2 (stmt);
4450 if (tree_fits_uhwi_p (off)
4451 && tree_to_uhwi (off) <= (unsigned HOST_WIDE_INT) ~idx)
4452 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)]
4453 = ~(~idx - (int) tree_to_uhwi (off));
4454 return;
4455 }
4456
4457 si = get_strinfo (idx);
4458 if (si == NULL || si->nonzero_chars == NULL_TREE)
4459 return;
4460
4461 off = gimple_assign_rhs2 (stmt);
4462 zsi = NULL;
4463 if (si->full_string_p && operand_equal_p (si->nonzero_chars, off, 0))
4464 zsi = zero_length_string (lhs, si);
4465 else if (TREE_CODE (off) == SSA_NAME)
4466 {
4467 gimple *def_stmt = SSA_NAME_DEF_STMT (off);
4468 if (gimple_assign_single_p (def_stmt)
4469 && si->full_string_p
4470 && operand_equal_p (si->nonzero_chars,
4471 gimple_assign_rhs1 (def_stmt), 0))
4472 zsi = zero_length_string (lhs, si);
4473 }
4474 if (zsi != NULL
4475 && si->endptr != NULL_TREE
4476 && si->endptr != lhs
4477 && TREE_CODE (si->endptr) == SSA_NAME)
4478 {
4479 enum tree_code rhs_code
4480 = useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (si->endptr))
4481 ? SSA_NAME : NOP_EXPR;
4482 gimple_assign_set_rhs_with_ops (&m_gsi, rhs_code, si->endptr);
4483 gcc_assert (gsi_stmt (m_gsi) == stmt);
4484 update_stmt (stmt);
4485 }
4486 }
4487
4488 /* Set LENRANGE to the number of nonzero bytes for a store of TYPE and
4489 clear all flags. Return true on success and false on failure. */
4490
4491 static bool
nonzero_bytes_for_type(tree type,unsigned lenrange[3],bool * nulterm,bool * allnul,bool * allnonnul)4492 nonzero_bytes_for_type (tree type, unsigned lenrange[3],
4493 bool *nulterm, bool *allnul, bool *allnonnul)
4494 {
4495 /* Use the size of the type of the expression as the size of the store,
4496 and set the upper bound of the length range to that of the size.
4497 Nothing is known about the contents so clear all flags. */
4498 tree typesize = TYPE_SIZE_UNIT (type);
4499 if (!type)
4500 return false;
4501
4502 if (!tree_fits_uhwi_p (typesize))
4503 return false;
4504
4505 unsigned HOST_WIDE_INT sz = tree_to_uhwi (typesize);
4506 if (sz > UINT_MAX)
4507 return false;
4508
4509 lenrange[2] = sz;
4510 lenrange[1] = lenrange[2] ? lenrange[2] - 1 : 0;
4511 lenrange[0] = 0;
4512 *nulterm = false;
4513 *allnul = false;
4514 *allnonnul = false;
4515 return true;
4516 }
4517
4518 /* Recursively determine the minimum and maximum number of leading nonzero
4519 bytes in the representation of EXP and set LENRANGE[0] and LENRANGE[1]
4520 to each.
4521 Sets LENRANGE[2] to the total size of the access (which may be less
4522 than LENRANGE[1] when what's being referenced by EXP is a pointer
4523 rather than an array).
4524 Sets *NULTERM if the representation contains a zero byte, sets *ALLNUL
4525 if all the bytes are zero, and *ALLNONNUL is all are nonzero.
4526 OFFSET and NBYTES are the offset into the representation and
4527 the size of the access to it determined from an ADDR_EXPR (i.e.,
4528 a pointer) or MEM_REF or zero for other expressions.
4529 Uses RVALS to determine range information.
4530 Avoids recursing deeper than the limits in SNLIM allow.
4531 Returns true on success and false otherwise. */
4532
4533 bool
count_nonzero_bytes(tree exp,gimple * stmt,unsigned HOST_WIDE_INT offset,unsigned HOST_WIDE_INT nbytes,unsigned lenrange[3],bool * nulterm,bool * allnul,bool * allnonnul,ssa_name_limit_t & snlim)4534 strlen_pass::count_nonzero_bytes (tree exp, gimple *stmt,
4535 unsigned HOST_WIDE_INT offset,
4536 unsigned HOST_WIDE_INT nbytes,
4537 unsigned lenrange[3], bool *nulterm,
4538 bool *allnul, bool *allnonnul,
4539 ssa_name_limit_t &snlim)
4540 {
4541 if (TREE_CODE (exp) == SSA_NAME)
4542 {
4543 /* Handle non-zero single-character stores specially. */
4544 tree type = TREE_TYPE (exp);
4545 if (TREE_CODE (type) == INTEGER_TYPE
4546 && TYPE_MODE (type) == TYPE_MODE (char_type_node)
4547 && TYPE_PRECISION (type) == TYPE_PRECISION (char_type_node)
4548 && tree_expr_nonzero_p (exp))
4549 {
4550 /* If the character EXP is known to be non-zero (even if its
4551 exact value is not known) recurse once to set the range
4552 for an arbitrary constant. */
4553 exp = build_int_cst (type, 1);
4554 return count_nonzero_bytes (exp, stmt,
4555 offset, 1, lenrange,
4556 nulterm, allnul, allnonnul, snlim);
4557 }
4558
4559 gimple *stmt = SSA_NAME_DEF_STMT (exp);
4560 if (gimple_assign_single_p (stmt))
4561 {
4562 exp = gimple_assign_rhs1 (stmt);
4563 if (!DECL_P (exp)
4564 && TREE_CODE (exp) != CONSTRUCTOR
4565 && TREE_CODE (exp) != MEM_REF)
4566 return false;
4567 /* Handle DECLs, CONSTRUCTOR and MEM_REF below. */
4568 }
4569 else if (gimple_code (stmt) == GIMPLE_PHI)
4570 {
4571 /* Avoid processing an SSA_NAME that has already been visited
4572 or if an SSA_NAME limit has been reached. Indicate success
4573 if the former and failure if the latter. */
4574 if (int res = snlim.next_phi (exp))
4575 return res > 0;
4576
4577 /* Determine the minimum and maximum from the PHI arguments. */
4578 unsigned int n = gimple_phi_num_args (stmt);
4579 for (unsigned i = 0; i != n; i++)
4580 {
4581 tree def = gimple_phi_arg_def (stmt, i);
4582 if (!count_nonzero_bytes (def, stmt,
4583 offset, nbytes, lenrange, nulterm,
4584 allnul, allnonnul, snlim))
4585 return false;
4586 }
4587
4588 return true;
4589 }
4590 }
4591
4592 if (TREE_CODE (exp) == CONSTRUCTOR)
4593 {
4594 if (nbytes)
4595 /* If NBYTES has already been determined by an outer MEM_REF
4596 fail rather than overwriting it (this shouldn't happen). */
4597 return false;
4598
4599 tree type = TREE_TYPE (exp);
4600 tree size = TYPE_SIZE_UNIT (type);
4601 if (!size || !tree_fits_uhwi_p (size))
4602 return false;
4603
4604 unsigned HOST_WIDE_INT byte_size = tree_to_uhwi (size);
4605 if (byte_size < offset)
4606 return false;
4607
4608 nbytes = byte_size - offset;
4609 }
4610
4611 if (TREE_CODE (exp) == MEM_REF)
4612 {
4613 if (nbytes)
4614 return false;
4615
4616 tree arg = TREE_OPERAND (exp, 0);
4617 tree off = TREE_OPERAND (exp, 1);
4618
4619 if (TREE_CODE (off) != INTEGER_CST || !tree_fits_uhwi_p (off))
4620 return false;
4621
4622 unsigned HOST_WIDE_INT wioff = tree_to_uhwi (off);
4623 if (INT_MAX < wioff)
4624 return false;
4625
4626 offset += wioff;
4627 if (INT_MAX < offset)
4628 return false;
4629
4630 /* The size of the MEM_REF access determines the number of bytes. */
4631 tree type = TREE_TYPE (exp);
4632 tree typesize = TYPE_SIZE_UNIT (type);
4633 if (!typesize || !tree_fits_uhwi_p (typesize))
4634 return false;
4635 nbytes = tree_to_uhwi (typesize);
4636 if (!nbytes)
4637 return false;
4638
4639 /* Handle MEM_REF = SSA_NAME types of assignments. */
4640 return count_nonzero_bytes_addr (arg, stmt,
4641 offset, nbytes, lenrange, nulterm,
4642 allnul, allnonnul, snlim);
4643 }
4644
4645 if (VAR_P (exp) || TREE_CODE (exp) == CONST_DECL)
4646 {
4647 /* If EXP can be folded into a constant use the result. Otherwise
4648 proceed to use EXP to determine a range of the result. */
4649 if (tree fold_exp = ctor_for_folding (exp))
4650 if (fold_exp != error_mark_node)
4651 exp = fold_exp;
4652 }
4653
4654 const char *prep = NULL;
4655 if (TREE_CODE (exp) == STRING_CST)
4656 {
4657 unsigned nchars = TREE_STRING_LENGTH (exp);
4658 if (nchars < offset)
4659 return false;
4660
4661 if (!nbytes)
4662 /* If NBYTES hasn't been determined earlier, either from ADDR_EXPR
4663 (i.e., it's the size of a pointer), or from MEM_REF (as the size
4664 of the access), set it here to the size of the string, including
4665 all internal and trailing nuls if the string has any. */
4666 nbytes = nchars - offset;
4667 else if (nchars - offset < nbytes)
4668 return false;
4669
4670 prep = TREE_STRING_POINTER (exp) + offset;
4671 }
4672
4673 unsigned char buf[256];
4674 if (!prep)
4675 {
4676 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
4677 return false;
4678 /* If the pointer to representation hasn't been set above
4679 for STRING_CST point it at the buffer. */
4680 prep = reinterpret_cast <char *>(buf);
4681 /* Try to extract the representation of the constant object
4682 or expression starting from the offset. */
4683 unsigned repsize = native_encode_expr (exp, buf, sizeof buf, offset);
4684 if (repsize < nbytes)
4685 {
4686 /* This should only happen when REPSIZE is zero because EXP
4687 doesn't denote an object with a known initializer, except
4688 perhaps when the reference reads past its end. */
4689 lenrange[0] = 0;
4690 prep = NULL;
4691 }
4692 else if (!nbytes)
4693 nbytes = repsize;
4694 else if (nbytes < repsize)
4695 return false;
4696 }
4697
4698 if (!nbytes)
4699 return nonzero_bytes_for_type (TREE_TYPE (exp), lenrange,
4700 nulterm, allnul, allnonnul);
4701
4702 /* Compute the number of leading nonzero bytes in the representation
4703 and update the minimum and maximum. */
4704 unsigned HOST_WIDE_INT n = prep ? strnlen (prep, nbytes) : nbytes;
4705
4706 if (n < lenrange[0])
4707 lenrange[0] = n;
4708 if (lenrange[1] < n)
4709 lenrange[1] = n;
4710
4711 /* Set the size of the representation. */
4712 if (lenrange[2] < nbytes)
4713 lenrange[2] = nbytes;
4714
4715 /* Clear NULTERM if none of the bytes is zero. */
4716 if (n == nbytes)
4717 *nulterm = false;
4718
4719 if (n)
4720 {
4721 /* When the initial number of non-zero bytes N is non-zero, reset
4722 *ALLNUL; if N is less than that the size of the representation
4723 also clear *ALLNONNUL. */
4724 *allnul = false;
4725 if (n < nbytes)
4726 *allnonnul = false;
4727 }
4728 else if (*allnul || *allnonnul)
4729 {
4730 *allnonnul = false;
4731
4732 if (*allnul)
4733 {
4734 /* When either ALLNUL is set and N is zero, also determine
4735 whether all subsequent bytes after the first one (which
4736 is nul) are zero or nonzero and clear ALLNUL if not. */
4737 for (const char *p = prep; p != prep + nbytes; ++p)
4738 if (*p)
4739 {
4740 *allnul = false;
4741 break;
4742 }
4743 }
4744 }
4745
4746 return true;
4747 }
4748
4749 /* Like count_nonzero_bytes, but instead of counting bytes in EXP, count
4750 bytes that are pointed to by EXP, which should be a pointer. */
4751
4752 bool
count_nonzero_bytes_addr(tree exp,gimple * stmt,unsigned HOST_WIDE_INT offset,unsigned HOST_WIDE_INT nbytes,unsigned lenrange[3],bool * nulterm,bool * allnul,bool * allnonnul,ssa_name_limit_t & snlim)4753 strlen_pass::count_nonzero_bytes_addr (tree exp, gimple *stmt,
4754 unsigned HOST_WIDE_INT offset,
4755 unsigned HOST_WIDE_INT nbytes,
4756 unsigned lenrange[3], bool *nulterm,
4757 bool *allnul, bool *allnonnul,
4758 ssa_name_limit_t &snlim)
4759 {
4760 int idx = get_stridx (exp, stmt);
4761 if (idx > 0)
4762 {
4763 strinfo *si = get_strinfo (idx);
4764 if (!si)
4765 return false;
4766
4767 /* Handle both constant lengths as well non-constant lengths
4768 in some range. */
4769 unsigned HOST_WIDE_INT minlen, maxlen;
4770 if (tree_fits_shwi_p (si->nonzero_chars))
4771 minlen = maxlen = tree_to_shwi (si->nonzero_chars);
4772 else if (si->nonzero_chars
4773 && TREE_CODE (si->nonzero_chars) == SSA_NAME)
4774 {
4775 value_range vr;
4776 ptr_qry.rvals->range_of_expr (vr, si->nonzero_chars, stmt);
4777 if (vr.kind () != VR_RANGE)
4778 return false;
4779
4780 minlen = tree_to_uhwi (vr.min ());
4781 maxlen = tree_to_uhwi (vr.max ());
4782 }
4783 else
4784 return false;
4785
4786 if (maxlen < offset)
4787 return false;
4788
4789 minlen = minlen < offset ? 0 : minlen - offset;
4790 maxlen -= offset;
4791 if (maxlen + 1 < nbytes)
4792 return false;
4793
4794 if (nbytes <= minlen)
4795 *nulterm = false;
4796
4797 if (nbytes < minlen)
4798 {
4799 minlen = nbytes;
4800 if (nbytes < maxlen)
4801 maxlen = nbytes;
4802 }
4803
4804 if (minlen < lenrange[0])
4805 lenrange[0] = minlen;
4806 if (lenrange[1] < maxlen)
4807 lenrange[1] = maxlen;
4808
4809 if (lenrange[2] < nbytes)
4810 lenrange[2] = nbytes;
4811
4812 /* Since only the length of the string are known and not its contents,
4813 clear ALLNUL and ALLNONNUL purely on the basis of the length. */
4814 *allnul = false;
4815 if (minlen < nbytes)
4816 *allnonnul = false;
4817
4818 return true;
4819 }
4820
4821 if (TREE_CODE (exp) == ADDR_EXPR)
4822 return count_nonzero_bytes (TREE_OPERAND (exp, 0), stmt,
4823 offset, nbytes,
4824 lenrange, nulterm, allnul, allnonnul, snlim);
4825
4826 if (TREE_CODE (exp) == SSA_NAME)
4827 {
4828 gimple *stmt = SSA_NAME_DEF_STMT (exp);
4829 if (gimple_code (stmt) == GIMPLE_PHI)
4830 {
4831 /* Avoid processing an SSA_NAME that has already been visited
4832 or if an SSA_NAME limit has been reached. Indicate success
4833 if the former and failure if the latter. */
4834 if (int res = snlim.next_phi (exp))
4835 return res > 0;
4836
4837 /* Determine the minimum and maximum from the PHI arguments. */
4838 unsigned int n = gimple_phi_num_args (stmt);
4839 for (unsigned i = 0; i != n; i++)
4840 {
4841 tree def = gimple_phi_arg_def (stmt, i);
4842 if (!count_nonzero_bytes_addr (def, stmt,
4843 offset, nbytes, lenrange,
4844 nulterm, allnul, allnonnul,
4845 snlim))
4846 return false;
4847 }
4848
4849 return true;
4850 }
4851 }
4852
4853 /* Otherwise we don't know anything. */
4854 lenrange[0] = 0;
4855 if (lenrange[1] < nbytes)
4856 lenrange[1] = nbytes;
4857 if (lenrange[2] < nbytes)
4858 lenrange[2] = nbytes;
4859 *nulterm = false;
4860 *allnul = false;
4861 *allnonnul = false;
4862 return true;
4863 }
4864
4865 /* Same as above except with an implicit SSA_NAME limit. When EXPR_OR_TYPE
4866 is a type rather than an expression use its size to compute the range.
4867 RVALS is used to determine ranges of dynamically computed string lengths
4868 (the results of strlen). */
4869
4870 bool
count_nonzero_bytes(tree expr_or_type,gimple * stmt,unsigned lenrange[3],bool * nulterm,bool * allnul,bool * allnonnul)4871 strlen_pass::count_nonzero_bytes (tree expr_or_type, gimple *stmt,
4872 unsigned lenrange[3], bool *nulterm,
4873 bool *allnul, bool *allnonnul)
4874 {
4875 if (TYPE_P (expr_or_type))
4876 return nonzero_bytes_for_type (expr_or_type, lenrange,
4877 nulterm, allnul, allnonnul);
4878
4879 /* Set to optimistic values so the caller doesn't have to worry about
4880 initializing these and to what. On success, the function will clear
4881 these if it determines their values are different but being recursive
4882 it never sets either to true. On failure, their values are
4883 unspecified. */
4884 *nulterm = true;
4885 *allnul = true;
4886 *allnonnul = true;
4887
4888 ssa_name_limit_t snlim;
4889 tree expr = expr_or_type;
4890 return count_nonzero_bytes (expr, stmt,
4891 0, 0, lenrange, nulterm, allnul, allnonnul,
4892 snlim);
4893 }
4894
4895 /* Handle a single or multibyte store other than by a built-in function,
4896 either via a single character assignment or by multi-byte assignment
4897 either via MEM_REF or via a type other than char (such as in
4898 '*(int*)a = 12345'). Return true to let the caller advance *GSI to
4899 the next statement in the basic block and false otherwise. */
4900
4901 bool
handle_store(bool * zero_write)4902 strlen_pass::handle_store (bool *zero_write)
4903 {
4904 gimple *stmt = gsi_stmt (m_gsi);
4905 /* The LHS and RHS of the store. The RHS is null if STMT is a function
4906 call. STORETYPE is the type of the store (determined from either
4907 the RHS of the assignment statement or the LHS of a function call. */
4908 tree lhs, rhs, storetype;
4909 if (is_gimple_assign (stmt))
4910 {
4911 lhs = gimple_assign_lhs (stmt);
4912 rhs = gimple_assign_rhs1 (stmt);
4913 storetype = TREE_TYPE (rhs);
4914 }
4915 else if (is_gimple_call (stmt))
4916 {
4917 lhs = gimple_call_lhs (stmt);
4918 rhs = NULL_TREE;
4919 storetype = TREE_TYPE (lhs);
4920 }
4921 else
4922 return true;
4923
4924 tree ssaname = NULL_TREE;
4925 strinfo *si = NULL;
4926 int idx = -1;
4927
4928 range_query *const rvals = ptr_qry.rvals;
4929
4930 /* The offset of the first byte in LHS modified by the store. */
4931 unsigned HOST_WIDE_INT offset = 0;
4932
4933 if (TREE_CODE (lhs) == MEM_REF
4934 && TREE_CODE (TREE_OPERAND (lhs, 0)) == SSA_NAME)
4935 {
4936 tree mem_offset = TREE_OPERAND (lhs, 1);
4937 if (tree_fits_uhwi_p (mem_offset))
4938 {
4939 /* Get the strinfo for the base, and use it if it starts with at
4940 least OFFSET nonzero characters. This is trivially true if
4941 OFFSET is zero. */
4942 offset = tree_to_uhwi (mem_offset);
4943 idx = get_stridx (TREE_OPERAND (lhs, 0), stmt);
4944 if (idx > 0)
4945 si = get_strinfo (idx);
4946 if (offset == 0)
4947 ssaname = TREE_OPERAND (lhs, 0);
4948 else if (si == NULL
4949 || compare_nonzero_chars (si, stmt, offset, rvals) < 0)
4950 {
4951 *zero_write = rhs ? initializer_zerop (rhs) : false;
4952
4953 bool dummy;
4954 unsigned lenrange[] = { UINT_MAX, 0, 0 };
4955 if (count_nonzero_bytes (rhs ? rhs : storetype, stmt, lenrange,
4956 &dummy, &dummy, &dummy))
4957 maybe_warn_overflow (stmt, true, lenrange[2]);
4958
4959 return true;
4960 }
4961 }
4962 }
4963 else
4964 {
4965 idx = get_addr_stridx (lhs, stmt, NULL_TREE, &offset, rvals);
4966 if (idx > 0)
4967 si = get_strinfo (idx);
4968 }
4969
4970 /* Minimum and maximum leading non-zero bytes and the size of the store. */
4971 unsigned lenrange[] = { UINT_MAX, 0, 0 };
4972
4973 /* Set to the minimum length of the string being assigned if known. */
4974 unsigned HOST_WIDE_INT rhs_minlen;
4975
4976 /* STORING_NONZERO_P is true iff not all stored characters are zero.
4977 STORING_ALL_NONZERO_P is true if all stored characters are zero.
4978 STORING_ALL_ZEROS_P is true iff all stored characters are zero.
4979 Both are false when it's impossible to determine which is true. */
4980 bool storing_nonzero_p;
4981 bool storing_all_nonzero_p;
4982 bool storing_all_zeros_p;
4983 /* FULL_STRING_P is set when the stored sequence of characters form
4984 a nul-terminated string. */
4985 bool full_string_p;
4986
4987 const bool ranges_valid
4988 = count_nonzero_bytes (rhs ? rhs : storetype, stmt,
4989 lenrange, &full_string_p,
4990 &storing_all_zeros_p, &storing_all_nonzero_p);
4991
4992 if (ranges_valid)
4993 {
4994 rhs_minlen = lenrange[0];
4995 storing_nonzero_p = lenrange[1] > 0;
4996 *zero_write = storing_all_zeros_p;
4997
4998 maybe_warn_overflow (stmt, true, lenrange[2]);
4999 }
5000 else
5001 {
5002 rhs_minlen = HOST_WIDE_INT_M1U;
5003 full_string_p = false;
5004 storing_nonzero_p = false;
5005 storing_all_zeros_p = false;
5006 storing_all_nonzero_p = false;
5007 }
5008
5009 if (si != NULL)
5010 {
5011 /* The count_nonzero_bytes call above might have unshared si.
5012 Fetch it again from the vector. */
5013 si = get_strinfo (idx);
5014 /* The corresponding element is set to 1 if the first and last
5015 element, respectively, of the sequence of characters being
5016 written over the string described by SI ends before
5017 the terminating nul (if it has one), to zero if the nul is
5018 being overwritten but not beyond, or negative otherwise. */
5019 int store_before_nul[2];
5020 if (ranges_valid)
5021 {
5022 /* The offset of the last stored byte. */
5023 unsigned HOST_WIDE_INT endoff = offset + lenrange[2] - 1;
5024 store_before_nul[0]
5025 = compare_nonzero_chars (si, stmt, offset, rvals);
5026 if (endoff == offset)
5027 store_before_nul[1] = store_before_nul[0];
5028 else
5029 store_before_nul[1]
5030 = compare_nonzero_chars (si, stmt, endoff, rvals);
5031 }
5032 else
5033 {
5034 store_before_nul[0]
5035 = compare_nonzero_chars (si, stmt, offset, rvals);
5036 store_before_nul[1] = store_before_nul[0];
5037 gcc_assert (offset == 0 || store_before_nul[0] >= 0);
5038 }
5039
5040 if (storing_all_zeros_p
5041 && store_before_nul[0] == 0
5042 && store_before_nul[1] == 0
5043 && si->full_string_p)
5044 {
5045 /* When overwriting a '\0' with a '\0', the store can be removed
5046 if we know it has been stored in the current function. */
5047 if (!stmt_could_throw_p (cfun, stmt) && si->writable)
5048 {
5049 unlink_stmt_vdef (stmt);
5050 release_defs (stmt);
5051 gsi_remove (&m_gsi, true);
5052 return false;
5053 }
5054 else
5055 {
5056 si->writable = true;
5057 gsi_next (&m_gsi);
5058 return false;
5059 }
5060 }
5061
5062 if (store_before_nul[1] > 0
5063 && storing_nonzero_p
5064 && lenrange[0] == lenrange[1]
5065 && lenrange[0] == lenrange[2]
5066 && TREE_CODE (storetype) == INTEGER_TYPE)
5067 {
5068 /* Handle a store of one or more non-nul characters that ends
5069 before the terminating nul of the destination and so does
5070 not affect its length
5071 If si->nonzero_chars > OFFSET, we aren't overwriting '\0',
5072 and if we aren't storing '\0', we know that the length of
5073 the string and any other zero terminated string in memory
5074 remains the same. In that case we move to the next gimple
5075 statement and return to signal the caller that it shouldn't
5076 invalidate anything.
5077
5078 This is beneficial for cases like:
5079
5080 char p[20];
5081 void foo (char *q)
5082 {
5083 strcpy (p, "foobar");
5084 size_t len = strlen (p); // can be folded to 6
5085 size_t len2 = strlen (q); // has to be computed
5086 p[0] = 'X';
5087 size_t len3 = strlen (p); // can be folded to 6
5088 size_t len4 = strlen (q); // can be folded to len2
5089 bar (len, len2, len3, len4);
5090 } */
5091 gsi_next (&m_gsi);
5092 return false;
5093 }
5094
5095 if (storing_all_zeros_p
5096 || storing_nonzero_p
5097 || (offset != 0 && store_before_nul[1] > 0))
5098 {
5099 /* When STORING_NONZERO_P, we know that the string will start
5100 with at least OFFSET + 1 nonzero characters. If storing
5101 a single character, set si->NONZERO_CHARS to the result.
5102 If storing multiple characters, try to determine the number
5103 of leading non-zero characters and set si->NONZERO_CHARS to
5104 the result instead.
5105
5106 When STORING_ALL_ZEROS_P, we know that the string is now
5107 OFFSET characters long.
5108
5109 Otherwise, we're storing an unknown value at offset OFFSET,
5110 so need to clip the nonzero_chars to OFFSET.
5111 Use the minimum length of the string (or individual character)
5112 being stored if it's known. Otherwise, STORING_NONZERO_P
5113 guarantees it's at least 1. */
5114 HOST_WIDE_INT len
5115 = storing_nonzero_p && ranges_valid ? lenrange[0] : 1;
5116 location_t loc = gimple_location (stmt);
5117 tree oldlen = si->nonzero_chars;
5118 if (store_before_nul[1] == 0 && si->full_string_p)
5119 /* We're overwriting the nul terminator with a nonzero or
5120 unknown character. If the previous stmt was a memcpy,
5121 its length may be decreased. */
5122 adjust_last_stmt (si, stmt, false);
5123 si = unshare_strinfo (si);
5124 if (storing_nonzero_p)
5125 {
5126 gcc_assert (len >= 0);
5127 si->nonzero_chars = build_int_cst (size_type_node, offset + len);
5128 }
5129 else
5130 si->nonzero_chars = build_int_cst (size_type_node, offset);
5131
5132 /* Set FULL_STRING_P only if the length of the strings being
5133 written is the same, and clear it if the strings have
5134 different lengths. In the latter case the length stored
5135 in si->NONZERO_CHARS becomes the lower bound.
5136 FIXME: Handle the upper bound of the length if possible. */
5137 si->full_string_p = full_string_p && lenrange[0] == lenrange[1];
5138
5139 if (storing_all_zeros_p
5140 && ssaname
5141 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ssaname))
5142 si->endptr = ssaname;
5143 else
5144 si->endptr = NULL;
5145 si->next = 0;
5146 si->stmt = NULL;
5147 si->writable = true;
5148 si->dont_invalidate = true;
5149 if (oldlen)
5150 {
5151 tree adj = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
5152 si->nonzero_chars, oldlen);
5153 adjust_related_strinfos (loc, si, adj);
5154 }
5155 else
5156 si->prev = 0;
5157 }
5158 }
5159 else if (idx == 0 && (storing_all_zeros_p || storing_nonzero_p))
5160 {
5161 if (ssaname)
5162 idx = new_stridx (ssaname);
5163 else
5164 idx = new_addr_stridx (lhs);
5165 if (idx != 0)
5166 {
5167 tree ptr = (ssaname ? ssaname : build_fold_addr_expr (lhs));
5168
5169 HOST_WIDE_INT slen;
5170 if (storing_all_zeros_p)
5171 slen = 0;
5172 else if (storing_nonzero_p && ranges_valid)
5173 {
5174 /* FIXME: Handle the upper bound of the length when
5175 LENRANGE[0] != LENRANGE[1]. */
5176 slen = lenrange[0];
5177 if (lenrange[0] != lenrange[1])
5178 /* Set the minimum length but ignore the maximum
5179 for now. */
5180 full_string_p = false;
5181 }
5182 else
5183 slen = -1;
5184
5185 tree len = (slen <= 0
5186 ? size_zero_node
5187 : build_int_cst (size_type_node, slen));
5188 si = new_strinfo (ptr, idx, len, slen >= 0 && full_string_p);
5189 set_strinfo (idx, si);
5190 if (storing_all_zeros_p
5191 && ssaname
5192 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ssaname))
5193 si->endptr = ssaname;
5194 si->dont_invalidate = true;
5195 si->writable = true;
5196 }
5197 }
5198 else if (idx == 0
5199 && rhs_minlen < HOST_WIDE_INT_M1U
5200 && ssaname == NULL_TREE
5201 && TREE_CODE (TREE_TYPE (lhs)) == ARRAY_TYPE)
5202 {
5203 HOST_WIDE_INT a = int_size_in_bytes (TREE_TYPE (lhs));
5204 if (a > 0 && (unsigned HOST_WIDE_INT) a > rhs_minlen)
5205 {
5206 int idx = new_addr_stridx (lhs);
5207 if (idx != 0)
5208 {
5209 si = new_strinfo (build_fold_addr_expr (lhs), idx,
5210 build_int_cst (size_type_node, rhs_minlen),
5211 full_string_p);
5212 set_strinfo (idx, si);
5213 si->dont_invalidate = true;
5214 }
5215 }
5216 }
5217
5218 if (si != NULL && offset == 0 && storing_all_zeros_p && lenrange[2] == 1)
5219 {
5220 /* For single-byte stores only, allow adjust_last_stmt to remove
5221 the statement if the stored '\0' is immediately overwritten. */
5222 laststmt.stmt = stmt;
5223 laststmt.len = build_int_cst (size_type_node, 1);
5224 laststmt.stridx = si->idx;
5225 }
5226 return true;
5227 }
5228
5229 /* Try to fold strstr (s, t) eq/ne s to strncmp (s, t, strlen (t)) eq/ne 0. */
5230
5231 static void
fold_strstr_to_strncmp(tree rhs1,tree rhs2,gimple * stmt)5232 fold_strstr_to_strncmp (tree rhs1, tree rhs2, gimple *stmt)
5233 {
5234 if (TREE_CODE (rhs1) != SSA_NAME
5235 || TREE_CODE (rhs2) != SSA_NAME)
5236 return;
5237
5238 gimple *call_stmt = NULL;
5239 for (int pass = 0; pass < 2; pass++)
5240 {
5241 gimple *g = SSA_NAME_DEF_STMT (rhs1);
5242 if (gimple_call_builtin_p (g, BUILT_IN_STRSTR)
5243 && has_single_use (rhs1)
5244 && gimple_call_arg (g, 0) == rhs2)
5245 {
5246 call_stmt = g;
5247 break;
5248 }
5249 std::swap (rhs1, rhs2);
5250 }
5251
5252 if (call_stmt)
5253 {
5254 tree arg0 = gimple_call_arg (call_stmt, 0);
5255
5256 if (arg0 == rhs2)
5257 {
5258 tree arg1 = gimple_call_arg (call_stmt, 1);
5259 tree arg1_len = NULL_TREE;
5260 int idx = get_stridx (arg1, call_stmt);
5261
5262 if (idx)
5263 {
5264 if (idx < 0)
5265 arg1_len = build_int_cst (size_type_node, ~idx);
5266 else
5267 {
5268 strinfo *si = get_strinfo (idx);
5269 if (si)
5270 arg1_len = get_string_length (si);
5271 }
5272 }
5273
5274 if (arg1_len != NULL_TREE)
5275 {
5276 gimple_stmt_iterator gsi = gsi_for_stmt (call_stmt);
5277 tree strncmp_decl = builtin_decl_explicit (BUILT_IN_STRNCMP);
5278
5279 if (!is_gimple_val (arg1_len))
5280 {
5281 tree arg1_len_tmp = make_ssa_name (TREE_TYPE (arg1_len));
5282 gassign *arg1_stmt = gimple_build_assign (arg1_len_tmp,
5283 arg1_len);
5284 gsi_insert_before (&gsi, arg1_stmt, GSI_SAME_STMT);
5285 arg1_len = arg1_len_tmp;
5286 }
5287
5288 gcall *strncmp_call = gimple_build_call (strncmp_decl, 3,
5289 arg0, arg1, arg1_len);
5290 tree strncmp_lhs = make_ssa_name (integer_type_node);
5291 gimple_set_vuse (strncmp_call, gimple_vuse (call_stmt));
5292 gimple_call_set_lhs (strncmp_call, strncmp_lhs);
5293 gsi_remove (&gsi, true);
5294 gsi_insert_before (&gsi, strncmp_call, GSI_SAME_STMT);
5295 tree zero = build_zero_cst (TREE_TYPE (strncmp_lhs));
5296
5297 if (is_gimple_assign (stmt))
5298 {
5299 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
5300 {
5301 tree cond = gimple_assign_rhs1 (stmt);
5302 TREE_OPERAND (cond, 0) = strncmp_lhs;
5303 TREE_OPERAND (cond, 1) = zero;
5304 }
5305 else
5306 {
5307 gimple_assign_set_rhs1 (stmt, strncmp_lhs);
5308 gimple_assign_set_rhs2 (stmt, zero);
5309 }
5310 }
5311 else
5312 {
5313 gcond *cond = as_a<gcond *> (stmt);
5314 gimple_cond_set_lhs (cond, strncmp_lhs);
5315 gimple_cond_set_rhs (cond, zero);
5316 }
5317 update_stmt (stmt);
5318 }
5319 }
5320 }
5321 }
5322
5323 /* Return true if TYPE corresponds to a narrow character type. */
5324
5325 static bool
is_char_type(tree type)5326 is_char_type (tree type)
5327 {
5328 return (TREE_CODE (type) == INTEGER_TYPE
5329 && TYPE_MODE (type) == TYPE_MODE (char_type_node)
5330 && TYPE_PRECISION (type) == TYPE_PRECISION (char_type_node));
5331 }
5332
5333 /* Check the built-in call at GSI for validity and optimize it.
5334 Uses RVALS to determine range information.
5335 Return true to let the caller advance *GSI to the next statement
5336 in the basic block and false otherwise. */
5337
5338 bool
check_and_optimize_call(bool * zero_write)5339 strlen_pass::check_and_optimize_call (bool *zero_write)
5340 {
5341 gimple *stmt = gsi_stmt (m_gsi);
5342
5343 if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
5344 {
5345 tree fntype = gimple_call_fntype (stmt);
5346 if (!fntype)
5347 return true;
5348
5349 if (lookup_attribute ("alloc_size", TYPE_ATTRIBUTES (fntype)))
5350 {
5351 handle_alloc_call (BUILT_IN_NONE);
5352 return true;
5353 }
5354
5355 if (tree lhs = gimple_call_lhs (stmt))
5356 handle_assign (lhs, zero_write);
5357
5358 /* Proceed to handle user-defined formatting functions. */
5359 }
5360
5361 /* When not optimizing we must be checking printf calls which
5362 we do even for user-defined functions when they are declared
5363 with attribute format. */
5364 if (!flag_optimize_strlen
5365 || !strlen_optimize
5366 || !valid_builtin_call (stmt))
5367 return !handle_printf_call (&m_gsi, ptr_qry);
5368
5369 tree callee = gimple_call_fndecl (stmt);
5370 switch (DECL_FUNCTION_CODE (callee))
5371 {
5372 case BUILT_IN_STRLEN:
5373 case BUILT_IN_STRNLEN:
5374 handle_builtin_strlen ();
5375 break;
5376 case BUILT_IN_STRCHR:
5377 handle_builtin_strchr ();
5378 break;
5379 case BUILT_IN_STRCPY:
5380 case BUILT_IN_STRCPY_CHK:
5381 case BUILT_IN_STPCPY:
5382 case BUILT_IN_STPCPY_CHK:
5383 handle_builtin_strcpy (DECL_FUNCTION_CODE (callee));
5384 break;
5385
5386 case BUILT_IN_STRNCAT:
5387 case BUILT_IN_STRNCAT_CHK:
5388 handle_builtin_strncat (DECL_FUNCTION_CODE (callee));
5389 break;
5390
5391 case BUILT_IN_STPNCPY:
5392 case BUILT_IN_STPNCPY_CHK:
5393 case BUILT_IN_STRNCPY:
5394 case BUILT_IN_STRNCPY_CHK:
5395 handle_builtin_stxncpy_strncat (false);
5396 break;
5397
5398 case BUILT_IN_MEMCPY:
5399 case BUILT_IN_MEMCPY_CHK:
5400 case BUILT_IN_MEMPCPY:
5401 case BUILT_IN_MEMPCPY_CHK:
5402 handle_builtin_memcpy (DECL_FUNCTION_CODE (callee));
5403 break;
5404 case BUILT_IN_STRCAT:
5405 case BUILT_IN_STRCAT_CHK:
5406 handle_builtin_strcat (DECL_FUNCTION_CODE (callee));
5407 break;
5408 case BUILT_IN_ALLOCA:
5409 case BUILT_IN_ALLOCA_WITH_ALIGN:
5410 case BUILT_IN_MALLOC:
5411 case BUILT_IN_CALLOC:
5412 handle_alloc_call (DECL_FUNCTION_CODE (callee));
5413 break;
5414 case BUILT_IN_MEMSET:
5415 if (handle_builtin_memset (zero_write))
5416 return false;
5417 break;
5418 case BUILT_IN_MEMCMP:
5419 if (handle_builtin_memcmp ())
5420 return false;
5421 break;
5422 case BUILT_IN_STRCMP:
5423 case BUILT_IN_STRNCMP:
5424 if (handle_builtin_string_cmp ())
5425 return false;
5426 break;
5427 default:
5428 if (handle_printf_call (&m_gsi, ptr_qry))
5429 return false;
5430 break;
5431 }
5432
5433 return true;
5434 }
5435
5436 /* Handle an assignment statement at *GSI to a LHS of integral type.
5437 If GSI's basic block needs clean-up of EH, set *CLEANUP_EH to true. */
5438
5439 void
handle_integral_assign(bool * cleanup_eh)5440 strlen_pass::handle_integral_assign (bool *cleanup_eh)
5441 {
5442 gimple *stmt = gsi_stmt (m_gsi);
5443 tree lhs = gimple_assign_lhs (stmt);
5444 tree lhs_type = TREE_TYPE (lhs);
5445
5446 enum tree_code code = gimple_assign_rhs_code (stmt);
5447 if (code == COND_EXPR)
5448 {
5449 tree cond = gimple_assign_rhs1 (stmt);
5450 enum tree_code cond_code = TREE_CODE (cond);
5451
5452 if (cond_code == EQ_EXPR || cond_code == NE_EXPR)
5453 fold_strstr_to_strncmp (TREE_OPERAND (cond, 0),
5454 TREE_OPERAND (cond, 1), stmt);
5455 }
5456 else if (code == EQ_EXPR || code == NE_EXPR)
5457 fold_strstr_to_strncmp (gimple_assign_rhs1 (stmt),
5458 gimple_assign_rhs2 (stmt), stmt);
5459 else if (gimple_assign_load_p (stmt)
5460 && TREE_CODE (lhs_type) == INTEGER_TYPE
5461 && TYPE_MODE (lhs_type) == TYPE_MODE (char_type_node)
5462 && (TYPE_PRECISION (lhs_type)
5463 == TYPE_PRECISION (char_type_node))
5464 && !gimple_has_volatile_ops (stmt))
5465 {
5466 tree off = integer_zero_node;
5467 unsigned HOST_WIDE_INT coff = 0;
5468 int idx = 0;
5469 tree rhs1 = gimple_assign_rhs1 (stmt);
5470 if (code == MEM_REF)
5471 {
5472 idx = get_stridx (TREE_OPERAND (rhs1, 0), stmt);
5473 if (idx > 0)
5474 {
5475 strinfo *si = get_strinfo (idx);
5476 if (si
5477 && si->nonzero_chars
5478 && TREE_CODE (si->nonzero_chars) == INTEGER_CST
5479 && (wi::to_widest (si->nonzero_chars)
5480 >= wi::to_widest (off)))
5481 off = TREE_OPERAND (rhs1, 1);
5482 else
5483 /* This case is not useful. See if get_addr_stridx
5484 returns something usable. */
5485 idx = 0;
5486 }
5487 }
5488 if (idx <= 0)
5489 idx = get_addr_stridx (rhs1, stmt, NULL_TREE, &coff);
5490 if (idx > 0)
5491 {
5492 strinfo *si = get_strinfo (idx);
5493 if (si
5494 && si->nonzero_chars
5495 && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
5496 {
5497 widest_int w1 = wi::to_widest (si->nonzero_chars);
5498 widest_int w2 = wi::to_widest (off) + coff;
5499 if (w1 == w2
5500 && si->full_string_p)
5501 {
5502 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
5503 {
5504 fprintf (dump_file, "Optimizing: ");
5505 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
5506 }
5507
5508 /* Reading the final '\0' character. */
5509 tree zero = build_int_cst (lhs_type, 0);
5510 gimple_set_vuse (stmt, NULL_TREE);
5511 gimple_assign_set_rhs_from_tree (&m_gsi, zero);
5512 *cleanup_eh
5513 |= maybe_clean_or_replace_eh_stmt (stmt,
5514 gsi_stmt (m_gsi));
5515 stmt = gsi_stmt (m_gsi);
5516 update_stmt (stmt);
5517
5518 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
5519 {
5520 fprintf (dump_file, "into: ");
5521 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
5522 }
5523 }
5524 else if (w1 > w2)
5525 {
5526 /* Reading a character before the final '\0'
5527 character. Just set the value range to ~[0, 0]
5528 if we don't have anything better. */
5529 value_range r;
5530 if (!get_range_query (cfun)->range_of_expr (r, lhs)
5531 || r.varying_p ())
5532 {
5533 r.set_nonzero (lhs_type);
5534 set_range_info (lhs, r);
5535 }
5536 }
5537 }
5538 }
5539 }
5540 else if (code == MEM_REF && TREE_CODE (lhs) == SSA_NAME)
5541 {
5542 if (int idx = new_stridx (lhs))
5543 {
5544 /* Record multi-byte assignments from MEM_REFs. */
5545 bool storing_all_nonzero_p;
5546 bool storing_all_zeros_p;
5547 bool full_string_p;
5548 unsigned lenrange[] = { UINT_MAX, 0, 0 };
5549 tree rhs = gimple_assign_rhs1 (stmt);
5550 const bool ranges_valid
5551 = count_nonzero_bytes (rhs, stmt,
5552 lenrange, &full_string_p,
5553 &storing_all_zeros_p,
5554 &storing_all_nonzero_p);
5555 if (ranges_valid)
5556 {
5557 tree length = build_int_cst (sizetype, lenrange[0]);
5558 strinfo *si = new_strinfo (lhs, idx, length, full_string_p);
5559 set_strinfo (idx, si);
5560 si->writable = true;
5561 si->dont_invalidate = true;
5562 }
5563 }
5564 }
5565
5566 if (strlen_to_stridx)
5567 {
5568 tree rhs1 = gimple_assign_rhs1 (stmt);
5569 if (stridx_strlenloc *ps = strlen_to_stridx->get (rhs1))
5570 strlen_to_stridx->put (lhs, stridx_strlenloc (*ps));
5571 }
5572 }
5573
5574 /* Handle assignment statement at *GSI to LHS. Set *ZERO_WRITE if
5575 the assignment stores all zero bytes. */
5576
5577 bool
handle_assign(tree lhs,bool * zero_write)5578 strlen_pass::handle_assign (tree lhs, bool *zero_write)
5579 {
5580 tree type = TREE_TYPE (lhs);
5581 if (TREE_CODE (type) == ARRAY_TYPE)
5582 type = TREE_TYPE (type);
5583
5584 bool is_char_store = is_char_type (type);
5585 if (!is_char_store && TREE_CODE (lhs) == MEM_REF)
5586 {
5587 /* To consider stores into char objects via integer types other
5588 than char but not those to non-character objects, determine
5589 the type of the destination rather than just the type of
5590 the access. */
5591 for (int i = 0; i != 2; ++i)
5592 {
5593 tree ref = TREE_OPERAND (lhs, i);
5594 type = TREE_TYPE (ref);
5595 if (TREE_CODE (type) == POINTER_TYPE)
5596 type = TREE_TYPE (type);
5597 if (TREE_CODE (type) == ARRAY_TYPE)
5598 type = TREE_TYPE (type);
5599 if (is_char_type (type))
5600 {
5601 is_char_store = true;
5602 break;
5603 }
5604 }
5605 }
5606
5607 /* Handle a single or multibyte assignment. */
5608 if (is_char_store && !handle_store (zero_write))
5609 return false;
5610
5611 return true;
5612 }
5613
5614
5615 /* Attempt to check for validity of the performed access a single statement
5616 at *GSI using string length knowledge, and to optimize it.
5617 If the given basic block needs clean-up of EH, CLEANUP_EH is set to
5618 true. Return true to let the caller advance *GSI to the next statement
5619 in the basic block and false otherwise. */
5620
5621 bool
check_and_optimize_stmt(bool * cleanup_eh)5622 strlen_pass::check_and_optimize_stmt (bool *cleanup_eh)
5623 {
5624 gimple *stmt = gsi_stmt (m_gsi);
5625
5626 /* For statements that modify a string, set to true if the write
5627 is only zeros. */
5628 bool zero_write = false;
5629
5630 if (is_gimple_call (stmt))
5631 {
5632 if (!check_and_optimize_call (&zero_write))
5633 return false;
5634 }
5635 else if (!flag_optimize_strlen || !strlen_optimize)
5636 return true;
5637 else if (is_gimple_assign (stmt) && !gimple_clobber_p (stmt))
5638 {
5639 /* Handle non-clobbering assignment. */
5640 tree lhs = gimple_assign_lhs (stmt);
5641 tree lhs_type = TREE_TYPE (lhs);
5642
5643 if (TREE_CODE (lhs) == SSA_NAME && POINTER_TYPE_P (lhs_type))
5644 {
5645 if (gimple_assign_single_p (stmt)
5646 || (gimple_assign_cast_p (stmt)
5647 && POINTER_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt)))))
5648 {
5649 int idx = get_stridx (gimple_assign_rhs1 (stmt), stmt);
5650 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = idx;
5651 }
5652 else if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
5653 handle_pointer_plus ();
5654 }
5655 else if (TREE_CODE (lhs) == SSA_NAME && INTEGRAL_TYPE_P (lhs_type))
5656 /* Handle assignment to a character. */
5657 handle_integral_assign (cleanup_eh);
5658 else if (TREE_CODE (lhs) != SSA_NAME && !TREE_SIDE_EFFECTS (lhs))
5659 if (!handle_assign (lhs, &zero_write))
5660 return false;
5661 }
5662 else if (gcond *cond = dyn_cast<gcond *> (stmt))
5663 {
5664 enum tree_code code = gimple_cond_code (cond);
5665 if (code == EQ_EXPR || code == NE_EXPR)
5666 fold_strstr_to_strncmp (gimple_cond_lhs (stmt),
5667 gimple_cond_rhs (stmt), stmt);
5668 }
5669
5670 if (gimple_vdef (stmt))
5671 maybe_invalidate (stmt, zero_write);
5672 return true;
5673 }
5674
5675 /* Recursively call maybe_invalidate on stmts that might be executed
5676 in between dombb and current bb and that contain a vdef. Stop when
5677 *count stmts are inspected, or if the whole strinfo vector has
5678 been invalidated. */
5679
5680 static void
do_invalidate(basic_block dombb,gimple * phi,bitmap visited,int * count)5681 do_invalidate (basic_block dombb, gimple *phi, bitmap visited, int *count)
5682 {
5683 unsigned int i, n = gimple_phi_num_args (phi);
5684
5685 for (i = 0; i < n; i++)
5686 {
5687 tree vuse = gimple_phi_arg_def (phi, i);
5688 gimple *stmt = SSA_NAME_DEF_STMT (vuse);
5689 basic_block bb = gimple_bb (stmt);
5690 if (bb == NULL
5691 || bb == dombb
5692 || !bitmap_set_bit (visited, bb->index)
5693 || !dominated_by_p (CDI_DOMINATORS, bb, dombb))
5694 continue;
5695 while (1)
5696 {
5697 if (gimple_code (stmt) == GIMPLE_PHI)
5698 {
5699 do_invalidate (dombb, stmt, visited, count);
5700 if (*count == 0)
5701 return;
5702 break;
5703 }
5704 if (--*count == 0)
5705 return;
5706 if (!maybe_invalidate (stmt))
5707 {
5708 *count = 0;
5709 return;
5710 }
5711 vuse = gimple_vuse (stmt);
5712 stmt = SSA_NAME_DEF_STMT (vuse);
5713 if (gimple_bb (stmt) != bb)
5714 {
5715 bb = gimple_bb (stmt);
5716 if (bb == NULL
5717 || bb == dombb
5718 || !bitmap_set_bit (visited, bb->index)
5719 || !dominated_by_p (CDI_DOMINATORS, bb, dombb))
5720 break;
5721 }
5722 }
5723 }
5724 }
5725
5726 /* Release pointer_query cache. */
5727
~strlen_pass()5728 strlen_pass::~strlen_pass ()
5729 {
5730 ptr_qry.flush_cache ();
5731 }
5732
5733 /* Callback for walk_dominator_tree. Attempt to optimize various
5734 string ops by remembering string lengths pointed by pointer SSA_NAMEs. */
5735
5736 edge
before_dom_children(basic_block bb)5737 strlen_pass::before_dom_children (basic_block bb)
5738 {
5739 basic_block dombb = get_immediate_dominator (CDI_DOMINATORS, bb);
5740
5741 if (dombb == NULL)
5742 stridx_to_strinfo = NULL;
5743 else
5744 {
5745 stridx_to_strinfo = ((vec<strinfo *, va_heap, vl_embed> *) dombb->aux);
5746 if (stridx_to_strinfo)
5747 {
5748 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
5749 gsi_next (&gsi))
5750 {
5751 gphi *phi = gsi.phi ();
5752 if (virtual_operand_p (gimple_phi_result (phi)))
5753 {
5754 bitmap visited = BITMAP_ALLOC (NULL);
5755 int count_vdef = 100;
5756 do_invalidate (dombb, phi, visited, &count_vdef);
5757 BITMAP_FREE (visited);
5758 if (count_vdef == 0)
5759 {
5760 /* If there were too many vdefs in between immediate
5761 dominator and current bb, invalidate everything.
5762 If stridx_to_strinfo has been unshared, we need
5763 to free it, otherwise just set it to NULL. */
5764 if (!strinfo_shared ())
5765 {
5766 unsigned int i;
5767 strinfo *si;
5768
5769 for (i = 1;
5770 vec_safe_iterate (stridx_to_strinfo, i, &si);
5771 ++i)
5772 {
5773 free_strinfo (si);
5774 (*stridx_to_strinfo)[i] = NULL;
5775 }
5776 }
5777 else
5778 stridx_to_strinfo = NULL;
5779 }
5780 break;
5781 }
5782 }
5783 }
5784 }
5785
5786 /* If all PHI arguments have the same string index, the PHI result
5787 has it as well. */
5788 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
5789 gsi_next (&gsi))
5790 {
5791 gphi *phi = gsi.phi ();
5792 tree result = gimple_phi_result (phi);
5793 if (!virtual_operand_p (result) && POINTER_TYPE_P (TREE_TYPE (result)))
5794 {
5795 int idx = get_stridx (gimple_phi_arg_def (phi, 0), phi);
5796 if (idx != 0)
5797 {
5798 unsigned int i, n = gimple_phi_num_args (phi);
5799 for (i = 1; i < n; i++)
5800 if (idx != get_stridx (gimple_phi_arg_def (phi, i), phi))
5801 break;
5802 if (i == n)
5803 ssa_ver_to_stridx[SSA_NAME_VERSION (result)] = idx;
5804 }
5805 }
5806 }
5807
5808 bool cleanup_eh = false;
5809
5810 /* Attempt to optimize individual statements. */
5811 for (m_gsi = gsi_start_bb (bb); !gsi_end_p (m_gsi); )
5812 {
5813 /* Reset search depth performance counter. */
5814 ptr_qry.depth = 0;
5815
5816 if (check_and_optimize_stmt (&cleanup_eh))
5817 gsi_next (&m_gsi);
5818 }
5819
5820 if (cleanup_eh && gimple_purge_dead_eh_edges (bb))
5821 m_cleanup_cfg = true;
5822
5823 bb->aux = stridx_to_strinfo;
5824 if (vec_safe_length (stridx_to_strinfo) && !strinfo_shared ())
5825 (*stridx_to_strinfo)[0] = (strinfo *) bb;
5826 return NULL;
5827 }
5828
5829 /* Callback for walk_dominator_tree. Free strinfo vector if it is
5830 owned by the current bb, clear bb->aux. */
5831
5832 void
after_dom_children(basic_block bb)5833 strlen_pass::after_dom_children (basic_block bb)
5834 {
5835 if (bb->aux)
5836 {
5837 stridx_to_strinfo = ((vec<strinfo *, va_heap, vl_embed> *) bb->aux);
5838 if (vec_safe_length (stridx_to_strinfo)
5839 && (*stridx_to_strinfo)[0] == (strinfo *) bb)
5840 {
5841 unsigned int i;
5842 strinfo *si;
5843
5844 for (i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
5845 free_strinfo (si);
5846 vec_free (stridx_to_strinfo);
5847 }
5848 bb->aux = NULL;
5849 }
5850 }
5851
5852 namespace {
5853
5854 static unsigned int
printf_strlen_execute(function * fun,bool warn_only)5855 printf_strlen_execute (function *fun, bool warn_only)
5856 {
5857 strlen_optimize = !warn_only;
5858
5859 calculate_dominance_info (CDI_DOMINATORS);
5860 loop_optimizer_init (LOOPS_NORMAL);
5861 scev_initialize ();
5862
5863 gcc_assert (!strlen_to_stridx);
5864 if (warn_stringop_overflow || warn_stringop_truncation)
5865 strlen_to_stridx = new hash_map<tree, stridx_strlenloc> ();
5866
5867 /* This has to happen after initializing the loop optimizer
5868 and initializing SCEV as they create new SSA_NAMEs. */
5869 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
5870 max_stridx = 1;
5871
5872 /* String length optimization is implemented as a walk of the dominator
5873 tree and a forward walk of statements within each block. */
5874 strlen_pass walker (CDI_DOMINATORS);
5875 walker.walk (ENTRY_BLOCK_PTR_FOR_FN (fun));
5876
5877 if (dump_file && (dump_flags & TDF_DETAILS))
5878 walker.ptr_qry.dump (dump_file, true);
5879
5880 ssa_ver_to_stridx.release ();
5881 strinfo_pool.release ();
5882 if (decl_to_stridxlist_htab)
5883 {
5884 obstack_free (&stridx_obstack, NULL);
5885 delete decl_to_stridxlist_htab;
5886 decl_to_stridxlist_htab = NULL;
5887 }
5888 laststmt.stmt = NULL;
5889 laststmt.len = NULL_TREE;
5890 laststmt.stridx = 0;
5891
5892 if (strlen_to_stridx)
5893 {
5894 strlen_to_stridx->empty ();
5895 delete strlen_to_stridx;
5896 strlen_to_stridx = NULL;
5897 }
5898
5899 scev_finalize ();
5900 loop_optimizer_finalize ();
5901
5902 return walker.m_cleanup_cfg ? TODO_cleanup_cfg : 0;
5903 }
5904
5905 /* This file defines two passes: one for warnings that runs only when
5906 optimization is disabled, and another that implements optimizations
5907 and also issues warnings. */
5908
5909 const pass_data pass_data_warn_printf =
5910 {
5911 GIMPLE_PASS, /* type */
5912 "warn-printf", /* name */
5913 OPTGROUP_NONE, /* optinfo_flags */
5914 TV_NONE, /* tv_id */
5915 /* Normally an optimization pass would require PROP_ssa but because
5916 this pass runs early, with no optimization, to do sprintf format
5917 checking, it only requires PROP_cfg. */
5918 PROP_cfg, /* properties_required */
5919 0, /* properties_provided */
5920 0, /* properties_destroyed */
5921 0, /* todo_flags_start */
5922 0, /* todo_flags_finish */
5923 };
5924
5925 class pass_warn_printf : public gimple_opt_pass
5926 {
5927 public:
pass_warn_printf(gcc::context * ctxt)5928 pass_warn_printf (gcc::context *ctxt)
5929 : gimple_opt_pass (pass_data_warn_printf, ctxt)
5930 {}
5931
5932 virtual bool gate (function *);
execute(function * fun)5933 virtual unsigned int execute (function *fun)
5934 {
5935 return printf_strlen_execute (fun, true);
5936 }
5937 };
5938
5939
5940 /* Return true to run the warning pass only when not optimizing and
5941 iff either -Wformat-overflow or -Wformat-truncation is specified. */
5942
5943 bool
gate(function *)5944 pass_warn_printf::gate (function *)
5945 {
5946 return !optimize && (warn_format_overflow > 0 || warn_format_trunc > 0);
5947 }
5948
5949 const pass_data pass_data_strlen =
5950 {
5951 GIMPLE_PASS, /* type */
5952 "strlen", /* name */
5953 OPTGROUP_NONE, /* optinfo_flags */
5954 TV_TREE_STRLEN, /* tv_id */
5955 PROP_cfg | PROP_ssa, /* properties_required */
5956 0, /* properties_provided */
5957 0, /* properties_destroyed */
5958 0, /* todo_flags_start */
5959 0, /* todo_flags_finish */
5960 };
5961
5962 class pass_strlen : public gimple_opt_pass
5963 {
5964 public:
pass_strlen(gcc::context * ctxt)5965 pass_strlen (gcc::context *ctxt)
5966 : gimple_opt_pass (pass_data_strlen, ctxt)
5967 {}
5968
clone()5969 opt_pass * clone () { return new pass_strlen (m_ctxt); }
5970
5971 virtual bool gate (function *);
execute(function * fun)5972 virtual unsigned int execute (function *fun)
5973 {
5974 return printf_strlen_execute (fun, false);
5975 }
5976 };
5977
5978 /* Return true to run the pass only when the sprintf and/or strlen
5979 optimizations are enabled and -Wformat-overflow or -Wformat-truncation
5980 are specified. */
5981
5982 bool
gate(function *)5983 pass_strlen::gate (function *)
5984 {
5985 return ((warn_format_overflow > 0
5986 || warn_format_trunc > 0
5987 || warn_restrict > 0
5988 || flag_optimize_strlen > 0
5989 || flag_printf_return_value)
5990 && optimize > 0);
5991 }
5992
5993 } // anon namespace
5994
5995 gimple_opt_pass *
make_pass_warn_printf(gcc::context * ctxt)5996 make_pass_warn_printf (gcc::context *ctxt)
5997 {
5998 return new pass_warn_printf (ctxt);
5999 }
6000
6001 gimple_opt_pass *
make_pass_strlen(gcc::context * ctxt)6002 make_pass_strlen (gcc::context *ctxt)
6003 {
6004 return new pass_strlen (ctxt);
6005 }
6006