xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/gimple-ssa-isolate-paths.c (revision d90047b5d07facf36e6c01dcc0bded8997ce9cc2)
1 /* Detect paths through the CFG which can never be executed in a conforming
2    program and isolate them.
3 
4    Copyright (C) 2013-2017 Free Software Foundation, Inc.
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12 
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "diagnostic-core.h"
32 #include "fold-const.h"
33 #include "gimple-iterator.h"
34 #include "gimple-walk.h"
35 #include "tree-ssa.h"
36 #include "cfgloop.h"
37 #include "tree-cfg.h"
38 #include "cfganal.h"
39 #include "intl.h"
40 
41 
42 static bool cfg_altered;
43 
44 /* Callback for walk_stmt_load_store_ops.
45 
46    Return TRUE if OP will dereference the tree stored in DATA, FALSE
47    otherwise.
48 
49    This routine only makes a superficial check for a dereference.  Thus,
50    it must only be used if it is safe to return a false negative.  */
51 static bool
52 check_loadstore (gimple *stmt, tree op, tree, void *data)
53 {
54   if ((TREE_CODE (op) == MEM_REF || TREE_CODE (op) == TARGET_MEM_REF)
55       && operand_equal_p (TREE_OPERAND (op, 0), (tree)data, 0))
56     {
57       TREE_THIS_VOLATILE (op) = 1;
58       TREE_SIDE_EFFECTS (op) = 1;
59       update_stmt (stmt);
60       return true;
61     }
62   return false;
63 }
64 
65 /* Insert a trap after SI and split the block after the trap.  */
66 
67 static void
68 insert_trap (gimple_stmt_iterator *si_p, tree op)
69 {
70   /* We want the NULL pointer dereference to actually occur so that
71      code that wishes to catch the signal can do so.
72 
73      If the dereference is a load, then there's nothing to do as the
74      LHS will be a throw-away SSA_NAME and the RHS is the NULL dereference.
75 
76      If the dereference is a store and we can easily transform the RHS,
77      then simplify the RHS to enable more DCE.   Note that we require the
78      statement to be a GIMPLE_ASSIGN which filters out calls on the RHS.  */
79   gimple *stmt = gsi_stmt (*si_p);
80   if (walk_stmt_load_store_ops (stmt, (void *)op, NULL, check_loadstore)
81       && is_gimple_assign (stmt)
82       && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt))))
83     {
84       /* We just need to turn the RHS into zero converted to the proper
85          type.  */
86       tree type = TREE_TYPE (gimple_assign_lhs (stmt));
87       gimple_assign_set_rhs_code (stmt, INTEGER_CST);
88       gimple_assign_set_rhs1 (stmt, fold_convert (type, integer_zero_node));
89       update_stmt (stmt);
90     }
91 
92   gcall *new_stmt
93     = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
94   gimple_seq seq = NULL;
95   gimple_seq_add_stmt (&seq, new_stmt);
96 
97   /* If we had a NULL pointer dereference, then we want to insert the
98      __builtin_trap after the statement, for the other cases we want
99      to insert before the statement.  */
100   if (walk_stmt_load_store_ops (stmt, (void *)op,
101 			        check_loadstore,
102 				check_loadstore))
103     {
104       gsi_insert_after (si_p, seq, GSI_NEW_STMT);
105       if (stmt_ends_bb_p (stmt))
106 	{
107 	  split_block (gimple_bb (stmt), stmt);
108 	  return;
109 	}
110     }
111   else
112     gsi_insert_before (si_p, seq, GSI_NEW_STMT);
113 
114   split_block (gimple_bb (new_stmt), new_stmt);
115   *si_p = gsi_for_stmt (stmt);
116 }
117 
118 /* BB when reached via incoming edge E will exhibit undefined behavior
119    at STMT.  Isolate and optimize the path which exhibits undefined
120    behavior.
121 
122    Isolation is simple.  Duplicate BB and redirect E to BB'.
123 
124    Optimization is simple as well.  Replace STMT in BB' with an
125    unconditional trap and remove all outgoing edges from BB'.
126 
127    If RET_ZERO, do not trap, only return NULL.
128 
129    DUPLICATE is a pre-existing duplicate, use it as BB' if it exists.
130 
131    Return BB'.  */
132 
133 basic_block
134 isolate_path (basic_block bb, basic_block duplicate,
135 	      edge e, gimple *stmt, tree op, bool ret_zero)
136 {
137   gimple_stmt_iterator si, si2;
138   edge_iterator ei;
139   edge e2;
140 
141   /* First duplicate BB if we have not done so already and remove all
142      the duplicate's outgoing edges as duplicate is going to unconditionally
143      trap.  Removing the outgoing edges is both an optimization and ensures
144      we don't need to do any PHI node updates.  */
145   if (!duplicate)
146     {
147       duplicate = duplicate_block (bb, NULL, NULL);
148       if (!ret_zero)
149 	for (ei = ei_start (duplicate->succs); (e2 = ei_safe_edge (ei)); )
150 	  remove_edge (e2);
151     }
152 
153   /* Complete the isolation step by redirecting E to reach DUPLICATE.  */
154   e2 = redirect_edge_and_branch (e, duplicate);
155   if (e2)
156     flush_pending_stmts (e2);
157 
158 
159   /* There may be more than one statement in DUPLICATE which exhibits
160      undefined behavior.  Ultimately we want the first such statement in
161      DUPLCIATE so that we're able to delete as much code as possible.
162 
163      So each time we discover undefined behavior in DUPLICATE, search for
164      the statement which triggers undefined behavior.  If found, then
165      transform the statement into a trap and delete everything after the
166      statement.  If not found, then this particular instance was subsumed by
167      an earlier instance of undefined behavior and there's nothing to do.
168 
169      This is made more complicated by the fact that we have STMT, which is in
170      BB rather than in DUPLICATE.  So we set up two iterators, one for each
171      block and walk forward looking for STMT in BB, advancing each iterator at
172      each step.
173 
174      When we find STMT the second iterator should point to STMT's equivalent in
175      duplicate.  If DUPLICATE ends before STMT is found in BB, then there's
176      nothing to do.
177 
178      Ignore labels and debug statements.  */
179   si = gsi_start_nondebug_after_labels_bb (bb);
180   si2 = gsi_start_nondebug_after_labels_bb (duplicate);
181   while (!gsi_end_p (si) && !gsi_end_p (si2) && gsi_stmt (si) != stmt)
182     {
183       gsi_next_nondebug (&si);
184       gsi_next_nondebug (&si2);
185     }
186 
187   /* This would be an indicator that we never found STMT in BB, which should
188      never happen.  */
189   gcc_assert (!gsi_end_p (si));
190 
191   /* If we did not run to the end of DUPLICATE, then SI points to STMT and
192      SI2 points to the duplicate of STMT in DUPLICATE.  Insert a trap
193      before SI2 and remove SI2 and all trailing statements.  */
194   if (!gsi_end_p (si2))
195     {
196       if (ret_zero)
197 	{
198 	  greturn *ret = as_a <greturn *> (gsi_stmt (si2));
199 	  tree zero = build_zero_cst (TREE_TYPE (gimple_return_retval (ret)));
200 	  gimple_return_set_retval (ret, zero);
201 	  update_stmt (ret);
202 	}
203       else
204 	insert_trap (&si2, op);
205     }
206 
207   return duplicate;
208 }
209 
210 /* Return TRUE if STMT is a div/mod operation using DIVISOR as the divisor.
211    FALSE otherwise.  */
212 
213 static bool
214 is_divmod_with_given_divisor (gimple *stmt, tree divisor)
215 {
216   /* Only assignments matter.  */
217   if (!is_gimple_assign (stmt))
218     return false;
219 
220   /* Check for every DIV/MOD expression.  */
221   enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
222   if (rhs_code == TRUNC_DIV_EXPR
223       || rhs_code == FLOOR_DIV_EXPR
224       || rhs_code == CEIL_DIV_EXPR
225       || rhs_code == EXACT_DIV_EXPR
226       || rhs_code == ROUND_DIV_EXPR
227       || rhs_code == TRUNC_MOD_EXPR
228       || rhs_code == FLOOR_MOD_EXPR
229       || rhs_code == CEIL_MOD_EXPR
230       || rhs_code == ROUND_MOD_EXPR)
231     {
232       /* Pointer equality is fine when DIVISOR is an SSA_NAME, but
233 	 not sufficient for constants which may have different types.  */
234       if (operand_equal_p (gimple_assign_rhs2 (stmt), divisor, 0))
235 	return true;
236     }
237   return false;
238 }
239 
240 /* NAME is an SSA_NAME that we have already determined has the value 0 or NULL.
241 
242    Return TRUE if USE_STMT uses NAME in a way where a 0 or NULL value results
243    in undefined behavior, FALSE otherwise
244 
245    LOC is used for issuing diagnostics.  This case represents potential
246    undefined behavior exposed by path splitting and that's reflected in
247    the diagnostic.  */
248 
249 bool
250 stmt_uses_name_in_undefined_way (gimple *use_stmt, tree name, location_t loc)
251 {
252   /* If we are working with a non pointer type, then see
253      if this use is a DIV/MOD operation using NAME as the
254      divisor.  */
255   if (!POINTER_TYPE_P (TREE_TYPE (name)))
256     {
257       if (!flag_non_call_exceptions)
258 	return is_divmod_with_given_divisor (use_stmt, name);
259       return false;
260     }
261 
262   /* NAME is a pointer, so see if it's used in a context where it must
263      be non-NULL.  */
264   bool by_dereference
265     = infer_nonnull_range_by_dereference (use_stmt, name);
266 
267   if (by_dereference
268       || infer_nonnull_range_by_attribute (use_stmt, name))
269     {
270 
271       if (by_dereference)
272 	{
273 	  warning_at (loc, OPT_Wnull_dereference,
274 		      "potential null pointer dereference");
275 	  if (!flag_isolate_erroneous_paths_dereference)
276 	    return false;
277 	}
278       else
279 	{
280 	  if (!flag_isolate_erroneous_paths_attribute)
281 	    return false;
282 	}
283       return true;
284     }
285   return false;
286 }
287 
288 /* Return TRUE if USE_STMT uses 0 or NULL in a context which results in
289    undefined behavior, FALSE otherwise.
290 
291    These cases are explicit in the IL.  */
292 
293 bool
294 stmt_uses_0_or_null_in_undefined_way (gimple *stmt)
295 {
296   if (!flag_non_call_exceptions
297       && is_divmod_with_given_divisor (stmt, integer_zero_node))
298     return true;
299 
300   /* By passing null_pointer_node, we can use the
301      infer_nonnull_range functions to detect explicit NULL
302      pointer dereferences and other uses where a non-NULL
303      value is required.  */
304 
305   bool by_dereference
306     = infer_nonnull_range_by_dereference (stmt, null_pointer_node);
307   if (by_dereference
308       || infer_nonnull_range_by_attribute (stmt, null_pointer_node))
309     {
310       if (by_dereference)
311 	{
312 	  location_t loc = gimple_location (stmt);
313 	  warning_at (loc, OPT_Wnull_dereference,
314 		      "null pointer dereference");
315 	  if (!flag_isolate_erroneous_paths_dereference)
316 	    return false;
317 	}
318       else
319 	{
320 	  if (!flag_isolate_erroneous_paths_attribute)
321 	    return false;
322 	}
323       return true;
324     }
325   return false;
326 }
327 
328 /* Look for PHI nodes which feed statements in the same block where
329    the value of the PHI node implies the statement is erroneous.
330 
331    For example, a NULL PHI arg value which then feeds a pointer
332    dereference.
333 
334    When found isolate and optimize the path associated with the PHI
335    argument feeding the erroneous statement.  */
336 static void
337 find_implicit_erroneous_behavior (void)
338 {
339   basic_block bb;
340 
341   FOR_EACH_BB_FN (bb, cfun)
342     {
343       gphi_iterator si;
344 
345       /* Out of an abundance of caution, do not isolate paths to a
346 	 block where the block has any abnormal outgoing edges.
347 
348 	 We might be able to relax this in the future.  We have to detect
349 	 when we have to split the block with the NULL dereference and
350 	 the trap we insert.  We have to preserve abnormal edges out
351 	 of the isolated block which in turn means updating PHIs at
352 	 the targets of those abnormal outgoing edges.  */
353       if (has_abnormal_or_eh_outgoing_edge_p (bb))
354 	continue;
355 
356 
357       /* If BB has an edge to itself, then duplication of BB below
358 	 could result in reallocation of BB's PHI nodes.   If that happens
359 	 then the loop below over the PHIs would use the old PHI and
360 	 thus invalid information.  We don't have a good way to know
361 	 if a PHI has been reallocated, so just avoid isolation in
362 	 this case.  */
363       if (find_edge (bb, bb))
364 	continue;
365 
366       /* First look for a PHI which sets a pointer to NULL and which
367  	 is then dereferenced within BB.  This is somewhat overly
368 	 conservative, but probably catches most of the interesting
369 	 cases.   */
370       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
371 	{
372 	  gphi *phi = si.phi ();
373 	  tree lhs = gimple_phi_result (phi);
374 
375 	  /* PHI produces a pointer result.  See if any of the PHI's
376 	     arguments are NULL.
377 
378 	     When we remove an edge, we want to reprocess the current
379 	     index, hence the ugly way we update I for each iteration.  */
380 	  basic_block duplicate = NULL;
381 	  for (unsigned i = 0, next_i = 0;
382 	       i < gimple_phi_num_args (phi);
383 	       i = next_i)
384 	    {
385 	      tree op = gimple_phi_arg_def (phi, i);
386 	      edge e = gimple_phi_arg_edge (phi, i);
387 	      imm_use_iterator iter;
388 	      gimple *use_stmt;
389 
390 	      next_i = i + 1;
391 
392 	      if (TREE_CODE (op) == ADDR_EXPR)
393 		{
394 		  tree valbase = get_base_address (TREE_OPERAND (op, 0));
395 		  if ((VAR_P (valbase) && !is_global_var (valbase))
396 		      || TREE_CODE (valbase) == PARM_DECL)
397 		    {
398 		      FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
399 			{
400 			  greturn *return_stmt
401 			    = dyn_cast <greturn *> (use_stmt);
402 			  if (!return_stmt)
403 			    continue;
404 
405 			  if (gimple_return_retval (return_stmt) != lhs)
406 			    continue;
407 
408 			  if (warning_at (gimple_location (use_stmt),
409 					  OPT_Wreturn_local_addr,
410 					  "function may return address "
411 					  "of local variable"))
412 			    inform (DECL_SOURCE_LOCATION(valbase),
413 				    "declared here");
414 
415 			  if (gimple_bb (use_stmt) == bb)
416 			    {
417 			      duplicate = isolate_path (bb, duplicate, e,
418 							use_stmt, lhs, true);
419 
420 			      /* When we remove an incoming edge, we need to
421 				 reprocess the Ith element.  */
422 			      next_i = i;
423 			      cfg_altered = true;
424 			    }
425 			}
426 		    }
427 		}
428 
429 	      if (!integer_zerop (op))
430 		continue;
431 
432 	      /* We've got a NULL PHI argument.  Now see if the
433  	         PHI's result is dereferenced within BB.  */
434 	      FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
435 	        {
436 	          /* We only care about uses in BB.  Catching cases in
437 		     in other blocks would require more complex path
438 		     isolation code.   */
439 		  if (gimple_bb (use_stmt) != bb)
440 		    continue;
441 
442 		  location_t loc = gimple_location (use_stmt)
443 		    ? gimple_location (use_stmt)
444 		    : gimple_phi_arg_location (phi, i);
445 
446 		  if (stmt_uses_name_in_undefined_way (use_stmt, lhs, loc))
447 		    {
448 		      duplicate = isolate_path (bb, duplicate, e,
449 						use_stmt, lhs, false);
450 
451 		      /* When we remove an incoming edge, we need to
452 			 reprocess the Ith element.  */
453 		      next_i = i;
454 		      cfg_altered = true;
455 		    }
456 		}
457 	    }
458 	}
459     }
460 }
461 
462 /* Look for statements which exhibit erroneous behavior.  For example
463    a NULL pointer dereference.
464 
465    When found, optimize the block containing the erroneous behavior.  */
466 static void
467 find_explicit_erroneous_behavior (void)
468 {
469   basic_block bb;
470 
471   FOR_EACH_BB_FN (bb, cfun)
472     {
473       gimple_stmt_iterator si;
474 
475       /* Out of an abundance of caution, do not isolate paths to a
476 	 block where the block has any abnormal outgoing edges.
477 
478 	 We might be able to relax this in the future.  We have to detect
479 	 when we have to split the block with the NULL dereference and
480 	 the trap we insert.  We have to preserve abnormal edges out
481 	 of the isolated block which in turn means updating PHIs at
482 	 the targets of those abnormal outgoing edges.  */
483       if (has_abnormal_or_eh_outgoing_edge_p (bb))
484 	continue;
485 
486       /* Now look at the statements in the block and see if any of
487 	 them explicitly dereference a NULL pointer.  This happens
488 	 because of jump threading and constant propagation.  */
489       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
490 	{
491 	  gimple *stmt = gsi_stmt (si);
492 
493 	  if (stmt_uses_0_or_null_in_undefined_way (stmt))
494 	    {
495 	      insert_trap (&si, null_pointer_node);
496 	      bb = gimple_bb (gsi_stmt (si));
497 
498 	      /* Ignore any more operands on this statement and
499 		 continue the statement iterator (which should
500 		 terminate its loop immediately.  */
501 	      cfg_altered = true;
502 	      break;
503 	    }
504 
505 	  /* Detect returning the address of a local variable.  This only
506 	     becomes undefined behavior if the result is used, so we do not
507 	     insert a trap and only return NULL instead.  */
508 	  if (greturn *return_stmt = dyn_cast <greturn *> (stmt))
509 	    {
510 	      tree val = gimple_return_retval (return_stmt);
511 	      if (val && TREE_CODE (val) == ADDR_EXPR)
512 		{
513 		  tree valbase = get_base_address (TREE_OPERAND (val, 0));
514 		  if ((VAR_P (valbase) && !is_global_var (valbase))
515 		      || TREE_CODE (valbase) == PARM_DECL)
516 		    {
517 		      /* We only need it for this particular case.  */
518 		      calculate_dominance_info (CDI_POST_DOMINATORS);
519 		      const char* msg;
520 		      bool always_executed = dominated_by_p
521 			(CDI_POST_DOMINATORS,
522 			 single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)), bb);
523 		      if (always_executed)
524 			msg = N_("function returns address of local variable");
525 		      else
526 			msg = N_("function may return address of "
527 				 "local variable");
528 
529 		      if (warning_at (gimple_location (stmt),
530 				      OPT_Wreturn_local_addr, msg))
531 			inform (DECL_SOURCE_LOCATION(valbase), "declared here");
532 		      tree zero = build_zero_cst (TREE_TYPE (val));
533 		      gimple_return_set_retval (return_stmt, zero);
534 		      update_stmt (stmt);
535 		    }
536 		}
537 	    }
538 	}
539     }
540 }
541 
542 /* Search the function for statements which, if executed, would cause
543    the program to fault such as a dereference of a NULL pointer.
544 
545    Such a program can't be valid if such a statement was to execute
546    according to ISO standards.
547 
548    We detect explicit NULL pointer dereferences as well as those implied
549    by a PHI argument having a NULL value which unconditionally flows into
550    a dereference in the same block as the PHI.
551 
552    In the former case we replace the offending statement with an
553    unconditional trap and eliminate the outgoing edges from the statement's
554    basic block.  This may expose secondary optimization opportunities.
555 
556    In the latter case, we isolate the path(s) with the NULL PHI
557    feeding the dereference.  We can then replace the offending statement
558    and eliminate the outgoing edges in the duplicate.  Again, this may
559    expose secondary optimization opportunities.
560 
561    A warning for both cases may be advisable as well.
562 
563    Other statically detectable violations of the ISO standard could be
564    handled in a similar way, such as out-of-bounds array indexing.  */
565 
566 static unsigned int
567 gimple_ssa_isolate_erroneous_paths (void)
568 {
569   initialize_original_copy_tables ();
570 
571   /* Search all the blocks for edges which, if traversed, will
572      result in undefined behavior.  */
573   cfg_altered = false;
574 
575   /* First handle cases where traversal of a particular edge
576      triggers undefined behavior.  These cases require creating
577      duplicate blocks and thus new SSA_NAMEs.
578 
579      We want that process complete prior to the phase where we start
580      removing edges from the CFG.  Edge removal may ultimately result in
581      removal of PHI nodes and thus releasing SSA_NAMEs back to the
582      name manager.
583 
584      If the two processes run in parallel we could release an SSA_NAME
585      back to the manager but we could still have dangling references
586      to the released SSA_NAME in unreachable blocks.
587      that any released names not have dangling references in the IL.  */
588   find_implicit_erroneous_behavior ();
589   find_explicit_erroneous_behavior ();
590 
591   free_original_copy_tables ();
592 
593   /* We scramble the CFG and loop structures a bit, clean up
594      appropriately.  We really should incrementally update the
595      loop structures, in theory it shouldn't be that hard.  */
596   free_dominance_info (CDI_POST_DOMINATORS);
597   if (cfg_altered)
598     {
599       free_dominance_info (CDI_DOMINATORS);
600       loops_state_set (LOOPS_NEED_FIXUP);
601       return TODO_cleanup_cfg | TODO_update_ssa;
602     }
603   return 0;
604 }
605 
606 namespace {
607 const pass_data pass_data_isolate_erroneous_paths =
608 {
609   GIMPLE_PASS, /* type */
610   "isolate-paths", /* name */
611   OPTGROUP_NONE, /* optinfo_flags */
612   TV_ISOLATE_ERRONEOUS_PATHS, /* tv_id */
613   ( PROP_cfg | PROP_ssa ), /* properties_required */
614   0, /* properties_provided */
615   0, /* properties_destroyed */
616   0, /* todo_flags_start */
617   0, /* todo_flags_finish */
618 };
619 
620 class pass_isolate_erroneous_paths : public gimple_opt_pass
621 {
622 public:
623   pass_isolate_erroneous_paths (gcc::context *ctxt)
624     : gimple_opt_pass (pass_data_isolate_erroneous_paths, ctxt)
625   {}
626 
627   /* opt_pass methods: */
628   opt_pass * clone () { return new pass_isolate_erroneous_paths (m_ctxt); }
629   virtual bool gate (function *)
630     {
631       /* If we do not have a suitable builtin function for the trap statement,
632 	 then do not perform the optimization.  */
633       return (flag_isolate_erroneous_paths_dereference != 0
634 	      || flag_isolate_erroneous_paths_attribute != 0
635 	      || warn_null_dereference);
636     }
637 
638   virtual unsigned int execute (function *)
639     {
640       return gimple_ssa_isolate_erroneous_paths ();
641     }
642 
643 }; // class pass_isolate_erroneous_paths
644 }
645 
646 gimple_opt_pass *
647 make_pass_isolate_erroneous_paths (gcc::context *ctxt)
648 {
649   return new pass_isolate_erroneous_paths (ctxt);
650 }
651