xref: /netbsd-src/external/gpl3/gcc/dist/gcc/tree-if-conv.cc (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1 /* If-conversion for vectorizer.
2    Copyright (C) 2004-2022 Free Software Foundation, Inc.
3    Contributed by Devang Patel <dpatel@apple.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* This pass implements a tree level if-conversion of loops.  Its
22    initial goal is to help the vectorizer to vectorize loops with
23    conditions.
24 
25    A short description of if-conversion:
26 
27      o Decide if a loop is if-convertible or not.
28      o Walk all loop basic blocks in breadth first order (BFS order).
29        o Remove conditional statements (at the end of basic block)
30          and propagate condition into destination basic blocks'
31 	 predicate list.
32        o Replace modify expression with conditional modify expression
33          using current basic block's condition.
34      o Merge all basic blocks
35        o Replace phi nodes with conditional modify expr
36        o Merge all basic blocks into header
37 
38      Sample transformation:
39 
40      INPUT
41      -----
42 
43      # i_23 = PHI <0(0), i_18(10)>;
44      <L0>:;
45      j_15 = A[i_23];
46      if (j_15 > 41) goto <L1>; else goto <L17>;
47 
48      <L17>:;
49      goto <bb 3> (<L3>);
50 
51      <L1>:;
52 
53      # iftmp.2_4 = PHI <0(8), 42(2)>;
54      <L3>:;
55      A[i_23] = iftmp.2_4;
56      i_18 = i_23 + 1;
57      if (i_18 <= 15) goto <L19>; else goto <L18>;
58 
59      <L19>:;
60      goto <bb 1> (<L0>);
61 
62      <L18>:;
63 
64      OUTPUT
65      ------
66 
67      # i_23 = PHI <0(0), i_18(10)>;
68      <L0>:;
69      j_15 = A[i_23];
70 
71      <L3>:;
72      iftmp.2_4 = j_15 > 41 ? 42 : 0;
73      A[i_23] = iftmp.2_4;
74      i_18 = i_23 + 1;
75      if (i_18 <= 15) goto <L19>; else goto <L18>;
76 
77      <L19>:;
78      goto <bb 1> (<L0>);
79 
80      <L18>:;
81 */
82 
83 #include "config.h"
84 #include "system.h"
85 #include "coretypes.h"
86 #include "backend.h"
87 #include "rtl.h"
88 #include "tree.h"
89 #include "gimple.h"
90 #include "cfghooks.h"
91 #include "tree-pass.h"
92 #include "ssa.h"
93 #include "expmed.h"
94 #include "optabs-query.h"
95 #include "gimple-pretty-print.h"
96 #include "alias.h"
97 #include "fold-const.h"
98 #include "stor-layout.h"
99 #include "gimple-fold.h"
100 #include "gimplify.h"
101 #include "gimple-iterator.h"
102 #include "gimplify-me.h"
103 #include "tree-cfg.h"
104 #include "tree-into-ssa.h"
105 #include "tree-ssa.h"
106 #include "cfgloop.h"
107 #include "tree-data-ref.h"
108 #include "tree-scalar-evolution.h"
109 #include "tree-ssa-loop.h"
110 #include "tree-ssa-loop-niter.h"
111 #include "tree-ssa-loop-ivopts.h"
112 #include "tree-ssa-address.h"
113 #include "dbgcnt.h"
114 #include "tree-hash-traits.h"
115 #include "varasm.h"
116 #include "builtins.h"
117 #include "cfganal.h"
118 #include "internal-fn.h"
119 #include "fold-const.h"
120 #include "tree-ssa-sccvn.h"
121 #include "tree-cfgcleanup.h"
122 #include "tree-ssa-dse.h"
123 #include "tree-vectorizer.h"
124 #include "tree-eh.h"
125 
126 /* Only handle PHIs with no more arguments unless we are asked to by
127    simd pragma.  */
128 #define MAX_PHI_ARG_NUM \
129   ((unsigned) param_max_tree_if_conversion_phi_args)
130 
131 /* True if we've converted a statement that was only executed when some
132    condition C was true, and if for correctness we need to predicate the
133    statement to ensure that it is a no-op when C is false.  See
134    predicate_statements for the kinds of predication we support.  */
135 static bool need_to_predicate;
136 
137 /* True if we have to rewrite stmts that may invoke undefined behavior
138    when a condition C was false so it doesn't if it is always executed.
139    See predicate_statements for the kinds of predication we support.  */
140 static bool need_to_rewrite_undefined;
141 
142 /* Indicate if there are any complicated PHIs that need to be handled in
143    if-conversion.  Complicated PHI has more than two arguments and can't
144    be degenerated to two arguments PHI.  See more information in comment
145    before phi_convertible_by_degenerating_args.  */
146 static bool any_complicated_phi;
147 
148 /* Hash for struct innermost_loop_behavior.  It depends on the user to
149    free the memory.  */
150 
151 struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior>
152 {
153   static inline hashval_t hash (const value_type &);
154   static inline bool equal (const value_type &,
155 			    const compare_type &);
156 };
157 
158 inline hashval_t
hash(const value_type & e)159 innermost_loop_behavior_hash::hash (const value_type &e)
160 {
161   hashval_t hash;
162 
163   hash = iterative_hash_expr (e->base_address, 0);
164   hash = iterative_hash_expr (e->offset, hash);
165   hash = iterative_hash_expr (e->init, hash);
166   return iterative_hash_expr (e->step, hash);
167 }
168 
169 inline bool
equal(const value_type & e1,const compare_type & e2)170 innermost_loop_behavior_hash::equal (const value_type &e1,
171 				     const compare_type &e2)
172 {
173   if ((e1->base_address && !e2->base_address)
174       || (!e1->base_address && e2->base_address)
175       || (!e1->offset && e2->offset)
176       || (e1->offset && !e2->offset)
177       || (!e1->init && e2->init)
178       || (e1->init && !e2->init)
179       || (!e1->step && e2->step)
180       || (e1->step && !e2->step))
181     return false;
182 
183   if (e1->base_address && e2->base_address
184       && !operand_equal_p (e1->base_address, e2->base_address, 0))
185     return false;
186   if (e1->offset && e2->offset
187       && !operand_equal_p (e1->offset, e2->offset, 0))
188     return false;
189   if (e1->init && e2->init
190       && !operand_equal_p (e1->init, e2->init, 0))
191     return false;
192   if (e1->step && e2->step
193       && !operand_equal_p (e1->step, e2->step, 0))
194     return false;
195 
196   return true;
197 }
198 
199 /* List of basic blocks in if-conversion-suitable order.  */
200 static basic_block *ifc_bbs;
201 
202 /* Hash table to store <DR's innermost loop behavior, DR> pairs.  */
203 static hash_map<innermost_loop_behavior_hash,
204 		data_reference_p> *innermost_DR_map;
205 
206 /* Hash table to store <base reference, DR> pairs.  */
207 static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map;
208 
209 /* List of redundant SSA names: the first should be replaced by the second.  */
210 static vec< std::pair<tree, tree> > redundant_ssa_names;
211 
212 /* Structure used to predicate basic blocks.  This is attached to the
213    ->aux field of the BBs in the loop to be if-converted.  */
214 struct bb_predicate {
215 
216   /* The condition under which this basic block is executed.  */
217   tree predicate;
218 
219   /* PREDICATE is gimplified, and the sequence of statements is
220      recorded here, in order to avoid the duplication of computations
221      that occur in previous conditions.  See PR44483.  */
222   gimple_seq predicate_gimplified_stmts;
223 };
224 
225 /* Returns true when the basic block BB has a predicate.  */
226 
227 static inline bool
bb_has_predicate(basic_block bb)228 bb_has_predicate (basic_block bb)
229 {
230   return bb->aux != NULL;
231 }
232 
233 /* Returns the gimplified predicate for basic block BB.  */
234 
235 static inline tree
bb_predicate(basic_block bb)236 bb_predicate (basic_block bb)
237 {
238   return ((struct bb_predicate *) bb->aux)->predicate;
239 }
240 
241 /* Sets the gimplified predicate COND for basic block BB.  */
242 
243 static inline void
set_bb_predicate(basic_block bb,tree cond)244 set_bb_predicate (basic_block bb, tree cond)
245 {
246   gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
247 	       && is_gimple_condexpr (TREE_OPERAND (cond, 0)))
248 	      || is_gimple_condexpr (cond));
249   ((struct bb_predicate *) bb->aux)->predicate = cond;
250 }
251 
252 /* Returns the sequence of statements of the gimplification of the
253    predicate for basic block BB.  */
254 
255 static inline gimple_seq
bb_predicate_gimplified_stmts(basic_block bb)256 bb_predicate_gimplified_stmts (basic_block bb)
257 {
258   return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts;
259 }
260 
261 /* Sets the sequence of statements STMTS of the gimplification of the
262    predicate for basic block BB.  */
263 
264 static inline void
set_bb_predicate_gimplified_stmts(basic_block bb,gimple_seq stmts)265 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
266 {
267   ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts;
268 }
269 
270 /* Adds the sequence of statements STMTS to the sequence of statements
271    of the predicate for basic block BB.  */
272 
273 static inline void
add_bb_predicate_gimplified_stmts(basic_block bb,gimple_seq stmts)274 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
275 {
276   /* We might have updated some stmts in STMTS via force_gimple_operand
277      calling fold_stmt and that producing multiple stmts.  Delink immediate
278      uses so update_ssa after loop versioning doesn't get confused for
279      the not yet inserted predicates.
280      ???  This should go away once we reliably avoid updating stmts
281      not in any BB.  */
282   for (gimple_stmt_iterator gsi = gsi_start (stmts);
283        !gsi_end_p (gsi); gsi_next (&gsi))
284     {
285       gimple *stmt = gsi_stmt (gsi);
286       delink_stmt_imm_use (stmt);
287       gimple_set_modified (stmt, true);
288     }
289   gimple_seq_add_seq_without_update
290     (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
291 }
292 
293 /* Initializes to TRUE the predicate of basic block BB.  */
294 
295 static inline void
init_bb_predicate(basic_block bb)296 init_bb_predicate (basic_block bb)
297 {
298   bb->aux = XNEW (struct bb_predicate);
299   set_bb_predicate_gimplified_stmts (bb, NULL);
300   set_bb_predicate (bb, boolean_true_node);
301 }
302 
303 /* Release the SSA_NAMEs associated with the predicate of basic block BB.  */
304 
305 static inline void
release_bb_predicate(basic_block bb)306 release_bb_predicate (basic_block bb)
307 {
308   gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
309   if (stmts)
310     {
311       /* Ensure that these stmts haven't yet been added to a bb.  */
312       if (flag_checking)
313 	for (gimple_stmt_iterator i = gsi_start (stmts);
314 	     !gsi_end_p (i); gsi_next (&i))
315 	  gcc_assert (! gimple_bb (gsi_stmt (i)));
316 
317       /* Discard them.  */
318       gimple_seq_discard (stmts);
319       set_bb_predicate_gimplified_stmts (bb, NULL);
320     }
321 }
322 
323 /* Free the predicate of basic block BB.  */
324 
325 static inline void
free_bb_predicate(basic_block bb)326 free_bb_predicate (basic_block bb)
327 {
328   if (!bb_has_predicate (bb))
329     return;
330 
331   release_bb_predicate (bb);
332   free (bb->aux);
333   bb->aux = NULL;
334 }
335 
336 /* Reinitialize predicate of BB with the true predicate.  */
337 
338 static inline void
reset_bb_predicate(basic_block bb)339 reset_bb_predicate (basic_block bb)
340 {
341   if (!bb_has_predicate (bb))
342     init_bb_predicate (bb);
343   else
344     {
345       release_bb_predicate (bb);
346       set_bb_predicate (bb, boolean_true_node);
347     }
348 }
349 
350 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
351    the expression EXPR.  Inserts the statement created for this
352    computation before GSI and leaves the iterator GSI at the same
353    statement.  */
354 
355 static tree
ifc_temp_var(tree type,tree expr,gimple_stmt_iterator * gsi)356 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
357 {
358   tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
359   gimple *stmt = gimple_build_assign (new_name, expr);
360   gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (*gsi)));
361   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
362   return new_name;
363 }
364 
365 /* Return true when COND is a false predicate.  */
366 
367 static inline bool
is_false_predicate(tree cond)368 is_false_predicate (tree cond)
369 {
370   return (cond != NULL_TREE
371 	  && (cond == boolean_false_node
372 	      || integer_zerop (cond)));
373 }
374 
375 /* Return true when COND is a true predicate.  */
376 
377 static inline bool
is_true_predicate(tree cond)378 is_true_predicate (tree cond)
379 {
380   return (cond == NULL_TREE
381 	  || cond == boolean_true_node
382 	  || integer_onep (cond));
383 }
384 
385 /* Returns true when BB has a predicate that is not trivial: true or
386    NULL_TREE.  */
387 
388 static inline bool
is_predicated(basic_block bb)389 is_predicated (basic_block bb)
390 {
391   return !is_true_predicate (bb_predicate (bb));
392 }
393 
394 /* Parses the predicate COND and returns its comparison code and
395    operands OP0 and OP1.  */
396 
397 static enum tree_code
parse_predicate(tree cond,tree * op0,tree * op1)398 parse_predicate (tree cond, tree *op0, tree *op1)
399 {
400   gimple *s;
401 
402   if (TREE_CODE (cond) == SSA_NAME
403       && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
404     {
405       if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
406 	{
407 	  *op0 = gimple_assign_rhs1 (s);
408 	  *op1 = gimple_assign_rhs2 (s);
409 	  return gimple_assign_rhs_code (s);
410 	}
411 
412       else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
413 	{
414 	  tree op = gimple_assign_rhs1 (s);
415 	  tree type = TREE_TYPE (op);
416 	  enum tree_code code = parse_predicate (op, op0, op1);
417 
418 	  return code == ERROR_MARK ? ERROR_MARK
419 	    : invert_tree_comparison (code, HONOR_NANS (type));
420 	}
421 
422       return ERROR_MARK;
423     }
424 
425   if (COMPARISON_CLASS_P (cond))
426     {
427       *op0 = TREE_OPERAND (cond, 0);
428       *op1 = TREE_OPERAND (cond, 1);
429       return TREE_CODE (cond);
430     }
431 
432   return ERROR_MARK;
433 }
434 
435 /* Returns the fold of predicate C1 OR C2 at location LOC.  */
436 
437 static tree
fold_or_predicates(location_t loc,tree c1,tree c2)438 fold_or_predicates (location_t loc, tree c1, tree c2)
439 {
440   tree op1a, op1b, op2a, op2b;
441   enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
442   enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
443 
444   if (code1 != ERROR_MARK && code2 != ERROR_MARK)
445     {
446       tree t = maybe_fold_or_comparisons (boolean_type_node, code1, op1a, op1b,
447 					  code2, op2a, op2b);
448       if (t)
449 	return t;
450     }
451 
452   return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
453 }
454 
455 /* Returns either a COND_EXPR or the folded expression if the folded
456    expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
457    a constant or a SSA_NAME. */
458 
459 static tree
fold_build_cond_expr(tree type,tree cond,tree rhs,tree lhs)460 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
461 {
462   tree rhs1, lhs1, cond_expr;
463 
464   /* If COND is comparison r != 0 and r has boolean type, convert COND
465      to SSA_NAME to accept by vect bool pattern.  */
466   if (TREE_CODE (cond) == NE_EXPR)
467     {
468       tree op0 = TREE_OPERAND (cond, 0);
469       tree op1 = TREE_OPERAND (cond, 1);
470       if (TREE_CODE (op0) == SSA_NAME
471 	  && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
472 	  && (integer_zerop (op1)))
473 	cond = op0;
474     }
475   cond_expr = fold_ternary (COND_EXPR, type, cond, rhs, lhs);
476 
477   if (cond_expr == NULL_TREE)
478     return build3 (COND_EXPR, type, cond, rhs, lhs);
479 
480   STRIP_USELESS_TYPE_CONVERSION (cond_expr);
481 
482   if (is_gimple_val (cond_expr))
483     return cond_expr;
484 
485   if (TREE_CODE (cond_expr) == ABS_EXPR)
486     {
487       rhs1 = TREE_OPERAND (cond_expr, 1);
488       STRIP_USELESS_TYPE_CONVERSION (rhs1);
489       if (is_gimple_val (rhs1))
490 	return build1 (ABS_EXPR, type, rhs1);
491     }
492 
493   if (TREE_CODE (cond_expr) == MIN_EXPR
494       || TREE_CODE (cond_expr) == MAX_EXPR)
495     {
496       lhs1 = TREE_OPERAND (cond_expr, 0);
497       STRIP_USELESS_TYPE_CONVERSION (lhs1);
498       rhs1 = TREE_OPERAND (cond_expr, 1);
499       STRIP_USELESS_TYPE_CONVERSION (rhs1);
500       if (is_gimple_val (rhs1) && is_gimple_val (lhs1))
501 	return build2 (TREE_CODE (cond_expr), type, lhs1, rhs1);
502     }
503   return build3 (COND_EXPR, type, cond, rhs, lhs);
504 }
505 
506 /* Add condition NC to the predicate list of basic block BB.  LOOP is
507    the loop to be if-converted. Use predicate of cd-equivalent block
508    for join bb if it exists: we call basic blocks bb1 and bb2
509    cd-equivalent if they are executed under the same condition.  */
510 
511 static inline void
add_to_predicate_list(class loop * loop,basic_block bb,tree nc)512 add_to_predicate_list (class loop *loop, basic_block bb, tree nc)
513 {
514   tree bc, *tp;
515   basic_block dom_bb;
516 
517   if (is_true_predicate (nc))
518     return;
519 
520   /* If dominance tells us this basic block is always executed,
521      don't record any predicates for it.  */
522   if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
523     return;
524 
525   dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
526   /* We use notion of cd equivalence to get simpler predicate for
527      join block, e.g. if join block has 2 predecessors with predicates
528      p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
529      p1 & p2 | p1 & !p2.  */
530   if (dom_bb != loop->header
531       && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
532     {
533       gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
534       bc = bb_predicate (dom_bb);
535       if (!is_true_predicate (bc))
536 	set_bb_predicate (bb, bc);
537       else
538 	gcc_assert (is_true_predicate (bb_predicate (bb)));
539       if (dump_file && (dump_flags & TDF_DETAILS))
540 	fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
541 		 dom_bb->index, bb->index);
542       return;
543     }
544 
545   if (!is_predicated (bb))
546     bc = nc;
547   else
548     {
549       bc = bb_predicate (bb);
550       bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
551       if (is_true_predicate (bc))
552 	{
553 	  reset_bb_predicate (bb);
554 	  return;
555 	}
556     }
557 
558   /* Allow a TRUTH_NOT_EXPR around the main predicate.  */
559   if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
560     tp = &TREE_OPERAND (bc, 0);
561   else
562     tp = &bc;
563   if (!is_gimple_condexpr (*tp))
564     {
565       gimple_seq stmts;
566       *tp = force_gimple_operand_1 (*tp, &stmts, is_gimple_condexpr, NULL_TREE);
567       add_bb_predicate_gimplified_stmts (bb, stmts);
568     }
569   set_bb_predicate (bb, bc);
570 }
571 
572 /* Add the condition COND to the previous condition PREV_COND, and add
573    this to the predicate list of the destination of edge E.  LOOP is
574    the loop to be if-converted.  */
575 
576 static void
add_to_dst_predicate_list(class loop * loop,edge e,tree prev_cond,tree cond)577 add_to_dst_predicate_list (class loop *loop, edge e,
578 			   tree prev_cond, tree cond)
579 {
580   if (!flow_bb_inside_loop_p (loop, e->dest))
581     return;
582 
583   if (!is_true_predicate (prev_cond))
584     cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
585 			prev_cond, cond);
586 
587   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
588     add_to_predicate_list (loop, e->dest, cond);
589 }
590 
591 /* Return true if one of the successor edges of BB exits LOOP.  */
592 
593 static bool
bb_with_exit_edge_p(class loop * loop,basic_block bb)594 bb_with_exit_edge_p (class loop *loop, basic_block bb)
595 {
596   edge e;
597   edge_iterator ei;
598 
599   FOR_EACH_EDGE (e, ei, bb->succs)
600     if (loop_exit_edge_p (loop, e))
601       return true;
602 
603   return false;
604 }
605 
606 /* Given PHI which has more than two arguments, this function checks if
607    it's if-convertible by degenerating its arguments.  Specifically, if
608    below two conditions are satisfied:
609 
610      1) Number of PHI arguments with different values equals to 2 and one
611 	argument has the only occurrence.
612      2) The edge corresponding to the unique argument isn't critical edge.
613 
614    Such PHI can be handled as PHIs have only two arguments.  For example,
615    below PHI:
616 
617      res = PHI <A_1(e1), A_1(e2), A_2(e3)>;
618 
619    can be transformed into:
620 
621      res = (predicate of e3) ? A_2 : A_1;
622 
623    Return TRUE if it is the case, FALSE otherwise.  */
624 
625 static bool
phi_convertible_by_degenerating_args(gphi * phi)626 phi_convertible_by_degenerating_args (gphi *phi)
627 {
628   edge e;
629   tree arg, t1 = NULL, t2 = NULL;
630   unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0;
631   unsigned int num_args = gimple_phi_num_args (phi);
632 
633   gcc_assert (num_args > 2);
634 
635   for (i = 0; i < num_args; i++)
636     {
637       arg = gimple_phi_arg_def (phi, i);
638       if (t1 == NULL || operand_equal_p (t1, arg, 0))
639 	{
640 	  n1++;
641 	  i1 = i;
642 	  t1 = arg;
643 	}
644       else if (t2 == NULL || operand_equal_p (t2, arg, 0))
645 	{
646 	  n2++;
647 	  i2 = i;
648 	  t2 = arg;
649 	}
650       else
651 	return false;
652     }
653 
654   if (n1 != 1 && n2 != 1)
655     return false;
656 
657   /* Check if the edge corresponding to the unique arg is critical.  */
658   e = gimple_phi_arg_edge (phi, (n1 == 1) ? i1 : i2);
659   if (EDGE_COUNT (e->src->succs) > 1)
660     return false;
661 
662   return true;
663 }
664 
665 /* Return true when PHI is if-convertible.  PHI is part of loop LOOP
666    and it belongs to basic block BB.  Note at this point, it is sure
667    that PHI is if-convertible.  This function updates global variable
668    ANY_COMPLICATED_PHI if PHI is complicated.  */
669 
670 static bool
if_convertible_phi_p(class loop * loop,basic_block bb,gphi * phi)671 if_convertible_phi_p (class loop *loop, basic_block bb, gphi *phi)
672 {
673   if (dump_file && (dump_flags & TDF_DETAILS))
674     {
675       fprintf (dump_file, "-------------------------\n");
676       print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
677     }
678 
679   if (bb != loop->header
680       && gimple_phi_num_args (phi) > 2
681       && !phi_convertible_by_degenerating_args (phi))
682     any_complicated_phi = true;
683 
684   return true;
685 }
686 
687 /* Records the status of a data reference.  This struct is attached to
688    each DR->aux field.  */
689 
690 struct ifc_dr {
691   bool rw_unconditionally;
692   bool w_unconditionally;
693   bool written_at_least_once;
694 
695   tree rw_predicate;
696   tree w_predicate;
697   tree base_w_predicate;
698 };
699 
700 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
701 #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once)
702 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
703 #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally)
704 
705 /* Iterates over DR's and stores refs, DR and base refs, DR pairs in
706    HASH tables.  While storing them in HASH table, it checks if the
707    reference is unconditionally read or written and stores that as a flag
708    information.  For base reference it checks if it is written atlest once
709    unconditionally and stores it as flag information along with DR.
710    In other words for every data reference A in STMT there exist other
711    accesses to a data reference with the same base with predicates that
712    add up (OR-up) to the true predicate: this ensures that the data
713    reference A is touched (read or written) on every iteration of the
714    if-converted loop.  */
715 static void
hash_memrefs_baserefs_and_store_DRs_read_written_info(data_reference_p a)716 hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a)
717 {
718 
719   data_reference_p *master_dr, *base_master_dr;
720   tree base_ref = DR_BASE_OBJECT (a);
721   innermost_loop_behavior *innermost = &DR_INNERMOST (a);
722   tree ca = bb_predicate (gimple_bb (DR_STMT (a)));
723   bool exist1, exist2;
724 
725   master_dr = &innermost_DR_map->get_or_insert (innermost, &exist1);
726   if (!exist1)
727     *master_dr = a;
728 
729   if (DR_IS_WRITE (a))
730     {
731       IFC_DR (*master_dr)->w_predicate
732 	= fold_or_predicates (UNKNOWN_LOCATION, ca,
733 			      IFC_DR (*master_dr)->w_predicate);
734       if (is_true_predicate (IFC_DR (*master_dr)->w_predicate))
735 	DR_W_UNCONDITIONALLY (*master_dr) = true;
736     }
737   IFC_DR (*master_dr)->rw_predicate
738     = fold_or_predicates (UNKNOWN_LOCATION, ca,
739 			  IFC_DR (*master_dr)->rw_predicate);
740   if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate))
741     DR_RW_UNCONDITIONALLY (*master_dr) = true;
742 
743   if (DR_IS_WRITE (a))
744     {
745       base_master_dr = &baseref_DR_map->get_or_insert (base_ref, &exist2);
746       if (!exist2)
747 	*base_master_dr = a;
748       IFC_DR (*base_master_dr)->base_w_predicate
749 	= fold_or_predicates (UNKNOWN_LOCATION, ca,
750 			      IFC_DR (*base_master_dr)->base_w_predicate);
751       if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate))
752 	DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true;
753     }
754 }
755 
756 /* Return TRUE if can prove the index IDX of an array reference REF is
757    within array bound.  Return false otherwise.  */
758 
759 static bool
idx_within_array_bound(tree ref,tree * idx,void * dta)760 idx_within_array_bound (tree ref, tree *idx, void *dta)
761 {
762   wi::overflow_type overflow;
763   widest_int niter, valid_niter, delta, wi_step;
764   tree ev, init, step;
765   tree low, high;
766   class loop *loop = (class loop*) dta;
767 
768   /* Only support within-bound access for array references.  */
769   if (TREE_CODE (ref) != ARRAY_REF)
770     return false;
771 
772   /* For arrays at the end of the structure, we are not guaranteed that they
773      do not really extend over their declared size.  However, for arrays of
774      size greater than one, this is unlikely to be intended.  */
775   if (array_at_struct_end_p (ref))
776     return false;
777 
778   ev = analyze_scalar_evolution (loop, *idx);
779   ev = instantiate_parameters (loop, ev);
780   init = initial_condition (ev);
781   step = evolution_part_in_loop_num (ev, loop->num);
782 
783   if (!init || TREE_CODE (init) != INTEGER_CST
784       || (step && TREE_CODE (step) != INTEGER_CST))
785     return false;
786 
787   low = array_ref_low_bound (ref);
788   high = array_ref_up_bound (ref);
789 
790   /* The case of nonconstant bounds could be handled, but it would be
791      complicated.  */
792   if (TREE_CODE (low) != INTEGER_CST
793       || !high || TREE_CODE (high) != INTEGER_CST)
794     return false;
795 
796   /* Check if the intial idx is within bound.  */
797   if (wi::to_widest (init) < wi::to_widest (low)
798       || wi::to_widest (init) > wi::to_widest (high))
799     return false;
800 
801   /* The idx is always within bound.  */
802   if (!step || integer_zerop (step))
803     return true;
804 
805   if (!max_loop_iterations (loop, &niter))
806     return false;
807 
808   if (wi::to_widest (step) < 0)
809     {
810       delta = wi::to_widest (init) - wi::to_widest (low);
811       wi_step = -wi::to_widest (step);
812     }
813   else
814     {
815       delta = wi::to_widest (high) - wi::to_widest (init);
816       wi_step = wi::to_widest (step);
817     }
818 
819   valid_niter = wi::div_floor (delta, wi_step, SIGNED, &overflow);
820   /* The iteration space of idx is within array bound.  */
821   if (!overflow && niter <= valid_niter)
822     return true;
823 
824   return false;
825 }
826 
827 /* Return TRUE if ref is a within bound array reference.  */
828 
829 static bool
ref_within_array_bound(gimple * stmt,tree ref)830 ref_within_array_bound (gimple *stmt, tree ref)
831 {
832   class loop *loop = loop_containing_stmt (stmt);
833 
834   gcc_assert (loop != NULL);
835   return for_each_index (&ref, idx_within_array_bound, loop);
836 }
837 
838 
839 /* Given a memory reference expression T, return TRUE if base object
840    it refers to is writable.  The base object of a memory reference
841    is the main object being referenced, which is returned by function
842    get_base_address.  */
843 
844 static bool
base_object_writable(tree ref)845 base_object_writable (tree ref)
846 {
847   tree base_tree = get_base_address (ref);
848 
849   return (base_tree
850 	  && DECL_P (base_tree)
851 	  && decl_binds_to_current_def_p (base_tree)
852 	  && !TREE_READONLY (base_tree));
853 }
854 
855 /* Return true when the memory references of STMT won't trap in the
856    if-converted code.  There are two things that we have to check for:
857 
858    - writes to memory occur to writable memory: if-conversion of
859    memory writes transforms the conditional memory writes into
860    unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
861    into "A[i] = cond ? foo : A[i]", and as the write to memory may not
862    be executed at all in the original code, it may be a readonly
863    memory.  To check that A is not const-qualified, we check that
864    there exists at least an unconditional write to A in the current
865    function.
866 
867    - reads or writes to memory are valid memory accesses for every
868    iteration.  To check that the memory accesses are correctly formed
869    and that we are allowed to read and write in these locations, we
870    check that the memory accesses to be if-converted occur at every
871    iteration unconditionally.
872 
873    Returns true for the memory reference in STMT, same memory reference
874    is read or written unconditionally atleast once and the base memory
875    reference is written unconditionally once.  This is to check reference
876    will not write fault.  Also retuns true if the memory reference is
877    unconditionally read once then we are conditionally writing to memory
878    which is defined as read and write and is bound to the definition
879    we are seeing.  */
880 static bool
ifcvt_memrefs_wont_trap(gimple * stmt,vec<data_reference_p> drs)881 ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs)
882 {
883   /* If DR didn't see a reference here we can't use it to tell
884      whether the ref traps or not.  */
885   if (gimple_uid (stmt) == 0)
886     return false;
887 
888   data_reference_p *master_dr, *base_master_dr;
889   data_reference_p a = drs[gimple_uid (stmt) - 1];
890 
891   tree base = DR_BASE_OBJECT (a);
892   innermost_loop_behavior *innermost = &DR_INNERMOST (a);
893 
894   gcc_assert (DR_STMT (a) == stmt);
895   gcc_assert (DR_BASE_ADDRESS (a) || DR_OFFSET (a)
896               || DR_INIT (a) || DR_STEP (a));
897 
898   master_dr = innermost_DR_map->get (innermost);
899   gcc_assert (master_dr != NULL);
900 
901   base_master_dr = baseref_DR_map->get (base);
902 
903   /* If a is unconditionally written to it doesn't trap.  */
904   if (DR_W_UNCONDITIONALLY (*master_dr))
905     return true;
906 
907   /* If a is unconditionally accessed then ...
908 
909      Even a is conditional access, we can treat it as an unconditional
910      one if it's an array reference and all its index are within array
911      bound.  */
912   if (DR_RW_UNCONDITIONALLY (*master_dr)
913       || ref_within_array_bound (stmt, DR_REF (a)))
914     {
915       /* an unconditional read won't trap.  */
916       if (DR_IS_READ (a))
917 	return true;
918 
919       /* an unconditionaly write won't trap if the base is written
920          to unconditionally.  */
921       if (base_master_dr
922 	  && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
923 	return flag_store_data_races;
924       /* or the base is known to be not readonly.  */
925       else if (base_object_writable (DR_REF (a)))
926 	return flag_store_data_races;
927     }
928 
929   return false;
930 }
931 
932 /* Return true if STMT could be converted into a masked load or store
933    (conditional load or store based on a mask computed from bb predicate).  */
934 
935 static bool
ifcvt_can_use_mask_load_store(gimple * stmt)936 ifcvt_can_use_mask_load_store (gimple *stmt)
937 {
938   /* Check whether this is a load or store.  */
939   tree lhs = gimple_assign_lhs (stmt);
940   bool is_load;
941   tree ref;
942   if (gimple_store_p (stmt))
943     {
944       if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
945 	return false;
946       is_load = false;
947       ref = lhs;
948     }
949   else if (gimple_assign_load_p (stmt))
950     {
951       is_load = true;
952       ref = gimple_assign_rhs1 (stmt);
953     }
954   else
955     return false;
956 
957   if (may_be_nonaddressable_p (ref))
958     return false;
959 
960   /* Mask should be integer mode of the same size as the load/store
961      mode.  */
962   machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
963   if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode))
964     return false;
965 
966   if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
967     return true;
968 
969   return false;
970 }
971 
972 /* Return true if STMT could be converted from an operation that is
973    unconditional to one that is conditional on a bb predicate mask.  */
974 
975 static bool
ifcvt_can_predicate(gimple * stmt)976 ifcvt_can_predicate (gimple *stmt)
977 {
978   basic_block bb = gimple_bb (stmt);
979 
980   if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
981       || bb->loop_father->dont_vectorize
982       || gimple_has_volatile_ops (stmt))
983     return false;
984 
985   if (gimple_assign_single_p (stmt))
986     return ifcvt_can_use_mask_load_store (stmt);
987 
988   tree_code code = gimple_assign_rhs_code (stmt);
989   tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
990   tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
991   if (!types_compatible_p (lhs_type, rhs_type))
992     return false;
993   internal_fn cond_fn = get_conditional_internal_fn (code);
994   return (cond_fn != IFN_LAST
995 	  && vectorized_internal_fn_supported_p (cond_fn, lhs_type));
996 }
997 
998 /* Return true when STMT is if-convertible.
999 
1000    GIMPLE_ASSIGN statement is not if-convertible if,
1001    - it is not movable,
1002    - it could trap,
1003    - LHS is not var decl.  */
1004 
1005 static bool
if_convertible_gimple_assign_stmt_p(gimple * stmt,vec<data_reference_p> refs)1006 if_convertible_gimple_assign_stmt_p (gimple *stmt,
1007 				     vec<data_reference_p> refs)
1008 {
1009   tree lhs = gimple_assign_lhs (stmt);
1010 
1011   if (dump_file && (dump_flags & TDF_DETAILS))
1012     {
1013       fprintf (dump_file, "-------------------------\n");
1014       print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1015     }
1016 
1017   if (!is_gimple_reg_type (TREE_TYPE (lhs)))
1018     return false;
1019 
1020   /* Some of these constrains might be too conservative.  */
1021   if (stmt_ends_bb_p (stmt)
1022       || gimple_has_volatile_ops (stmt)
1023       || (TREE_CODE (lhs) == SSA_NAME
1024           && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
1025       || gimple_has_side_effects (stmt))
1026     {
1027       if (dump_file && (dump_flags & TDF_DETAILS))
1028         fprintf (dump_file, "stmt not suitable for ifcvt\n");
1029       return false;
1030     }
1031 
1032   /* tree-into-ssa.cc uses GF_PLF_1, so avoid it, because
1033      in between if_convertible_loop_p and combine_blocks
1034      we can perform loop versioning.  */
1035   gimple_set_plf (stmt, GF_PLF_2, false);
1036 
1037   if ((! gimple_vuse (stmt)
1038        || gimple_could_trap_p_1 (stmt, false, false)
1039        || ! ifcvt_memrefs_wont_trap (stmt, refs))
1040       && gimple_could_trap_p (stmt))
1041     {
1042       if (ifcvt_can_predicate (stmt))
1043 	{
1044 	  gimple_set_plf (stmt, GF_PLF_2, true);
1045 	  need_to_predicate = true;
1046 	  return true;
1047 	}
1048       if (dump_file && (dump_flags & TDF_DETAILS))
1049 	fprintf (dump_file, "tree could trap...\n");
1050       return false;
1051     }
1052   else if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
1053 	    || POINTER_TYPE_P (TREE_TYPE (lhs)))
1054 	   && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
1055 	   && arith_code_with_undefined_signed_overflow
1056 				(gimple_assign_rhs_code (stmt)))
1057     /* We have to rewrite stmts with undefined overflow.  */
1058     need_to_rewrite_undefined = true;
1059 
1060   /* When if-converting stores force versioning, likewise if we
1061      ended up generating store data races.  */
1062   if (gimple_vdef (stmt))
1063     need_to_predicate = true;
1064 
1065   return true;
1066 }
1067 
1068 /* Return true when STMT is if-convertible.
1069 
1070    A statement is if-convertible if:
1071    - it is an if-convertible GIMPLE_ASSIGN,
1072    - it is a GIMPLE_LABEL or a GIMPLE_COND,
1073    - it is builtins call.  */
1074 
1075 static bool
if_convertible_stmt_p(gimple * stmt,vec<data_reference_p> refs)1076 if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
1077 {
1078   switch (gimple_code (stmt))
1079     {
1080     case GIMPLE_LABEL:
1081     case GIMPLE_DEBUG:
1082     case GIMPLE_COND:
1083       return true;
1084 
1085     case GIMPLE_ASSIGN:
1086       return if_convertible_gimple_assign_stmt_p (stmt, refs);
1087 
1088     case GIMPLE_CALL:
1089       {
1090 	tree fndecl = gimple_call_fndecl (stmt);
1091 	if (fndecl)
1092 	  {
1093 	    int flags = gimple_call_flags (stmt);
1094 	    if ((flags & ECF_CONST)
1095 		&& !(flags & ECF_LOOPING_CONST_OR_PURE)
1096 		/* We can only vectorize some builtins at the moment,
1097 		   so restrict if-conversion to those.  */
1098 		&& fndecl_built_in_p (fndecl))
1099 	      return true;
1100 	  }
1101 	return false;
1102       }
1103 
1104     default:
1105       /* Don't know what to do with 'em so don't do anything.  */
1106       if (dump_file && (dump_flags & TDF_DETAILS))
1107 	{
1108 	  fprintf (dump_file, "don't know what to do\n");
1109 	  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1110 	}
1111       return false;
1112     }
1113 }
1114 
1115 /* Assumes that BB has more than 1 predecessors.
1116    Returns false if at least one successor is not on critical edge
1117    and true otherwise.  */
1118 
1119 static inline bool
all_preds_critical_p(basic_block bb)1120 all_preds_critical_p (basic_block bb)
1121 {
1122   edge e;
1123   edge_iterator ei;
1124 
1125   FOR_EACH_EDGE (e, ei, bb->preds)
1126     if (EDGE_COUNT (e->src->succs) == 1)
1127       return false;
1128   return true;
1129 }
1130 
1131 /* Return true when BB is if-convertible.  This routine does not check
1132    basic block's statements and phis.
1133 
1134    A basic block is not if-convertible if:
1135    - it is non-empty and it is after the exit block (in BFS order),
1136    - it is after the exit block but before the latch,
1137    - its edges are not normal.
1138 
1139    EXIT_BB is the basic block containing the exit of the LOOP.  BB is
1140    inside LOOP.  */
1141 
1142 static bool
if_convertible_bb_p(class loop * loop,basic_block bb,basic_block exit_bb)1143 if_convertible_bb_p (class loop *loop, basic_block bb, basic_block exit_bb)
1144 {
1145   edge e;
1146   edge_iterator ei;
1147 
1148   if (dump_file && (dump_flags & TDF_DETAILS))
1149     fprintf (dump_file, "----------[%d]-------------\n", bb->index);
1150 
1151   if (EDGE_COUNT (bb->succs) > 2)
1152     return false;
1153 
1154   gimple *last = last_stmt (bb);
1155   if (gcall *call = safe_dyn_cast <gcall *> (last))
1156     if (gimple_call_ctrl_altering_p (call))
1157       return false;
1158 
1159   if (exit_bb)
1160     {
1161       if (bb != loop->latch)
1162 	{
1163 	  if (dump_file && (dump_flags & TDF_DETAILS))
1164 	    fprintf (dump_file, "basic block after exit bb but before latch\n");
1165 	  return false;
1166 	}
1167       else if (!empty_block_p (bb))
1168 	{
1169 	  if (dump_file && (dump_flags & TDF_DETAILS))
1170 	    fprintf (dump_file, "non empty basic block after exit bb\n");
1171 	  return false;
1172 	}
1173       else if (bb == loop->latch
1174 	       && bb != exit_bb
1175 	       && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1176 	  {
1177 	    if (dump_file && (dump_flags & TDF_DETAILS))
1178 	      fprintf (dump_file, "latch is not dominated by exit_block\n");
1179 	    return false;
1180 	  }
1181     }
1182 
1183   /* Be less adventurous and handle only normal edges.  */
1184   FOR_EACH_EDGE (e, ei, bb->succs)
1185     if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1186       {
1187 	if (dump_file && (dump_flags & TDF_DETAILS))
1188 	  fprintf (dump_file, "Difficult to handle edges\n");
1189 	return false;
1190       }
1191 
1192   return true;
1193 }
1194 
1195 /* Return true when all predecessor blocks of BB are visited.  The
1196    VISITED bitmap keeps track of the visited blocks.  */
1197 
1198 static bool
pred_blocks_visited_p(basic_block bb,bitmap * visited)1199 pred_blocks_visited_p (basic_block bb, bitmap *visited)
1200 {
1201   edge e;
1202   edge_iterator ei;
1203   FOR_EACH_EDGE (e, ei, bb->preds)
1204     if (!bitmap_bit_p (*visited, e->src->index))
1205       return false;
1206 
1207   return true;
1208 }
1209 
1210 /* Get body of a LOOP in suitable order for if-conversion.  It is
1211    caller's responsibility to deallocate basic block list.
1212    If-conversion suitable order is, breadth first sort (BFS) order
1213    with an additional constraint: select a block only if all its
1214    predecessors are already selected.  */
1215 
1216 static basic_block *
get_loop_body_in_if_conv_order(const class loop * loop)1217 get_loop_body_in_if_conv_order (const class loop *loop)
1218 {
1219   basic_block *blocks, *blocks_in_bfs_order;
1220   basic_block bb;
1221   bitmap visited;
1222   unsigned int index = 0;
1223   unsigned int visited_count = 0;
1224 
1225   gcc_assert (loop->num_nodes);
1226   gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1227 
1228   blocks = XCNEWVEC (basic_block, loop->num_nodes);
1229   visited = BITMAP_ALLOC (NULL);
1230 
1231   blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1232 
1233   index = 0;
1234   while (index < loop->num_nodes)
1235     {
1236       bb = blocks_in_bfs_order [index];
1237 
1238       if (bb->flags & BB_IRREDUCIBLE_LOOP)
1239 	{
1240 	  free (blocks_in_bfs_order);
1241 	  BITMAP_FREE (visited);
1242 	  free (blocks);
1243 	  return NULL;
1244 	}
1245 
1246       if (!bitmap_bit_p (visited, bb->index))
1247 	{
1248 	  if (pred_blocks_visited_p (bb, &visited)
1249 	      || bb == loop->header)
1250 	    {
1251 	      /* This block is now visited.  */
1252 	      bitmap_set_bit (visited, bb->index);
1253 	      blocks[visited_count++] = bb;
1254 	    }
1255 	}
1256 
1257       index++;
1258 
1259       if (index == loop->num_nodes
1260 	  && visited_count != loop->num_nodes)
1261 	/* Not done yet.  */
1262 	index = 0;
1263     }
1264   free (blocks_in_bfs_order);
1265   BITMAP_FREE (visited);
1266   return blocks;
1267 }
1268 
1269 /* Returns true when the analysis of the predicates for all the basic
1270    blocks in LOOP succeeded.
1271 
1272    predicate_bbs first allocates the predicates of the basic blocks.
1273    These fields are then initialized with the tree expressions
1274    representing the predicates under which a basic block is executed
1275    in the LOOP.  As the loop->header is executed at each iteration, it
1276    has the "true" predicate.  Other statements executed under a
1277    condition are predicated with that condition, for example
1278 
1279    | if (x)
1280    |   S1;
1281    | else
1282    |   S2;
1283 
1284    S1 will be predicated with "x", and
1285    S2 will be predicated with "!x".  */
1286 
1287 static void
predicate_bbs(loop_p loop)1288 predicate_bbs (loop_p loop)
1289 {
1290   unsigned int i;
1291 
1292   for (i = 0; i < loop->num_nodes; i++)
1293     init_bb_predicate (ifc_bbs[i]);
1294 
1295   for (i = 0; i < loop->num_nodes; i++)
1296     {
1297       basic_block bb = ifc_bbs[i];
1298       tree cond;
1299       gimple *stmt;
1300 
1301       /* The loop latch and loop exit block are always executed and
1302 	 have no extra conditions to be processed: skip them.  */
1303       if (bb == loop->latch
1304 	  || bb_with_exit_edge_p (loop, bb))
1305 	{
1306 	  reset_bb_predicate (bb);
1307 	  continue;
1308 	}
1309 
1310       cond = bb_predicate (bb);
1311       stmt = last_stmt (bb);
1312       if (stmt && gimple_code (stmt) == GIMPLE_COND)
1313 	{
1314 	  tree c2;
1315 	  edge true_edge, false_edge;
1316 	  location_t loc = gimple_location (stmt);
1317 	  tree c = build2_loc (loc, gimple_cond_code (stmt),
1318 				    boolean_type_node,
1319 				    gimple_cond_lhs (stmt),
1320 				    gimple_cond_rhs (stmt));
1321 
1322 	  /* Add new condition into destination's predicate list.  */
1323 	  extract_true_false_edges_from_block (gimple_bb (stmt),
1324 					       &true_edge, &false_edge);
1325 
1326 	  /* If C is true, then TRUE_EDGE is taken.  */
1327 	  add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1328 				     unshare_expr (c));
1329 
1330 	  /* If C is false, then FALSE_EDGE is taken.  */
1331 	  c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1332 			   unshare_expr (c));
1333 	  add_to_dst_predicate_list (loop, false_edge,
1334 				     unshare_expr (cond), c2);
1335 
1336 	  cond = NULL_TREE;
1337 	}
1338 
1339       /* If current bb has only one successor, then consider it as an
1340 	 unconditional goto.  */
1341       if (single_succ_p (bb))
1342 	{
1343 	  basic_block bb_n = single_succ (bb);
1344 
1345 	  /* The successor bb inherits the predicate of its
1346 	     predecessor.  If there is no predicate in the predecessor
1347 	     bb, then consider the successor bb as always executed.  */
1348 	  if (cond == NULL_TREE)
1349 	    cond = boolean_true_node;
1350 
1351 	  add_to_predicate_list (loop, bb_n, cond);
1352 	}
1353     }
1354 
1355   /* The loop header is always executed.  */
1356   reset_bb_predicate (loop->header);
1357   gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1358 	      && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1359 }
1360 
1361 /* Build region by adding loop pre-header and post-header blocks.  */
1362 
1363 static vec<basic_block>
build_region(class loop * loop)1364 build_region (class loop *loop)
1365 {
1366   vec<basic_block> region = vNULL;
1367   basic_block exit_bb = NULL;
1368 
1369   gcc_assert (ifc_bbs);
1370   /* The first element is loop pre-header.  */
1371   region.safe_push (loop_preheader_edge (loop)->src);
1372 
1373   for (unsigned int i = 0; i < loop->num_nodes; i++)
1374     {
1375       basic_block bb = ifc_bbs[i];
1376       region.safe_push (bb);
1377       /* Find loop postheader.  */
1378       edge e;
1379       edge_iterator ei;
1380       FOR_EACH_EDGE (e, ei, bb->succs)
1381 	if (loop_exit_edge_p (loop, e))
1382 	  {
1383 	      exit_bb = e->dest;
1384 	      break;
1385 	  }
1386     }
1387   /* The last element is loop post-header.  */
1388   gcc_assert (exit_bb);
1389   region.safe_push (exit_bb);
1390   return region;
1391 }
1392 
1393 /* Return true when LOOP is if-convertible.  This is a helper function
1394    for if_convertible_loop_p.  REFS and DDRS are initialized and freed
1395    in if_convertible_loop_p.  */
1396 
1397 static bool
if_convertible_loop_p_1(class loop * loop,vec<data_reference_p> * refs)1398 if_convertible_loop_p_1 (class loop *loop, vec<data_reference_p> *refs)
1399 {
1400   unsigned int i;
1401   basic_block exit_bb = NULL;
1402   vec<basic_block> region;
1403 
1404   if (find_data_references_in_loop (loop, refs) == chrec_dont_know)
1405     return false;
1406 
1407   calculate_dominance_info (CDI_DOMINATORS);
1408 
1409   /* Allow statements that can be handled during if-conversion.  */
1410   ifc_bbs = get_loop_body_in_if_conv_order (loop);
1411   if (!ifc_bbs)
1412     {
1413       if (dump_file && (dump_flags & TDF_DETAILS))
1414 	fprintf (dump_file, "Irreducible loop\n");
1415       return false;
1416     }
1417 
1418   for (i = 0; i < loop->num_nodes; i++)
1419     {
1420       basic_block bb = ifc_bbs[i];
1421 
1422       if (!if_convertible_bb_p (loop, bb, exit_bb))
1423 	return false;
1424 
1425       if (bb_with_exit_edge_p (loop, bb))
1426 	exit_bb = bb;
1427     }
1428 
1429   for (i = 0; i < loop->num_nodes; i++)
1430     {
1431       basic_block bb = ifc_bbs[i];
1432       gimple_stmt_iterator gsi;
1433 
1434       bool may_have_nonlocal_labels
1435 	= bb_with_exit_edge_p (loop, bb) || bb == loop->latch;
1436       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1437 	switch (gimple_code (gsi_stmt (gsi)))
1438 	  {
1439 	  case GIMPLE_LABEL:
1440 	    if (!may_have_nonlocal_labels)
1441 	      {
1442 		tree label
1443 		  = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi)));
1444 		if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
1445 		  return false;
1446 	      }
1447 	    /* Fallthru.  */
1448 	  case GIMPLE_ASSIGN:
1449 	  case GIMPLE_CALL:
1450 	  case GIMPLE_DEBUG:
1451 	  case GIMPLE_COND:
1452 	    gimple_set_uid (gsi_stmt (gsi), 0);
1453 	    break;
1454 	  default:
1455 	    return false;
1456 	  }
1457     }
1458 
1459   data_reference_p dr;
1460 
1461   innermost_DR_map
1462 	  = new hash_map<innermost_loop_behavior_hash, data_reference_p>;
1463   baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
1464 
1465   /* Compute post-dominator tree locally.  */
1466   region = build_region (loop);
1467   calculate_dominance_info_for_region (CDI_POST_DOMINATORS, region);
1468 
1469   predicate_bbs (loop);
1470 
1471   /* Free post-dominator tree since it is not used after predication.  */
1472   free_dominance_info_for_region (cfun, CDI_POST_DOMINATORS, region);
1473   region.release ();
1474 
1475   for (i = 0; refs->iterate (i, &dr); i++)
1476     {
1477       tree ref = DR_REF (dr);
1478 
1479       dr->aux = XNEW (struct ifc_dr);
1480       DR_BASE_W_UNCONDITIONALLY (dr) = false;
1481       DR_RW_UNCONDITIONALLY (dr) = false;
1482       DR_W_UNCONDITIONALLY (dr) = false;
1483       IFC_DR (dr)->rw_predicate = boolean_false_node;
1484       IFC_DR (dr)->w_predicate = boolean_false_node;
1485       IFC_DR (dr)->base_w_predicate = boolean_false_node;
1486       if (gimple_uid (DR_STMT (dr)) == 0)
1487 	gimple_set_uid (DR_STMT (dr), i + 1);
1488 
1489       /* If DR doesn't have innermost loop behavior or it's a compound
1490          memory reference, we synthesize its innermost loop behavior
1491          for hashing.  */
1492       if (TREE_CODE (ref) == COMPONENT_REF
1493           || TREE_CODE (ref) == IMAGPART_EXPR
1494           || TREE_CODE (ref) == REALPART_EXPR
1495           || !(DR_BASE_ADDRESS (dr) || DR_OFFSET (dr)
1496 	       || DR_INIT (dr) || DR_STEP (dr)))
1497         {
1498           while (TREE_CODE (ref) == COMPONENT_REF
1499 	         || TREE_CODE (ref) == IMAGPART_EXPR
1500 	         || TREE_CODE (ref) == REALPART_EXPR)
1501 	    ref = TREE_OPERAND (ref, 0);
1502 
1503 	  memset (&DR_INNERMOST (dr), 0, sizeof (DR_INNERMOST (dr)));
1504 	  DR_BASE_ADDRESS (dr) = ref;
1505         }
1506       hash_memrefs_baserefs_and_store_DRs_read_written_info (dr);
1507     }
1508 
1509   for (i = 0; i < loop->num_nodes; i++)
1510     {
1511       basic_block bb = ifc_bbs[i];
1512       gimple_stmt_iterator itr;
1513 
1514       /* Check the if-convertibility of statements in predicated BBs.  */
1515       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1516 	for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1517 	  if (!if_convertible_stmt_p (gsi_stmt (itr), *refs))
1518 	    return false;
1519     }
1520 
1521   /* Checking PHIs needs to be done after stmts, as the fact whether there
1522      are any masked loads or stores affects the tests.  */
1523   for (i = 0; i < loop->num_nodes; i++)
1524     {
1525       basic_block bb = ifc_bbs[i];
1526       gphi_iterator itr;
1527 
1528       for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1529 	if (!if_convertible_phi_p (loop, bb, itr.phi ()))
1530 	  return false;
1531     }
1532 
1533   if (dump_file)
1534     fprintf (dump_file, "Applying if-conversion\n");
1535 
1536   return true;
1537 }
1538 
1539 /* Return true when LOOP is if-convertible.
1540    LOOP is if-convertible if:
1541    - it is innermost,
1542    - it has two or more basic blocks,
1543    - it has only one exit,
1544    - loop header is not the exit edge,
1545    - if its basic blocks and phi nodes are if convertible.  */
1546 
1547 static bool
if_convertible_loop_p(class loop * loop)1548 if_convertible_loop_p (class loop *loop)
1549 {
1550   edge e;
1551   edge_iterator ei;
1552   bool res = false;
1553   vec<data_reference_p> refs;
1554 
1555   /* Handle only innermost loop.  */
1556   if (!loop || loop->inner)
1557     {
1558       if (dump_file && (dump_flags & TDF_DETAILS))
1559 	fprintf (dump_file, "not innermost loop\n");
1560       return false;
1561     }
1562 
1563   /* If only one block, no need for if-conversion.  */
1564   if (loop->num_nodes <= 2)
1565     {
1566       if (dump_file && (dump_flags & TDF_DETAILS))
1567 	fprintf (dump_file, "less than 2 basic blocks\n");
1568       return false;
1569     }
1570 
1571   /* More than one loop exit is too much to handle.  */
1572   if (!single_exit (loop))
1573     {
1574       if (dump_file && (dump_flags & TDF_DETAILS))
1575 	fprintf (dump_file, "multiple exits\n");
1576       return false;
1577     }
1578 
1579   /* If one of the loop header's edge is an exit edge then do not
1580      apply if-conversion.  */
1581   FOR_EACH_EDGE (e, ei, loop->header->succs)
1582     if (loop_exit_edge_p (loop, e))
1583       return false;
1584 
1585   refs.create (5);
1586   res = if_convertible_loop_p_1 (loop, &refs);
1587 
1588   data_reference_p dr;
1589   unsigned int i;
1590   for (i = 0; refs.iterate (i, &dr); i++)
1591     free (dr->aux);
1592 
1593   free_data_refs (refs);
1594 
1595   delete innermost_DR_map;
1596   innermost_DR_map = NULL;
1597 
1598   delete baseref_DR_map;
1599   baseref_DR_map = NULL;
1600 
1601   return res;
1602 }
1603 
1604 /* Return reduc_1 if has_nop.
1605 
1606    if (...)
1607      tmp1 = (unsigned type) reduc_1;
1608      tmp2 = tmp1 + rhs2;
1609      reduc_3 = (signed type) tmp2.  */
1610 static tree
strip_nop_cond_scalar_reduction(bool has_nop,tree op)1611 strip_nop_cond_scalar_reduction (bool has_nop, tree op)
1612 {
1613   if (!has_nop)
1614     return op;
1615 
1616   if (TREE_CODE (op) != SSA_NAME)
1617     return NULL_TREE;
1618 
1619   gassign *stmt = safe_dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
1620   if (!stmt
1621       || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
1622       || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE
1623 				 (gimple_assign_rhs1 (stmt))))
1624     return NULL_TREE;
1625 
1626   return gimple_assign_rhs1 (stmt);
1627 }
1628 
1629 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1630    which is in predicated basic block.
1631    In fact, the following PHI pattern is searching:
1632       loop-header:
1633 	reduc_1 = PHI <..., reduc_2>
1634       ...
1635 	if (...)
1636 	  reduc_3 = ...
1637 	reduc_2 = PHI <reduc_1, reduc_3>
1638 
1639    ARG_0 and ARG_1 are correspondent PHI arguments.
1640    REDUC, OP0 and OP1 contain reduction stmt and its operands.
1641    EXTENDED is true if PHI has > 2 arguments.  */
1642 
1643 static bool
is_cond_scalar_reduction(gimple * phi,gimple ** reduc,tree arg_0,tree arg_1,tree * op0,tree * op1,bool extended,bool * has_nop,gimple ** nop_reduc)1644 is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
1645 			  tree *op0, tree *op1, bool extended, bool* has_nop,
1646 			  gimple **nop_reduc)
1647 {
1648   tree lhs, r_op1, r_op2, r_nop1, r_nop2;
1649   gimple *stmt;
1650   gimple *header_phi = NULL;
1651   enum tree_code reduction_op;
1652   basic_block bb = gimple_bb (phi);
1653   class loop *loop = bb->loop_father;
1654   edge latch_e = loop_latch_edge (loop);
1655   imm_use_iterator imm_iter;
1656   use_operand_p use_p;
1657   edge e;
1658   edge_iterator ei;
1659   bool result = *has_nop = false;
1660   if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1661     return false;
1662 
1663   if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1664     {
1665       lhs = arg_1;
1666       header_phi = SSA_NAME_DEF_STMT (arg_0);
1667       stmt = SSA_NAME_DEF_STMT (arg_1);
1668     }
1669   else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1670     {
1671       lhs = arg_0;
1672       header_phi = SSA_NAME_DEF_STMT (arg_1);
1673       stmt = SSA_NAME_DEF_STMT (arg_0);
1674     }
1675   else
1676     return false;
1677   if (gimple_bb (header_phi) != loop->header)
1678     return false;
1679 
1680   if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1681     return false;
1682 
1683   if (gimple_code (stmt) != GIMPLE_ASSIGN
1684       || gimple_has_volatile_ops (stmt))
1685     return false;
1686 
1687   if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1688     return false;
1689 
1690   if (!is_predicated (gimple_bb (stmt)))
1691     return false;
1692 
1693   /* Check that stmt-block is predecessor of phi-block.  */
1694   FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1695     if (e->dest == bb)
1696       {
1697 	result = true;
1698 	break;
1699       }
1700   if (!result)
1701     return false;
1702 
1703   if (!has_single_use (lhs))
1704     return false;
1705 
1706   reduction_op = gimple_assign_rhs_code (stmt);
1707 
1708     /* Catch something like below
1709 
1710      loop-header:
1711      reduc_1 = PHI <..., reduc_2>
1712      ...
1713      if (...)
1714      tmp1 = (unsigned type) reduc_1;
1715      tmp2 = tmp1 + rhs2;
1716      reduc_3 = (signed type) tmp2;
1717 
1718      reduc_2 = PHI <reduc_1, reduc_3>
1719 
1720      and convert to
1721 
1722      reduc_2 = PHI <0, reduc_3>
1723      tmp1 = (unsigned type)reduce_1;
1724      ifcvt = cond_expr ? rhs2 : 0
1725      tmp2 = tmp1 +/- ifcvt;
1726      reduce_1 = (signed type)tmp2;  */
1727 
1728   if (CONVERT_EXPR_CODE_P (reduction_op))
1729     {
1730       lhs = gimple_assign_rhs1 (stmt);
1731       if (TREE_CODE (lhs) != SSA_NAME
1732 	  || !has_single_use (lhs))
1733 	return false;
1734 
1735       *nop_reduc = stmt;
1736       stmt = SSA_NAME_DEF_STMT (lhs);
1737       if (gimple_bb (stmt) != gimple_bb (*nop_reduc)
1738 	  || !is_gimple_assign (stmt))
1739 	return false;
1740 
1741       *has_nop = true;
1742       reduction_op = gimple_assign_rhs_code (stmt);
1743     }
1744 
1745   if (reduction_op != PLUS_EXPR
1746       && reduction_op != MINUS_EXPR
1747       && reduction_op != BIT_IOR_EXPR
1748       && reduction_op != BIT_XOR_EXPR
1749       && reduction_op != BIT_AND_EXPR)
1750     return false;
1751   r_op1 = gimple_assign_rhs1 (stmt);
1752   r_op2 = gimple_assign_rhs2 (stmt);
1753 
1754   r_nop1 = strip_nop_cond_scalar_reduction (*has_nop, r_op1);
1755   r_nop2 = strip_nop_cond_scalar_reduction (*has_nop, r_op2);
1756 
1757   /* Make R_OP1 to hold reduction variable.  */
1758   if (r_nop2 == PHI_RESULT (header_phi)
1759       && commutative_tree_code (reduction_op))
1760     {
1761       std::swap (r_op1, r_op2);
1762       std::swap (r_nop1, r_nop2);
1763     }
1764   else if (r_nop1 != PHI_RESULT (header_phi))
1765     return false;
1766 
1767   if (*has_nop)
1768     {
1769       /* Check that R_NOP1 is used in nop_stmt or in PHI only.  */
1770       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1)
1771 	{
1772 	  gimple *use_stmt = USE_STMT (use_p);
1773 	  if (is_gimple_debug (use_stmt))
1774 	    continue;
1775 	  if (use_stmt == SSA_NAME_DEF_STMT (r_op1))
1776 	    continue;
1777 	  if (use_stmt != phi)
1778 	    return false;
1779 	}
1780     }
1781 
1782   /* Check that R_OP1 is used in reduction stmt or in PHI only.  */
1783   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1784     {
1785       gimple *use_stmt = USE_STMT (use_p);
1786       if (is_gimple_debug (use_stmt))
1787 	continue;
1788       if (use_stmt == stmt)
1789 	continue;
1790       if (gimple_code (use_stmt) != GIMPLE_PHI)
1791 	return false;
1792     }
1793 
1794   *op0 = r_op1; *op1 = r_op2;
1795   *reduc = stmt;
1796   return true;
1797 }
1798 
1799 /* Converts conditional scalar reduction into unconditional form, e.g.
1800      bb_4
1801        if (_5 != 0) goto bb_5 else goto bb_6
1802      end_bb_4
1803      bb_5
1804        res_6 = res_13 + 1;
1805      end_bb_5
1806      bb_6
1807        # res_2 = PHI <res_13(4), res_6(5)>
1808      end_bb_6
1809 
1810    will be converted into sequence
1811     _ifc__1 = _5 != 0 ? 1 : 0;
1812     res_2 = res_13 + _ifc__1;
1813   Argument SWAP tells that arguments of conditional expression should be
1814   swapped.
1815   Returns rhs of resulting PHI assignment.  */
1816 
1817 static tree
convert_scalar_cond_reduction(gimple * reduc,gimple_stmt_iterator * gsi,tree cond,tree op0,tree op1,bool swap,bool has_nop,gimple * nop_reduc)1818 convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
1819 			       tree cond, tree op0, tree op1, bool swap,
1820 			       bool has_nop, gimple* nop_reduc)
1821 {
1822   gimple_stmt_iterator stmt_it;
1823   gimple *new_assign;
1824   tree rhs;
1825   tree rhs1 = gimple_assign_rhs1 (reduc);
1826   tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1827   tree c;
1828   enum tree_code reduction_op  = gimple_assign_rhs_code (reduc);
1829   tree op_nochange = neutral_op_for_reduction (TREE_TYPE (rhs1), reduction_op, NULL);
1830   gimple_seq stmts = NULL;
1831 
1832   if (dump_file && (dump_flags & TDF_DETAILS))
1833     {
1834       fprintf (dump_file, "Found cond scalar reduction.\n");
1835       print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1836     }
1837 
1838   /* Build cond expression using COND and constant operand
1839      of reduction rhs.  */
1840   c = fold_build_cond_expr (TREE_TYPE (rhs1),
1841 			    unshare_expr (cond),
1842 			    swap ? op_nochange : op1,
1843 			    swap ? op1 : op_nochange);
1844 
1845   /* Create assignment stmt and insert it at GSI.  */
1846   new_assign = gimple_build_assign (tmp, c);
1847   gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1848   /* Build rhs for unconditional increment/decrement/logic_operation.  */
1849   rhs = gimple_build (&stmts, reduction_op,
1850 		      TREE_TYPE (rhs1), op0, tmp);
1851 
1852   if (has_nop)
1853     {
1854       rhs = gimple_convert (&stmts,
1855 			    TREE_TYPE (gimple_assign_lhs (nop_reduc)), rhs);
1856       stmt_it = gsi_for_stmt (nop_reduc);
1857       gsi_remove (&stmt_it, true);
1858       release_defs (nop_reduc);
1859     }
1860   gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1861 
1862   /* Delete original reduction stmt.  */
1863   stmt_it = gsi_for_stmt (reduc);
1864   gsi_remove (&stmt_it, true);
1865   release_defs (reduc);
1866   return rhs;
1867 }
1868 
1869 /* Produce condition for all occurrences of ARG in PHI node.  */
1870 
1871 static tree
gen_phi_arg_condition(gphi * phi,vec<int> * occur,gimple_stmt_iterator * gsi)1872 gen_phi_arg_condition (gphi *phi, vec<int> *occur,
1873 		       gimple_stmt_iterator *gsi)
1874 {
1875   int len;
1876   int i;
1877   tree cond = NULL_TREE;
1878   tree c;
1879   edge e;
1880 
1881   len = occur->length ();
1882   gcc_assert (len > 0);
1883   for (i = 0; i < len; i++)
1884     {
1885       e = gimple_phi_arg_edge (phi, (*occur)[i]);
1886       c = bb_predicate (e->src);
1887       if (is_true_predicate (c))
1888 	{
1889 	  cond = c;
1890 	  break;
1891 	}
1892       c = force_gimple_operand_gsi_1 (gsi, unshare_expr (c),
1893 				      is_gimple_condexpr, NULL_TREE,
1894 				      true, GSI_SAME_STMT);
1895       if (cond != NULL_TREE)
1896 	{
1897 	  /* Must build OR expression.  */
1898 	  cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
1899 	  cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1900 					     is_gimple_condexpr, NULL_TREE,
1901 					     true, GSI_SAME_STMT);
1902 	}
1903       else
1904 	cond = c;
1905     }
1906   gcc_assert (cond != NULL_TREE);
1907   return cond;
1908 }
1909 
1910 /* Local valueization callback that follows all-use SSA edges.  */
1911 
1912 static tree
ifcvt_follow_ssa_use_edges(tree val)1913 ifcvt_follow_ssa_use_edges (tree val)
1914 {
1915   return val;
1916 }
1917 
1918 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1919    This routine can handle PHI nodes with more than two arguments.
1920 
1921    For example,
1922      S1: A = PHI <x1(1), x2(5)>
1923    is converted into,
1924      S2: A = cond ? x1 : x2;
1925 
1926    The generated code is inserted at GSI that points to the top of
1927    basic block's statement list.
1928    If PHI node has more than two arguments a chain of conditional
1929    expression is produced.  */
1930 
1931 
1932 static void
predicate_scalar_phi(gphi * phi,gimple_stmt_iterator * gsi)1933 predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
1934 {
1935   gimple *new_stmt = NULL, *reduc, *nop_reduc;
1936   tree rhs, res, arg0, arg1, op0, op1, scev;
1937   tree cond;
1938   unsigned int index0;
1939   unsigned int max, args_len;
1940   edge e;
1941   basic_block bb;
1942   unsigned int i;
1943   bool has_nop;
1944 
1945   res = gimple_phi_result (phi);
1946   if (virtual_operand_p (res))
1947     return;
1948 
1949   if ((rhs = degenerate_phi_result (phi))
1950       || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
1951 					    res))
1952 	  && !chrec_contains_undetermined (scev)
1953 	  && scev != res
1954 	  && (rhs = gimple_phi_arg_def (phi, 0))))
1955     {
1956       if (dump_file && (dump_flags & TDF_DETAILS))
1957 	{
1958 	  fprintf (dump_file, "Degenerate phi!\n");
1959 	  print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
1960 	}
1961       new_stmt = gimple_build_assign (res, rhs);
1962       gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1963       update_stmt (new_stmt);
1964       return;
1965     }
1966 
1967   bb = gimple_bb (phi);
1968   if (EDGE_COUNT (bb->preds) == 2)
1969     {
1970       /* Predicate ordinary PHI node with 2 arguments.  */
1971       edge first_edge, second_edge;
1972       basic_block true_bb;
1973       first_edge = EDGE_PRED (bb, 0);
1974       second_edge = EDGE_PRED (bb, 1);
1975       cond = bb_predicate (first_edge->src);
1976       if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1977 	std::swap (first_edge, second_edge);
1978       if (EDGE_COUNT (first_edge->src->succs) > 1)
1979 	{
1980 	  cond = bb_predicate (second_edge->src);
1981 	  if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1982 	    cond = TREE_OPERAND (cond, 0);
1983 	  else
1984 	    first_edge = second_edge;
1985 	}
1986       else
1987 	cond = bb_predicate (first_edge->src);
1988       /* Gimplify the condition to a valid cond-expr conditonal operand.  */
1989       cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1990 					 is_gimple_condexpr, NULL_TREE,
1991 					 true, GSI_SAME_STMT);
1992       true_bb = first_edge->src;
1993       if (EDGE_PRED (bb, 1)->src == true_bb)
1994 	{
1995 	  arg0 = gimple_phi_arg_def (phi, 1);
1996 	  arg1 = gimple_phi_arg_def (phi, 0);
1997 	}
1998       else
1999 	{
2000 	  arg0 = gimple_phi_arg_def (phi, 0);
2001 	  arg1 = gimple_phi_arg_def (phi, 1);
2002 	}
2003       if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
2004 				    &op0, &op1, false, &has_nop,
2005 				    &nop_reduc))
2006 	{
2007 	  /* Convert reduction stmt into vectorizable form.  */
2008 	  rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2009 					       true_bb != gimple_bb (reduc),
2010 					       has_nop, nop_reduc);
2011 	  redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2012 	}
2013       else
2014 	/* Build new RHS using selected condition and arguments.  */
2015 	rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2016 				    arg0, arg1);
2017       new_stmt = gimple_build_assign (res, rhs);
2018       gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2019       gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt);
2020       if (fold_stmt (&new_gsi, ifcvt_follow_ssa_use_edges))
2021 	{
2022 	  new_stmt = gsi_stmt (new_gsi);
2023 	  update_stmt (new_stmt);
2024 	}
2025 
2026       if (dump_file && (dump_flags & TDF_DETAILS))
2027 	{
2028 	  fprintf (dump_file, "new phi replacement stmt\n");
2029 	  print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2030 	}
2031       return;
2032     }
2033 
2034   /* Create hashmap for PHI node which contain vector of argument indexes
2035      having the same value.  */
2036   bool swap = false;
2037   hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
2038   unsigned int num_args = gimple_phi_num_args (phi);
2039   int max_ind = -1;
2040   /* Vector of different PHI argument values.  */
2041   auto_vec<tree> args (num_args);
2042 
2043   /* Compute phi_arg_map.  */
2044   for (i = 0; i < num_args; i++)
2045     {
2046       tree arg;
2047 
2048       arg = gimple_phi_arg_def (phi, i);
2049       if (!phi_arg_map.get (arg))
2050 	args.quick_push (arg);
2051       phi_arg_map.get_or_insert (arg).safe_push (i);
2052     }
2053 
2054   /* Determine element with max number of occurrences.  */
2055   max_ind = -1;
2056   max = 1;
2057   args_len = args.length ();
2058   for (i = 0; i < args_len; i++)
2059     {
2060       unsigned int len;
2061       if ((len = phi_arg_map.get (args[i])->length ()) > max)
2062 	{
2063 	  max_ind = (int) i;
2064 	  max = len;
2065 	}
2066     }
2067 
2068   /* Put element with max number of occurences to the end of ARGS.  */
2069   if (max_ind != -1 && max_ind +1 != (int) args_len)
2070     std::swap (args[args_len - 1], args[max_ind]);
2071 
2072   /* Handle one special case when number of arguments with different values
2073      is equal 2 and one argument has the only occurrence.  Such PHI can be
2074      handled as if would have only 2 arguments.  */
2075   if (args_len == 2 && phi_arg_map.get (args[0])->length () == 1)
2076     {
2077       vec<int> *indexes;
2078       indexes = phi_arg_map.get (args[0]);
2079       index0 = (*indexes)[0];
2080       arg0 = args[0];
2081       arg1 = args[1];
2082       e = gimple_phi_arg_edge (phi, index0);
2083       cond = bb_predicate (e->src);
2084       if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2085 	{
2086 	  swap = true;
2087 	  cond = TREE_OPERAND (cond, 0);
2088 	}
2089       /* Gimplify the condition to a valid cond-expr conditonal operand.  */
2090       cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
2091 					 is_gimple_condexpr, NULL_TREE,
2092 					 true, GSI_SAME_STMT);
2093       if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
2094 				      &op0, &op1, true, &has_nop, &nop_reduc)))
2095 	rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2096 				    swap? arg1 : arg0,
2097 				    swap? arg0 : arg1);
2098       else
2099 	{
2100 	  /* Convert reduction stmt into vectorizable form.  */
2101 	  rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2102 					       swap,has_nop, nop_reduc);
2103 	  redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2104 	}
2105       new_stmt = gimple_build_assign (res, rhs);
2106       gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2107       update_stmt (new_stmt);
2108     }
2109   else
2110     {
2111       /* Common case.  */
2112       vec<int> *indexes;
2113       tree type = TREE_TYPE (gimple_phi_result (phi));
2114       tree lhs;
2115       arg1 = args[1];
2116       for (i = 0; i < args_len; i++)
2117 	{
2118 	  arg0 = args[i];
2119 	  indexes = phi_arg_map.get (args[i]);
2120 	  if (i != args_len - 1)
2121 	    lhs = make_temp_ssa_name (type, NULL, "_ifc_");
2122 	  else
2123 	    lhs = res;
2124 	  cond = gen_phi_arg_condition (phi, indexes, gsi);
2125 	  rhs = fold_build_cond_expr (type, unshare_expr (cond),
2126 				      arg0, arg1);
2127 	  new_stmt = gimple_build_assign (lhs, rhs);
2128 	  gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2129 	  update_stmt (new_stmt);
2130 	  arg1 = lhs;
2131 	}
2132     }
2133 
2134   if (dump_file && (dump_flags & TDF_DETAILS))
2135     {
2136       fprintf (dump_file, "new extended phi replacement stmt\n");
2137       print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2138     }
2139 }
2140 
2141 /* Replaces in LOOP all the scalar phi nodes other than those in the
2142    LOOP->header block with conditional modify expressions.  */
2143 
2144 static void
predicate_all_scalar_phis(class loop * loop)2145 predicate_all_scalar_phis (class loop *loop)
2146 {
2147   basic_block bb;
2148   unsigned int orig_loop_num_nodes = loop->num_nodes;
2149   unsigned int i;
2150 
2151   for (i = 1; i < orig_loop_num_nodes; i++)
2152     {
2153       gphi *phi;
2154       gimple_stmt_iterator gsi;
2155       gphi_iterator phi_gsi;
2156       bb = ifc_bbs[i];
2157 
2158       if (bb == loop->header)
2159 	continue;
2160 
2161       phi_gsi = gsi_start_phis (bb);
2162       if (gsi_end_p (phi_gsi))
2163 	continue;
2164 
2165       gsi = gsi_after_labels (bb);
2166       while (!gsi_end_p (phi_gsi))
2167 	{
2168 	  phi = phi_gsi.phi ();
2169 	  if (virtual_operand_p (gimple_phi_result (phi)))
2170 	    gsi_next (&phi_gsi);
2171 	  else
2172 	    {
2173 	      predicate_scalar_phi (phi, &gsi);
2174 	      remove_phi_node (&phi_gsi, false);
2175 	    }
2176 	}
2177     }
2178 }
2179 
2180 /* Insert in each basic block of LOOP the statements produced by the
2181    gimplification of the predicates.  */
2182 
2183 static void
insert_gimplified_predicates(loop_p loop)2184 insert_gimplified_predicates (loop_p loop)
2185 {
2186   unsigned int i;
2187 
2188   for (i = 0; i < loop->num_nodes; i++)
2189     {
2190       basic_block bb = ifc_bbs[i];
2191       gimple_seq stmts;
2192       if (!is_predicated (bb))
2193 	gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
2194       if (!is_predicated (bb))
2195 	{
2196 	  /* Do not insert statements for a basic block that is not
2197 	     predicated.  Also make sure that the predicate of the
2198 	     basic block is set to true.  */
2199 	  reset_bb_predicate (bb);
2200 	  continue;
2201 	}
2202 
2203       stmts = bb_predicate_gimplified_stmts (bb);
2204       if (stmts)
2205 	{
2206 	  if (need_to_predicate)
2207 	    {
2208 	      /* Insert the predicate of the BB just after the label,
2209 		 as the if-conversion of memory writes will use this
2210 		 predicate.  */
2211 	      gimple_stmt_iterator gsi = gsi_after_labels (bb);
2212 	      gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2213 	    }
2214 	  else
2215 	    {
2216 	      /* Insert the predicate of the BB at the end of the BB
2217 		 as this would reduce the register pressure: the only
2218 		 use of this predicate will be in successor BBs.  */
2219 	      gimple_stmt_iterator gsi = gsi_last_bb (bb);
2220 
2221 	      if (gsi_end_p (gsi)
2222 		  || stmt_ends_bb_p (gsi_stmt (gsi)))
2223 		gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2224 	      else
2225 		gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
2226 	    }
2227 
2228 	  /* Once the sequence is code generated, set it to NULL.  */
2229 	  set_bb_predicate_gimplified_stmts (bb, NULL);
2230 	}
2231     }
2232 }
2233 
2234 /* Helper function for predicate_statements. Returns index of existent
2235    mask if it was created for given SIZE and -1 otherwise.  */
2236 
2237 static int
mask_exists(int size,const vec<int> & vec)2238 mask_exists (int size, const vec<int> &vec)
2239 {
2240   unsigned int ix;
2241   int v;
2242   FOR_EACH_VEC_ELT (vec, ix, v)
2243     if (v == size)
2244       return (int) ix;
2245   return -1;
2246 }
2247 
2248 /* Helper function for predicate_statements.  STMT is a memory read or
2249    write and it needs to be predicated by MASK.  Return a statement
2250    that does so.  */
2251 
2252 static gimple *
predicate_load_or_store(gimple_stmt_iterator * gsi,gassign * stmt,tree mask)2253 predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask)
2254 {
2255   gcall *new_stmt;
2256 
2257   tree lhs = gimple_assign_lhs (stmt);
2258   tree rhs = gimple_assign_rhs1 (stmt);
2259   tree ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2260   mark_addressable (ref);
2261   tree addr = force_gimple_operand_gsi (gsi, build_fold_addr_expr (ref),
2262 					true, NULL_TREE, true, GSI_SAME_STMT);
2263   tree ptr = build_int_cst (reference_alias_ptr_type (ref),
2264 			    get_object_alignment (ref));
2265   /* Copy points-to info if possible.  */
2266   if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2267     copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2268 		   ref);
2269   if (TREE_CODE (lhs) == SSA_NAME)
2270     {
2271       new_stmt
2272 	= gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
2273 				      ptr, mask);
2274       gimple_call_set_lhs (new_stmt, lhs);
2275       gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2276     }
2277   else
2278     {
2279       new_stmt
2280 	= gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2281 				      mask, rhs);
2282       gimple_move_vops (new_stmt, stmt);
2283     }
2284   gimple_call_set_nothrow (new_stmt, true);
2285   return new_stmt;
2286 }
2287 
2288 /* STMT uses OP_LHS.  Check whether it is equivalent to:
2289 
2290      ... = OP_MASK ? OP_LHS : X;
2291 
2292    Return X if so, otherwise return null.  OP_MASK is an SSA_NAME that is
2293    known to have value OP_COND.  */
2294 
2295 static tree
check_redundant_cond_expr(gimple * stmt,tree op_mask,tree op_cond,tree op_lhs)2296 check_redundant_cond_expr (gimple *stmt, tree op_mask, tree op_cond,
2297 			   tree op_lhs)
2298 {
2299   gassign *assign = dyn_cast <gassign *> (stmt);
2300   if (!assign || gimple_assign_rhs_code (assign) != COND_EXPR)
2301     return NULL_TREE;
2302 
2303   tree use_cond = gimple_assign_rhs1 (assign);
2304   tree if_true = gimple_assign_rhs2 (assign);
2305   tree if_false = gimple_assign_rhs3 (assign);
2306 
2307   if ((use_cond == op_mask || operand_equal_p (use_cond, op_cond, 0))
2308       && if_true == op_lhs)
2309     return if_false;
2310 
2311   if (inverse_conditions_p (use_cond, op_cond) && if_false == op_lhs)
2312     return if_true;
2313 
2314   return NULL_TREE;
2315 }
2316 
2317 /* Return true if VALUE is available for use at STMT.  SSA_NAMES is
2318    the set of SSA names defined earlier in STMT's block.  */
2319 
2320 static bool
value_available_p(gimple * stmt,hash_set<tree_ssa_name_hash> * ssa_names,tree value)2321 value_available_p (gimple *stmt, hash_set<tree_ssa_name_hash> *ssa_names,
2322 		   tree value)
2323 {
2324   if (is_gimple_min_invariant (value))
2325     return true;
2326 
2327   if (TREE_CODE (value) == SSA_NAME)
2328     {
2329       if (SSA_NAME_IS_DEFAULT_DEF (value))
2330 	return true;
2331 
2332       basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
2333       basic_block use_bb = gimple_bb (stmt);
2334       return (def_bb == use_bb
2335 	      ? ssa_names->contains (value)
2336 	      : dominated_by_p (CDI_DOMINATORS, use_bb, def_bb));
2337     }
2338 
2339   return false;
2340 }
2341 
2342 /* Helper function for predicate_statements.  STMT is a potentially-trapping
2343    arithmetic operation that needs to be predicated by MASK, an SSA_NAME that
2344    has value COND.  Return a statement that does so.  SSA_NAMES is the set of
2345    SSA names defined earlier in STMT's block.  */
2346 
2347 static gimple *
predicate_rhs_code(gassign * stmt,tree mask,tree cond,hash_set<tree_ssa_name_hash> * ssa_names)2348 predicate_rhs_code (gassign *stmt, tree mask, tree cond,
2349 		    hash_set<tree_ssa_name_hash> *ssa_names)
2350 {
2351   tree lhs = gimple_assign_lhs (stmt);
2352   tree_code code = gimple_assign_rhs_code (stmt);
2353   unsigned int nops = gimple_num_ops (stmt);
2354   internal_fn cond_fn = get_conditional_internal_fn (code);
2355 
2356   /* Construct the arguments to the conditional internal function.   */
2357   auto_vec<tree, 8> args;
2358   args.safe_grow (nops + 1, true);
2359   args[0] = mask;
2360   for (unsigned int i = 1; i < nops; ++i)
2361     args[i] = gimple_op (stmt, i);
2362   args[nops] = NULL_TREE;
2363 
2364   /* Look for uses of the result to see whether they are COND_EXPRs that can
2365      be folded into the conditional call.  */
2366   imm_use_iterator imm_iter;
2367   gimple *use_stmt;
2368   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
2369     {
2370       tree new_else = check_redundant_cond_expr (use_stmt, mask, cond, lhs);
2371       if (new_else && value_available_p (stmt, ssa_names, new_else))
2372 	{
2373 	  if (!args[nops])
2374 	    args[nops] = new_else;
2375 	  if (operand_equal_p (new_else, args[nops], 0))
2376 	    {
2377 	      /* We have:
2378 
2379 		   LHS = IFN_COND (MASK, ..., ELSE);
2380 		   X = MASK ? LHS : ELSE;
2381 
2382 		 which makes X equivalent to LHS.  */
2383 	      tree use_lhs = gimple_assign_lhs (use_stmt);
2384 	      redundant_ssa_names.safe_push (std::make_pair (use_lhs, lhs));
2385 	    }
2386 	}
2387     }
2388   if (!args[nops])
2389     args[nops] = targetm.preferred_else_value (cond_fn, TREE_TYPE (lhs),
2390 					       nops - 1, &args[1]);
2391 
2392   /* Create and insert the call.  */
2393   gcall *new_stmt = gimple_build_call_internal_vec (cond_fn, args);
2394   gimple_call_set_lhs (new_stmt, lhs);
2395   gimple_call_set_nothrow (new_stmt, true);
2396 
2397   return new_stmt;
2398 }
2399 
2400 /* Predicate each write to memory in LOOP.
2401 
2402    This function transforms control flow constructs containing memory
2403    writes of the form:
2404 
2405    | for (i = 0; i < N; i++)
2406    |   if (cond)
2407    |     A[i] = expr;
2408 
2409    into the following form that does not contain control flow:
2410 
2411    | for (i = 0; i < N; i++)
2412    |   A[i] = cond ? expr : A[i];
2413 
2414    The original CFG looks like this:
2415 
2416    | bb_0
2417    |   i = 0
2418    | end_bb_0
2419    |
2420    | bb_1
2421    |   if (i < N) goto bb_5 else goto bb_2
2422    | end_bb_1
2423    |
2424    | bb_2
2425    |   cond = some_computation;
2426    |   if (cond) goto bb_3 else goto bb_4
2427    | end_bb_2
2428    |
2429    | bb_3
2430    |   A[i] = expr;
2431    |   goto bb_4
2432    | end_bb_3
2433    |
2434    | bb_4
2435    |   goto bb_1
2436    | end_bb_4
2437 
2438    insert_gimplified_predicates inserts the computation of the COND
2439    expression at the beginning of the destination basic block:
2440 
2441    | bb_0
2442    |   i = 0
2443    | end_bb_0
2444    |
2445    | bb_1
2446    |   if (i < N) goto bb_5 else goto bb_2
2447    | end_bb_1
2448    |
2449    | bb_2
2450    |   cond = some_computation;
2451    |   if (cond) goto bb_3 else goto bb_4
2452    | end_bb_2
2453    |
2454    | bb_3
2455    |   cond = some_computation;
2456    |   A[i] = expr;
2457    |   goto bb_4
2458    | end_bb_3
2459    |
2460    | bb_4
2461    |   goto bb_1
2462    | end_bb_4
2463 
2464    predicate_statements is then predicating the memory write as follows:
2465 
2466    | bb_0
2467    |   i = 0
2468    | end_bb_0
2469    |
2470    | bb_1
2471    |   if (i < N) goto bb_5 else goto bb_2
2472    | end_bb_1
2473    |
2474    | bb_2
2475    |   if (cond) goto bb_3 else goto bb_4
2476    | end_bb_2
2477    |
2478    | bb_3
2479    |   cond = some_computation;
2480    |   A[i] = cond ? expr : A[i];
2481    |   goto bb_4
2482    | end_bb_3
2483    |
2484    | bb_4
2485    |   goto bb_1
2486    | end_bb_4
2487 
2488    and finally combine_blocks removes the basic block boundaries making
2489    the loop vectorizable:
2490 
2491    | bb_0
2492    |   i = 0
2493    |   if (i < N) goto bb_5 else goto bb_1
2494    | end_bb_0
2495    |
2496    | bb_1
2497    |   cond = some_computation;
2498    |   A[i] = cond ? expr : A[i];
2499    |   if (i < N) goto bb_5 else goto bb_4
2500    | end_bb_1
2501    |
2502    | bb_4
2503    |   goto bb_1
2504    | end_bb_4
2505 */
2506 
2507 static void
predicate_statements(loop_p loop)2508 predicate_statements (loop_p loop)
2509 {
2510   unsigned int i, orig_loop_num_nodes = loop->num_nodes;
2511   auto_vec<int, 1> vect_sizes;
2512   auto_vec<tree, 1> vect_masks;
2513   hash_set<tree_ssa_name_hash> ssa_names;
2514 
2515   for (i = 1; i < orig_loop_num_nodes; i++)
2516     {
2517       gimple_stmt_iterator gsi;
2518       basic_block bb = ifc_bbs[i];
2519       tree cond = bb_predicate (bb);
2520       bool swap;
2521       int index;
2522 
2523       if (is_true_predicate (cond))
2524 	continue;
2525 
2526       swap = false;
2527       if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2528 	{
2529 	  swap = true;
2530 	  cond = TREE_OPERAND (cond, 0);
2531 	}
2532 
2533       vect_sizes.truncate (0);
2534       vect_masks.truncate (0);
2535 
2536       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2537 	{
2538 	  gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
2539 	  tree lhs;
2540 	  if (!stmt)
2541 	    ;
2542 	  else if (is_false_predicate (cond)
2543 		   && gimple_vdef (stmt))
2544 	    {
2545 	      unlink_stmt_vdef (stmt);
2546 	      gsi_remove (&gsi, true);
2547 	      release_defs (stmt);
2548 	      continue;
2549 	    }
2550 	  else if (gimple_plf (stmt, GF_PLF_2))
2551 	    {
2552 	      tree lhs = gimple_assign_lhs (stmt);
2553 	      tree mask;
2554 	      gimple *new_stmt;
2555 	      gimple_seq stmts = NULL;
2556 	      machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
2557 	      /* We checked before setting GF_PLF_2 that an equivalent
2558 		 integer mode exists.  */
2559 	      int bitsize = GET_MODE_BITSIZE (mode).to_constant ();
2560 	      if (!vect_sizes.is_empty ()
2561 		  && (index = mask_exists (bitsize, vect_sizes)) != -1)
2562 		/* Use created mask.  */
2563 		mask = vect_masks[index];
2564 	      else
2565 		{
2566 		  if (COMPARISON_CLASS_P (cond))
2567 		    mask = gimple_build (&stmts, TREE_CODE (cond),
2568 					 boolean_type_node,
2569 					 TREE_OPERAND (cond, 0),
2570 					 TREE_OPERAND (cond, 1));
2571 		  else
2572 		    mask = cond;
2573 
2574 		  if (swap)
2575 		    {
2576 		      tree true_val
2577 			= constant_boolean_node (true, TREE_TYPE (mask));
2578 		      mask = gimple_build (&stmts, BIT_XOR_EXPR,
2579 					   TREE_TYPE (mask), mask, true_val);
2580 		    }
2581 		  gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2582 
2583 		  /* Save mask and its size for further use.  */
2584 		  vect_sizes.safe_push (bitsize);
2585 		  vect_masks.safe_push (mask);
2586 		}
2587 	      if (gimple_assign_single_p (stmt))
2588 		new_stmt = predicate_load_or_store (&gsi, stmt, mask);
2589 	      else
2590 		new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
2591 
2592 	      gsi_replace (&gsi, new_stmt, true);
2593 	    }
2594 	  else if (((lhs = gimple_assign_lhs (stmt)), true)
2595 		   && (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2596 		       || POINTER_TYPE_P (TREE_TYPE (lhs)))
2597 		   && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
2598 		   && arith_code_with_undefined_signed_overflow
2599 						(gimple_assign_rhs_code (stmt)))
2600 	    {
2601 	      gsi_remove (&gsi, true);
2602 	      gimple_seq stmts = rewrite_to_defined_overflow (stmt);
2603 	      bool first = true;
2604 	      for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
2605 		   !gsi_end_p (gsi2);)
2606 		{
2607 		  gassign *stmt2 = as_a <gassign *> (gsi_stmt (gsi2));
2608 		  gsi_remove (&gsi2, false);
2609 		  if (first)
2610 		    {
2611 		      gsi_insert_before (&gsi, stmt2, GSI_NEW_STMT);
2612 		      first = false;
2613 		    }
2614 		  else
2615 		    gsi_insert_after (&gsi, stmt2, GSI_NEW_STMT);
2616 		}
2617 	    }
2618 	  else if (gimple_vdef (stmt))
2619 	    {
2620 	      tree lhs = gimple_assign_lhs (stmt);
2621 	      tree rhs = gimple_assign_rhs1 (stmt);
2622 	      tree type = TREE_TYPE (lhs);
2623 
2624 	      lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
2625 	      rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
2626 	      if (swap)
2627 		std::swap (lhs, rhs);
2628 	      cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
2629 						 is_gimple_condexpr, NULL_TREE,
2630 						 true, GSI_SAME_STMT);
2631 	      rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
2632 	      gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
2633 	      update_stmt (stmt);
2634 	    }
2635 	  lhs = gimple_get_lhs (gsi_stmt (gsi));
2636 	  if (lhs && TREE_CODE (lhs) == SSA_NAME)
2637 	    ssa_names.add (lhs);
2638 	  gsi_next (&gsi);
2639 	}
2640       ssa_names.empty ();
2641     }
2642 }
2643 
2644 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2645    other than the exit and latch of the LOOP.  Also resets the
2646    GIMPLE_DEBUG information.  */
2647 
2648 static void
remove_conditions_and_labels(loop_p loop)2649 remove_conditions_and_labels (loop_p loop)
2650 {
2651   gimple_stmt_iterator gsi;
2652   unsigned int i;
2653 
2654   for (i = 0; i < loop->num_nodes; i++)
2655     {
2656       basic_block bb = ifc_bbs[i];
2657 
2658       if (bb_with_exit_edge_p (loop, bb)
2659 	  || bb == loop->latch)
2660 	continue;
2661 
2662       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2663 	switch (gimple_code (gsi_stmt (gsi)))
2664 	  {
2665 	  case GIMPLE_COND:
2666 	  case GIMPLE_LABEL:
2667 	    gsi_remove (&gsi, true);
2668 	    break;
2669 
2670 	  case GIMPLE_DEBUG:
2671 	    /* ??? Should there be conditional GIMPLE_DEBUG_BINDs?  */
2672 	    if (gimple_debug_bind_p (gsi_stmt (gsi)))
2673 	      {
2674 		gimple_debug_bind_reset_value (gsi_stmt (gsi));
2675 		update_stmt (gsi_stmt (gsi));
2676 	      }
2677 	    gsi_next (&gsi);
2678 	    break;
2679 
2680 	  default:
2681 	    gsi_next (&gsi);
2682 	  }
2683     }
2684 }
2685 
2686 /* Combine all the basic blocks from LOOP into one or two super basic
2687    blocks.  Replace PHI nodes with conditional modify expressions.  */
2688 
2689 static void
combine_blocks(class loop * loop)2690 combine_blocks (class loop *loop)
2691 {
2692   basic_block bb, exit_bb, merge_target_bb;
2693   unsigned int orig_loop_num_nodes = loop->num_nodes;
2694   unsigned int i;
2695   edge e;
2696   edge_iterator ei;
2697 
2698   remove_conditions_and_labels (loop);
2699   insert_gimplified_predicates (loop);
2700   predicate_all_scalar_phis (loop);
2701 
2702   if (need_to_predicate || need_to_rewrite_undefined)
2703     predicate_statements (loop);
2704 
2705   /* Merge basic blocks.  */
2706   exit_bb = NULL;
2707   bool *predicated = XNEWVEC (bool, orig_loop_num_nodes);
2708   for (i = 0; i < orig_loop_num_nodes; i++)
2709     {
2710       bb = ifc_bbs[i];
2711       predicated[i] = !is_true_predicate (bb_predicate (bb));
2712       free_bb_predicate (bb);
2713       if (bb_with_exit_edge_p (loop, bb))
2714 	{
2715 	  gcc_assert (exit_bb == NULL);
2716 	  exit_bb = bb;
2717 	}
2718     }
2719   gcc_assert (exit_bb != loop->latch);
2720 
2721   merge_target_bb = loop->header;
2722 
2723   /* Get at the virtual def valid for uses starting at the first block
2724      we merge into the header.  Without a virtual PHI the loop has the
2725      same virtual use on all stmts.  */
2726   gphi *vphi = get_virtual_phi (loop->header);
2727   tree last_vdef = NULL_TREE;
2728   if (vphi)
2729     {
2730       last_vdef = gimple_phi_result (vphi);
2731       for (gimple_stmt_iterator gsi = gsi_start_bb (loop->header);
2732 	   ! gsi_end_p (gsi); gsi_next (&gsi))
2733 	if (gimple_vdef (gsi_stmt (gsi)))
2734 	  last_vdef = gimple_vdef (gsi_stmt (gsi));
2735     }
2736   for (i = 1; i < orig_loop_num_nodes; i++)
2737     {
2738       gimple_stmt_iterator gsi;
2739       gimple_stmt_iterator last;
2740 
2741       bb = ifc_bbs[i];
2742 
2743       if (bb == exit_bb || bb == loop->latch)
2744 	continue;
2745 
2746       /* We release virtual PHIs late because we have to propagate them
2747          out using the current VUSE.  The def might be the one used
2748 	 after the loop.  */
2749       vphi = get_virtual_phi (bb);
2750       if (vphi)
2751 	{
2752 	  /* When there's just loads inside the loop a stray virtual
2753 	     PHI merging the uses can appear, update last_vdef from
2754 	     it.  */
2755 	  if (!last_vdef)
2756 	    last_vdef = gimple_phi_arg_def (vphi, 0);
2757 	  imm_use_iterator iter;
2758 	  use_operand_p use_p;
2759 	  gimple *use_stmt;
2760 	  FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2761 	    {
2762 	      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2763 		SET_USE (use_p, last_vdef);
2764 	    }
2765 	  if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
2766 	    SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
2767 	  gsi = gsi_for_stmt (vphi);
2768 	  remove_phi_node (&gsi, true);
2769 	}
2770 
2771       /* Make stmts member of loop->header and clear range info from all stmts
2772 	 in BB which is now no longer executed conditional on a predicate we
2773 	 could have derived it from.  */
2774       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2775 	{
2776 	  gimple *stmt = gsi_stmt (gsi);
2777 	  gimple_set_bb (stmt, merge_target_bb);
2778 	  /* Update virtual operands.  */
2779 	  if (last_vdef)
2780 	    {
2781 	      use_operand_p use_p = ssa_vuse_operand (stmt);
2782 	      if (use_p
2783 		  && USE_FROM_PTR (use_p) != last_vdef)
2784 		SET_USE (use_p, last_vdef);
2785 	      if (gimple_vdef (stmt))
2786 		last_vdef = gimple_vdef (stmt);
2787 	    }
2788 	  else
2789 	    /* If this is the first load we arrive at update last_vdef
2790 	       so we handle stray PHIs correctly.  */
2791 	    last_vdef = gimple_vuse (stmt);
2792 	  if (predicated[i])
2793 	    {
2794 	      ssa_op_iter i;
2795 	      tree op;
2796 	      FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF)
2797 		reset_flow_sensitive_info (op);
2798 	    }
2799 	}
2800 
2801       /* Update stmt list.  */
2802       last = gsi_last_bb (merge_target_bb);
2803       gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT);
2804       set_bb_seq (bb, NULL);
2805     }
2806 
2807   /* Fixup virtual operands in the exit block.  */
2808   if (exit_bb
2809       && exit_bb != loop->header)
2810     {
2811       /* We release virtual PHIs late because we have to propagate them
2812          out using the current VUSE.  The def might be the one used
2813 	 after the loop.  */
2814       vphi = get_virtual_phi (exit_bb);
2815       if (vphi)
2816 	{
2817 	  /* When there's just loads inside the loop a stray virtual
2818 	     PHI merging the uses can appear, update last_vdef from
2819 	     it.  */
2820 	  if (!last_vdef)
2821 	    last_vdef = gimple_phi_arg_def (vphi, 0);
2822 	  imm_use_iterator iter;
2823 	  use_operand_p use_p;
2824 	  gimple *use_stmt;
2825 	  FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2826 	    {
2827 	      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2828 		SET_USE (use_p, last_vdef);
2829 	    }
2830 	  if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
2831 	    SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
2832 	  gimple_stmt_iterator gsi = gsi_for_stmt (vphi);
2833 	  remove_phi_node (&gsi, true);
2834 	}
2835     }
2836 
2837   /* Now remove all the edges in the loop, except for those from the exit
2838      block and delete the blocks we elided.  */
2839   for (i = 1; i < orig_loop_num_nodes; i++)
2840     {
2841       bb = ifc_bbs[i];
2842 
2843       for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
2844 	{
2845 	  if (e->src == exit_bb)
2846 	    ei_next (&ei);
2847 	  else
2848 	    remove_edge (e);
2849 	}
2850     }
2851   for (i = 1; i < orig_loop_num_nodes; i++)
2852     {
2853       bb = ifc_bbs[i];
2854 
2855       if (bb == exit_bb || bb == loop->latch)
2856 	continue;
2857 
2858       delete_basic_block (bb);
2859     }
2860 
2861   /* Re-connect the exit block.  */
2862   if (exit_bb != NULL)
2863     {
2864       if (exit_bb != loop->header)
2865 	{
2866 	  /* Connect this node to loop header.  */
2867 	  make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU);
2868 	  set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
2869 	}
2870 
2871       /* Redirect non-exit edges to loop->latch.  */
2872       FOR_EACH_EDGE (e, ei, exit_bb->succs)
2873 	{
2874 	  if (!loop_exit_edge_p (loop, e))
2875 	    redirect_edge_and_branch (e, loop->latch);
2876 	}
2877       set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
2878     }
2879   else
2880     {
2881       /* If the loop does not have an exit, reconnect header and latch.  */
2882       make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
2883       set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
2884     }
2885 
2886   /* If possible, merge loop header to the block with the exit edge.
2887      This reduces the number of basic blocks to two, to please the
2888      vectorizer that handles only loops with two nodes.  */
2889   if (exit_bb
2890       && exit_bb != loop->header)
2891     {
2892       if (can_merge_blocks_p (loop->header, exit_bb))
2893 	merge_blocks (loop->header, exit_bb);
2894     }
2895 
2896   free (ifc_bbs);
2897   ifc_bbs = NULL;
2898   free (predicated);
2899 }
2900 
2901 /* Version LOOP before if-converting it; the original loop
2902    will be if-converted, the new copy of the loop will not,
2903    and the LOOP_VECTORIZED internal call will be guarding which
2904    loop to execute.  The vectorizer pass will fold this
2905    internal call into either true or false.
2906 
2907    Note that this function intentionally invalidates profile.  Both edges
2908    out of LOOP_VECTORIZED must have 100% probability so the profile remains
2909    consistent after the condition is folded in the vectorizer.  */
2910 
2911 static class loop *
version_loop_for_if_conversion(class loop * loop,vec<gimple * > * preds)2912 version_loop_for_if_conversion (class loop *loop, vec<gimple *> *preds)
2913 {
2914   basic_block cond_bb;
2915   tree cond = make_ssa_name (boolean_type_node);
2916   class loop *new_loop;
2917   gimple *g;
2918   gimple_stmt_iterator gsi;
2919   unsigned int save_length;
2920 
2921   g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
2922 				  build_int_cst (integer_type_node, loop->num),
2923 				  integer_zero_node);
2924   gimple_call_set_lhs (g, cond);
2925 
2926   /* Save BB->aux around loop_version as that uses the same field.  */
2927   save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes;
2928   void **saved_preds = XALLOCAVEC (void *, save_length);
2929   for (unsigned i = 0; i < save_length; i++)
2930     saved_preds[i] = ifc_bbs[i]->aux;
2931 
2932   initialize_original_copy_tables ();
2933   /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
2934      is re-merged in the vectorizer.  */
2935   new_loop = loop_version (loop, cond, &cond_bb,
2936 			   profile_probability::always (),
2937 			   profile_probability::always (),
2938 			   profile_probability::always (),
2939 			   profile_probability::always (), true);
2940   free_original_copy_tables ();
2941 
2942   for (unsigned i = 0; i < save_length; i++)
2943     ifc_bbs[i]->aux = saved_preds[i];
2944 
2945   if (new_loop == NULL)
2946     return NULL;
2947 
2948   new_loop->dont_vectorize = true;
2949   new_loop->force_vectorize = false;
2950   gsi = gsi_last_bb (cond_bb);
2951   gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
2952   if (preds)
2953     preds->safe_push (g);
2954   gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2955   update_ssa (TODO_update_ssa);
2956   return new_loop;
2957 }
2958 
2959 /* Return true when LOOP satisfies the follow conditions that will
2960    allow it to be recognized by the vectorizer for outer-loop
2961    vectorization:
2962     - The loop is not the root node of the loop tree.
2963     - The loop has exactly one inner loop.
2964     - The loop has a single exit.
2965     - The loop header has a single successor, which is the inner
2966       loop header.
2967     - Each of the inner and outer loop latches have a single
2968       predecessor.
2969     - The loop exit block has a single predecessor, which is the
2970       inner loop's exit block.  */
2971 
2972 static bool
versionable_outer_loop_p(class loop * loop)2973 versionable_outer_loop_p (class loop *loop)
2974 {
2975   if (!loop_outer (loop)
2976       || loop->dont_vectorize
2977       || !loop->inner
2978       || loop->inner->next
2979       || !single_exit (loop)
2980       || !single_succ_p (loop->header)
2981       || single_succ (loop->header) != loop->inner->header
2982       || !single_pred_p (loop->latch)
2983       || !single_pred_p (loop->inner->latch))
2984     return false;
2985 
2986   basic_block outer_exit = single_pred (loop->latch);
2987   basic_block inner_exit = single_pred (loop->inner->latch);
2988 
2989   if (!single_pred_p (outer_exit) || single_pred (outer_exit) != inner_exit)
2990     return false;
2991 
2992   if (dump_file)
2993     fprintf (dump_file, "Found vectorizable outer loop for versioning\n");
2994 
2995   return true;
2996 }
2997 
2998 /* Performs splitting of critical edges.  Skip splitting and return false
2999    if LOOP will not be converted because:
3000 
3001      - LOOP is not well formed.
3002      - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
3003 
3004    Last restriction is valid only if AGGRESSIVE_IF_CONV is false.  */
3005 
3006 static bool
ifcvt_split_critical_edges(class loop * loop,bool aggressive_if_conv)3007 ifcvt_split_critical_edges (class loop *loop, bool aggressive_if_conv)
3008 {
3009   basic_block *body;
3010   basic_block bb;
3011   unsigned int num = loop->num_nodes;
3012   unsigned int i;
3013   gimple *stmt;
3014   edge e;
3015   edge_iterator ei;
3016   auto_vec<edge> critical_edges;
3017 
3018   /* Loop is not well formed.  */
3019   if (num <= 2 || loop->inner || !single_exit (loop))
3020     return false;
3021 
3022   body = get_loop_body (loop);
3023   for (i = 0; i < num; i++)
3024     {
3025       bb = body[i];
3026       if (!aggressive_if_conv
3027 	  && phi_nodes (bb)
3028 	  && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM)
3029 	{
3030 	  if (dump_file && (dump_flags & TDF_DETAILS))
3031 	    fprintf (dump_file,
3032 		     "BB %d has complicated PHI with more than %u args.\n",
3033 		     bb->index, MAX_PHI_ARG_NUM);
3034 
3035 	  free (body);
3036 	  return false;
3037 	}
3038       if (bb == loop->latch || bb_with_exit_edge_p (loop, bb))
3039 	continue;
3040 
3041       stmt = last_stmt (bb);
3042       /* Skip basic blocks not ending with conditional branch.  */
3043       if (!stmt || gimple_code (stmt) != GIMPLE_COND)
3044 	continue;
3045 
3046       FOR_EACH_EDGE (e, ei, bb->succs)
3047 	if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
3048 	  critical_edges.safe_push (e);
3049     }
3050   free (body);
3051 
3052   while (critical_edges.length () > 0)
3053     {
3054       e = critical_edges.pop ();
3055       /* Don't split if bb can be predicated along non-critical edge.  */
3056       if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (e->dest))
3057 	split_edge (e);
3058     }
3059 
3060   return true;
3061 }
3062 
3063 /* Delete redundant statements produced by predication which prevents
3064    loop vectorization.  */
3065 
3066 static void
ifcvt_local_dce(class loop * loop)3067 ifcvt_local_dce (class loop *loop)
3068 {
3069   gimple *stmt;
3070   gimple *stmt1;
3071   gimple *phi;
3072   gimple_stmt_iterator gsi;
3073   auto_vec<gimple *> worklist;
3074   enum gimple_code code;
3075   use_operand_p use_p;
3076   imm_use_iterator imm_iter;
3077 
3078   /* The loop has a single BB only.  */
3079   basic_block bb = loop->header;
3080   tree latch_vdef = NULL_TREE;
3081 
3082   worklist.create (64);
3083   /* Consider all phi as live statements.  */
3084   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3085     {
3086       phi = gsi_stmt (gsi);
3087       gimple_set_plf (phi, GF_PLF_2, true);
3088       worklist.safe_push (phi);
3089       if (virtual_operand_p (gimple_phi_result (phi)))
3090 	latch_vdef = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
3091     }
3092   /* Consider load/store statements, CALL and COND as live.  */
3093   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3094     {
3095       stmt = gsi_stmt (gsi);
3096       if (is_gimple_debug (stmt))
3097 	{
3098 	  gimple_set_plf (stmt, GF_PLF_2, true);
3099 	  continue;
3100 	}
3101       if (gimple_store_p (stmt) || gimple_assign_load_p (stmt))
3102 	{
3103 	  gimple_set_plf (stmt, GF_PLF_2, true);
3104 	  worklist.safe_push (stmt);
3105 	  continue;
3106 	}
3107       code = gimple_code (stmt);
3108       if (code == GIMPLE_COND || code == GIMPLE_CALL)
3109 	{
3110 	  gimple_set_plf (stmt, GF_PLF_2, true);
3111 	  worklist.safe_push (stmt);
3112 	  continue;
3113 	}
3114       gimple_set_plf (stmt, GF_PLF_2, false);
3115 
3116       if (code == GIMPLE_ASSIGN)
3117 	{
3118 	  tree lhs = gimple_assign_lhs (stmt);
3119 	  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3120 	    {
3121 	      stmt1 = USE_STMT (use_p);
3122 	      if (!is_gimple_debug (stmt1) && gimple_bb (stmt1) != bb)
3123 		{
3124 		  gimple_set_plf (stmt, GF_PLF_2, true);
3125 		  worklist.safe_push (stmt);
3126 		  break;
3127 		}
3128 	    }
3129 	}
3130     }
3131   /* Propagate liveness through arguments of live stmt.  */
3132   while (worklist.length () > 0)
3133     {
3134       ssa_op_iter iter;
3135       use_operand_p use_p;
3136       tree use;
3137 
3138       stmt = worklist.pop ();
3139       FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3140 	{
3141 	  use = USE_FROM_PTR (use_p);
3142 	  if (TREE_CODE (use) != SSA_NAME)
3143 	    continue;
3144 	  stmt1 = SSA_NAME_DEF_STMT (use);
3145 	  if (gimple_bb (stmt1) != bb || gimple_plf (stmt1, GF_PLF_2))
3146 	    continue;
3147 	  gimple_set_plf (stmt1, GF_PLF_2, true);
3148 	  worklist.safe_push (stmt1);
3149 	}
3150     }
3151   /* Delete dead statements.  */
3152   gsi = gsi_last_bb (bb);
3153   while (!gsi_end_p (gsi))
3154     {
3155       gimple_stmt_iterator gsiprev = gsi;
3156       gsi_prev (&gsiprev);
3157       stmt = gsi_stmt (gsi);
3158       if (gimple_store_p (stmt) && gimple_vdef (stmt))
3159 	{
3160 	  tree lhs = gimple_get_lhs (stmt);
3161 	  ao_ref write;
3162 	  ao_ref_init (&write, lhs);
3163 
3164 	  if (dse_classify_store (&write, stmt, false, NULL, NULL, latch_vdef)
3165 	      == DSE_STORE_DEAD)
3166 	    delete_dead_or_redundant_assignment (&gsi, "dead");
3167 	  gsi = gsiprev;
3168 	  continue;
3169 	}
3170 
3171       if (gimple_plf (stmt, GF_PLF_2))
3172 	{
3173 	  gsi = gsiprev;
3174 	  continue;
3175 	}
3176       if (dump_file && (dump_flags & TDF_DETAILS))
3177 	{
3178 	  fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
3179 	  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3180 	}
3181       gsi_remove (&gsi, true);
3182       release_defs (stmt);
3183       gsi = gsiprev;
3184     }
3185 }
3186 
3187 /* Return true if VALUE is already available on edge PE.  */
3188 
3189 static bool
ifcvt_available_on_edge_p(edge pe,tree value)3190 ifcvt_available_on_edge_p (edge pe, tree value)
3191 {
3192   if (is_gimple_min_invariant (value))
3193     return true;
3194 
3195   if (TREE_CODE (value) == SSA_NAME)
3196     {
3197       basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
3198       if (!def_bb || dominated_by_p (CDI_DOMINATORS, pe->dest, def_bb))
3199 	return true;
3200     }
3201 
3202   return false;
3203 }
3204 
3205 /* Return true if STMT can be hoisted from if-converted loop LOOP to
3206    edge PE.  */
3207 
3208 static bool
ifcvt_can_hoist(class loop * loop,edge pe,gimple * stmt)3209 ifcvt_can_hoist (class loop *loop, edge pe, gimple *stmt)
3210 {
3211   if (auto *call = dyn_cast<gcall *> (stmt))
3212     {
3213       if (gimple_call_internal_p (call)
3214 	  && internal_fn_mask_index (gimple_call_internal_fn (call)) >= 0)
3215 	return false;
3216     }
3217   else if (auto *assign = dyn_cast<gassign *> (stmt))
3218     {
3219       if (gimple_assign_rhs_code (assign) == COND_EXPR)
3220 	return false;
3221     }
3222   else
3223     return false;
3224 
3225   if (gimple_has_side_effects (stmt)
3226       || gimple_could_trap_p (stmt)
3227       || stmt_could_throw_p (cfun, stmt)
3228       || gimple_vdef (stmt)
3229       || gimple_vuse (stmt))
3230     return false;
3231 
3232   int num_args = gimple_num_args (stmt);
3233   if (pe != loop_preheader_edge (loop))
3234     {
3235       for (int i = 0; i < num_args; ++i)
3236 	if (!ifcvt_available_on_edge_p (pe, gimple_arg (stmt, i)))
3237 	  return false;
3238     }
3239   else
3240     {
3241       for (int i = 0; i < num_args; ++i)
3242 	if (!expr_invariant_in_loop_p (loop, gimple_arg (stmt, i)))
3243 	  return false;
3244     }
3245 
3246   return true;
3247 }
3248 
3249 /* Hoist invariant statements from LOOP to edge PE.  */
3250 
3251 static void
ifcvt_hoist_invariants(class loop * loop,edge pe)3252 ifcvt_hoist_invariants (class loop *loop, edge pe)
3253 {
3254   gimple_stmt_iterator hoist_gsi = {};
3255   unsigned int num_blocks = loop->num_nodes;
3256   basic_block *body = get_loop_body (loop);
3257   for (unsigned int i = 0; i < num_blocks; ++i)
3258     for (gimple_stmt_iterator gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);)
3259       {
3260 	gimple *stmt = gsi_stmt (gsi);
3261 	if (ifcvt_can_hoist (loop, pe, stmt))
3262 	  {
3263 	    /* Once we've hoisted one statement, insert other statements
3264 	       after it.  */
3265 	    gsi_remove (&gsi, false);
3266 	    if (hoist_gsi.ptr)
3267 	      gsi_insert_after (&hoist_gsi, stmt, GSI_NEW_STMT);
3268 	    else
3269 	      {
3270 		gsi_insert_on_edge_immediate (pe, stmt);
3271 		hoist_gsi = gsi_for_stmt (stmt);
3272 	      }
3273 	    continue;
3274 	  }
3275 	gsi_next (&gsi);
3276       }
3277   free (body);
3278 }
3279 
3280 /* If-convert LOOP when it is legal.  For the moment this pass has no
3281    profitability analysis.  Returns non-zero todo flags when something
3282    changed.  */
3283 
3284 unsigned int
tree_if_conversion(class loop * loop,vec<gimple * > * preds)3285 tree_if_conversion (class loop *loop, vec<gimple *> *preds)
3286 {
3287   unsigned int todo = 0;
3288   bool aggressive_if_conv;
3289   class loop *rloop;
3290   bitmap exit_bbs;
3291   edge pe;
3292 
3293  again:
3294   rloop = NULL;
3295   ifc_bbs = NULL;
3296   need_to_predicate = false;
3297   need_to_rewrite_undefined = false;
3298   any_complicated_phi = false;
3299 
3300   /* Apply more aggressive if-conversion when loop or its outer loop were
3301      marked with simd pragma.  When that's the case, we try to if-convert
3302      loop containing PHIs with more than MAX_PHI_ARG_NUM arguments.  */
3303   aggressive_if_conv = loop->force_vectorize;
3304   if (!aggressive_if_conv)
3305     {
3306       class loop *outer_loop = loop_outer (loop);
3307       if (outer_loop && outer_loop->force_vectorize)
3308 	aggressive_if_conv = true;
3309     }
3310 
3311   if (!ifcvt_split_critical_edges (loop, aggressive_if_conv))
3312     goto cleanup;
3313 
3314   if (!if_convertible_loop_p (loop)
3315       || !dbg_cnt (if_conversion_tree))
3316     goto cleanup;
3317 
3318   if ((need_to_predicate || any_complicated_phi)
3319       && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
3320 	  || loop->dont_vectorize))
3321     goto cleanup;
3322 
3323   /* The edge to insert invariant stmts on.  */
3324   pe = loop_preheader_edge (loop);
3325 
3326   /* Since we have no cost model, always version loops unless the user
3327      specified -ftree-loop-if-convert or unless versioning is required.
3328      Either version this loop, or if the pattern is right for outer-loop
3329      vectorization, version the outer loop.  In the latter case we will
3330      still if-convert the original inner loop.  */
3331   if (need_to_predicate
3332       || any_complicated_phi
3333       || flag_tree_loop_if_convert != 1)
3334     {
3335       class loop *vloop
3336 	= (versionable_outer_loop_p (loop_outer (loop))
3337 	   ? loop_outer (loop) : loop);
3338       class loop *nloop = version_loop_for_if_conversion (vloop, preds);
3339       if (nloop == NULL)
3340 	goto cleanup;
3341       if (vloop != loop)
3342 	{
3343 	  /* If versionable_outer_loop_p decided to version the
3344 	     outer loop, version also the inner loop of the non-vectorized
3345 	     loop copy.  So we transform:
3346 	      loop1
3347 		loop2
3348 	     into:
3349 	      if (LOOP_VECTORIZED (1, 3))
3350 		{
3351 		  loop1
3352 		    loop2
3353 		}
3354 	      else
3355 		loop3 (copy of loop1)
3356 		  if (LOOP_VECTORIZED (4, 5))
3357 		    loop4 (copy of loop2)
3358 		  else
3359 		    loop5 (copy of loop4)  */
3360 	  gcc_assert (nloop->inner && nloop->inner->next == NULL);
3361 	  rloop = nloop->inner;
3362 	}
3363       else
3364 	/* If we versioned loop then make sure to insert invariant
3365 	   stmts before the .LOOP_VECTORIZED check since the vectorizer
3366 	   will re-use that for things like runtime alias versioning
3367 	   whose condition can end up using those invariants.  */
3368 	pe = single_pred_edge (gimple_bb (preds->last ()));
3369     }
3370 
3371   /* Now all statements are if-convertible.  Combine all the basic
3372      blocks into one huge basic block doing the if-conversion
3373      on-the-fly.  */
3374   combine_blocks (loop);
3375 
3376   /* Perform local CSE, this esp. helps the vectorizer analysis if loads
3377      and stores are involved.  CSE only the loop body, not the entry
3378      PHIs, those are to be kept in sync with the non-if-converted copy.
3379      ???  We'll still keep dead stores though.  */
3380   exit_bbs = BITMAP_ALLOC (NULL);
3381   bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
3382   bitmap_set_bit (exit_bbs, loop->latch->index);
3383 
3384   std::pair <tree, tree> *name_pair;
3385   unsigned ssa_names_idx;
3386   FOR_EACH_VEC_ELT (redundant_ssa_names, ssa_names_idx, name_pair)
3387     replace_uses_by (name_pair->first, name_pair->second);
3388   redundant_ssa_names.release ();
3389 
3390   todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs);
3391 
3392   /* Delete dead predicate computations.  */
3393   ifcvt_local_dce (loop);
3394   BITMAP_FREE (exit_bbs);
3395 
3396   ifcvt_hoist_invariants (loop, pe);
3397 
3398   todo |= TODO_cleanup_cfg;
3399 
3400  cleanup:
3401   if (ifc_bbs)
3402     {
3403       unsigned int i;
3404 
3405       for (i = 0; i < loop->num_nodes; i++)
3406 	free_bb_predicate (ifc_bbs[i]);
3407 
3408       free (ifc_bbs);
3409       ifc_bbs = NULL;
3410     }
3411   if (rloop != NULL)
3412     {
3413       loop = rloop;
3414       goto again;
3415     }
3416 
3417   return todo;
3418 }
3419 
3420 /* Tree if-conversion pass management.  */
3421 
3422 namespace {
3423 
3424 const pass_data pass_data_if_conversion =
3425 {
3426   GIMPLE_PASS, /* type */
3427   "ifcvt", /* name */
3428   OPTGROUP_NONE, /* optinfo_flags */
3429   TV_TREE_LOOP_IFCVT, /* tv_id */
3430   ( PROP_cfg | PROP_ssa ), /* properties_required */
3431   0, /* properties_provided */
3432   0, /* properties_destroyed */
3433   0, /* todo_flags_start */
3434   0, /* todo_flags_finish */
3435 };
3436 
3437 class pass_if_conversion : public gimple_opt_pass
3438 {
3439 public:
pass_if_conversion(gcc::context * ctxt)3440   pass_if_conversion (gcc::context *ctxt)
3441     : gimple_opt_pass (pass_data_if_conversion, ctxt)
3442   {}
3443 
3444   /* opt_pass methods: */
3445   virtual bool gate (function *);
3446   virtual unsigned int execute (function *);
3447 
3448 }; // class pass_if_conversion
3449 
3450 bool
gate(function * fun)3451 pass_if_conversion::gate (function *fun)
3452 {
3453   return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
3454 	   && flag_tree_loop_if_convert != 0)
3455 	  || flag_tree_loop_if_convert == 1);
3456 }
3457 
3458 unsigned int
execute(function * fun)3459 pass_if_conversion::execute (function *fun)
3460 {
3461   unsigned todo = 0;
3462 
3463   if (number_of_loops (fun) <= 1)
3464     return 0;
3465 
3466   auto_vec<gimple *> preds;
3467   for (auto loop : loops_list (cfun, 0))
3468     if (flag_tree_loop_if_convert == 1
3469 	|| ((flag_tree_loop_vectorize || loop->force_vectorize)
3470 	    && !loop->dont_vectorize))
3471       todo |= tree_if_conversion (loop, &preds);
3472 
3473   if (todo)
3474     {
3475       free_numbers_of_iterations_estimates (fun);
3476       scev_reset ();
3477     }
3478 
3479   if (flag_checking)
3480     {
3481       basic_block bb;
3482       FOR_EACH_BB_FN (bb, fun)
3483 	gcc_assert (!bb->aux);
3484     }
3485 
3486   /* Perform IL update now, it might elide some loops.  */
3487   if (todo & TODO_cleanup_cfg)
3488     {
3489       cleanup_tree_cfg ();
3490       if (need_ssa_update_p (fun))
3491 	todo |= TODO_update_ssa;
3492     }
3493   if (todo & TODO_update_ssa_any)
3494     update_ssa (todo & TODO_update_ssa_any);
3495 
3496   /* If if-conversion elided the loop fall back to the original one.  */
3497   for (unsigned i = 0; i < preds.length (); ++i)
3498     {
3499       gimple *g = preds[i];
3500       if (!gimple_bb (g))
3501 	continue;
3502       unsigned ifcvt_loop = tree_to_uhwi (gimple_call_arg (g, 0));
3503       if (!get_loop (fun, ifcvt_loop))
3504 	{
3505 	  if (dump_file)
3506 	    fprintf (dump_file, "If-converted loop vanished\n");
3507 	  fold_loop_internal_call (g, boolean_false_node);
3508 	}
3509     }
3510 
3511   return 0;
3512 }
3513 
3514 } // anon namespace
3515 
3516 gimple_opt_pass *
make_pass_if_conversion(gcc::context * ctxt)3517 make_pass_if_conversion (gcc::context *ctxt)
3518 {
3519   return new pass_if_conversion (ctxt);
3520 }
3521