xref: /dflybsd-src/contrib/gcc-8.0/gcc/tree-if-conv.c (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1*38fd1498Szrj /* If-conversion for vectorizer.
2*38fd1498Szrj    Copyright (C) 2004-2018 Free Software Foundation, Inc.
3*38fd1498Szrj    Contributed by Devang Patel <dpatel@apple.com>
4*38fd1498Szrj 
5*38fd1498Szrj This file is part of GCC.
6*38fd1498Szrj 
7*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
8*38fd1498Szrj the terms of the GNU General Public License as published by the Free
9*38fd1498Szrj Software Foundation; either version 3, or (at your option) any later
10*38fd1498Szrj version.
11*38fd1498Szrj 
12*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
14*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15*38fd1498Szrj for more details.
16*38fd1498Szrj 
17*38fd1498Szrj You should have received a copy of the GNU General Public License
18*38fd1498Szrj along with GCC; see the file COPYING3.  If not see
19*38fd1498Szrj <http://www.gnu.org/licenses/>.  */
20*38fd1498Szrj 
21*38fd1498Szrj /* This pass implements a tree level if-conversion of loops.  Its
22*38fd1498Szrj    initial goal is to help the vectorizer to vectorize loops with
23*38fd1498Szrj    conditions.
24*38fd1498Szrj 
25*38fd1498Szrj    A short description of if-conversion:
26*38fd1498Szrj 
27*38fd1498Szrj      o Decide if a loop is if-convertible or not.
28*38fd1498Szrj      o Walk all loop basic blocks in breadth first order (BFS order).
29*38fd1498Szrj        o Remove conditional statements (at the end of basic block)
30*38fd1498Szrj          and propagate condition into destination basic blocks'
31*38fd1498Szrj 	 predicate list.
32*38fd1498Szrj        o Replace modify expression with conditional modify expression
33*38fd1498Szrj          using current basic block's condition.
34*38fd1498Szrj      o Merge all basic blocks
35*38fd1498Szrj        o Replace phi nodes with conditional modify expr
36*38fd1498Szrj        o Merge all basic blocks into header
37*38fd1498Szrj 
38*38fd1498Szrj      Sample transformation:
39*38fd1498Szrj 
40*38fd1498Szrj      INPUT
41*38fd1498Szrj      -----
42*38fd1498Szrj 
43*38fd1498Szrj      # i_23 = PHI <0(0), i_18(10)>;
44*38fd1498Szrj      <L0>:;
45*38fd1498Szrj      j_15 = A[i_23];
46*38fd1498Szrj      if (j_15 > 41) goto <L1>; else goto <L17>;
47*38fd1498Szrj 
48*38fd1498Szrj      <L17>:;
49*38fd1498Szrj      goto <bb 3> (<L3>);
50*38fd1498Szrj 
51*38fd1498Szrj      <L1>:;
52*38fd1498Szrj 
53*38fd1498Szrj      # iftmp.2_4 = PHI <0(8), 42(2)>;
54*38fd1498Szrj      <L3>:;
55*38fd1498Szrj      A[i_23] = iftmp.2_4;
56*38fd1498Szrj      i_18 = i_23 + 1;
57*38fd1498Szrj      if (i_18 <= 15) goto <L19>; else goto <L18>;
58*38fd1498Szrj 
59*38fd1498Szrj      <L19>:;
60*38fd1498Szrj      goto <bb 1> (<L0>);
61*38fd1498Szrj 
62*38fd1498Szrj      <L18>:;
63*38fd1498Szrj 
64*38fd1498Szrj      OUTPUT
65*38fd1498Szrj      ------
66*38fd1498Szrj 
67*38fd1498Szrj      # i_23 = PHI <0(0), i_18(10)>;
68*38fd1498Szrj      <L0>:;
69*38fd1498Szrj      j_15 = A[i_23];
70*38fd1498Szrj 
71*38fd1498Szrj      <L3>:;
72*38fd1498Szrj      iftmp.2_4 = j_15 > 41 ? 42 : 0;
73*38fd1498Szrj      A[i_23] = iftmp.2_4;
74*38fd1498Szrj      i_18 = i_23 + 1;
75*38fd1498Szrj      if (i_18 <= 15) goto <L19>; else goto <L18>;
76*38fd1498Szrj 
77*38fd1498Szrj      <L19>:;
78*38fd1498Szrj      goto <bb 1> (<L0>);
79*38fd1498Szrj 
80*38fd1498Szrj      <L18>:;
81*38fd1498Szrj */
82*38fd1498Szrj 
83*38fd1498Szrj #include "config.h"
84*38fd1498Szrj #include "system.h"
85*38fd1498Szrj #include "coretypes.h"
86*38fd1498Szrj #include "backend.h"
87*38fd1498Szrj #include "rtl.h"
88*38fd1498Szrj #include "tree.h"
89*38fd1498Szrj #include "gimple.h"
90*38fd1498Szrj #include "cfghooks.h"
91*38fd1498Szrj #include "tree-pass.h"
92*38fd1498Szrj #include "ssa.h"
93*38fd1498Szrj #include "expmed.h"
94*38fd1498Szrj #include "optabs-query.h"
95*38fd1498Szrj #include "gimple-pretty-print.h"
96*38fd1498Szrj #include "alias.h"
97*38fd1498Szrj #include "fold-const.h"
98*38fd1498Szrj #include "stor-layout.h"
99*38fd1498Szrj #include "gimple-fold.h"
100*38fd1498Szrj #include "gimplify.h"
101*38fd1498Szrj #include "gimple-iterator.h"
102*38fd1498Szrj #include "gimplify-me.h"
103*38fd1498Szrj #include "tree-cfg.h"
104*38fd1498Szrj #include "tree-into-ssa.h"
105*38fd1498Szrj #include "tree-ssa.h"
106*38fd1498Szrj #include "cfgloop.h"
107*38fd1498Szrj #include "tree-data-ref.h"
108*38fd1498Szrj #include "tree-scalar-evolution.h"
109*38fd1498Szrj #include "tree-ssa-loop.h"
110*38fd1498Szrj #include "tree-ssa-loop-niter.h"
111*38fd1498Szrj #include "tree-ssa-loop-ivopts.h"
112*38fd1498Szrj #include "tree-ssa-address.h"
113*38fd1498Szrj #include "dbgcnt.h"
114*38fd1498Szrj #include "tree-hash-traits.h"
115*38fd1498Szrj #include "varasm.h"
116*38fd1498Szrj #include "builtins.h"
117*38fd1498Szrj #include "params.h"
118*38fd1498Szrj #include "cfganal.h"
119*38fd1498Szrj 
120*38fd1498Szrj /* Only handle PHIs with no more arguments unless we are asked to by
121*38fd1498Szrj    simd pragma.  */
122*38fd1498Szrj #define MAX_PHI_ARG_NUM \
123*38fd1498Szrj   ((unsigned) PARAM_VALUE (PARAM_MAX_TREE_IF_CONVERSION_PHI_ARGS))
124*38fd1498Szrj 
125*38fd1498Szrj /* Indicate if new load/store that needs to be predicated is introduced
126*38fd1498Szrj    during if conversion.  */
127*38fd1498Szrj static bool any_pred_load_store;
128*38fd1498Szrj 
129*38fd1498Szrj /* Indicate if there are any complicated PHIs that need to be handled in
130*38fd1498Szrj    if-conversion.  Complicated PHI has more than two arguments and can't
131*38fd1498Szrj    be degenerated to two arguments PHI.  See more information in comment
132*38fd1498Szrj    before phi_convertible_by_degenerating_args.  */
133*38fd1498Szrj static bool any_complicated_phi;
134*38fd1498Szrj 
135*38fd1498Szrj /* Hash for struct innermost_loop_behavior.  It depends on the user to
136*38fd1498Szrj    free the memory.  */
137*38fd1498Szrj 
138*38fd1498Szrj struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior>
139*38fd1498Szrj {
140*38fd1498Szrj   static inline hashval_t hash (const value_type &);
141*38fd1498Szrj   static inline bool equal (const value_type &,
142*38fd1498Szrj 			    const compare_type &);
143*38fd1498Szrj };
144*38fd1498Szrj 
145*38fd1498Szrj inline hashval_t
hash(const value_type & e)146*38fd1498Szrj innermost_loop_behavior_hash::hash (const value_type &e)
147*38fd1498Szrj {
148*38fd1498Szrj   hashval_t hash;
149*38fd1498Szrj 
150*38fd1498Szrj   hash = iterative_hash_expr (e->base_address, 0);
151*38fd1498Szrj   hash = iterative_hash_expr (e->offset, hash);
152*38fd1498Szrj   hash = iterative_hash_expr (e->init, hash);
153*38fd1498Szrj   return iterative_hash_expr (e->step, hash);
154*38fd1498Szrj }
155*38fd1498Szrj 
156*38fd1498Szrj inline bool
equal(const value_type & e1,const compare_type & e2)157*38fd1498Szrj innermost_loop_behavior_hash::equal (const value_type &e1,
158*38fd1498Szrj 				     const compare_type &e2)
159*38fd1498Szrj {
160*38fd1498Szrj   if ((e1->base_address && !e2->base_address)
161*38fd1498Szrj       || (!e1->base_address && e2->base_address)
162*38fd1498Szrj       || (!e1->offset && e2->offset)
163*38fd1498Szrj       || (e1->offset && !e2->offset)
164*38fd1498Szrj       || (!e1->init && e2->init)
165*38fd1498Szrj       || (e1->init && !e2->init)
166*38fd1498Szrj       || (!e1->step && e2->step)
167*38fd1498Szrj       || (e1->step && !e2->step))
168*38fd1498Szrj     return false;
169*38fd1498Szrj 
170*38fd1498Szrj   if (e1->base_address && e2->base_address
171*38fd1498Szrj       && !operand_equal_p (e1->base_address, e2->base_address, 0))
172*38fd1498Szrj     return false;
173*38fd1498Szrj   if (e1->offset && e2->offset
174*38fd1498Szrj       && !operand_equal_p (e1->offset, e2->offset, 0))
175*38fd1498Szrj     return false;
176*38fd1498Szrj   if (e1->init && e2->init
177*38fd1498Szrj       && !operand_equal_p (e1->init, e2->init, 0))
178*38fd1498Szrj     return false;
179*38fd1498Szrj   if (e1->step && e2->step
180*38fd1498Szrj       && !operand_equal_p (e1->step, e2->step, 0))
181*38fd1498Szrj     return false;
182*38fd1498Szrj 
183*38fd1498Szrj   return true;
184*38fd1498Szrj }
185*38fd1498Szrj 
186*38fd1498Szrj /* List of basic blocks in if-conversion-suitable order.  */
187*38fd1498Szrj static basic_block *ifc_bbs;
188*38fd1498Szrj 
189*38fd1498Szrj /* Hash table to store <DR's innermost loop behavior, DR> pairs.  */
190*38fd1498Szrj static hash_map<innermost_loop_behavior_hash,
191*38fd1498Szrj 		data_reference_p> *innermost_DR_map;
192*38fd1498Szrj 
193*38fd1498Szrj /* Hash table to store <base reference, DR> pairs.  */
194*38fd1498Szrj static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map;
195*38fd1498Szrj 
196*38fd1498Szrj /* Structure used to predicate basic blocks.  This is attached to the
197*38fd1498Szrj    ->aux field of the BBs in the loop to be if-converted.  */
198*38fd1498Szrj struct bb_predicate {
199*38fd1498Szrj 
200*38fd1498Szrj   /* The condition under which this basic block is executed.  */
201*38fd1498Szrj   tree predicate;
202*38fd1498Szrj 
203*38fd1498Szrj   /* PREDICATE is gimplified, and the sequence of statements is
204*38fd1498Szrj      recorded here, in order to avoid the duplication of computations
205*38fd1498Szrj      that occur in previous conditions.  See PR44483.  */
206*38fd1498Szrj   gimple_seq predicate_gimplified_stmts;
207*38fd1498Szrj };
208*38fd1498Szrj 
209*38fd1498Szrj /* Returns true when the basic block BB has a predicate.  */
210*38fd1498Szrj 
211*38fd1498Szrj static inline bool
bb_has_predicate(basic_block bb)212*38fd1498Szrj bb_has_predicate (basic_block bb)
213*38fd1498Szrj {
214*38fd1498Szrj   return bb->aux != NULL;
215*38fd1498Szrj }
216*38fd1498Szrj 
217*38fd1498Szrj /* Returns the gimplified predicate for basic block BB.  */
218*38fd1498Szrj 
219*38fd1498Szrj static inline tree
bb_predicate(basic_block bb)220*38fd1498Szrj bb_predicate (basic_block bb)
221*38fd1498Szrj {
222*38fd1498Szrj   return ((struct bb_predicate *) bb->aux)->predicate;
223*38fd1498Szrj }
224*38fd1498Szrj 
225*38fd1498Szrj /* Sets the gimplified predicate COND for basic block BB.  */
226*38fd1498Szrj 
227*38fd1498Szrj static inline void
set_bb_predicate(basic_block bb,tree cond)228*38fd1498Szrj set_bb_predicate (basic_block bb, tree cond)
229*38fd1498Szrj {
230*38fd1498Szrj   gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
231*38fd1498Szrj 	       && is_gimple_condexpr (TREE_OPERAND (cond, 0)))
232*38fd1498Szrj 	      || is_gimple_condexpr (cond));
233*38fd1498Szrj   ((struct bb_predicate *) bb->aux)->predicate = cond;
234*38fd1498Szrj }
235*38fd1498Szrj 
236*38fd1498Szrj /* Returns the sequence of statements of the gimplification of the
237*38fd1498Szrj    predicate for basic block BB.  */
238*38fd1498Szrj 
239*38fd1498Szrj static inline gimple_seq
bb_predicate_gimplified_stmts(basic_block bb)240*38fd1498Szrj bb_predicate_gimplified_stmts (basic_block bb)
241*38fd1498Szrj {
242*38fd1498Szrj   return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts;
243*38fd1498Szrj }
244*38fd1498Szrj 
245*38fd1498Szrj /* Sets the sequence of statements STMTS of the gimplification of the
246*38fd1498Szrj    predicate for basic block BB.  */
247*38fd1498Szrj 
248*38fd1498Szrj static inline void
set_bb_predicate_gimplified_stmts(basic_block bb,gimple_seq stmts)249*38fd1498Szrj set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
250*38fd1498Szrj {
251*38fd1498Szrj   ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts;
252*38fd1498Szrj }
253*38fd1498Szrj 
254*38fd1498Szrj /* Adds the sequence of statements STMTS to the sequence of statements
255*38fd1498Szrj    of the predicate for basic block BB.  */
256*38fd1498Szrj 
257*38fd1498Szrj static inline void
add_bb_predicate_gimplified_stmts(basic_block bb,gimple_seq stmts)258*38fd1498Szrj add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
259*38fd1498Szrj {
260*38fd1498Szrj   /* We might have updated some stmts in STMTS via force_gimple_operand
261*38fd1498Szrj      calling fold_stmt and that producing multiple stmts.  Delink immediate
262*38fd1498Szrj      uses so update_ssa after loop versioning doesn't get confused for
263*38fd1498Szrj      the not yet inserted predicates.
264*38fd1498Szrj      ???  This should go away once we reliably avoid updating stmts
265*38fd1498Szrj      not in any BB.  */
266*38fd1498Szrj   for (gimple_stmt_iterator gsi = gsi_start (stmts);
267*38fd1498Szrj        !gsi_end_p (gsi); gsi_next (&gsi))
268*38fd1498Szrj     {
269*38fd1498Szrj       gimple *stmt = gsi_stmt (gsi);
270*38fd1498Szrj       delink_stmt_imm_use (stmt);
271*38fd1498Szrj       gimple_set_modified (stmt, true);
272*38fd1498Szrj     }
273*38fd1498Szrj   gimple_seq_add_seq_without_update
274*38fd1498Szrj     (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
275*38fd1498Szrj }
276*38fd1498Szrj 
277*38fd1498Szrj /* Initializes to TRUE the predicate of basic block BB.  */
278*38fd1498Szrj 
279*38fd1498Szrj static inline void
init_bb_predicate(basic_block bb)280*38fd1498Szrj init_bb_predicate (basic_block bb)
281*38fd1498Szrj {
282*38fd1498Szrj   bb->aux = XNEW (struct bb_predicate);
283*38fd1498Szrj   set_bb_predicate_gimplified_stmts (bb, NULL);
284*38fd1498Szrj   set_bb_predicate (bb, boolean_true_node);
285*38fd1498Szrj }
286*38fd1498Szrj 
287*38fd1498Szrj /* Release the SSA_NAMEs associated with the predicate of basic block BB.  */
288*38fd1498Szrj 
289*38fd1498Szrj static inline void
release_bb_predicate(basic_block bb)290*38fd1498Szrj release_bb_predicate (basic_block bb)
291*38fd1498Szrj {
292*38fd1498Szrj   gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
293*38fd1498Szrj   if (stmts)
294*38fd1498Szrj     {
295*38fd1498Szrj       /* Ensure that these stmts haven't yet been added to a bb.  */
296*38fd1498Szrj       if (flag_checking)
297*38fd1498Szrj 	for (gimple_stmt_iterator i = gsi_start (stmts);
298*38fd1498Szrj 	     !gsi_end_p (i); gsi_next (&i))
299*38fd1498Szrj 	  gcc_assert (! gimple_bb (gsi_stmt (i)));
300*38fd1498Szrj 
301*38fd1498Szrj       /* Discard them.  */
302*38fd1498Szrj       gimple_seq_discard (stmts);
303*38fd1498Szrj       set_bb_predicate_gimplified_stmts (bb, NULL);
304*38fd1498Szrj     }
305*38fd1498Szrj }
306*38fd1498Szrj 
307*38fd1498Szrj /* Free the predicate of basic block BB.  */
308*38fd1498Szrj 
309*38fd1498Szrj static inline void
free_bb_predicate(basic_block bb)310*38fd1498Szrj free_bb_predicate (basic_block bb)
311*38fd1498Szrj {
312*38fd1498Szrj   if (!bb_has_predicate (bb))
313*38fd1498Szrj     return;
314*38fd1498Szrj 
315*38fd1498Szrj   release_bb_predicate (bb);
316*38fd1498Szrj   free (bb->aux);
317*38fd1498Szrj   bb->aux = NULL;
318*38fd1498Szrj }
319*38fd1498Szrj 
320*38fd1498Szrj /* Reinitialize predicate of BB with the true predicate.  */
321*38fd1498Szrj 
322*38fd1498Szrj static inline void
reset_bb_predicate(basic_block bb)323*38fd1498Szrj reset_bb_predicate (basic_block bb)
324*38fd1498Szrj {
325*38fd1498Szrj   if (!bb_has_predicate (bb))
326*38fd1498Szrj     init_bb_predicate (bb);
327*38fd1498Szrj   else
328*38fd1498Szrj     {
329*38fd1498Szrj       release_bb_predicate (bb);
330*38fd1498Szrj       set_bb_predicate (bb, boolean_true_node);
331*38fd1498Szrj     }
332*38fd1498Szrj }
333*38fd1498Szrj 
334*38fd1498Szrj /* Returns a new SSA_NAME of type TYPE that is assigned the value of
335*38fd1498Szrj    the expression EXPR.  Inserts the statement created for this
336*38fd1498Szrj    computation before GSI and leaves the iterator GSI at the same
337*38fd1498Szrj    statement.  */
338*38fd1498Szrj 
339*38fd1498Szrj static tree
ifc_temp_var(tree type,tree expr,gimple_stmt_iterator * gsi)340*38fd1498Szrj ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
341*38fd1498Szrj {
342*38fd1498Szrj   tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
343*38fd1498Szrj   gimple *stmt = gimple_build_assign (new_name, expr);
344*38fd1498Szrj   gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (*gsi)));
345*38fd1498Szrj   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
346*38fd1498Szrj   return new_name;
347*38fd1498Szrj }
348*38fd1498Szrj 
349*38fd1498Szrj /* Return true when COND is a false predicate.  */
350*38fd1498Szrj 
351*38fd1498Szrj static inline bool
is_false_predicate(tree cond)352*38fd1498Szrj is_false_predicate (tree cond)
353*38fd1498Szrj {
354*38fd1498Szrj   return (cond != NULL_TREE
355*38fd1498Szrj 	  && (cond == boolean_false_node
356*38fd1498Szrj 	      || integer_zerop (cond)));
357*38fd1498Szrj }
358*38fd1498Szrj 
359*38fd1498Szrj /* Return true when COND is a true predicate.  */
360*38fd1498Szrj 
361*38fd1498Szrj static inline bool
is_true_predicate(tree cond)362*38fd1498Szrj is_true_predicate (tree cond)
363*38fd1498Szrj {
364*38fd1498Szrj   return (cond == NULL_TREE
365*38fd1498Szrj 	  || cond == boolean_true_node
366*38fd1498Szrj 	  || integer_onep (cond));
367*38fd1498Szrj }
368*38fd1498Szrj 
369*38fd1498Szrj /* Returns true when BB has a predicate that is not trivial: true or
370*38fd1498Szrj    NULL_TREE.  */
371*38fd1498Szrj 
372*38fd1498Szrj static inline bool
is_predicated(basic_block bb)373*38fd1498Szrj is_predicated (basic_block bb)
374*38fd1498Szrj {
375*38fd1498Szrj   return !is_true_predicate (bb_predicate (bb));
376*38fd1498Szrj }
377*38fd1498Szrj 
378*38fd1498Szrj /* Parses the predicate COND and returns its comparison code and
379*38fd1498Szrj    operands OP0 and OP1.  */
380*38fd1498Szrj 
381*38fd1498Szrj static enum tree_code
parse_predicate(tree cond,tree * op0,tree * op1)382*38fd1498Szrj parse_predicate (tree cond, tree *op0, tree *op1)
383*38fd1498Szrj {
384*38fd1498Szrj   gimple *s;
385*38fd1498Szrj 
386*38fd1498Szrj   if (TREE_CODE (cond) == SSA_NAME
387*38fd1498Szrj       && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
388*38fd1498Szrj     {
389*38fd1498Szrj       if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
390*38fd1498Szrj 	{
391*38fd1498Szrj 	  *op0 = gimple_assign_rhs1 (s);
392*38fd1498Szrj 	  *op1 = gimple_assign_rhs2 (s);
393*38fd1498Szrj 	  return gimple_assign_rhs_code (s);
394*38fd1498Szrj 	}
395*38fd1498Szrj 
396*38fd1498Szrj       else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
397*38fd1498Szrj 	{
398*38fd1498Szrj 	  tree op = gimple_assign_rhs1 (s);
399*38fd1498Szrj 	  tree type = TREE_TYPE (op);
400*38fd1498Szrj 	  enum tree_code code = parse_predicate (op, op0, op1);
401*38fd1498Szrj 
402*38fd1498Szrj 	  return code == ERROR_MARK ? ERROR_MARK
403*38fd1498Szrj 	    : invert_tree_comparison (code, HONOR_NANS (type));
404*38fd1498Szrj 	}
405*38fd1498Szrj 
406*38fd1498Szrj       return ERROR_MARK;
407*38fd1498Szrj     }
408*38fd1498Szrj 
409*38fd1498Szrj   if (COMPARISON_CLASS_P (cond))
410*38fd1498Szrj     {
411*38fd1498Szrj       *op0 = TREE_OPERAND (cond, 0);
412*38fd1498Szrj       *op1 = TREE_OPERAND (cond, 1);
413*38fd1498Szrj       return TREE_CODE (cond);
414*38fd1498Szrj     }
415*38fd1498Szrj 
416*38fd1498Szrj   return ERROR_MARK;
417*38fd1498Szrj }
418*38fd1498Szrj 
419*38fd1498Szrj /* Returns the fold of predicate C1 OR C2 at location LOC.  */
420*38fd1498Szrj 
421*38fd1498Szrj static tree
fold_or_predicates(location_t loc,tree c1,tree c2)422*38fd1498Szrj fold_or_predicates (location_t loc, tree c1, tree c2)
423*38fd1498Szrj {
424*38fd1498Szrj   tree op1a, op1b, op2a, op2b;
425*38fd1498Szrj   enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
426*38fd1498Szrj   enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
427*38fd1498Szrj 
428*38fd1498Szrj   if (code1 != ERROR_MARK && code2 != ERROR_MARK)
429*38fd1498Szrj     {
430*38fd1498Szrj       tree t = maybe_fold_or_comparisons (code1, op1a, op1b,
431*38fd1498Szrj 					  code2, op2a, op2b);
432*38fd1498Szrj       if (t)
433*38fd1498Szrj 	return t;
434*38fd1498Szrj     }
435*38fd1498Szrj 
436*38fd1498Szrj   return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
437*38fd1498Szrj }
438*38fd1498Szrj 
439*38fd1498Szrj /* Returns either a COND_EXPR or the folded expression if the folded
440*38fd1498Szrj    expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
441*38fd1498Szrj    a constant or a SSA_NAME. */
442*38fd1498Szrj 
443*38fd1498Szrj static tree
fold_build_cond_expr(tree type,tree cond,tree rhs,tree lhs)444*38fd1498Szrj fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
445*38fd1498Szrj {
446*38fd1498Szrj   tree rhs1, lhs1, cond_expr;
447*38fd1498Szrj 
448*38fd1498Szrj   /* If COND is comparison r != 0 and r has boolean type, convert COND
449*38fd1498Szrj      to SSA_NAME to accept by vect bool pattern.  */
450*38fd1498Szrj   if (TREE_CODE (cond) == NE_EXPR)
451*38fd1498Szrj     {
452*38fd1498Szrj       tree op0 = TREE_OPERAND (cond, 0);
453*38fd1498Szrj       tree op1 = TREE_OPERAND (cond, 1);
454*38fd1498Szrj       if (TREE_CODE (op0) == SSA_NAME
455*38fd1498Szrj 	  && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
456*38fd1498Szrj 	  && (integer_zerop (op1)))
457*38fd1498Szrj 	cond = op0;
458*38fd1498Szrj     }
459*38fd1498Szrj   cond_expr = fold_ternary (COND_EXPR, type, cond, rhs, lhs);
460*38fd1498Szrj 
461*38fd1498Szrj   if (cond_expr == NULL_TREE)
462*38fd1498Szrj     return build3 (COND_EXPR, type, cond, rhs, lhs);
463*38fd1498Szrj 
464*38fd1498Szrj   STRIP_USELESS_TYPE_CONVERSION (cond_expr);
465*38fd1498Szrj 
466*38fd1498Szrj   if (is_gimple_val (cond_expr))
467*38fd1498Szrj     return cond_expr;
468*38fd1498Szrj 
469*38fd1498Szrj   if (TREE_CODE (cond_expr) == ABS_EXPR)
470*38fd1498Szrj     {
471*38fd1498Szrj       rhs1 = TREE_OPERAND (cond_expr, 1);
472*38fd1498Szrj       STRIP_USELESS_TYPE_CONVERSION (rhs1);
473*38fd1498Szrj       if (is_gimple_val (rhs1))
474*38fd1498Szrj 	return build1 (ABS_EXPR, type, rhs1);
475*38fd1498Szrj     }
476*38fd1498Szrj 
477*38fd1498Szrj   if (TREE_CODE (cond_expr) == MIN_EXPR
478*38fd1498Szrj       || TREE_CODE (cond_expr) == MAX_EXPR)
479*38fd1498Szrj     {
480*38fd1498Szrj       lhs1 = TREE_OPERAND (cond_expr, 0);
481*38fd1498Szrj       STRIP_USELESS_TYPE_CONVERSION (lhs1);
482*38fd1498Szrj       rhs1 = TREE_OPERAND (cond_expr, 1);
483*38fd1498Szrj       STRIP_USELESS_TYPE_CONVERSION (rhs1);
484*38fd1498Szrj       if (is_gimple_val (rhs1) && is_gimple_val (lhs1))
485*38fd1498Szrj 	return build2 (TREE_CODE (cond_expr), type, lhs1, rhs1);
486*38fd1498Szrj     }
487*38fd1498Szrj   return build3 (COND_EXPR, type, cond, rhs, lhs);
488*38fd1498Szrj }
489*38fd1498Szrj 
490*38fd1498Szrj /* Add condition NC to the predicate list of basic block BB.  LOOP is
491*38fd1498Szrj    the loop to be if-converted. Use predicate of cd-equivalent block
492*38fd1498Szrj    for join bb if it exists: we call basic blocks bb1 and bb2
493*38fd1498Szrj    cd-equivalent if they are executed under the same condition.  */
494*38fd1498Szrj 
495*38fd1498Szrj static inline void
add_to_predicate_list(struct loop * loop,basic_block bb,tree nc)496*38fd1498Szrj add_to_predicate_list (struct loop *loop, basic_block bb, tree nc)
497*38fd1498Szrj {
498*38fd1498Szrj   tree bc, *tp;
499*38fd1498Szrj   basic_block dom_bb;
500*38fd1498Szrj 
501*38fd1498Szrj   if (is_true_predicate (nc))
502*38fd1498Szrj     return;
503*38fd1498Szrj 
504*38fd1498Szrj   /* If dominance tells us this basic block is always executed,
505*38fd1498Szrj      don't record any predicates for it.  */
506*38fd1498Szrj   if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
507*38fd1498Szrj     return;
508*38fd1498Szrj 
509*38fd1498Szrj   dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
510*38fd1498Szrj   /* We use notion of cd equivalence to get simpler predicate for
511*38fd1498Szrj      join block, e.g. if join block has 2 predecessors with predicates
512*38fd1498Szrj      p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
513*38fd1498Szrj      p1 & p2 | p1 & !p2.  */
514*38fd1498Szrj   if (dom_bb != loop->header
515*38fd1498Szrj       && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
516*38fd1498Szrj     {
517*38fd1498Szrj       gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
518*38fd1498Szrj       bc = bb_predicate (dom_bb);
519*38fd1498Szrj       if (!is_true_predicate (bc))
520*38fd1498Szrj 	set_bb_predicate (bb, bc);
521*38fd1498Szrj       else
522*38fd1498Szrj 	gcc_assert (is_true_predicate (bb_predicate (bb)));
523*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
524*38fd1498Szrj 	fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
525*38fd1498Szrj 		 dom_bb->index, bb->index);
526*38fd1498Szrj       return;
527*38fd1498Szrj     }
528*38fd1498Szrj 
529*38fd1498Szrj   if (!is_predicated (bb))
530*38fd1498Szrj     bc = nc;
531*38fd1498Szrj   else
532*38fd1498Szrj     {
533*38fd1498Szrj       bc = bb_predicate (bb);
534*38fd1498Szrj       bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
535*38fd1498Szrj       if (is_true_predicate (bc))
536*38fd1498Szrj 	{
537*38fd1498Szrj 	  reset_bb_predicate (bb);
538*38fd1498Szrj 	  return;
539*38fd1498Szrj 	}
540*38fd1498Szrj     }
541*38fd1498Szrj 
542*38fd1498Szrj   /* Allow a TRUTH_NOT_EXPR around the main predicate.  */
543*38fd1498Szrj   if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
544*38fd1498Szrj     tp = &TREE_OPERAND (bc, 0);
545*38fd1498Szrj   else
546*38fd1498Szrj     tp = &bc;
547*38fd1498Szrj   if (!is_gimple_condexpr (*tp))
548*38fd1498Szrj     {
549*38fd1498Szrj       gimple_seq stmts;
550*38fd1498Szrj       *tp = force_gimple_operand_1 (*tp, &stmts, is_gimple_condexpr, NULL_TREE);
551*38fd1498Szrj       add_bb_predicate_gimplified_stmts (bb, stmts);
552*38fd1498Szrj     }
553*38fd1498Szrj   set_bb_predicate (bb, bc);
554*38fd1498Szrj }
555*38fd1498Szrj 
556*38fd1498Szrj /* Add the condition COND to the previous condition PREV_COND, and add
557*38fd1498Szrj    this to the predicate list of the destination of edge E.  LOOP is
558*38fd1498Szrj    the loop to be if-converted.  */
559*38fd1498Szrj 
560*38fd1498Szrj static void
add_to_dst_predicate_list(struct loop * loop,edge e,tree prev_cond,tree cond)561*38fd1498Szrj add_to_dst_predicate_list (struct loop *loop, edge e,
562*38fd1498Szrj 			   tree prev_cond, tree cond)
563*38fd1498Szrj {
564*38fd1498Szrj   if (!flow_bb_inside_loop_p (loop, e->dest))
565*38fd1498Szrj     return;
566*38fd1498Szrj 
567*38fd1498Szrj   if (!is_true_predicate (prev_cond))
568*38fd1498Szrj     cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
569*38fd1498Szrj 			prev_cond, cond);
570*38fd1498Szrj 
571*38fd1498Szrj   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
572*38fd1498Szrj     add_to_predicate_list (loop, e->dest, cond);
573*38fd1498Szrj }
574*38fd1498Szrj 
575*38fd1498Szrj /* Return true if one of the successor edges of BB exits LOOP.  */
576*38fd1498Szrj 
577*38fd1498Szrj static bool
bb_with_exit_edge_p(struct loop * loop,basic_block bb)578*38fd1498Szrj bb_with_exit_edge_p (struct loop *loop, basic_block bb)
579*38fd1498Szrj {
580*38fd1498Szrj   edge e;
581*38fd1498Szrj   edge_iterator ei;
582*38fd1498Szrj 
583*38fd1498Szrj   FOR_EACH_EDGE (e, ei, bb->succs)
584*38fd1498Szrj     if (loop_exit_edge_p (loop, e))
585*38fd1498Szrj       return true;
586*38fd1498Szrj 
587*38fd1498Szrj   return false;
588*38fd1498Szrj }
589*38fd1498Szrj 
590*38fd1498Szrj /* Given PHI which has more than two arguments, this function checks if
591*38fd1498Szrj    it's if-convertible by degenerating its arguments.  Specifically, if
592*38fd1498Szrj    below two conditions are satisfied:
593*38fd1498Szrj 
594*38fd1498Szrj      1) Number of PHI arguments with different values equals to 2 and one
595*38fd1498Szrj 	argument has the only occurrence.
596*38fd1498Szrj      2) The edge corresponding to the unique argument isn't critical edge.
597*38fd1498Szrj 
598*38fd1498Szrj    Such PHI can be handled as PHIs have only two arguments.  For example,
599*38fd1498Szrj    below PHI:
600*38fd1498Szrj 
601*38fd1498Szrj      res = PHI <A_1(e1), A_1(e2), A_2(e3)>;
602*38fd1498Szrj 
603*38fd1498Szrj    can be transformed into:
604*38fd1498Szrj 
605*38fd1498Szrj      res = (predicate of e3) ? A_2 : A_1;
606*38fd1498Szrj 
607*38fd1498Szrj    Return TRUE if it is the case, FALSE otherwise.  */
608*38fd1498Szrj 
609*38fd1498Szrj static bool
phi_convertible_by_degenerating_args(gphi * phi)610*38fd1498Szrj phi_convertible_by_degenerating_args (gphi *phi)
611*38fd1498Szrj {
612*38fd1498Szrj   edge e;
613*38fd1498Szrj   tree arg, t1 = NULL, t2 = NULL;
614*38fd1498Szrj   unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0;
615*38fd1498Szrj   unsigned int num_args = gimple_phi_num_args (phi);
616*38fd1498Szrj 
617*38fd1498Szrj   gcc_assert (num_args > 2);
618*38fd1498Szrj 
619*38fd1498Szrj   for (i = 0; i < num_args; i++)
620*38fd1498Szrj     {
621*38fd1498Szrj       arg = gimple_phi_arg_def (phi, i);
622*38fd1498Szrj       if (t1 == NULL || operand_equal_p (t1, arg, 0))
623*38fd1498Szrj 	{
624*38fd1498Szrj 	  n1++;
625*38fd1498Szrj 	  i1 = i;
626*38fd1498Szrj 	  t1 = arg;
627*38fd1498Szrj 	}
628*38fd1498Szrj       else if (t2 == NULL || operand_equal_p (t2, arg, 0))
629*38fd1498Szrj 	{
630*38fd1498Szrj 	  n2++;
631*38fd1498Szrj 	  i2 = i;
632*38fd1498Szrj 	  t2 = arg;
633*38fd1498Szrj 	}
634*38fd1498Szrj       else
635*38fd1498Szrj 	return false;
636*38fd1498Szrj     }
637*38fd1498Szrj 
638*38fd1498Szrj   if (n1 != 1 && n2 != 1)
639*38fd1498Szrj     return false;
640*38fd1498Szrj 
641*38fd1498Szrj   /* Check if the edge corresponding to the unique arg is critical.  */
642*38fd1498Szrj   e = gimple_phi_arg_edge (phi, (n1 == 1) ? i1 : i2);
643*38fd1498Szrj   if (EDGE_COUNT (e->src->succs) > 1)
644*38fd1498Szrj     return false;
645*38fd1498Szrj 
646*38fd1498Szrj   return true;
647*38fd1498Szrj }
648*38fd1498Szrj 
649*38fd1498Szrj /* Return true when PHI is if-convertible.  PHI is part of loop LOOP
650*38fd1498Szrj    and it belongs to basic block BB.  Note at this point, it is sure
651*38fd1498Szrj    that PHI is if-convertible.  This function updates global variable
652*38fd1498Szrj    ANY_COMPLICATED_PHI if PHI is complicated.  */
653*38fd1498Szrj 
654*38fd1498Szrj static bool
if_convertible_phi_p(struct loop * loop,basic_block bb,gphi * phi)655*38fd1498Szrj if_convertible_phi_p (struct loop *loop, basic_block bb, gphi *phi)
656*38fd1498Szrj {
657*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
658*38fd1498Szrj     {
659*38fd1498Szrj       fprintf (dump_file, "-------------------------\n");
660*38fd1498Szrj       print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
661*38fd1498Szrj     }
662*38fd1498Szrj 
663*38fd1498Szrj   if (bb != loop->header
664*38fd1498Szrj       && gimple_phi_num_args (phi) > 2
665*38fd1498Szrj       && !phi_convertible_by_degenerating_args (phi))
666*38fd1498Szrj     any_complicated_phi = true;
667*38fd1498Szrj 
668*38fd1498Szrj   return true;
669*38fd1498Szrj }
670*38fd1498Szrj 
671*38fd1498Szrj /* Records the status of a data reference.  This struct is attached to
672*38fd1498Szrj    each DR->aux field.  */
673*38fd1498Szrj 
674*38fd1498Szrj struct ifc_dr {
675*38fd1498Szrj   bool rw_unconditionally;
676*38fd1498Szrj   bool w_unconditionally;
677*38fd1498Szrj   bool written_at_least_once;
678*38fd1498Szrj 
679*38fd1498Szrj   tree rw_predicate;
680*38fd1498Szrj   tree w_predicate;
681*38fd1498Szrj   tree base_w_predicate;
682*38fd1498Szrj };
683*38fd1498Szrj 
684*38fd1498Szrj #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
685*38fd1498Szrj #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once)
686*38fd1498Szrj #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
687*38fd1498Szrj #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally)
688*38fd1498Szrj 
689*38fd1498Szrj /* Iterates over DR's and stores refs, DR and base refs, DR pairs in
690*38fd1498Szrj    HASH tables.  While storing them in HASH table, it checks if the
691*38fd1498Szrj    reference is unconditionally read or written and stores that as a flag
692*38fd1498Szrj    information.  For base reference it checks if it is written atlest once
693*38fd1498Szrj    unconditionally and stores it as flag information along with DR.
694*38fd1498Szrj    In other words for every data reference A in STMT there exist other
695*38fd1498Szrj    accesses to a data reference with the same base with predicates that
696*38fd1498Szrj    add up (OR-up) to the true predicate: this ensures that the data
697*38fd1498Szrj    reference A is touched (read or written) on every iteration of the
698*38fd1498Szrj    if-converted loop.  */
699*38fd1498Szrj static void
hash_memrefs_baserefs_and_store_DRs_read_written_info(data_reference_p a)700*38fd1498Szrj hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a)
701*38fd1498Szrj {
702*38fd1498Szrj 
703*38fd1498Szrj   data_reference_p *master_dr, *base_master_dr;
704*38fd1498Szrj   tree base_ref = DR_BASE_OBJECT (a);
705*38fd1498Szrj   innermost_loop_behavior *innermost = &DR_INNERMOST (a);
706*38fd1498Szrj   tree ca = bb_predicate (gimple_bb (DR_STMT (a)));
707*38fd1498Szrj   bool exist1, exist2;
708*38fd1498Szrj 
709*38fd1498Szrj   master_dr = &innermost_DR_map->get_or_insert (innermost, &exist1);
710*38fd1498Szrj   if (!exist1)
711*38fd1498Szrj     *master_dr = a;
712*38fd1498Szrj 
713*38fd1498Szrj   if (DR_IS_WRITE (a))
714*38fd1498Szrj     {
715*38fd1498Szrj       IFC_DR (*master_dr)->w_predicate
716*38fd1498Szrj 	= fold_or_predicates (UNKNOWN_LOCATION, ca,
717*38fd1498Szrj 			      IFC_DR (*master_dr)->w_predicate);
718*38fd1498Szrj       if (is_true_predicate (IFC_DR (*master_dr)->w_predicate))
719*38fd1498Szrj 	DR_W_UNCONDITIONALLY (*master_dr) = true;
720*38fd1498Szrj     }
721*38fd1498Szrj   IFC_DR (*master_dr)->rw_predicate
722*38fd1498Szrj     = fold_or_predicates (UNKNOWN_LOCATION, ca,
723*38fd1498Szrj 			  IFC_DR (*master_dr)->rw_predicate);
724*38fd1498Szrj   if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate))
725*38fd1498Szrj     DR_RW_UNCONDITIONALLY (*master_dr) = true;
726*38fd1498Szrj 
727*38fd1498Szrj   if (DR_IS_WRITE (a))
728*38fd1498Szrj     {
729*38fd1498Szrj       base_master_dr = &baseref_DR_map->get_or_insert (base_ref, &exist2);
730*38fd1498Szrj       if (!exist2)
731*38fd1498Szrj 	*base_master_dr = a;
732*38fd1498Szrj       IFC_DR (*base_master_dr)->base_w_predicate
733*38fd1498Szrj 	= fold_or_predicates (UNKNOWN_LOCATION, ca,
734*38fd1498Szrj 			      IFC_DR (*base_master_dr)->base_w_predicate);
735*38fd1498Szrj       if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate))
736*38fd1498Szrj 	DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true;
737*38fd1498Szrj     }
738*38fd1498Szrj }
739*38fd1498Szrj 
740*38fd1498Szrj /* Return TRUE if can prove the index IDX of an array reference REF is
741*38fd1498Szrj    within array bound.  Return false otherwise.  */
742*38fd1498Szrj 
743*38fd1498Szrj static bool
idx_within_array_bound(tree ref,tree * idx,void * dta)744*38fd1498Szrj idx_within_array_bound (tree ref, tree *idx, void *dta)
745*38fd1498Szrj {
746*38fd1498Szrj   bool overflow;
747*38fd1498Szrj   widest_int niter, valid_niter, delta, wi_step;
748*38fd1498Szrj   tree ev, init, step;
749*38fd1498Szrj   tree low, high;
750*38fd1498Szrj   struct loop *loop = (struct loop*) dta;
751*38fd1498Szrj 
752*38fd1498Szrj   /* Only support within-bound access for array references.  */
753*38fd1498Szrj   if (TREE_CODE (ref) != ARRAY_REF)
754*38fd1498Szrj     return false;
755*38fd1498Szrj 
756*38fd1498Szrj   /* For arrays at the end of the structure, we are not guaranteed that they
757*38fd1498Szrj      do not really extend over their declared size.  However, for arrays of
758*38fd1498Szrj      size greater than one, this is unlikely to be intended.  */
759*38fd1498Szrj   if (array_at_struct_end_p (ref))
760*38fd1498Szrj     return false;
761*38fd1498Szrj 
762*38fd1498Szrj   ev = analyze_scalar_evolution (loop, *idx);
763*38fd1498Szrj   ev = instantiate_parameters (loop, ev);
764*38fd1498Szrj   init = initial_condition (ev);
765*38fd1498Szrj   step = evolution_part_in_loop_num (ev, loop->num);
766*38fd1498Szrj 
767*38fd1498Szrj   if (!init || TREE_CODE (init) != INTEGER_CST
768*38fd1498Szrj       || (step && TREE_CODE (step) != INTEGER_CST))
769*38fd1498Szrj     return false;
770*38fd1498Szrj 
771*38fd1498Szrj   low = array_ref_low_bound (ref);
772*38fd1498Szrj   high = array_ref_up_bound (ref);
773*38fd1498Szrj 
774*38fd1498Szrj   /* The case of nonconstant bounds could be handled, but it would be
775*38fd1498Szrj      complicated.  */
776*38fd1498Szrj   if (TREE_CODE (low) != INTEGER_CST
777*38fd1498Szrj       || !high || TREE_CODE (high) != INTEGER_CST)
778*38fd1498Szrj     return false;
779*38fd1498Szrj 
780*38fd1498Szrj   /* Check if the intial idx is within bound.  */
781*38fd1498Szrj   if (wi::to_widest (init) < wi::to_widest (low)
782*38fd1498Szrj       || wi::to_widest (init) > wi::to_widest (high))
783*38fd1498Szrj     return false;
784*38fd1498Szrj 
785*38fd1498Szrj   /* The idx is always within bound.  */
786*38fd1498Szrj   if (!step || integer_zerop (step))
787*38fd1498Szrj     return true;
788*38fd1498Szrj 
789*38fd1498Szrj   if (!max_loop_iterations (loop, &niter))
790*38fd1498Szrj     return false;
791*38fd1498Szrj 
792*38fd1498Szrj   if (wi::to_widest (step) < 0)
793*38fd1498Szrj     {
794*38fd1498Szrj       delta = wi::to_widest (init) - wi::to_widest (low);
795*38fd1498Szrj       wi_step = -wi::to_widest (step);
796*38fd1498Szrj     }
797*38fd1498Szrj   else
798*38fd1498Szrj     {
799*38fd1498Szrj       delta = wi::to_widest (high) - wi::to_widest (init);
800*38fd1498Szrj       wi_step = wi::to_widest (step);
801*38fd1498Szrj     }
802*38fd1498Szrj 
803*38fd1498Szrj   valid_niter = wi::div_floor (delta, wi_step, SIGNED, &overflow);
804*38fd1498Szrj   /* The iteration space of idx is within array bound.  */
805*38fd1498Szrj   if (!overflow && niter <= valid_niter)
806*38fd1498Szrj     return true;
807*38fd1498Szrj 
808*38fd1498Szrj   return false;
809*38fd1498Szrj }
810*38fd1498Szrj 
811*38fd1498Szrj /* Return TRUE if ref is a within bound array reference.  */
812*38fd1498Szrj 
813*38fd1498Szrj static bool
ref_within_array_bound(gimple * stmt,tree ref)814*38fd1498Szrj ref_within_array_bound (gimple *stmt, tree ref)
815*38fd1498Szrj {
816*38fd1498Szrj   struct loop *loop = loop_containing_stmt (stmt);
817*38fd1498Szrj 
818*38fd1498Szrj   gcc_assert (loop != NULL);
819*38fd1498Szrj   return for_each_index (&ref, idx_within_array_bound, loop);
820*38fd1498Szrj }
821*38fd1498Szrj 
822*38fd1498Szrj 
823*38fd1498Szrj /* Given a memory reference expression T, return TRUE if base object
824*38fd1498Szrj    it refers to is writable.  The base object of a memory reference
825*38fd1498Szrj    is the main object being referenced, which is returned by function
826*38fd1498Szrj    get_base_address.  */
827*38fd1498Szrj 
828*38fd1498Szrj static bool
base_object_writable(tree ref)829*38fd1498Szrj base_object_writable (tree ref)
830*38fd1498Szrj {
831*38fd1498Szrj   tree base_tree = get_base_address (ref);
832*38fd1498Szrj 
833*38fd1498Szrj   return (base_tree
834*38fd1498Szrj 	  && DECL_P (base_tree)
835*38fd1498Szrj 	  && decl_binds_to_current_def_p (base_tree)
836*38fd1498Szrj 	  && !TREE_READONLY (base_tree));
837*38fd1498Szrj }
838*38fd1498Szrj 
839*38fd1498Szrj /* Return true when the memory references of STMT won't trap in the
840*38fd1498Szrj    if-converted code.  There are two things that we have to check for:
841*38fd1498Szrj 
842*38fd1498Szrj    - writes to memory occur to writable memory: if-conversion of
843*38fd1498Szrj    memory writes transforms the conditional memory writes into
844*38fd1498Szrj    unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
845*38fd1498Szrj    into "A[i] = cond ? foo : A[i]", and as the write to memory may not
846*38fd1498Szrj    be executed at all in the original code, it may be a readonly
847*38fd1498Szrj    memory.  To check that A is not const-qualified, we check that
848*38fd1498Szrj    there exists at least an unconditional write to A in the current
849*38fd1498Szrj    function.
850*38fd1498Szrj 
851*38fd1498Szrj    - reads or writes to memory are valid memory accesses for every
852*38fd1498Szrj    iteration.  To check that the memory accesses are correctly formed
853*38fd1498Szrj    and that we are allowed to read and write in these locations, we
854*38fd1498Szrj    check that the memory accesses to be if-converted occur at every
855*38fd1498Szrj    iteration unconditionally.
856*38fd1498Szrj 
857*38fd1498Szrj    Returns true for the memory reference in STMT, same memory reference
858*38fd1498Szrj    is read or written unconditionally atleast once and the base memory
859*38fd1498Szrj    reference is written unconditionally once.  This is to check reference
860*38fd1498Szrj    will not write fault.  Also retuns true if the memory reference is
861*38fd1498Szrj    unconditionally read once then we are conditionally writing to memory
862*38fd1498Szrj    which is defined as read and write and is bound to the definition
863*38fd1498Szrj    we are seeing.  */
864*38fd1498Szrj static bool
ifcvt_memrefs_wont_trap(gimple * stmt,vec<data_reference_p> drs)865*38fd1498Szrj ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs)
866*38fd1498Szrj {
867*38fd1498Szrj   /* If DR didn't see a reference here we can't use it to tell
868*38fd1498Szrj      whether the ref traps or not.  */
869*38fd1498Szrj   if (gimple_uid (stmt) == 0)
870*38fd1498Szrj     return false;
871*38fd1498Szrj 
872*38fd1498Szrj   data_reference_p *master_dr, *base_master_dr;
873*38fd1498Szrj   data_reference_p a = drs[gimple_uid (stmt) - 1];
874*38fd1498Szrj 
875*38fd1498Szrj   tree base = DR_BASE_OBJECT (a);
876*38fd1498Szrj   innermost_loop_behavior *innermost = &DR_INNERMOST (a);
877*38fd1498Szrj 
878*38fd1498Szrj   gcc_assert (DR_STMT (a) == stmt);
879*38fd1498Szrj   gcc_assert (DR_BASE_ADDRESS (a) || DR_OFFSET (a)
880*38fd1498Szrj               || DR_INIT (a) || DR_STEP (a));
881*38fd1498Szrj 
882*38fd1498Szrj   master_dr = innermost_DR_map->get (innermost);
883*38fd1498Szrj   gcc_assert (master_dr != NULL);
884*38fd1498Szrj 
885*38fd1498Szrj   base_master_dr = baseref_DR_map->get (base);
886*38fd1498Szrj 
887*38fd1498Szrj   /* If a is unconditionally written to it doesn't trap.  */
888*38fd1498Szrj   if (DR_W_UNCONDITIONALLY (*master_dr))
889*38fd1498Szrj     return true;
890*38fd1498Szrj 
891*38fd1498Szrj   /* If a is unconditionally accessed then ...
892*38fd1498Szrj 
893*38fd1498Szrj      Even a is conditional access, we can treat it as an unconditional
894*38fd1498Szrj      one if it's an array reference and all its index are within array
895*38fd1498Szrj      bound.  */
896*38fd1498Szrj   if (DR_RW_UNCONDITIONALLY (*master_dr)
897*38fd1498Szrj       || ref_within_array_bound (stmt, DR_REF (a)))
898*38fd1498Szrj     {
899*38fd1498Szrj       /* an unconditional read won't trap.  */
900*38fd1498Szrj       if (DR_IS_READ (a))
901*38fd1498Szrj 	return true;
902*38fd1498Szrj 
903*38fd1498Szrj       /* an unconditionaly write won't trap if the base is written
904*38fd1498Szrj          to unconditionally.  */
905*38fd1498Szrj       if (base_master_dr
906*38fd1498Szrj 	  && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
907*38fd1498Szrj 	return PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES);
908*38fd1498Szrj       /* or the base is known to be not readonly.  */
909*38fd1498Szrj       else if (base_object_writable (DR_REF (a)))
910*38fd1498Szrj 	return PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES);
911*38fd1498Szrj     }
912*38fd1498Szrj 
913*38fd1498Szrj   return false;
914*38fd1498Szrj }
915*38fd1498Szrj 
916*38fd1498Szrj /* Return true if STMT could be converted into a masked load or store
917*38fd1498Szrj    (conditional load or store based on a mask computed from bb predicate).  */
918*38fd1498Szrj 
919*38fd1498Szrj static bool
ifcvt_can_use_mask_load_store(gimple * stmt)920*38fd1498Szrj ifcvt_can_use_mask_load_store (gimple *stmt)
921*38fd1498Szrj {
922*38fd1498Szrj   tree lhs, ref;
923*38fd1498Szrj   machine_mode mode;
924*38fd1498Szrj   basic_block bb = gimple_bb (stmt);
925*38fd1498Szrj   bool is_load;
926*38fd1498Szrj 
927*38fd1498Szrj   if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
928*38fd1498Szrj       || bb->loop_father->dont_vectorize
929*38fd1498Szrj       || !gimple_assign_single_p (stmt)
930*38fd1498Szrj       || gimple_has_volatile_ops (stmt))
931*38fd1498Szrj     return false;
932*38fd1498Szrj 
933*38fd1498Szrj   /* Check whether this is a load or store.  */
934*38fd1498Szrj   lhs = gimple_assign_lhs (stmt);
935*38fd1498Szrj   if (gimple_store_p (stmt))
936*38fd1498Szrj     {
937*38fd1498Szrj       if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
938*38fd1498Szrj 	return false;
939*38fd1498Szrj       is_load = false;
940*38fd1498Szrj       ref = lhs;
941*38fd1498Szrj     }
942*38fd1498Szrj   else if (gimple_assign_load_p (stmt))
943*38fd1498Szrj     {
944*38fd1498Szrj       is_load = true;
945*38fd1498Szrj       ref = gimple_assign_rhs1 (stmt);
946*38fd1498Szrj     }
947*38fd1498Szrj   else
948*38fd1498Szrj     return false;
949*38fd1498Szrj 
950*38fd1498Szrj   if (may_be_nonaddressable_p (ref))
951*38fd1498Szrj     return false;
952*38fd1498Szrj 
953*38fd1498Szrj   /* Mask should be integer mode of the same size as the load/store
954*38fd1498Szrj      mode.  */
955*38fd1498Szrj   mode = TYPE_MODE (TREE_TYPE (lhs));
956*38fd1498Szrj   if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode))
957*38fd1498Szrj     return false;
958*38fd1498Szrj 
959*38fd1498Szrj   if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
960*38fd1498Szrj     return true;
961*38fd1498Szrj 
962*38fd1498Szrj   return false;
963*38fd1498Szrj }
964*38fd1498Szrj 
965*38fd1498Szrj /* Return true when STMT is if-convertible.
966*38fd1498Szrj 
967*38fd1498Szrj    GIMPLE_ASSIGN statement is not if-convertible if,
968*38fd1498Szrj    - it is not movable,
969*38fd1498Szrj    - it could trap,
970*38fd1498Szrj    - LHS is not var decl.  */
971*38fd1498Szrj 
972*38fd1498Szrj static bool
if_convertible_gimple_assign_stmt_p(gimple * stmt,vec<data_reference_p> refs)973*38fd1498Szrj if_convertible_gimple_assign_stmt_p (gimple *stmt,
974*38fd1498Szrj 				     vec<data_reference_p> refs)
975*38fd1498Szrj {
976*38fd1498Szrj   tree lhs = gimple_assign_lhs (stmt);
977*38fd1498Szrj 
978*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
979*38fd1498Szrj     {
980*38fd1498Szrj       fprintf (dump_file, "-------------------------\n");
981*38fd1498Szrj       print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
982*38fd1498Szrj     }
983*38fd1498Szrj 
984*38fd1498Szrj   if (!is_gimple_reg_type (TREE_TYPE (lhs)))
985*38fd1498Szrj     return false;
986*38fd1498Szrj 
987*38fd1498Szrj   /* Some of these constrains might be too conservative.  */
988*38fd1498Szrj   if (stmt_ends_bb_p (stmt)
989*38fd1498Szrj       || gimple_has_volatile_ops (stmt)
990*38fd1498Szrj       || (TREE_CODE (lhs) == SSA_NAME
991*38fd1498Szrj           && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
992*38fd1498Szrj       || gimple_has_side_effects (stmt))
993*38fd1498Szrj     {
994*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
995*38fd1498Szrj         fprintf (dump_file, "stmt not suitable for ifcvt\n");
996*38fd1498Szrj       return false;
997*38fd1498Szrj     }
998*38fd1498Szrj 
999*38fd1498Szrj   /* tree-into-ssa.c uses GF_PLF_1, so avoid it, because
1000*38fd1498Szrj      in between if_convertible_loop_p and combine_blocks
1001*38fd1498Szrj      we can perform loop versioning.  */
1002*38fd1498Szrj   gimple_set_plf (stmt, GF_PLF_2, false);
1003*38fd1498Szrj 
1004*38fd1498Szrj   if ((! gimple_vuse (stmt)
1005*38fd1498Szrj        || gimple_could_trap_p_1 (stmt, false, false)
1006*38fd1498Szrj        || ! ifcvt_memrefs_wont_trap (stmt, refs))
1007*38fd1498Szrj       && gimple_could_trap_p (stmt))
1008*38fd1498Szrj     {
1009*38fd1498Szrj       if (ifcvt_can_use_mask_load_store (stmt))
1010*38fd1498Szrj 	{
1011*38fd1498Szrj 	  gimple_set_plf (stmt, GF_PLF_2, true);
1012*38fd1498Szrj 	  any_pred_load_store = true;
1013*38fd1498Szrj 	  return true;
1014*38fd1498Szrj 	}
1015*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
1016*38fd1498Szrj 	fprintf (dump_file, "tree could trap...\n");
1017*38fd1498Szrj       return false;
1018*38fd1498Szrj     }
1019*38fd1498Szrj 
1020*38fd1498Szrj   /* When if-converting stores force versioning, likewise if we
1021*38fd1498Szrj      ended up generating store data races.  */
1022*38fd1498Szrj   if (gimple_vdef (stmt))
1023*38fd1498Szrj     any_pred_load_store = true;
1024*38fd1498Szrj 
1025*38fd1498Szrj   return true;
1026*38fd1498Szrj }
1027*38fd1498Szrj 
1028*38fd1498Szrj /* Return true when STMT is if-convertible.
1029*38fd1498Szrj 
1030*38fd1498Szrj    A statement is if-convertible if:
1031*38fd1498Szrj    - it is an if-convertible GIMPLE_ASSIGN,
1032*38fd1498Szrj    - it is a GIMPLE_LABEL or a GIMPLE_COND,
1033*38fd1498Szrj    - it is builtins call.  */
1034*38fd1498Szrj 
1035*38fd1498Szrj static bool
if_convertible_stmt_p(gimple * stmt,vec<data_reference_p> refs)1036*38fd1498Szrj if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
1037*38fd1498Szrj {
1038*38fd1498Szrj   switch (gimple_code (stmt))
1039*38fd1498Szrj     {
1040*38fd1498Szrj     case GIMPLE_LABEL:
1041*38fd1498Szrj     case GIMPLE_DEBUG:
1042*38fd1498Szrj     case GIMPLE_COND:
1043*38fd1498Szrj       return true;
1044*38fd1498Szrj 
1045*38fd1498Szrj     case GIMPLE_ASSIGN:
1046*38fd1498Szrj       return if_convertible_gimple_assign_stmt_p (stmt, refs);
1047*38fd1498Szrj 
1048*38fd1498Szrj     case GIMPLE_CALL:
1049*38fd1498Szrj       {
1050*38fd1498Szrj 	tree fndecl = gimple_call_fndecl (stmt);
1051*38fd1498Szrj 	if (fndecl)
1052*38fd1498Szrj 	  {
1053*38fd1498Szrj 	    int flags = gimple_call_flags (stmt);
1054*38fd1498Szrj 	    if ((flags & ECF_CONST)
1055*38fd1498Szrj 		&& !(flags & ECF_LOOPING_CONST_OR_PURE)
1056*38fd1498Szrj 		/* We can only vectorize some builtins at the moment,
1057*38fd1498Szrj 		   so restrict if-conversion to those.  */
1058*38fd1498Szrj 		&& DECL_BUILT_IN (fndecl))
1059*38fd1498Szrj 	      return true;
1060*38fd1498Szrj 	  }
1061*38fd1498Szrj 	return false;
1062*38fd1498Szrj       }
1063*38fd1498Szrj 
1064*38fd1498Szrj     default:
1065*38fd1498Szrj       /* Don't know what to do with 'em so don't do anything.  */
1066*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
1067*38fd1498Szrj 	{
1068*38fd1498Szrj 	  fprintf (dump_file, "don't know what to do\n");
1069*38fd1498Szrj 	  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1070*38fd1498Szrj 	}
1071*38fd1498Szrj       return false;
1072*38fd1498Szrj     }
1073*38fd1498Szrj 
1074*38fd1498Szrj   return true;
1075*38fd1498Szrj }
1076*38fd1498Szrj 
1077*38fd1498Szrj /* Assumes that BB has more than 1 predecessors.
1078*38fd1498Szrj    Returns false if at least one successor is not on critical edge
1079*38fd1498Szrj    and true otherwise.  */
1080*38fd1498Szrj 
1081*38fd1498Szrj static inline bool
all_preds_critical_p(basic_block bb)1082*38fd1498Szrj all_preds_critical_p (basic_block bb)
1083*38fd1498Szrj {
1084*38fd1498Szrj   edge e;
1085*38fd1498Szrj   edge_iterator ei;
1086*38fd1498Szrj 
1087*38fd1498Szrj   FOR_EACH_EDGE (e, ei, bb->preds)
1088*38fd1498Szrj     if (EDGE_COUNT (e->src->succs) == 1)
1089*38fd1498Szrj       return false;
1090*38fd1498Szrj   return true;
1091*38fd1498Szrj }
1092*38fd1498Szrj 
1093*38fd1498Szrj /* Returns true if at least one successor in on critical edge.  */
1094*38fd1498Szrj static inline bool
has_pred_critical_p(basic_block bb)1095*38fd1498Szrj has_pred_critical_p (basic_block bb)
1096*38fd1498Szrj {
1097*38fd1498Szrj   edge e;
1098*38fd1498Szrj   edge_iterator ei;
1099*38fd1498Szrj 
1100*38fd1498Szrj   FOR_EACH_EDGE (e, ei, bb->preds)
1101*38fd1498Szrj     if (EDGE_COUNT (e->src->succs) > 1)
1102*38fd1498Szrj       return true;
1103*38fd1498Szrj   return false;
1104*38fd1498Szrj }
1105*38fd1498Szrj 
1106*38fd1498Szrj /* Return true when BB is if-convertible.  This routine does not check
1107*38fd1498Szrj    basic block's statements and phis.
1108*38fd1498Szrj 
1109*38fd1498Szrj    A basic block is not if-convertible if:
1110*38fd1498Szrj    - it is non-empty and it is after the exit block (in BFS order),
1111*38fd1498Szrj    - it is after the exit block but before the latch,
1112*38fd1498Szrj    - its edges are not normal.
1113*38fd1498Szrj 
1114*38fd1498Szrj    EXIT_BB is the basic block containing the exit of the LOOP.  BB is
1115*38fd1498Szrj    inside LOOP.  */
1116*38fd1498Szrj 
1117*38fd1498Szrj static bool
if_convertible_bb_p(struct loop * loop,basic_block bb,basic_block exit_bb)1118*38fd1498Szrj if_convertible_bb_p (struct loop *loop, basic_block bb, basic_block exit_bb)
1119*38fd1498Szrj {
1120*38fd1498Szrj   edge e;
1121*38fd1498Szrj   edge_iterator ei;
1122*38fd1498Szrj 
1123*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
1124*38fd1498Szrj     fprintf (dump_file, "----------[%d]-------------\n", bb->index);
1125*38fd1498Szrj 
1126*38fd1498Szrj   if (EDGE_COUNT (bb->succs) > 2)
1127*38fd1498Szrj     return false;
1128*38fd1498Szrj 
1129*38fd1498Szrj   if (exit_bb)
1130*38fd1498Szrj     {
1131*38fd1498Szrj       if (bb != loop->latch)
1132*38fd1498Szrj 	{
1133*38fd1498Szrj 	  if (dump_file && (dump_flags & TDF_DETAILS))
1134*38fd1498Szrj 	    fprintf (dump_file, "basic block after exit bb but before latch\n");
1135*38fd1498Szrj 	  return false;
1136*38fd1498Szrj 	}
1137*38fd1498Szrj       else if (!empty_block_p (bb))
1138*38fd1498Szrj 	{
1139*38fd1498Szrj 	  if (dump_file && (dump_flags & TDF_DETAILS))
1140*38fd1498Szrj 	    fprintf (dump_file, "non empty basic block after exit bb\n");
1141*38fd1498Szrj 	  return false;
1142*38fd1498Szrj 	}
1143*38fd1498Szrj       else if (bb == loop->latch
1144*38fd1498Szrj 	       && bb != exit_bb
1145*38fd1498Szrj 	       && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1146*38fd1498Szrj 	  {
1147*38fd1498Szrj 	    if (dump_file && (dump_flags & TDF_DETAILS))
1148*38fd1498Szrj 	      fprintf (dump_file, "latch is not dominated by exit_block\n");
1149*38fd1498Szrj 	    return false;
1150*38fd1498Szrj 	  }
1151*38fd1498Szrj     }
1152*38fd1498Szrj 
1153*38fd1498Szrj   /* Be less adventurous and handle only normal edges.  */
1154*38fd1498Szrj   FOR_EACH_EDGE (e, ei, bb->succs)
1155*38fd1498Szrj     if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1156*38fd1498Szrj       {
1157*38fd1498Szrj 	if (dump_file && (dump_flags & TDF_DETAILS))
1158*38fd1498Szrj 	  fprintf (dump_file, "Difficult to handle edges\n");
1159*38fd1498Szrj 	return false;
1160*38fd1498Szrj       }
1161*38fd1498Szrj 
1162*38fd1498Szrj   return true;
1163*38fd1498Szrj }
1164*38fd1498Szrj 
1165*38fd1498Szrj /* Return true when all predecessor blocks of BB are visited.  The
1166*38fd1498Szrj    VISITED bitmap keeps track of the visited blocks.  */
1167*38fd1498Szrj 
1168*38fd1498Szrj static bool
pred_blocks_visited_p(basic_block bb,bitmap * visited)1169*38fd1498Szrj pred_blocks_visited_p (basic_block bb, bitmap *visited)
1170*38fd1498Szrj {
1171*38fd1498Szrj   edge e;
1172*38fd1498Szrj   edge_iterator ei;
1173*38fd1498Szrj   FOR_EACH_EDGE (e, ei, bb->preds)
1174*38fd1498Szrj     if (!bitmap_bit_p (*visited, e->src->index))
1175*38fd1498Szrj       return false;
1176*38fd1498Szrj 
1177*38fd1498Szrj   return true;
1178*38fd1498Szrj }
1179*38fd1498Szrj 
1180*38fd1498Szrj /* Get body of a LOOP in suitable order for if-conversion.  It is
1181*38fd1498Szrj    caller's responsibility to deallocate basic block list.
1182*38fd1498Szrj    If-conversion suitable order is, breadth first sort (BFS) order
1183*38fd1498Szrj    with an additional constraint: select a block only if all its
1184*38fd1498Szrj    predecessors are already selected.  */
1185*38fd1498Szrj 
1186*38fd1498Szrj static basic_block *
get_loop_body_in_if_conv_order(const struct loop * loop)1187*38fd1498Szrj get_loop_body_in_if_conv_order (const struct loop *loop)
1188*38fd1498Szrj {
1189*38fd1498Szrj   basic_block *blocks, *blocks_in_bfs_order;
1190*38fd1498Szrj   basic_block bb;
1191*38fd1498Szrj   bitmap visited;
1192*38fd1498Szrj   unsigned int index = 0;
1193*38fd1498Szrj   unsigned int visited_count = 0;
1194*38fd1498Szrj 
1195*38fd1498Szrj   gcc_assert (loop->num_nodes);
1196*38fd1498Szrj   gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1197*38fd1498Szrj 
1198*38fd1498Szrj   blocks = XCNEWVEC (basic_block, loop->num_nodes);
1199*38fd1498Szrj   visited = BITMAP_ALLOC (NULL);
1200*38fd1498Szrj 
1201*38fd1498Szrj   blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1202*38fd1498Szrj 
1203*38fd1498Szrj   index = 0;
1204*38fd1498Szrj   while (index < loop->num_nodes)
1205*38fd1498Szrj     {
1206*38fd1498Szrj       bb = blocks_in_bfs_order [index];
1207*38fd1498Szrj 
1208*38fd1498Szrj       if (bb->flags & BB_IRREDUCIBLE_LOOP)
1209*38fd1498Szrj 	{
1210*38fd1498Szrj 	  free (blocks_in_bfs_order);
1211*38fd1498Szrj 	  BITMAP_FREE (visited);
1212*38fd1498Szrj 	  free (blocks);
1213*38fd1498Szrj 	  return NULL;
1214*38fd1498Szrj 	}
1215*38fd1498Szrj 
1216*38fd1498Szrj       if (!bitmap_bit_p (visited, bb->index))
1217*38fd1498Szrj 	{
1218*38fd1498Szrj 	  if (pred_blocks_visited_p (bb, &visited)
1219*38fd1498Szrj 	      || bb == loop->header)
1220*38fd1498Szrj 	    {
1221*38fd1498Szrj 	      /* This block is now visited.  */
1222*38fd1498Szrj 	      bitmap_set_bit (visited, bb->index);
1223*38fd1498Szrj 	      blocks[visited_count++] = bb;
1224*38fd1498Szrj 	    }
1225*38fd1498Szrj 	}
1226*38fd1498Szrj 
1227*38fd1498Szrj       index++;
1228*38fd1498Szrj 
1229*38fd1498Szrj       if (index == loop->num_nodes
1230*38fd1498Szrj 	  && visited_count != loop->num_nodes)
1231*38fd1498Szrj 	/* Not done yet.  */
1232*38fd1498Szrj 	index = 0;
1233*38fd1498Szrj     }
1234*38fd1498Szrj   free (blocks_in_bfs_order);
1235*38fd1498Szrj   BITMAP_FREE (visited);
1236*38fd1498Szrj   return blocks;
1237*38fd1498Szrj }
1238*38fd1498Szrj 
1239*38fd1498Szrj /* Returns true when the analysis of the predicates for all the basic
1240*38fd1498Szrj    blocks in LOOP succeeded.
1241*38fd1498Szrj 
1242*38fd1498Szrj    predicate_bbs first allocates the predicates of the basic blocks.
1243*38fd1498Szrj    These fields are then initialized with the tree expressions
1244*38fd1498Szrj    representing the predicates under which a basic block is executed
1245*38fd1498Szrj    in the LOOP.  As the loop->header is executed at each iteration, it
1246*38fd1498Szrj    has the "true" predicate.  Other statements executed under a
1247*38fd1498Szrj    condition are predicated with that condition, for example
1248*38fd1498Szrj 
1249*38fd1498Szrj    | if (x)
1250*38fd1498Szrj    |   S1;
1251*38fd1498Szrj    | else
1252*38fd1498Szrj    |   S2;
1253*38fd1498Szrj 
1254*38fd1498Szrj    S1 will be predicated with "x", and
1255*38fd1498Szrj    S2 will be predicated with "!x".  */
1256*38fd1498Szrj 
1257*38fd1498Szrj static void
predicate_bbs(loop_p loop)1258*38fd1498Szrj predicate_bbs (loop_p loop)
1259*38fd1498Szrj {
1260*38fd1498Szrj   unsigned int i;
1261*38fd1498Szrj 
1262*38fd1498Szrj   for (i = 0; i < loop->num_nodes; i++)
1263*38fd1498Szrj     init_bb_predicate (ifc_bbs[i]);
1264*38fd1498Szrj 
1265*38fd1498Szrj   for (i = 0; i < loop->num_nodes; i++)
1266*38fd1498Szrj     {
1267*38fd1498Szrj       basic_block bb = ifc_bbs[i];
1268*38fd1498Szrj       tree cond;
1269*38fd1498Szrj       gimple *stmt;
1270*38fd1498Szrj 
1271*38fd1498Szrj       /* The loop latch and loop exit block are always executed and
1272*38fd1498Szrj 	 have no extra conditions to be processed: skip them.  */
1273*38fd1498Szrj       if (bb == loop->latch
1274*38fd1498Szrj 	  || bb_with_exit_edge_p (loop, bb))
1275*38fd1498Szrj 	{
1276*38fd1498Szrj 	  reset_bb_predicate (bb);
1277*38fd1498Szrj 	  continue;
1278*38fd1498Szrj 	}
1279*38fd1498Szrj 
1280*38fd1498Szrj       cond = bb_predicate (bb);
1281*38fd1498Szrj       stmt = last_stmt (bb);
1282*38fd1498Szrj       if (stmt && gimple_code (stmt) == GIMPLE_COND)
1283*38fd1498Szrj 	{
1284*38fd1498Szrj 	  tree c2;
1285*38fd1498Szrj 	  edge true_edge, false_edge;
1286*38fd1498Szrj 	  location_t loc = gimple_location (stmt);
1287*38fd1498Szrj 	  tree c = build2_loc (loc, gimple_cond_code (stmt),
1288*38fd1498Szrj 				    boolean_type_node,
1289*38fd1498Szrj 				    gimple_cond_lhs (stmt),
1290*38fd1498Szrj 				    gimple_cond_rhs (stmt));
1291*38fd1498Szrj 
1292*38fd1498Szrj 	  /* Add new condition into destination's predicate list.  */
1293*38fd1498Szrj 	  extract_true_false_edges_from_block (gimple_bb (stmt),
1294*38fd1498Szrj 					       &true_edge, &false_edge);
1295*38fd1498Szrj 
1296*38fd1498Szrj 	  /* If C is true, then TRUE_EDGE is taken.  */
1297*38fd1498Szrj 	  add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1298*38fd1498Szrj 				     unshare_expr (c));
1299*38fd1498Szrj 
1300*38fd1498Szrj 	  /* If C is false, then FALSE_EDGE is taken.  */
1301*38fd1498Szrj 	  c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1302*38fd1498Szrj 			   unshare_expr (c));
1303*38fd1498Szrj 	  add_to_dst_predicate_list (loop, false_edge,
1304*38fd1498Szrj 				     unshare_expr (cond), c2);
1305*38fd1498Szrj 
1306*38fd1498Szrj 	  cond = NULL_TREE;
1307*38fd1498Szrj 	}
1308*38fd1498Szrj 
1309*38fd1498Szrj       /* If current bb has only one successor, then consider it as an
1310*38fd1498Szrj 	 unconditional goto.  */
1311*38fd1498Szrj       if (single_succ_p (bb))
1312*38fd1498Szrj 	{
1313*38fd1498Szrj 	  basic_block bb_n = single_succ (bb);
1314*38fd1498Szrj 
1315*38fd1498Szrj 	  /* The successor bb inherits the predicate of its
1316*38fd1498Szrj 	     predecessor.  If there is no predicate in the predecessor
1317*38fd1498Szrj 	     bb, then consider the successor bb as always executed.  */
1318*38fd1498Szrj 	  if (cond == NULL_TREE)
1319*38fd1498Szrj 	    cond = boolean_true_node;
1320*38fd1498Szrj 
1321*38fd1498Szrj 	  add_to_predicate_list (loop, bb_n, cond);
1322*38fd1498Szrj 	}
1323*38fd1498Szrj     }
1324*38fd1498Szrj 
1325*38fd1498Szrj   /* The loop header is always executed.  */
1326*38fd1498Szrj   reset_bb_predicate (loop->header);
1327*38fd1498Szrj   gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1328*38fd1498Szrj 	      && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1329*38fd1498Szrj }
1330*38fd1498Szrj 
1331*38fd1498Szrj /* Build region by adding loop pre-header and post-header blocks.  */
1332*38fd1498Szrj 
1333*38fd1498Szrj static vec<basic_block>
build_region(struct loop * loop)1334*38fd1498Szrj build_region (struct loop *loop)
1335*38fd1498Szrj {
1336*38fd1498Szrj   vec<basic_block> region = vNULL;
1337*38fd1498Szrj   basic_block exit_bb = NULL;
1338*38fd1498Szrj 
1339*38fd1498Szrj   gcc_assert (ifc_bbs);
1340*38fd1498Szrj   /* The first element is loop pre-header.  */
1341*38fd1498Szrj   region.safe_push (loop_preheader_edge (loop)->src);
1342*38fd1498Szrj 
1343*38fd1498Szrj   for (unsigned int i = 0; i < loop->num_nodes; i++)
1344*38fd1498Szrj     {
1345*38fd1498Szrj       basic_block bb = ifc_bbs[i];
1346*38fd1498Szrj       region.safe_push (bb);
1347*38fd1498Szrj       /* Find loop postheader.  */
1348*38fd1498Szrj       edge e;
1349*38fd1498Szrj       edge_iterator ei;
1350*38fd1498Szrj       FOR_EACH_EDGE (e, ei, bb->succs)
1351*38fd1498Szrj 	if (loop_exit_edge_p (loop, e))
1352*38fd1498Szrj 	  {
1353*38fd1498Szrj 	      exit_bb = e->dest;
1354*38fd1498Szrj 	      break;
1355*38fd1498Szrj 	  }
1356*38fd1498Szrj     }
1357*38fd1498Szrj   /* The last element is loop post-header.  */
1358*38fd1498Szrj   gcc_assert (exit_bb);
1359*38fd1498Szrj   region.safe_push (exit_bb);
1360*38fd1498Szrj   return region;
1361*38fd1498Szrj }
1362*38fd1498Szrj 
1363*38fd1498Szrj /* Return true when LOOP is if-convertible.  This is a helper function
1364*38fd1498Szrj    for if_convertible_loop_p.  REFS and DDRS are initialized and freed
1365*38fd1498Szrj    in if_convertible_loop_p.  */
1366*38fd1498Szrj 
1367*38fd1498Szrj static bool
if_convertible_loop_p_1(struct loop * loop,vec<data_reference_p> * refs)1368*38fd1498Szrj if_convertible_loop_p_1 (struct loop *loop, vec<data_reference_p> *refs)
1369*38fd1498Szrj {
1370*38fd1498Szrj   unsigned int i;
1371*38fd1498Szrj   basic_block exit_bb = NULL;
1372*38fd1498Szrj   vec<basic_block> region;
1373*38fd1498Szrj 
1374*38fd1498Szrj   if (find_data_references_in_loop (loop, refs) == chrec_dont_know)
1375*38fd1498Szrj     return false;
1376*38fd1498Szrj 
1377*38fd1498Szrj   calculate_dominance_info (CDI_DOMINATORS);
1378*38fd1498Szrj 
1379*38fd1498Szrj   /* Allow statements that can be handled during if-conversion.  */
1380*38fd1498Szrj   ifc_bbs = get_loop_body_in_if_conv_order (loop);
1381*38fd1498Szrj   if (!ifc_bbs)
1382*38fd1498Szrj     {
1383*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
1384*38fd1498Szrj 	fprintf (dump_file, "Irreducible loop\n");
1385*38fd1498Szrj       return false;
1386*38fd1498Szrj     }
1387*38fd1498Szrj 
1388*38fd1498Szrj   for (i = 0; i < loop->num_nodes; i++)
1389*38fd1498Szrj     {
1390*38fd1498Szrj       basic_block bb = ifc_bbs[i];
1391*38fd1498Szrj 
1392*38fd1498Szrj       if (!if_convertible_bb_p (loop, bb, exit_bb))
1393*38fd1498Szrj 	return false;
1394*38fd1498Szrj 
1395*38fd1498Szrj       if (bb_with_exit_edge_p (loop, bb))
1396*38fd1498Szrj 	exit_bb = bb;
1397*38fd1498Szrj     }
1398*38fd1498Szrj 
1399*38fd1498Szrj   for (i = 0; i < loop->num_nodes; i++)
1400*38fd1498Szrj     {
1401*38fd1498Szrj       basic_block bb = ifc_bbs[i];
1402*38fd1498Szrj       gimple_stmt_iterator gsi;
1403*38fd1498Szrj 
1404*38fd1498Szrj       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1405*38fd1498Szrj 	switch (gimple_code (gsi_stmt (gsi)))
1406*38fd1498Szrj 	  {
1407*38fd1498Szrj 	  case GIMPLE_LABEL:
1408*38fd1498Szrj 	  case GIMPLE_ASSIGN:
1409*38fd1498Szrj 	  case GIMPLE_CALL:
1410*38fd1498Szrj 	  case GIMPLE_DEBUG:
1411*38fd1498Szrj 	  case GIMPLE_COND:
1412*38fd1498Szrj 	    gimple_set_uid (gsi_stmt (gsi), 0);
1413*38fd1498Szrj 	    break;
1414*38fd1498Szrj 	  default:
1415*38fd1498Szrj 	    return false;
1416*38fd1498Szrj 	  }
1417*38fd1498Szrj     }
1418*38fd1498Szrj 
1419*38fd1498Szrj   data_reference_p dr;
1420*38fd1498Szrj 
1421*38fd1498Szrj   innermost_DR_map
1422*38fd1498Szrj 	  = new hash_map<innermost_loop_behavior_hash, data_reference_p>;
1423*38fd1498Szrj   baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
1424*38fd1498Szrj 
1425*38fd1498Szrj   /* Compute post-dominator tree locally.  */
1426*38fd1498Szrj   region = build_region (loop);
1427*38fd1498Szrj   calculate_dominance_info_for_region (CDI_POST_DOMINATORS, region);
1428*38fd1498Szrj 
1429*38fd1498Szrj   predicate_bbs (loop);
1430*38fd1498Szrj 
1431*38fd1498Szrj   /* Free post-dominator tree since it is not used after predication.  */
1432*38fd1498Szrj   free_dominance_info_for_region (cfun, CDI_POST_DOMINATORS, region);
1433*38fd1498Szrj   region.release ();
1434*38fd1498Szrj 
1435*38fd1498Szrj   for (i = 0; refs->iterate (i, &dr); i++)
1436*38fd1498Szrj     {
1437*38fd1498Szrj       tree ref = DR_REF (dr);
1438*38fd1498Szrj 
1439*38fd1498Szrj       dr->aux = XNEW (struct ifc_dr);
1440*38fd1498Szrj       DR_BASE_W_UNCONDITIONALLY (dr) = false;
1441*38fd1498Szrj       DR_RW_UNCONDITIONALLY (dr) = false;
1442*38fd1498Szrj       DR_W_UNCONDITIONALLY (dr) = false;
1443*38fd1498Szrj       IFC_DR (dr)->rw_predicate = boolean_false_node;
1444*38fd1498Szrj       IFC_DR (dr)->w_predicate = boolean_false_node;
1445*38fd1498Szrj       IFC_DR (dr)->base_w_predicate = boolean_false_node;
1446*38fd1498Szrj       if (gimple_uid (DR_STMT (dr)) == 0)
1447*38fd1498Szrj 	gimple_set_uid (DR_STMT (dr), i + 1);
1448*38fd1498Szrj 
1449*38fd1498Szrj       /* If DR doesn't have innermost loop behavior or it's a compound
1450*38fd1498Szrj          memory reference, we synthesize its innermost loop behavior
1451*38fd1498Szrj          for hashing.  */
1452*38fd1498Szrj       if (TREE_CODE (ref) == COMPONENT_REF
1453*38fd1498Szrj           || TREE_CODE (ref) == IMAGPART_EXPR
1454*38fd1498Szrj           || TREE_CODE (ref) == REALPART_EXPR
1455*38fd1498Szrj           || !(DR_BASE_ADDRESS (dr) || DR_OFFSET (dr)
1456*38fd1498Szrj 	       || DR_INIT (dr) || DR_STEP (dr)))
1457*38fd1498Szrj         {
1458*38fd1498Szrj           while (TREE_CODE (ref) == COMPONENT_REF
1459*38fd1498Szrj 	         || TREE_CODE (ref) == IMAGPART_EXPR
1460*38fd1498Szrj 	         || TREE_CODE (ref) == REALPART_EXPR)
1461*38fd1498Szrj 	    ref = TREE_OPERAND (ref, 0);
1462*38fd1498Szrj 
1463*38fd1498Szrj 	  memset (&DR_INNERMOST (dr), 0, sizeof (DR_INNERMOST (dr)));
1464*38fd1498Szrj 	  DR_BASE_ADDRESS (dr) = ref;
1465*38fd1498Szrj         }
1466*38fd1498Szrj       hash_memrefs_baserefs_and_store_DRs_read_written_info (dr);
1467*38fd1498Szrj     }
1468*38fd1498Szrj 
1469*38fd1498Szrj   for (i = 0; i < loop->num_nodes; i++)
1470*38fd1498Szrj     {
1471*38fd1498Szrj       basic_block bb = ifc_bbs[i];
1472*38fd1498Szrj       gimple_stmt_iterator itr;
1473*38fd1498Szrj 
1474*38fd1498Szrj       /* Check the if-convertibility of statements in predicated BBs.  */
1475*38fd1498Szrj       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1476*38fd1498Szrj 	for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1477*38fd1498Szrj 	  if (!if_convertible_stmt_p (gsi_stmt (itr), *refs))
1478*38fd1498Szrj 	    return false;
1479*38fd1498Szrj     }
1480*38fd1498Szrj 
1481*38fd1498Szrj   /* Checking PHIs needs to be done after stmts, as the fact whether there
1482*38fd1498Szrj      are any masked loads or stores affects the tests.  */
1483*38fd1498Szrj   for (i = 0; i < loop->num_nodes; i++)
1484*38fd1498Szrj     {
1485*38fd1498Szrj       basic_block bb = ifc_bbs[i];
1486*38fd1498Szrj       gphi_iterator itr;
1487*38fd1498Szrj 
1488*38fd1498Szrj       for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1489*38fd1498Szrj 	if (!if_convertible_phi_p (loop, bb, itr.phi ()))
1490*38fd1498Szrj 	  return false;
1491*38fd1498Szrj     }
1492*38fd1498Szrj 
1493*38fd1498Szrj   if (dump_file)
1494*38fd1498Szrj     fprintf (dump_file, "Applying if-conversion\n");
1495*38fd1498Szrj 
1496*38fd1498Szrj   return true;
1497*38fd1498Szrj }
1498*38fd1498Szrj 
1499*38fd1498Szrj /* Return true when LOOP is if-convertible.
1500*38fd1498Szrj    LOOP is if-convertible if:
1501*38fd1498Szrj    - it is innermost,
1502*38fd1498Szrj    - it has two or more basic blocks,
1503*38fd1498Szrj    - it has only one exit,
1504*38fd1498Szrj    - loop header is not the exit edge,
1505*38fd1498Szrj    - if its basic blocks and phi nodes are if convertible.  */
1506*38fd1498Szrj 
1507*38fd1498Szrj static bool
if_convertible_loop_p(struct loop * loop)1508*38fd1498Szrj if_convertible_loop_p (struct loop *loop)
1509*38fd1498Szrj {
1510*38fd1498Szrj   edge e;
1511*38fd1498Szrj   edge_iterator ei;
1512*38fd1498Szrj   bool res = false;
1513*38fd1498Szrj   vec<data_reference_p> refs;
1514*38fd1498Szrj 
1515*38fd1498Szrj   /* Handle only innermost loop.  */
1516*38fd1498Szrj   if (!loop || loop->inner)
1517*38fd1498Szrj     {
1518*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
1519*38fd1498Szrj 	fprintf (dump_file, "not innermost loop\n");
1520*38fd1498Szrj       return false;
1521*38fd1498Szrj     }
1522*38fd1498Szrj 
1523*38fd1498Szrj   /* If only one block, no need for if-conversion.  */
1524*38fd1498Szrj   if (loop->num_nodes <= 2)
1525*38fd1498Szrj     {
1526*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
1527*38fd1498Szrj 	fprintf (dump_file, "less than 2 basic blocks\n");
1528*38fd1498Szrj       return false;
1529*38fd1498Szrj     }
1530*38fd1498Szrj 
1531*38fd1498Szrj   /* More than one loop exit is too much to handle.  */
1532*38fd1498Szrj   if (!single_exit (loop))
1533*38fd1498Szrj     {
1534*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
1535*38fd1498Szrj 	fprintf (dump_file, "multiple exits\n");
1536*38fd1498Szrj       return false;
1537*38fd1498Szrj     }
1538*38fd1498Szrj 
1539*38fd1498Szrj   /* If one of the loop header's edge is an exit edge then do not
1540*38fd1498Szrj      apply if-conversion.  */
1541*38fd1498Szrj   FOR_EACH_EDGE (e, ei, loop->header->succs)
1542*38fd1498Szrj     if (loop_exit_edge_p (loop, e))
1543*38fd1498Szrj       return false;
1544*38fd1498Szrj 
1545*38fd1498Szrj   refs.create (5);
1546*38fd1498Szrj   res = if_convertible_loop_p_1 (loop, &refs);
1547*38fd1498Szrj 
1548*38fd1498Szrj   data_reference_p dr;
1549*38fd1498Szrj   unsigned int i;
1550*38fd1498Szrj   for (i = 0; refs.iterate (i, &dr); i++)
1551*38fd1498Szrj     free (dr->aux);
1552*38fd1498Szrj 
1553*38fd1498Szrj   free_data_refs (refs);
1554*38fd1498Szrj 
1555*38fd1498Szrj   delete innermost_DR_map;
1556*38fd1498Szrj   innermost_DR_map = NULL;
1557*38fd1498Szrj 
1558*38fd1498Szrj   delete baseref_DR_map;
1559*38fd1498Szrj   baseref_DR_map = NULL;
1560*38fd1498Szrj 
1561*38fd1498Szrj   return res;
1562*38fd1498Szrj }
1563*38fd1498Szrj 
1564*38fd1498Szrj /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1565*38fd1498Szrj    which is in predicated basic block.
1566*38fd1498Szrj    In fact, the following PHI pattern is searching:
1567*38fd1498Szrj       loop-header:
1568*38fd1498Szrj 	reduc_1 = PHI <..., reduc_2>
1569*38fd1498Szrj       ...
1570*38fd1498Szrj 	if (...)
1571*38fd1498Szrj 	  reduc_3 = ...
1572*38fd1498Szrj 	reduc_2 = PHI <reduc_1, reduc_3>
1573*38fd1498Szrj 
1574*38fd1498Szrj    ARG_0 and ARG_1 are correspondent PHI arguments.
1575*38fd1498Szrj    REDUC, OP0 and OP1 contain reduction stmt and its operands.
1576*38fd1498Szrj    EXTENDED is true if PHI has > 2 arguments.  */
1577*38fd1498Szrj 
1578*38fd1498Szrj static bool
is_cond_scalar_reduction(gimple * phi,gimple ** reduc,tree arg_0,tree arg_1,tree * op0,tree * op1,bool extended)1579*38fd1498Szrj is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
1580*38fd1498Szrj 			  tree *op0, tree *op1, bool extended)
1581*38fd1498Szrj {
1582*38fd1498Szrj   tree lhs, r_op1, r_op2;
1583*38fd1498Szrj   gimple *stmt;
1584*38fd1498Szrj   gimple *header_phi = NULL;
1585*38fd1498Szrj   enum tree_code reduction_op;
1586*38fd1498Szrj   basic_block bb = gimple_bb (phi);
1587*38fd1498Szrj   struct loop *loop = bb->loop_father;
1588*38fd1498Szrj   edge latch_e = loop_latch_edge (loop);
1589*38fd1498Szrj   imm_use_iterator imm_iter;
1590*38fd1498Szrj   use_operand_p use_p;
1591*38fd1498Szrj   edge e;
1592*38fd1498Szrj   edge_iterator ei;
1593*38fd1498Szrj   bool result = false;
1594*38fd1498Szrj   if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1595*38fd1498Szrj     return false;
1596*38fd1498Szrj 
1597*38fd1498Szrj   if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1598*38fd1498Szrj     {
1599*38fd1498Szrj       lhs = arg_1;
1600*38fd1498Szrj       header_phi = SSA_NAME_DEF_STMT (arg_0);
1601*38fd1498Szrj       stmt = SSA_NAME_DEF_STMT (arg_1);
1602*38fd1498Szrj     }
1603*38fd1498Szrj   else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1604*38fd1498Szrj     {
1605*38fd1498Szrj       lhs = arg_0;
1606*38fd1498Szrj       header_phi = SSA_NAME_DEF_STMT (arg_1);
1607*38fd1498Szrj       stmt = SSA_NAME_DEF_STMT (arg_0);
1608*38fd1498Szrj     }
1609*38fd1498Szrj   else
1610*38fd1498Szrj     return false;
1611*38fd1498Szrj   if (gimple_bb (header_phi) != loop->header)
1612*38fd1498Szrj     return false;
1613*38fd1498Szrj 
1614*38fd1498Szrj   if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1615*38fd1498Szrj     return false;
1616*38fd1498Szrj 
1617*38fd1498Szrj   if (gimple_code (stmt) != GIMPLE_ASSIGN
1618*38fd1498Szrj       || gimple_has_volatile_ops (stmt))
1619*38fd1498Szrj     return false;
1620*38fd1498Szrj 
1621*38fd1498Szrj   if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1622*38fd1498Szrj     return false;
1623*38fd1498Szrj 
1624*38fd1498Szrj   if (!is_predicated (gimple_bb (stmt)))
1625*38fd1498Szrj     return false;
1626*38fd1498Szrj 
1627*38fd1498Szrj   /* Check that stmt-block is predecessor of phi-block.  */
1628*38fd1498Szrj   FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1629*38fd1498Szrj     if (e->dest == bb)
1630*38fd1498Szrj       {
1631*38fd1498Szrj 	result = true;
1632*38fd1498Szrj 	break;
1633*38fd1498Szrj       }
1634*38fd1498Szrj   if (!result)
1635*38fd1498Szrj     return false;
1636*38fd1498Szrj 
1637*38fd1498Szrj   if (!has_single_use (lhs))
1638*38fd1498Szrj     return false;
1639*38fd1498Szrj 
1640*38fd1498Szrj   reduction_op = gimple_assign_rhs_code (stmt);
1641*38fd1498Szrj   if (reduction_op != PLUS_EXPR && reduction_op != MINUS_EXPR)
1642*38fd1498Szrj     return false;
1643*38fd1498Szrj   r_op1 = gimple_assign_rhs1 (stmt);
1644*38fd1498Szrj   r_op2 = gimple_assign_rhs2 (stmt);
1645*38fd1498Szrj 
1646*38fd1498Szrj   /* Make R_OP1 to hold reduction variable.  */
1647*38fd1498Szrj   if (r_op2 == PHI_RESULT (header_phi)
1648*38fd1498Szrj       && reduction_op == PLUS_EXPR)
1649*38fd1498Szrj     std::swap (r_op1, r_op2);
1650*38fd1498Szrj   else if (r_op1 != PHI_RESULT (header_phi))
1651*38fd1498Szrj     return false;
1652*38fd1498Szrj 
1653*38fd1498Szrj   /* Check that R_OP1 is used in reduction stmt or in PHI only.  */
1654*38fd1498Szrj   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1655*38fd1498Szrj     {
1656*38fd1498Szrj       gimple *use_stmt = USE_STMT (use_p);
1657*38fd1498Szrj       if (is_gimple_debug (use_stmt))
1658*38fd1498Szrj 	continue;
1659*38fd1498Szrj       if (use_stmt == stmt)
1660*38fd1498Szrj 	continue;
1661*38fd1498Szrj       if (gimple_code (use_stmt) != GIMPLE_PHI)
1662*38fd1498Szrj 	return false;
1663*38fd1498Szrj     }
1664*38fd1498Szrj 
1665*38fd1498Szrj   *op0 = r_op1; *op1 = r_op2;
1666*38fd1498Szrj   *reduc = stmt;
1667*38fd1498Szrj   return true;
1668*38fd1498Szrj }
1669*38fd1498Szrj 
1670*38fd1498Szrj /* Converts conditional scalar reduction into unconditional form, e.g.
1671*38fd1498Szrj      bb_4
1672*38fd1498Szrj        if (_5 != 0) goto bb_5 else goto bb_6
1673*38fd1498Szrj      end_bb_4
1674*38fd1498Szrj      bb_5
1675*38fd1498Szrj        res_6 = res_13 + 1;
1676*38fd1498Szrj      end_bb_5
1677*38fd1498Szrj      bb_6
1678*38fd1498Szrj        # res_2 = PHI <res_13(4), res_6(5)>
1679*38fd1498Szrj      end_bb_6
1680*38fd1498Szrj 
1681*38fd1498Szrj    will be converted into sequence
1682*38fd1498Szrj     _ifc__1 = _5 != 0 ? 1 : 0;
1683*38fd1498Szrj     res_2 = res_13 + _ifc__1;
1684*38fd1498Szrj   Argument SWAP tells that arguments of conditional expression should be
1685*38fd1498Szrj   swapped.
1686*38fd1498Szrj   Returns rhs of resulting PHI assignment.  */
1687*38fd1498Szrj 
1688*38fd1498Szrj static tree
convert_scalar_cond_reduction(gimple * reduc,gimple_stmt_iterator * gsi,tree cond,tree op0,tree op1,bool swap)1689*38fd1498Szrj convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
1690*38fd1498Szrj 			       tree cond, tree op0, tree op1, bool swap)
1691*38fd1498Szrj {
1692*38fd1498Szrj   gimple_stmt_iterator stmt_it;
1693*38fd1498Szrj   gimple *new_assign;
1694*38fd1498Szrj   tree rhs;
1695*38fd1498Szrj   tree rhs1 = gimple_assign_rhs1 (reduc);
1696*38fd1498Szrj   tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1697*38fd1498Szrj   tree c;
1698*38fd1498Szrj   tree zero = build_zero_cst (TREE_TYPE (rhs1));
1699*38fd1498Szrj 
1700*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
1701*38fd1498Szrj     {
1702*38fd1498Szrj       fprintf (dump_file, "Found cond scalar reduction.\n");
1703*38fd1498Szrj       print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1704*38fd1498Szrj     }
1705*38fd1498Szrj 
1706*38fd1498Szrj   /* Build cond expression using COND and constant operand
1707*38fd1498Szrj      of reduction rhs.  */
1708*38fd1498Szrj   c = fold_build_cond_expr (TREE_TYPE (rhs1),
1709*38fd1498Szrj 			    unshare_expr (cond),
1710*38fd1498Szrj 			    swap ? zero : op1,
1711*38fd1498Szrj 			    swap ? op1 : zero);
1712*38fd1498Szrj 
1713*38fd1498Szrj   /* Create assignment stmt and insert it at GSI.  */
1714*38fd1498Szrj   new_assign = gimple_build_assign (tmp, c);
1715*38fd1498Szrj   gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1716*38fd1498Szrj   /* Build rhs for unconditional increment/decrement.  */
1717*38fd1498Szrj   rhs = fold_build2 (gimple_assign_rhs_code (reduc),
1718*38fd1498Szrj 		     TREE_TYPE (rhs1), op0, tmp);
1719*38fd1498Szrj 
1720*38fd1498Szrj   /* Delete original reduction stmt.  */
1721*38fd1498Szrj   stmt_it = gsi_for_stmt (reduc);
1722*38fd1498Szrj   gsi_remove (&stmt_it, true);
1723*38fd1498Szrj   release_defs (reduc);
1724*38fd1498Szrj   return rhs;
1725*38fd1498Szrj }
1726*38fd1498Szrj 
1727*38fd1498Szrj /* Produce condition for all occurrences of ARG in PHI node.  */
1728*38fd1498Szrj 
1729*38fd1498Szrj static tree
gen_phi_arg_condition(gphi * phi,vec<int> * occur,gimple_stmt_iterator * gsi)1730*38fd1498Szrj gen_phi_arg_condition (gphi *phi, vec<int> *occur,
1731*38fd1498Szrj 		       gimple_stmt_iterator *gsi)
1732*38fd1498Szrj {
1733*38fd1498Szrj   int len;
1734*38fd1498Szrj   int i;
1735*38fd1498Szrj   tree cond = NULL_TREE;
1736*38fd1498Szrj   tree c;
1737*38fd1498Szrj   edge e;
1738*38fd1498Szrj 
1739*38fd1498Szrj   len = occur->length ();
1740*38fd1498Szrj   gcc_assert (len > 0);
1741*38fd1498Szrj   for (i = 0; i < len; i++)
1742*38fd1498Szrj     {
1743*38fd1498Szrj       e = gimple_phi_arg_edge (phi, (*occur)[i]);
1744*38fd1498Szrj       c = bb_predicate (e->src);
1745*38fd1498Szrj       if (is_true_predicate (c))
1746*38fd1498Szrj 	{
1747*38fd1498Szrj 	  cond = c;
1748*38fd1498Szrj 	  break;
1749*38fd1498Szrj 	}
1750*38fd1498Szrj       c = force_gimple_operand_gsi_1 (gsi, unshare_expr (c),
1751*38fd1498Szrj 				      is_gimple_condexpr, NULL_TREE,
1752*38fd1498Szrj 				      true, GSI_SAME_STMT);
1753*38fd1498Szrj       if (cond != NULL_TREE)
1754*38fd1498Szrj 	{
1755*38fd1498Szrj 	  /* Must build OR expression.  */
1756*38fd1498Szrj 	  cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
1757*38fd1498Szrj 	  cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1758*38fd1498Szrj 					     is_gimple_condexpr, NULL_TREE,
1759*38fd1498Szrj 					     true, GSI_SAME_STMT);
1760*38fd1498Szrj 	}
1761*38fd1498Szrj       else
1762*38fd1498Szrj 	cond = c;
1763*38fd1498Szrj     }
1764*38fd1498Szrj   gcc_assert (cond != NULL_TREE);
1765*38fd1498Szrj   return cond;
1766*38fd1498Szrj }
1767*38fd1498Szrj 
1768*38fd1498Szrj /* Local valueization callback that follows all-use SSA edges.  */
1769*38fd1498Szrj 
1770*38fd1498Szrj static tree
ifcvt_follow_ssa_use_edges(tree val)1771*38fd1498Szrj ifcvt_follow_ssa_use_edges (tree val)
1772*38fd1498Szrj {
1773*38fd1498Szrj   return val;
1774*38fd1498Szrj }
1775*38fd1498Szrj 
1776*38fd1498Szrj /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1777*38fd1498Szrj    This routine can handle PHI nodes with more than two arguments.
1778*38fd1498Szrj 
1779*38fd1498Szrj    For example,
1780*38fd1498Szrj      S1: A = PHI <x1(1), x2(5)>
1781*38fd1498Szrj    is converted into,
1782*38fd1498Szrj      S2: A = cond ? x1 : x2;
1783*38fd1498Szrj 
1784*38fd1498Szrj    The generated code is inserted at GSI that points to the top of
1785*38fd1498Szrj    basic block's statement list.
1786*38fd1498Szrj    If PHI node has more than two arguments a chain of conditional
1787*38fd1498Szrj    expression is produced.  */
1788*38fd1498Szrj 
1789*38fd1498Szrj 
1790*38fd1498Szrj static void
predicate_scalar_phi(gphi * phi,gimple_stmt_iterator * gsi)1791*38fd1498Szrj predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
1792*38fd1498Szrj {
1793*38fd1498Szrj   gimple *new_stmt = NULL, *reduc;
1794*38fd1498Szrj   tree rhs, res, arg0, arg1, op0, op1, scev;
1795*38fd1498Szrj   tree cond;
1796*38fd1498Szrj   unsigned int index0;
1797*38fd1498Szrj   unsigned int max, args_len;
1798*38fd1498Szrj   edge e;
1799*38fd1498Szrj   basic_block bb;
1800*38fd1498Szrj   unsigned int i;
1801*38fd1498Szrj 
1802*38fd1498Szrj   res = gimple_phi_result (phi);
1803*38fd1498Szrj   if (virtual_operand_p (res))
1804*38fd1498Szrj     return;
1805*38fd1498Szrj 
1806*38fd1498Szrj   if ((rhs = degenerate_phi_result (phi))
1807*38fd1498Szrj       || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
1808*38fd1498Szrj 					    res))
1809*38fd1498Szrj 	  && !chrec_contains_undetermined (scev)
1810*38fd1498Szrj 	  && scev != res
1811*38fd1498Szrj 	  && (rhs = gimple_phi_arg_def (phi, 0))))
1812*38fd1498Szrj     {
1813*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
1814*38fd1498Szrj 	{
1815*38fd1498Szrj 	  fprintf (dump_file, "Degenerate phi!\n");
1816*38fd1498Szrj 	  print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
1817*38fd1498Szrj 	}
1818*38fd1498Szrj       new_stmt = gimple_build_assign (res, rhs);
1819*38fd1498Szrj       gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1820*38fd1498Szrj       update_stmt (new_stmt);
1821*38fd1498Szrj       return;
1822*38fd1498Szrj     }
1823*38fd1498Szrj 
1824*38fd1498Szrj   bb = gimple_bb (phi);
1825*38fd1498Szrj   if (EDGE_COUNT (bb->preds) == 2)
1826*38fd1498Szrj     {
1827*38fd1498Szrj       /* Predicate ordinary PHI node with 2 arguments.  */
1828*38fd1498Szrj       edge first_edge, second_edge;
1829*38fd1498Szrj       basic_block true_bb;
1830*38fd1498Szrj       first_edge = EDGE_PRED (bb, 0);
1831*38fd1498Szrj       second_edge = EDGE_PRED (bb, 1);
1832*38fd1498Szrj       cond = bb_predicate (first_edge->src);
1833*38fd1498Szrj       if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1834*38fd1498Szrj 	std::swap (first_edge, second_edge);
1835*38fd1498Szrj       if (EDGE_COUNT (first_edge->src->succs) > 1)
1836*38fd1498Szrj 	{
1837*38fd1498Szrj 	  cond = bb_predicate (second_edge->src);
1838*38fd1498Szrj 	  if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1839*38fd1498Szrj 	    cond = TREE_OPERAND (cond, 0);
1840*38fd1498Szrj 	  else
1841*38fd1498Szrj 	    first_edge = second_edge;
1842*38fd1498Szrj 	}
1843*38fd1498Szrj       else
1844*38fd1498Szrj 	cond = bb_predicate (first_edge->src);
1845*38fd1498Szrj       /* Gimplify the condition to a valid cond-expr conditonal operand.  */
1846*38fd1498Szrj       cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1847*38fd1498Szrj 					 is_gimple_condexpr, NULL_TREE,
1848*38fd1498Szrj 					 true, GSI_SAME_STMT);
1849*38fd1498Szrj       true_bb = first_edge->src;
1850*38fd1498Szrj       if (EDGE_PRED (bb, 1)->src == true_bb)
1851*38fd1498Szrj 	{
1852*38fd1498Szrj 	  arg0 = gimple_phi_arg_def (phi, 1);
1853*38fd1498Szrj 	  arg1 = gimple_phi_arg_def (phi, 0);
1854*38fd1498Szrj 	}
1855*38fd1498Szrj       else
1856*38fd1498Szrj 	{
1857*38fd1498Szrj 	  arg0 = gimple_phi_arg_def (phi, 0);
1858*38fd1498Szrj 	  arg1 = gimple_phi_arg_def (phi, 1);
1859*38fd1498Szrj 	}
1860*38fd1498Szrj       if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
1861*38fd1498Szrj 				    &op0, &op1, false))
1862*38fd1498Szrj 	/* Convert reduction stmt into vectorizable form.  */
1863*38fd1498Szrj 	rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1864*38fd1498Szrj 					     true_bb != gimple_bb (reduc));
1865*38fd1498Szrj       else
1866*38fd1498Szrj 	/* Build new RHS using selected condition and arguments.  */
1867*38fd1498Szrj 	rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1868*38fd1498Szrj 				    arg0, arg1);
1869*38fd1498Szrj       new_stmt = gimple_build_assign (res, rhs);
1870*38fd1498Szrj       gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1871*38fd1498Szrj       gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt);
1872*38fd1498Szrj       if (fold_stmt (&new_gsi, ifcvt_follow_ssa_use_edges))
1873*38fd1498Szrj 	{
1874*38fd1498Szrj 	  new_stmt = gsi_stmt (new_gsi);
1875*38fd1498Szrj 	  update_stmt (new_stmt);
1876*38fd1498Szrj 	}
1877*38fd1498Szrj 
1878*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
1879*38fd1498Szrj 	{
1880*38fd1498Szrj 	  fprintf (dump_file, "new phi replacement stmt\n");
1881*38fd1498Szrj 	  print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
1882*38fd1498Szrj 	}
1883*38fd1498Szrj       return;
1884*38fd1498Szrj     }
1885*38fd1498Szrj 
1886*38fd1498Szrj   /* Create hashmap for PHI node which contain vector of argument indexes
1887*38fd1498Szrj      having the same value.  */
1888*38fd1498Szrj   bool swap = false;
1889*38fd1498Szrj   hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
1890*38fd1498Szrj   unsigned int num_args = gimple_phi_num_args (phi);
1891*38fd1498Szrj   int max_ind = -1;
1892*38fd1498Szrj   /* Vector of different PHI argument values.  */
1893*38fd1498Szrj   auto_vec<tree> args (num_args);
1894*38fd1498Szrj 
1895*38fd1498Szrj   /* Compute phi_arg_map.  */
1896*38fd1498Szrj   for (i = 0; i < num_args; i++)
1897*38fd1498Szrj     {
1898*38fd1498Szrj       tree arg;
1899*38fd1498Szrj 
1900*38fd1498Szrj       arg = gimple_phi_arg_def (phi, i);
1901*38fd1498Szrj       if (!phi_arg_map.get (arg))
1902*38fd1498Szrj 	args.quick_push (arg);
1903*38fd1498Szrj       phi_arg_map.get_or_insert (arg).safe_push (i);
1904*38fd1498Szrj     }
1905*38fd1498Szrj 
1906*38fd1498Szrj   /* Determine element with max number of occurrences.  */
1907*38fd1498Szrj   max_ind = -1;
1908*38fd1498Szrj   max = 1;
1909*38fd1498Szrj   args_len = args.length ();
1910*38fd1498Szrj   for (i = 0; i < args_len; i++)
1911*38fd1498Szrj     {
1912*38fd1498Szrj       unsigned int len;
1913*38fd1498Szrj       if ((len = phi_arg_map.get (args[i])->length ()) > max)
1914*38fd1498Szrj 	{
1915*38fd1498Szrj 	  max_ind = (int) i;
1916*38fd1498Szrj 	  max = len;
1917*38fd1498Szrj 	}
1918*38fd1498Szrj     }
1919*38fd1498Szrj 
1920*38fd1498Szrj   /* Put element with max number of occurences to the end of ARGS.  */
1921*38fd1498Szrj   if (max_ind != -1 && max_ind +1 != (int) args_len)
1922*38fd1498Szrj     std::swap (args[args_len - 1], args[max_ind]);
1923*38fd1498Szrj 
1924*38fd1498Szrj   /* Handle one special case when number of arguments with different values
1925*38fd1498Szrj      is equal 2 and one argument has the only occurrence.  Such PHI can be
1926*38fd1498Szrj      handled as if would have only 2 arguments.  */
1927*38fd1498Szrj   if (args_len == 2 && phi_arg_map.get (args[0])->length () == 1)
1928*38fd1498Szrj     {
1929*38fd1498Szrj       vec<int> *indexes;
1930*38fd1498Szrj       indexes = phi_arg_map.get (args[0]);
1931*38fd1498Szrj       index0 = (*indexes)[0];
1932*38fd1498Szrj       arg0 = args[0];
1933*38fd1498Szrj       arg1 = args[1];
1934*38fd1498Szrj       e = gimple_phi_arg_edge (phi, index0);
1935*38fd1498Szrj       cond = bb_predicate (e->src);
1936*38fd1498Szrj       if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1937*38fd1498Szrj 	{
1938*38fd1498Szrj 	  swap = true;
1939*38fd1498Szrj 	  cond = TREE_OPERAND (cond, 0);
1940*38fd1498Szrj 	}
1941*38fd1498Szrj       /* Gimplify the condition to a valid cond-expr conditonal operand.  */
1942*38fd1498Szrj       cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1943*38fd1498Szrj 					 is_gimple_condexpr, NULL_TREE,
1944*38fd1498Szrj 					 true, GSI_SAME_STMT);
1945*38fd1498Szrj       if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
1946*38fd1498Szrj 				      &op0, &op1, true)))
1947*38fd1498Szrj 	rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1948*38fd1498Szrj 				    swap? arg1 : arg0,
1949*38fd1498Szrj 				    swap? arg0 : arg1);
1950*38fd1498Szrj       else
1951*38fd1498Szrj 	/* Convert reduction stmt into vectorizable form.  */
1952*38fd1498Szrj 	rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1953*38fd1498Szrj 					     swap);
1954*38fd1498Szrj       new_stmt = gimple_build_assign (res, rhs);
1955*38fd1498Szrj       gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1956*38fd1498Szrj       update_stmt (new_stmt);
1957*38fd1498Szrj     }
1958*38fd1498Szrj   else
1959*38fd1498Szrj     {
1960*38fd1498Szrj       /* Common case.  */
1961*38fd1498Szrj       vec<int> *indexes;
1962*38fd1498Szrj       tree type = TREE_TYPE (gimple_phi_result (phi));
1963*38fd1498Szrj       tree lhs;
1964*38fd1498Szrj       arg1 = args[1];
1965*38fd1498Szrj       for (i = 0; i < args_len; i++)
1966*38fd1498Szrj 	{
1967*38fd1498Szrj 	  arg0 = args[i];
1968*38fd1498Szrj 	  indexes = phi_arg_map.get (args[i]);
1969*38fd1498Szrj 	  if (i != args_len - 1)
1970*38fd1498Szrj 	    lhs = make_temp_ssa_name (type, NULL, "_ifc_");
1971*38fd1498Szrj 	  else
1972*38fd1498Szrj 	    lhs = res;
1973*38fd1498Szrj 	  cond = gen_phi_arg_condition (phi, indexes, gsi);
1974*38fd1498Szrj 	  rhs = fold_build_cond_expr (type, unshare_expr (cond),
1975*38fd1498Szrj 				      arg0, arg1);
1976*38fd1498Szrj 	  new_stmt = gimple_build_assign (lhs, rhs);
1977*38fd1498Szrj 	  gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1978*38fd1498Szrj 	  update_stmt (new_stmt);
1979*38fd1498Szrj 	  arg1 = lhs;
1980*38fd1498Szrj 	}
1981*38fd1498Szrj     }
1982*38fd1498Szrj 
1983*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
1984*38fd1498Szrj     {
1985*38fd1498Szrj       fprintf (dump_file, "new extended phi replacement stmt\n");
1986*38fd1498Szrj       print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
1987*38fd1498Szrj     }
1988*38fd1498Szrj }
1989*38fd1498Szrj 
1990*38fd1498Szrj /* Replaces in LOOP all the scalar phi nodes other than those in the
1991*38fd1498Szrj    LOOP->header block with conditional modify expressions.  */
1992*38fd1498Szrj 
1993*38fd1498Szrj static void
predicate_all_scalar_phis(struct loop * loop)1994*38fd1498Szrj predicate_all_scalar_phis (struct loop *loop)
1995*38fd1498Szrj {
1996*38fd1498Szrj   basic_block bb;
1997*38fd1498Szrj   unsigned int orig_loop_num_nodes = loop->num_nodes;
1998*38fd1498Szrj   unsigned int i;
1999*38fd1498Szrj 
2000*38fd1498Szrj   for (i = 1; i < orig_loop_num_nodes; i++)
2001*38fd1498Szrj     {
2002*38fd1498Szrj       gphi *phi;
2003*38fd1498Szrj       gimple_stmt_iterator gsi;
2004*38fd1498Szrj       gphi_iterator phi_gsi;
2005*38fd1498Szrj       bb = ifc_bbs[i];
2006*38fd1498Szrj 
2007*38fd1498Szrj       if (bb == loop->header)
2008*38fd1498Szrj 	continue;
2009*38fd1498Szrj 
2010*38fd1498Szrj       phi_gsi = gsi_start_phis (bb);
2011*38fd1498Szrj       if (gsi_end_p (phi_gsi))
2012*38fd1498Szrj 	continue;
2013*38fd1498Szrj 
2014*38fd1498Szrj       gsi = gsi_after_labels (bb);
2015*38fd1498Szrj       while (!gsi_end_p (phi_gsi))
2016*38fd1498Szrj 	{
2017*38fd1498Szrj 	  phi = phi_gsi.phi ();
2018*38fd1498Szrj 	  if (virtual_operand_p (gimple_phi_result (phi)))
2019*38fd1498Szrj 	    gsi_next (&phi_gsi);
2020*38fd1498Szrj 	  else
2021*38fd1498Szrj 	    {
2022*38fd1498Szrj 	      predicate_scalar_phi (phi, &gsi);
2023*38fd1498Szrj 	      remove_phi_node (&phi_gsi, false);
2024*38fd1498Szrj 	    }
2025*38fd1498Szrj 	}
2026*38fd1498Szrj     }
2027*38fd1498Szrj }
2028*38fd1498Szrj 
2029*38fd1498Szrj /* Insert in each basic block of LOOP the statements produced by the
2030*38fd1498Szrj    gimplification of the predicates.  */
2031*38fd1498Szrj 
2032*38fd1498Szrj static void
insert_gimplified_predicates(loop_p loop)2033*38fd1498Szrj insert_gimplified_predicates (loop_p loop)
2034*38fd1498Szrj {
2035*38fd1498Szrj   unsigned int i;
2036*38fd1498Szrj 
2037*38fd1498Szrj   for (i = 0; i < loop->num_nodes; i++)
2038*38fd1498Szrj     {
2039*38fd1498Szrj       basic_block bb = ifc_bbs[i];
2040*38fd1498Szrj       gimple_seq stmts;
2041*38fd1498Szrj       if (!is_predicated (bb))
2042*38fd1498Szrj 	gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
2043*38fd1498Szrj       if (!is_predicated (bb))
2044*38fd1498Szrj 	{
2045*38fd1498Szrj 	  /* Do not insert statements for a basic block that is not
2046*38fd1498Szrj 	     predicated.  Also make sure that the predicate of the
2047*38fd1498Szrj 	     basic block is set to true.  */
2048*38fd1498Szrj 	  reset_bb_predicate (bb);
2049*38fd1498Szrj 	  continue;
2050*38fd1498Szrj 	}
2051*38fd1498Szrj 
2052*38fd1498Szrj       stmts = bb_predicate_gimplified_stmts (bb);
2053*38fd1498Szrj       if (stmts)
2054*38fd1498Szrj 	{
2055*38fd1498Szrj 	  if (any_pred_load_store)
2056*38fd1498Szrj 	    {
2057*38fd1498Szrj 	      /* Insert the predicate of the BB just after the label,
2058*38fd1498Szrj 		 as the if-conversion of memory writes will use this
2059*38fd1498Szrj 		 predicate.  */
2060*38fd1498Szrj 	      gimple_stmt_iterator gsi = gsi_after_labels (bb);
2061*38fd1498Szrj 	      gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2062*38fd1498Szrj 	    }
2063*38fd1498Szrj 	  else
2064*38fd1498Szrj 	    {
2065*38fd1498Szrj 	      /* Insert the predicate of the BB at the end of the BB
2066*38fd1498Szrj 		 as this would reduce the register pressure: the only
2067*38fd1498Szrj 		 use of this predicate will be in successor BBs.  */
2068*38fd1498Szrj 	      gimple_stmt_iterator gsi = gsi_last_bb (bb);
2069*38fd1498Szrj 
2070*38fd1498Szrj 	      if (gsi_end_p (gsi)
2071*38fd1498Szrj 		  || stmt_ends_bb_p (gsi_stmt (gsi)))
2072*38fd1498Szrj 		gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2073*38fd1498Szrj 	      else
2074*38fd1498Szrj 		gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
2075*38fd1498Szrj 	    }
2076*38fd1498Szrj 
2077*38fd1498Szrj 	  /* Once the sequence is code generated, set it to NULL.  */
2078*38fd1498Szrj 	  set_bb_predicate_gimplified_stmts (bb, NULL);
2079*38fd1498Szrj 	}
2080*38fd1498Szrj     }
2081*38fd1498Szrj }
2082*38fd1498Szrj 
2083*38fd1498Szrj /* Helper function for predicate_mem_writes. Returns index of existent
2084*38fd1498Szrj    mask if it was created for given SIZE and -1 otherwise.  */
2085*38fd1498Szrj 
2086*38fd1498Szrj static int
mask_exists(int size,vec<int> vec)2087*38fd1498Szrj mask_exists (int size, vec<int> vec)
2088*38fd1498Szrj {
2089*38fd1498Szrj   unsigned int ix;
2090*38fd1498Szrj   int v;
2091*38fd1498Szrj   FOR_EACH_VEC_ELT (vec, ix, v)
2092*38fd1498Szrj     if (v == size)
2093*38fd1498Szrj       return (int) ix;
2094*38fd1498Szrj   return -1;
2095*38fd1498Szrj }
2096*38fd1498Szrj 
2097*38fd1498Szrj /* Predicate each write to memory in LOOP.
2098*38fd1498Szrj 
2099*38fd1498Szrj    This function transforms control flow constructs containing memory
2100*38fd1498Szrj    writes of the form:
2101*38fd1498Szrj 
2102*38fd1498Szrj    | for (i = 0; i < N; i++)
2103*38fd1498Szrj    |   if (cond)
2104*38fd1498Szrj    |     A[i] = expr;
2105*38fd1498Szrj 
2106*38fd1498Szrj    into the following form that does not contain control flow:
2107*38fd1498Szrj 
2108*38fd1498Szrj    | for (i = 0; i < N; i++)
2109*38fd1498Szrj    |   A[i] = cond ? expr : A[i];
2110*38fd1498Szrj 
2111*38fd1498Szrj    The original CFG looks like this:
2112*38fd1498Szrj 
2113*38fd1498Szrj    | bb_0
2114*38fd1498Szrj    |   i = 0
2115*38fd1498Szrj    | end_bb_0
2116*38fd1498Szrj    |
2117*38fd1498Szrj    | bb_1
2118*38fd1498Szrj    |   if (i < N) goto bb_5 else goto bb_2
2119*38fd1498Szrj    | end_bb_1
2120*38fd1498Szrj    |
2121*38fd1498Szrj    | bb_2
2122*38fd1498Szrj    |   cond = some_computation;
2123*38fd1498Szrj    |   if (cond) goto bb_3 else goto bb_4
2124*38fd1498Szrj    | end_bb_2
2125*38fd1498Szrj    |
2126*38fd1498Szrj    | bb_3
2127*38fd1498Szrj    |   A[i] = expr;
2128*38fd1498Szrj    |   goto bb_4
2129*38fd1498Szrj    | end_bb_3
2130*38fd1498Szrj    |
2131*38fd1498Szrj    | bb_4
2132*38fd1498Szrj    |   goto bb_1
2133*38fd1498Szrj    | end_bb_4
2134*38fd1498Szrj 
2135*38fd1498Szrj    insert_gimplified_predicates inserts the computation of the COND
2136*38fd1498Szrj    expression at the beginning of the destination basic block:
2137*38fd1498Szrj 
2138*38fd1498Szrj    | bb_0
2139*38fd1498Szrj    |   i = 0
2140*38fd1498Szrj    | end_bb_0
2141*38fd1498Szrj    |
2142*38fd1498Szrj    | bb_1
2143*38fd1498Szrj    |   if (i < N) goto bb_5 else goto bb_2
2144*38fd1498Szrj    | end_bb_1
2145*38fd1498Szrj    |
2146*38fd1498Szrj    | bb_2
2147*38fd1498Szrj    |   cond = some_computation;
2148*38fd1498Szrj    |   if (cond) goto bb_3 else goto bb_4
2149*38fd1498Szrj    | end_bb_2
2150*38fd1498Szrj    |
2151*38fd1498Szrj    | bb_3
2152*38fd1498Szrj    |   cond = some_computation;
2153*38fd1498Szrj    |   A[i] = expr;
2154*38fd1498Szrj    |   goto bb_4
2155*38fd1498Szrj    | end_bb_3
2156*38fd1498Szrj    |
2157*38fd1498Szrj    | bb_4
2158*38fd1498Szrj    |   goto bb_1
2159*38fd1498Szrj    | end_bb_4
2160*38fd1498Szrj 
2161*38fd1498Szrj    predicate_mem_writes is then predicating the memory write as follows:
2162*38fd1498Szrj 
2163*38fd1498Szrj    | bb_0
2164*38fd1498Szrj    |   i = 0
2165*38fd1498Szrj    | end_bb_0
2166*38fd1498Szrj    |
2167*38fd1498Szrj    | bb_1
2168*38fd1498Szrj    |   if (i < N) goto bb_5 else goto bb_2
2169*38fd1498Szrj    | end_bb_1
2170*38fd1498Szrj    |
2171*38fd1498Szrj    | bb_2
2172*38fd1498Szrj    |   if (cond) goto bb_3 else goto bb_4
2173*38fd1498Szrj    | end_bb_2
2174*38fd1498Szrj    |
2175*38fd1498Szrj    | bb_3
2176*38fd1498Szrj    |   cond = some_computation;
2177*38fd1498Szrj    |   A[i] = cond ? expr : A[i];
2178*38fd1498Szrj    |   goto bb_4
2179*38fd1498Szrj    | end_bb_3
2180*38fd1498Szrj    |
2181*38fd1498Szrj    | bb_4
2182*38fd1498Szrj    |   goto bb_1
2183*38fd1498Szrj    | end_bb_4
2184*38fd1498Szrj 
2185*38fd1498Szrj    and finally combine_blocks removes the basic block boundaries making
2186*38fd1498Szrj    the loop vectorizable:
2187*38fd1498Szrj 
2188*38fd1498Szrj    | bb_0
2189*38fd1498Szrj    |   i = 0
2190*38fd1498Szrj    |   if (i < N) goto bb_5 else goto bb_1
2191*38fd1498Szrj    | end_bb_0
2192*38fd1498Szrj    |
2193*38fd1498Szrj    | bb_1
2194*38fd1498Szrj    |   cond = some_computation;
2195*38fd1498Szrj    |   A[i] = cond ? expr : A[i];
2196*38fd1498Szrj    |   if (i < N) goto bb_5 else goto bb_4
2197*38fd1498Szrj    | end_bb_1
2198*38fd1498Szrj    |
2199*38fd1498Szrj    | bb_4
2200*38fd1498Szrj    |   goto bb_1
2201*38fd1498Szrj    | end_bb_4
2202*38fd1498Szrj */
2203*38fd1498Szrj 
2204*38fd1498Szrj static void
predicate_mem_writes(loop_p loop)2205*38fd1498Szrj predicate_mem_writes (loop_p loop)
2206*38fd1498Szrj {
2207*38fd1498Szrj   unsigned int i, orig_loop_num_nodes = loop->num_nodes;
2208*38fd1498Szrj   auto_vec<int, 1> vect_sizes;
2209*38fd1498Szrj   auto_vec<tree, 1> vect_masks;
2210*38fd1498Szrj 
2211*38fd1498Szrj   for (i = 1; i < orig_loop_num_nodes; i++)
2212*38fd1498Szrj     {
2213*38fd1498Szrj       gimple_stmt_iterator gsi;
2214*38fd1498Szrj       basic_block bb = ifc_bbs[i];
2215*38fd1498Szrj       tree cond = bb_predicate (bb);
2216*38fd1498Szrj       bool swap;
2217*38fd1498Szrj       gimple *stmt;
2218*38fd1498Szrj       int index;
2219*38fd1498Szrj 
2220*38fd1498Szrj       if (is_true_predicate (cond))
2221*38fd1498Szrj 	continue;
2222*38fd1498Szrj 
2223*38fd1498Szrj       swap = false;
2224*38fd1498Szrj       if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2225*38fd1498Szrj 	{
2226*38fd1498Szrj 	  swap = true;
2227*38fd1498Szrj 	  cond = TREE_OPERAND (cond, 0);
2228*38fd1498Szrj 	}
2229*38fd1498Szrj 
2230*38fd1498Szrj       vect_sizes.truncate (0);
2231*38fd1498Szrj       vect_masks.truncate (0);
2232*38fd1498Szrj 
2233*38fd1498Szrj       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2234*38fd1498Szrj 	{
2235*38fd1498Szrj 	  if (!gimple_assign_single_p (stmt = gsi_stmt (gsi)))
2236*38fd1498Szrj 	    ;
2237*38fd1498Szrj 	  else if (is_false_predicate (cond)
2238*38fd1498Szrj 		   && gimple_vdef (stmt))
2239*38fd1498Szrj 	    {
2240*38fd1498Szrj 	      unlink_stmt_vdef (stmt);
2241*38fd1498Szrj 	      gsi_remove (&gsi, true);
2242*38fd1498Szrj 	      release_defs (stmt);
2243*38fd1498Szrj 	      continue;
2244*38fd1498Szrj 	    }
2245*38fd1498Szrj 	  else if (gimple_plf (stmt, GF_PLF_2))
2246*38fd1498Szrj 	    {
2247*38fd1498Szrj 	      tree lhs = gimple_assign_lhs (stmt);
2248*38fd1498Szrj 	      tree rhs = gimple_assign_rhs1 (stmt);
2249*38fd1498Szrj 	      tree ref, addr, ptr, mask;
2250*38fd1498Szrj 	      gcall *new_stmt;
2251*38fd1498Szrj 	      gimple_seq stmts = NULL;
2252*38fd1498Szrj 	      machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
2253*38fd1498Szrj 	      /* We checked before setting GF_PLF_2 that an equivalent
2254*38fd1498Szrj 		 integer mode exists.  */
2255*38fd1498Szrj 	      int bitsize = GET_MODE_BITSIZE (mode).to_constant ();
2256*38fd1498Szrj 	      ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2257*38fd1498Szrj 	      mark_addressable (ref);
2258*38fd1498Szrj 	      addr = force_gimple_operand_gsi (&gsi, build_fold_addr_expr (ref),
2259*38fd1498Szrj 					       true, NULL_TREE, true,
2260*38fd1498Szrj 					       GSI_SAME_STMT);
2261*38fd1498Szrj 	      if (!vect_sizes.is_empty ()
2262*38fd1498Szrj 		  && (index = mask_exists (bitsize, vect_sizes)) != -1)
2263*38fd1498Szrj 		/* Use created mask.  */
2264*38fd1498Szrj 		mask = vect_masks[index];
2265*38fd1498Szrj 	      else
2266*38fd1498Szrj 		{
2267*38fd1498Szrj 		  if (COMPARISON_CLASS_P (cond))
2268*38fd1498Szrj 		    mask = gimple_build (&stmts, TREE_CODE (cond),
2269*38fd1498Szrj 					 boolean_type_node,
2270*38fd1498Szrj 					 TREE_OPERAND (cond, 0),
2271*38fd1498Szrj 					 TREE_OPERAND (cond, 1));
2272*38fd1498Szrj 		  else
2273*38fd1498Szrj 		    mask = cond;
2274*38fd1498Szrj 
2275*38fd1498Szrj 		  if (swap)
2276*38fd1498Szrj 		    {
2277*38fd1498Szrj 		      tree true_val
2278*38fd1498Szrj 			= constant_boolean_node (true, TREE_TYPE (mask));
2279*38fd1498Szrj 		      mask = gimple_build (&stmts, BIT_XOR_EXPR,
2280*38fd1498Szrj 					   TREE_TYPE (mask), mask, true_val);
2281*38fd1498Szrj 		    }
2282*38fd1498Szrj 		  gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2283*38fd1498Szrj 
2284*38fd1498Szrj 		  /* Save mask and its size for further use.  */
2285*38fd1498Szrj 		  vect_sizes.safe_push (bitsize);
2286*38fd1498Szrj 		  vect_masks.safe_push (mask);
2287*38fd1498Szrj 		}
2288*38fd1498Szrj 	      ptr = build_int_cst (reference_alias_ptr_type (ref),
2289*38fd1498Szrj 				   get_object_alignment (ref));
2290*38fd1498Szrj 	      /* Copy points-to info if possible.  */
2291*38fd1498Szrj 	      if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2292*38fd1498Szrj 		copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2293*38fd1498Szrj 			       ref);
2294*38fd1498Szrj 	      if (TREE_CODE (lhs) == SSA_NAME)
2295*38fd1498Szrj 		{
2296*38fd1498Szrj 		  new_stmt
2297*38fd1498Szrj 		    = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
2298*38fd1498Szrj 						  ptr, mask);
2299*38fd1498Szrj 		  gimple_call_set_lhs (new_stmt, lhs);
2300*38fd1498Szrj 		  gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2301*38fd1498Szrj 		}
2302*38fd1498Szrj 	      else
2303*38fd1498Szrj 		{
2304*38fd1498Szrj 		  new_stmt
2305*38fd1498Szrj 		    = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2306*38fd1498Szrj 						  mask, rhs);
2307*38fd1498Szrj 		  gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2308*38fd1498Szrj 		  gimple_set_vdef (new_stmt, gimple_vdef (stmt));
2309*38fd1498Szrj 		  SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt;
2310*38fd1498Szrj 		}
2311*38fd1498Szrj 	      gimple_call_set_nothrow (new_stmt, true);
2312*38fd1498Szrj 
2313*38fd1498Szrj 	      gsi_replace (&gsi, new_stmt, true);
2314*38fd1498Szrj 	    }
2315*38fd1498Szrj 	  else if (gimple_vdef (stmt))
2316*38fd1498Szrj 	    {
2317*38fd1498Szrj 	      tree lhs = gimple_assign_lhs (stmt);
2318*38fd1498Szrj 	      tree rhs = gimple_assign_rhs1 (stmt);
2319*38fd1498Szrj 	      tree type = TREE_TYPE (lhs);
2320*38fd1498Szrj 
2321*38fd1498Szrj 	      lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
2322*38fd1498Szrj 	      rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
2323*38fd1498Szrj 	      if (swap)
2324*38fd1498Szrj 		std::swap (lhs, rhs);
2325*38fd1498Szrj 	      cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
2326*38fd1498Szrj 						 is_gimple_condexpr, NULL_TREE,
2327*38fd1498Szrj 						 true, GSI_SAME_STMT);
2328*38fd1498Szrj 	      rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
2329*38fd1498Szrj 	      gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
2330*38fd1498Szrj 	      update_stmt (stmt);
2331*38fd1498Szrj 	    }
2332*38fd1498Szrj 	  gsi_next (&gsi);
2333*38fd1498Szrj 	}
2334*38fd1498Szrj     }
2335*38fd1498Szrj }
2336*38fd1498Szrj 
2337*38fd1498Szrj /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2338*38fd1498Szrj    other than the exit and latch of the LOOP.  Also resets the
2339*38fd1498Szrj    GIMPLE_DEBUG information.  */
2340*38fd1498Szrj 
2341*38fd1498Szrj static void
remove_conditions_and_labels(loop_p loop)2342*38fd1498Szrj remove_conditions_and_labels (loop_p loop)
2343*38fd1498Szrj {
2344*38fd1498Szrj   gimple_stmt_iterator gsi;
2345*38fd1498Szrj   unsigned int i;
2346*38fd1498Szrj 
2347*38fd1498Szrj   for (i = 0; i < loop->num_nodes; i++)
2348*38fd1498Szrj     {
2349*38fd1498Szrj       basic_block bb = ifc_bbs[i];
2350*38fd1498Szrj 
2351*38fd1498Szrj       if (bb_with_exit_edge_p (loop, bb)
2352*38fd1498Szrj         || bb == loop->latch)
2353*38fd1498Szrj       continue;
2354*38fd1498Szrj 
2355*38fd1498Szrj       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2356*38fd1498Szrj 	switch (gimple_code (gsi_stmt (gsi)))
2357*38fd1498Szrj 	  {
2358*38fd1498Szrj 	  case GIMPLE_COND:
2359*38fd1498Szrj 	  case GIMPLE_LABEL:
2360*38fd1498Szrj 	    gsi_remove (&gsi, true);
2361*38fd1498Szrj 	    break;
2362*38fd1498Szrj 
2363*38fd1498Szrj 	  case GIMPLE_DEBUG:
2364*38fd1498Szrj 	    /* ??? Should there be conditional GIMPLE_DEBUG_BINDs?  */
2365*38fd1498Szrj 	    if (gimple_debug_bind_p (gsi_stmt (gsi)))
2366*38fd1498Szrj 	      {
2367*38fd1498Szrj 		gimple_debug_bind_reset_value (gsi_stmt (gsi));
2368*38fd1498Szrj 		update_stmt (gsi_stmt (gsi));
2369*38fd1498Szrj 	      }
2370*38fd1498Szrj 	    gsi_next (&gsi);
2371*38fd1498Szrj 	    break;
2372*38fd1498Szrj 
2373*38fd1498Szrj 	  default:
2374*38fd1498Szrj 	    gsi_next (&gsi);
2375*38fd1498Szrj 	  }
2376*38fd1498Szrj     }
2377*38fd1498Szrj }
2378*38fd1498Szrj 
2379*38fd1498Szrj /* Combine all the basic blocks from LOOP into one or two super basic
2380*38fd1498Szrj    blocks.  Replace PHI nodes with conditional modify expressions.  */
2381*38fd1498Szrj 
2382*38fd1498Szrj static void
combine_blocks(struct loop * loop)2383*38fd1498Szrj combine_blocks (struct loop *loop)
2384*38fd1498Szrj {
2385*38fd1498Szrj   basic_block bb, exit_bb, merge_target_bb;
2386*38fd1498Szrj   unsigned int orig_loop_num_nodes = loop->num_nodes;
2387*38fd1498Szrj   unsigned int i;
2388*38fd1498Szrj   edge e;
2389*38fd1498Szrj   edge_iterator ei;
2390*38fd1498Szrj 
2391*38fd1498Szrj   remove_conditions_and_labels (loop);
2392*38fd1498Szrj   insert_gimplified_predicates (loop);
2393*38fd1498Szrj   predicate_all_scalar_phis (loop);
2394*38fd1498Szrj 
2395*38fd1498Szrj   if (any_pred_load_store)
2396*38fd1498Szrj     predicate_mem_writes (loop);
2397*38fd1498Szrj 
2398*38fd1498Szrj   /* Merge basic blocks: first remove all the edges in the loop,
2399*38fd1498Szrj      except for those from the exit block.  */
2400*38fd1498Szrj   exit_bb = NULL;
2401*38fd1498Szrj   bool *predicated = XNEWVEC (bool, orig_loop_num_nodes);
2402*38fd1498Szrj   for (i = 0; i < orig_loop_num_nodes; i++)
2403*38fd1498Szrj     {
2404*38fd1498Szrj       bb = ifc_bbs[i];
2405*38fd1498Szrj       predicated[i] = !is_true_predicate (bb_predicate (bb));
2406*38fd1498Szrj       free_bb_predicate (bb);
2407*38fd1498Szrj       if (bb_with_exit_edge_p (loop, bb))
2408*38fd1498Szrj 	{
2409*38fd1498Szrj 	  gcc_assert (exit_bb == NULL);
2410*38fd1498Szrj 	  exit_bb = bb;
2411*38fd1498Szrj 	}
2412*38fd1498Szrj     }
2413*38fd1498Szrj   gcc_assert (exit_bb != loop->latch);
2414*38fd1498Szrj 
2415*38fd1498Szrj   for (i = 1; i < orig_loop_num_nodes; i++)
2416*38fd1498Szrj     {
2417*38fd1498Szrj       bb = ifc_bbs[i];
2418*38fd1498Szrj 
2419*38fd1498Szrj       for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
2420*38fd1498Szrj 	{
2421*38fd1498Szrj 	  if (e->src == exit_bb)
2422*38fd1498Szrj 	    ei_next (&ei);
2423*38fd1498Szrj 	  else
2424*38fd1498Szrj 	    remove_edge (e);
2425*38fd1498Szrj 	}
2426*38fd1498Szrj     }
2427*38fd1498Szrj 
2428*38fd1498Szrj   if (exit_bb != NULL)
2429*38fd1498Szrj     {
2430*38fd1498Szrj       if (exit_bb != loop->header)
2431*38fd1498Szrj 	{
2432*38fd1498Szrj 	  /* Connect this node to loop header.  */
2433*38fd1498Szrj 	  make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU);
2434*38fd1498Szrj 	  set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
2435*38fd1498Szrj 	}
2436*38fd1498Szrj 
2437*38fd1498Szrj       /* Redirect non-exit edges to loop->latch.  */
2438*38fd1498Szrj       FOR_EACH_EDGE (e, ei, exit_bb->succs)
2439*38fd1498Szrj 	{
2440*38fd1498Szrj 	  if (!loop_exit_edge_p (loop, e))
2441*38fd1498Szrj 	    redirect_edge_and_branch (e, loop->latch);
2442*38fd1498Szrj 	}
2443*38fd1498Szrj       set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
2444*38fd1498Szrj     }
2445*38fd1498Szrj   else
2446*38fd1498Szrj     {
2447*38fd1498Szrj       /* If the loop does not have an exit, reconnect header and latch.  */
2448*38fd1498Szrj       make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
2449*38fd1498Szrj       set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
2450*38fd1498Szrj     }
2451*38fd1498Szrj 
2452*38fd1498Szrj   merge_target_bb = loop->header;
2453*38fd1498Szrj 
2454*38fd1498Szrj   /* Get at the virtual def valid for uses starting at the first block
2455*38fd1498Szrj      we merge into the header.  Without a virtual PHI the loop has the
2456*38fd1498Szrj      same virtual use on all stmts.  */
2457*38fd1498Szrj   gphi *vphi = get_virtual_phi (loop->header);
2458*38fd1498Szrj   tree last_vdef = NULL_TREE;
2459*38fd1498Szrj   if (vphi)
2460*38fd1498Szrj     {
2461*38fd1498Szrj       last_vdef = gimple_phi_result (vphi);
2462*38fd1498Szrj       for (gimple_stmt_iterator gsi = gsi_start_bb (loop->header);
2463*38fd1498Szrj 	   ! gsi_end_p (gsi); gsi_next (&gsi))
2464*38fd1498Szrj 	if (gimple_vdef (gsi_stmt (gsi)))
2465*38fd1498Szrj 	  last_vdef = gimple_vdef (gsi_stmt (gsi));
2466*38fd1498Szrj     }
2467*38fd1498Szrj   for (i = 1; i < orig_loop_num_nodes; i++)
2468*38fd1498Szrj     {
2469*38fd1498Szrj       gimple_stmt_iterator gsi;
2470*38fd1498Szrj       gimple_stmt_iterator last;
2471*38fd1498Szrj 
2472*38fd1498Szrj       bb = ifc_bbs[i];
2473*38fd1498Szrj 
2474*38fd1498Szrj       if (bb == exit_bb || bb == loop->latch)
2475*38fd1498Szrj 	continue;
2476*38fd1498Szrj 
2477*38fd1498Szrj       /* We release virtual PHIs late because we have to propagate them
2478*38fd1498Szrj          out using the current VUSE.  The def might be the one used
2479*38fd1498Szrj 	 after the loop.  */
2480*38fd1498Szrj       vphi = get_virtual_phi (bb);
2481*38fd1498Szrj       if (vphi)
2482*38fd1498Szrj 	{
2483*38fd1498Szrj 	  imm_use_iterator iter;
2484*38fd1498Szrj 	  use_operand_p use_p;
2485*38fd1498Szrj 	  gimple *use_stmt;
2486*38fd1498Szrj 	  FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2487*38fd1498Szrj 	    {
2488*38fd1498Szrj 	      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2489*38fd1498Szrj 		SET_USE (use_p, last_vdef);
2490*38fd1498Szrj 	    }
2491*38fd1498Szrj 	  gsi = gsi_for_stmt (vphi);
2492*38fd1498Szrj 	  remove_phi_node (&gsi, true);
2493*38fd1498Szrj 	}
2494*38fd1498Szrj 
2495*38fd1498Szrj       /* Make stmts member of loop->header and clear range info from all stmts
2496*38fd1498Szrj 	 in BB which is now no longer executed conditional on a predicate we
2497*38fd1498Szrj 	 could have derived it from.  */
2498*38fd1498Szrj       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2499*38fd1498Szrj 	{
2500*38fd1498Szrj 	  gimple *stmt = gsi_stmt (gsi);
2501*38fd1498Szrj 	  gimple_set_bb (stmt, merge_target_bb);
2502*38fd1498Szrj 	  /* Update virtual operands.  */
2503*38fd1498Szrj 	  if (last_vdef)
2504*38fd1498Szrj 	    {
2505*38fd1498Szrj 	      use_operand_p use_p = ssa_vuse_operand (stmt);
2506*38fd1498Szrj 	      if (use_p
2507*38fd1498Szrj 		  && USE_FROM_PTR (use_p) != last_vdef)
2508*38fd1498Szrj 		SET_USE (use_p, last_vdef);
2509*38fd1498Szrj 	      if (gimple_vdef (stmt))
2510*38fd1498Szrj 		last_vdef = gimple_vdef (stmt);
2511*38fd1498Szrj 	    }
2512*38fd1498Szrj 	  if (predicated[i])
2513*38fd1498Szrj 	    {
2514*38fd1498Szrj 	      ssa_op_iter i;
2515*38fd1498Szrj 	      tree op;
2516*38fd1498Szrj 	      FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF)
2517*38fd1498Szrj 		reset_flow_sensitive_info (op);
2518*38fd1498Szrj 	    }
2519*38fd1498Szrj 	}
2520*38fd1498Szrj 
2521*38fd1498Szrj       /* Update stmt list.  */
2522*38fd1498Szrj       last = gsi_last_bb (merge_target_bb);
2523*38fd1498Szrj       gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT);
2524*38fd1498Szrj       set_bb_seq (bb, NULL);
2525*38fd1498Szrj 
2526*38fd1498Szrj       delete_basic_block (bb);
2527*38fd1498Szrj     }
2528*38fd1498Szrj 
2529*38fd1498Szrj   /* If possible, merge loop header to the block with the exit edge.
2530*38fd1498Szrj      This reduces the number of basic blocks to two, to please the
2531*38fd1498Szrj      vectorizer that handles only loops with two nodes.  */
2532*38fd1498Szrj   if (exit_bb
2533*38fd1498Szrj       && exit_bb != loop->header)
2534*38fd1498Szrj     {
2535*38fd1498Szrj       /* We release virtual PHIs late because we have to propagate them
2536*38fd1498Szrj          out using the current VUSE.  The def might be the one used
2537*38fd1498Szrj 	 after the loop.  */
2538*38fd1498Szrj       vphi = get_virtual_phi (exit_bb);
2539*38fd1498Szrj       if (vphi)
2540*38fd1498Szrj 	{
2541*38fd1498Szrj 	  imm_use_iterator iter;
2542*38fd1498Szrj 	  use_operand_p use_p;
2543*38fd1498Szrj 	  gimple *use_stmt;
2544*38fd1498Szrj 	  FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2545*38fd1498Szrj 	    {
2546*38fd1498Szrj 	      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2547*38fd1498Szrj 		SET_USE (use_p, last_vdef);
2548*38fd1498Szrj 	    }
2549*38fd1498Szrj 	  gimple_stmt_iterator gsi = gsi_for_stmt (vphi);
2550*38fd1498Szrj 	  remove_phi_node (&gsi, true);
2551*38fd1498Szrj 	}
2552*38fd1498Szrj 
2553*38fd1498Szrj       if (can_merge_blocks_p (loop->header, exit_bb))
2554*38fd1498Szrj 	merge_blocks (loop->header, exit_bb);
2555*38fd1498Szrj     }
2556*38fd1498Szrj 
2557*38fd1498Szrj   free (ifc_bbs);
2558*38fd1498Szrj   ifc_bbs = NULL;
2559*38fd1498Szrj   free (predicated);
2560*38fd1498Szrj }
2561*38fd1498Szrj 
2562*38fd1498Szrj /* Version LOOP before if-converting it; the original loop
2563*38fd1498Szrj    will be if-converted, the new copy of the loop will not,
2564*38fd1498Szrj    and the LOOP_VECTORIZED internal call will be guarding which
2565*38fd1498Szrj    loop to execute.  The vectorizer pass will fold this
2566*38fd1498Szrj    internal call into either true or false.
2567*38fd1498Szrj 
2568*38fd1498Szrj    Note that this function intentionally invalidates profile.  Both edges
2569*38fd1498Szrj    out of LOOP_VECTORIZED must have 100% probability so the profile remains
2570*38fd1498Szrj    consistent after the condition is folded in the vectorizer.  */
2571*38fd1498Szrj 
2572*38fd1498Szrj static struct loop *
version_loop_for_if_conversion(struct loop * loop)2573*38fd1498Szrj version_loop_for_if_conversion (struct loop *loop)
2574*38fd1498Szrj {
2575*38fd1498Szrj   basic_block cond_bb;
2576*38fd1498Szrj   tree cond = make_ssa_name (boolean_type_node);
2577*38fd1498Szrj   struct loop *new_loop;
2578*38fd1498Szrj   gimple *g;
2579*38fd1498Szrj   gimple_stmt_iterator gsi;
2580*38fd1498Szrj   unsigned int save_length;
2581*38fd1498Szrj 
2582*38fd1498Szrj   g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
2583*38fd1498Szrj 				  build_int_cst (integer_type_node, loop->num),
2584*38fd1498Szrj 				  integer_zero_node);
2585*38fd1498Szrj   gimple_call_set_lhs (g, cond);
2586*38fd1498Szrj 
2587*38fd1498Szrj   /* Save BB->aux around loop_version as that uses the same field.  */
2588*38fd1498Szrj   save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes;
2589*38fd1498Szrj   void **saved_preds = XALLOCAVEC (void *, save_length);
2590*38fd1498Szrj   for (unsigned i = 0; i < save_length; i++)
2591*38fd1498Szrj     saved_preds[i] = ifc_bbs[i]->aux;
2592*38fd1498Szrj 
2593*38fd1498Szrj   initialize_original_copy_tables ();
2594*38fd1498Szrj   /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
2595*38fd1498Szrj      is re-merged in the vectorizer.  */
2596*38fd1498Szrj   new_loop = loop_version (loop, cond, &cond_bb,
2597*38fd1498Szrj 			   profile_probability::always (),
2598*38fd1498Szrj 			   profile_probability::always (),
2599*38fd1498Szrj 			   profile_probability::always (),
2600*38fd1498Szrj 			   profile_probability::always (), true);
2601*38fd1498Szrj   free_original_copy_tables ();
2602*38fd1498Szrj 
2603*38fd1498Szrj   for (unsigned i = 0; i < save_length; i++)
2604*38fd1498Szrj     ifc_bbs[i]->aux = saved_preds[i];
2605*38fd1498Szrj 
2606*38fd1498Szrj   if (new_loop == NULL)
2607*38fd1498Szrj     return NULL;
2608*38fd1498Szrj 
2609*38fd1498Szrj   new_loop->dont_vectorize = true;
2610*38fd1498Szrj   new_loop->force_vectorize = false;
2611*38fd1498Szrj   gsi = gsi_last_bb (cond_bb);
2612*38fd1498Szrj   gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
2613*38fd1498Szrj   gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2614*38fd1498Szrj   update_ssa (TODO_update_ssa);
2615*38fd1498Szrj   return new_loop;
2616*38fd1498Szrj }
2617*38fd1498Szrj 
2618*38fd1498Szrj /* Return true when LOOP satisfies the follow conditions that will
2619*38fd1498Szrj    allow it to be recognized by the vectorizer for outer-loop
2620*38fd1498Szrj    vectorization:
2621*38fd1498Szrj     - The loop is not the root node of the loop tree.
2622*38fd1498Szrj     - The loop has exactly one inner loop.
2623*38fd1498Szrj     - The loop has a single exit.
2624*38fd1498Szrj     - The loop header has a single successor, which is the inner
2625*38fd1498Szrj       loop header.
2626*38fd1498Szrj     - Each of the inner and outer loop latches have a single
2627*38fd1498Szrj       predecessor.
2628*38fd1498Szrj     - The loop exit block has a single predecessor, which is the
2629*38fd1498Szrj       inner loop's exit block.  */
2630*38fd1498Szrj 
2631*38fd1498Szrj static bool
versionable_outer_loop_p(struct loop * loop)2632*38fd1498Szrj versionable_outer_loop_p (struct loop *loop)
2633*38fd1498Szrj {
2634*38fd1498Szrj   if (!loop_outer (loop)
2635*38fd1498Szrj       || loop->dont_vectorize
2636*38fd1498Szrj       || !loop->inner
2637*38fd1498Szrj       || loop->inner->next
2638*38fd1498Szrj       || !single_exit (loop)
2639*38fd1498Szrj       || !single_succ_p (loop->header)
2640*38fd1498Szrj       || single_succ (loop->header) != loop->inner->header
2641*38fd1498Szrj       || !single_pred_p (loop->latch)
2642*38fd1498Szrj       || !single_pred_p (loop->inner->latch))
2643*38fd1498Szrj     return false;
2644*38fd1498Szrj 
2645*38fd1498Szrj   basic_block outer_exit = single_pred (loop->latch);
2646*38fd1498Szrj   basic_block inner_exit = single_pred (loop->inner->latch);
2647*38fd1498Szrj 
2648*38fd1498Szrj   if (!single_pred_p (outer_exit) || single_pred (outer_exit) != inner_exit)
2649*38fd1498Szrj     return false;
2650*38fd1498Szrj 
2651*38fd1498Szrj   if (dump_file)
2652*38fd1498Szrj     fprintf (dump_file, "Found vectorizable outer loop for versioning\n");
2653*38fd1498Szrj 
2654*38fd1498Szrj   return true;
2655*38fd1498Szrj }
2656*38fd1498Szrj 
2657*38fd1498Szrj /* Performs splitting of critical edges.  Skip splitting and return false
2658*38fd1498Szrj    if LOOP will not be converted because:
2659*38fd1498Szrj 
2660*38fd1498Szrj      - LOOP is not well formed.
2661*38fd1498Szrj      - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
2662*38fd1498Szrj 
2663*38fd1498Szrj    Last restriction is valid only if AGGRESSIVE_IF_CONV is false.  */
2664*38fd1498Szrj 
2665*38fd1498Szrj static bool
ifcvt_split_critical_edges(struct loop * loop,bool aggressive_if_conv)2666*38fd1498Szrj ifcvt_split_critical_edges (struct loop *loop, bool aggressive_if_conv)
2667*38fd1498Szrj {
2668*38fd1498Szrj   basic_block *body;
2669*38fd1498Szrj   basic_block bb;
2670*38fd1498Szrj   unsigned int num = loop->num_nodes;
2671*38fd1498Szrj   unsigned int i;
2672*38fd1498Szrj   gimple *stmt;
2673*38fd1498Szrj   edge e;
2674*38fd1498Szrj   edge_iterator ei;
2675*38fd1498Szrj   auto_vec<edge> critical_edges;
2676*38fd1498Szrj 
2677*38fd1498Szrj   /* Loop is not well formed.  */
2678*38fd1498Szrj   if (num <= 2 || loop->inner || !single_exit (loop))
2679*38fd1498Szrj     return false;
2680*38fd1498Szrj 
2681*38fd1498Szrj   body = get_loop_body (loop);
2682*38fd1498Szrj   for (i = 0; i < num; i++)
2683*38fd1498Szrj     {
2684*38fd1498Szrj       bb = body[i];
2685*38fd1498Szrj       if (!aggressive_if_conv
2686*38fd1498Szrj 	  && phi_nodes (bb)
2687*38fd1498Szrj 	  && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM)
2688*38fd1498Szrj 	{
2689*38fd1498Szrj 	  if (dump_file && (dump_flags & TDF_DETAILS))
2690*38fd1498Szrj 	    fprintf (dump_file,
2691*38fd1498Szrj 		     "BB %d has complicated PHI with more than %u args.\n",
2692*38fd1498Szrj 		     bb->index, MAX_PHI_ARG_NUM);
2693*38fd1498Szrj 
2694*38fd1498Szrj 	  free (body);
2695*38fd1498Szrj 	  return false;
2696*38fd1498Szrj 	}
2697*38fd1498Szrj       if (bb == loop->latch || bb_with_exit_edge_p (loop, bb))
2698*38fd1498Szrj 	continue;
2699*38fd1498Szrj 
2700*38fd1498Szrj       stmt = last_stmt (bb);
2701*38fd1498Szrj       /* Skip basic blocks not ending with conditional branch.  */
2702*38fd1498Szrj       if (!stmt || gimple_code (stmt) != GIMPLE_COND)
2703*38fd1498Szrj 	continue;
2704*38fd1498Szrj 
2705*38fd1498Szrj       FOR_EACH_EDGE (e, ei, bb->succs)
2706*38fd1498Szrj 	if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
2707*38fd1498Szrj 	  critical_edges.safe_push (e);
2708*38fd1498Szrj     }
2709*38fd1498Szrj   free (body);
2710*38fd1498Szrj 
2711*38fd1498Szrj   while (critical_edges.length () > 0)
2712*38fd1498Szrj     {
2713*38fd1498Szrj       e = critical_edges.pop ();
2714*38fd1498Szrj       /* Don't split if bb can be predicated along non-critical edge.  */
2715*38fd1498Szrj       if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (e->dest))
2716*38fd1498Szrj 	split_edge (e);
2717*38fd1498Szrj     }
2718*38fd1498Szrj 
2719*38fd1498Szrj   return true;
2720*38fd1498Szrj }
2721*38fd1498Szrj 
2722*38fd1498Szrj /* Delete redundant statements produced by predication which prevents
2723*38fd1498Szrj    loop vectorization.  */
2724*38fd1498Szrj 
2725*38fd1498Szrj static void
ifcvt_local_dce(basic_block bb)2726*38fd1498Szrj ifcvt_local_dce (basic_block bb)
2727*38fd1498Szrj {
2728*38fd1498Szrj   gimple *stmt;
2729*38fd1498Szrj   gimple *stmt1;
2730*38fd1498Szrj   gimple *phi;
2731*38fd1498Szrj   gimple_stmt_iterator gsi;
2732*38fd1498Szrj   auto_vec<gimple *> worklist;
2733*38fd1498Szrj   enum gimple_code code;
2734*38fd1498Szrj   use_operand_p use_p;
2735*38fd1498Szrj   imm_use_iterator imm_iter;
2736*38fd1498Szrj 
2737*38fd1498Szrj   worklist.create (64);
2738*38fd1498Szrj   /* Consider all phi as live statements.  */
2739*38fd1498Szrj   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2740*38fd1498Szrj     {
2741*38fd1498Szrj       phi = gsi_stmt (gsi);
2742*38fd1498Szrj       gimple_set_plf (phi, GF_PLF_2, true);
2743*38fd1498Szrj       worklist.safe_push (phi);
2744*38fd1498Szrj     }
2745*38fd1498Szrj   /* Consider load/store statements, CALL and COND as live.  */
2746*38fd1498Szrj   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2747*38fd1498Szrj     {
2748*38fd1498Szrj       stmt = gsi_stmt (gsi);
2749*38fd1498Szrj       if (gimple_store_p (stmt)
2750*38fd1498Szrj 	  || gimple_assign_load_p (stmt)
2751*38fd1498Szrj 	  || is_gimple_debug (stmt))
2752*38fd1498Szrj 	{
2753*38fd1498Szrj 	  gimple_set_plf (stmt, GF_PLF_2, true);
2754*38fd1498Szrj 	  worklist.safe_push (stmt);
2755*38fd1498Szrj 	  continue;
2756*38fd1498Szrj 	}
2757*38fd1498Szrj       code = gimple_code (stmt);
2758*38fd1498Szrj       if (code == GIMPLE_COND || code == GIMPLE_CALL)
2759*38fd1498Szrj 	{
2760*38fd1498Szrj 	  gimple_set_plf (stmt, GF_PLF_2, true);
2761*38fd1498Szrj 	  worklist.safe_push (stmt);
2762*38fd1498Szrj 	  continue;
2763*38fd1498Szrj 	}
2764*38fd1498Szrj       gimple_set_plf (stmt, GF_PLF_2, false);
2765*38fd1498Szrj 
2766*38fd1498Szrj       if (code == GIMPLE_ASSIGN)
2767*38fd1498Szrj 	{
2768*38fd1498Szrj 	  tree lhs = gimple_assign_lhs (stmt);
2769*38fd1498Szrj 	  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2770*38fd1498Szrj 	    {
2771*38fd1498Szrj 	      stmt1 = USE_STMT (use_p);
2772*38fd1498Szrj 	      if (gimple_bb (stmt1) != bb)
2773*38fd1498Szrj 		{
2774*38fd1498Szrj 		  gimple_set_plf (stmt, GF_PLF_2, true);
2775*38fd1498Szrj 		  worklist.safe_push (stmt);
2776*38fd1498Szrj 		  break;
2777*38fd1498Szrj 		}
2778*38fd1498Szrj 	    }
2779*38fd1498Szrj 	}
2780*38fd1498Szrj     }
2781*38fd1498Szrj   /* Propagate liveness through arguments of live stmt.  */
2782*38fd1498Szrj   while (worklist.length () > 0)
2783*38fd1498Szrj     {
2784*38fd1498Szrj       ssa_op_iter iter;
2785*38fd1498Szrj       use_operand_p use_p;
2786*38fd1498Szrj       tree use;
2787*38fd1498Szrj 
2788*38fd1498Szrj       stmt = worklist.pop ();
2789*38fd1498Szrj       FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2790*38fd1498Szrj 	{
2791*38fd1498Szrj 	  use = USE_FROM_PTR (use_p);
2792*38fd1498Szrj 	  if (TREE_CODE (use) != SSA_NAME)
2793*38fd1498Szrj 	    continue;
2794*38fd1498Szrj 	  stmt1 = SSA_NAME_DEF_STMT (use);
2795*38fd1498Szrj 	  if (gimple_bb (stmt1) != bb
2796*38fd1498Szrj 	      || gimple_plf (stmt1, GF_PLF_2))
2797*38fd1498Szrj 	    continue;
2798*38fd1498Szrj 	  gimple_set_plf (stmt1, GF_PLF_2, true);
2799*38fd1498Szrj 	  worklist.safe_push (stmt1);
2800*38fd1498Szrj 	}
2801*38fd1498Szrj     }
2802*38fd1498Szrj   /* Delete dead statements.  */
2803*38fd1498Szrj   gsi = gsi_start_bb (bb);
2804*38fd1498Szrj   while (!gsi_end_p (gsi))
2805*38fd1498Szrj     {
2806*38fd1498Szrj       stmt = gsi_stmt (gsi);
2807*38fd1498Szrj       if (gimple_plf (stmt, GF_PLF_2))
2808*38fd1498Szrj 	{
2809*38fd1498Szrj 	  gsi_next (&gsi);
2810*38fd1498Szrj 	  continue;
2811*38fd1498Szrj 	}
2812*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
2813*38fd1498Szrj 	{
2814*38fd1498Szrj 	  fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
2815*38fd1498Szrj 	  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2816*38fd1498Szrj 	}
2817*38fd1498Szrj       gsi_remove (&gsi, true);
2818*38fd1498Szrj       release_defs (stmt);
2819*38fd1498Szrj     }
2820*38fd1498Szrj }
2821*38fd1498Szrj 
2822*38fd1498Szrj /* If-convert LOOP when it is legal.  For the moment this pass has no
2823*38fd1498Szrj    profitability analysis.  Returns non-zero todo flags when something
2824*38fd1498Szrj    changed.  */
2825*38fd1498Szrj 
2826*38fd1498Szrj unsigned int
tree_if_conversion(struct loop * loop)2827*38fd1498Szrj tree_if_conversion (struct loop *loop)
2828*38fd1498Szrj {
2829*38fd1498Szrj   unsigned int todo = 0;
2830*38fd1498Szrj   bool aggressive_if_conv;
2831*38fd1498Szrj   struct loop *rloop;
2832*38fd1498Szrj 
2833*38fd1498Szrj  again:
2834*38fd1498Szrj   rloop = NULL;
2835*38fd1498Szrj   ifc_bbs = NULL;
2836*38fd1498Szrj   any_pred_load_store = false;
2837*38fd1498Szrj   any_complicated_phi = false;
2838*38fd1498Szrj 
2839*38fd1498Szrj   /* Apply more aggressive if-conversion when loop or its outer loop were
2840*38fd1498Szrj      marked with simd pragma.  When that's the case, we try to if-convert
2841*38fd1498Szrj      loop containing PHIs with more than MAX_PHI_ARG_NUM arguments.  */
2842*38fd1498Szrj   aggressive_if_conv = loop->force_vectorize;
2843*38fd1498Szrj   if (!aggressive_if_conv)
2844*38fd1498Szrj     {
2845*38fd1498Szrj       struct loop *outer_loop = loop_outer (loop);
2846*38fd1498Szrj       if (outer_loop && outer_loop->force_vectorize)
2847*38fd1498Szrj 	aggressive_if_conv = true;
2848*38fd1498Szrj     }
2849*38fd1498Szrj 
2850*38fd1498Szrj   if (!ifcvt_split_critical_edges (loop, aggressive_if_conv))
2851*38fd1498Szrj     goto cleanup;
2852*38fd1498Szrj 
2853*38fd1498Szrj   if (!if_convertible_loop_p (loop)
2854*38fd1498Szrj       || !dbg_cnt (if_conversion_tree))
2855*38fd1498Szrj     goto cleanup;
2856*38fd1498Szrj 
2857*38fd1498Szrj   if ((any_pred_load_store || any_complicated_phi)
2858*38fd1498Szrj       && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
2859*38fd1498Szrj 	  || loop->dont_vectorize))
2860*38fd1498Szrj     goto cleanup;
2861*38fd1498Szrj 
2862*38fd1498Szrj   /* Since we have no cost model, always version loops unless the user
2863*38fd1498Szrj      specified -ftree-loop-if-convert or unless versioning is required.
2864*38fd1498Szrj      Either version this loop, or if the pattern is right for outer-loop
2865*38fd1498Szrj      vectorization, version the outer loop.  In the latter case we will
2866*38fd1498Szrj      still if-convert the original inner loop.  */
2867*38fd1498Szrj   if (any_pred_load_store
2868*38fd1498Szrj       || any_complicated_phi
2869*38fd1498Szrj       || flag_tree_loop_if_convert != 1)
2870*38fd1498Szrj     {
2871*38fd1498Szrj       struct loop *vloop
2872*38fd1498Szrj 	= (versionable_outer_loop_p (loop_outer (loop))
2873*38fd1498Szrj 	   ? loop_outer (loop) : loop);
2874*38fd1498Szrj       struct loop *nloop = version_loop_for_if_conversion (vloop);
2875*38fd1498Szrj       if (nloop == NULL)
2876*38fd1498Szrj 	goto cleanup;
2877*38fd1498Szrj       if (vloop != loop)
2878*38fd1498Szrj 	{
2879*38fd1498Szrj 	  /* If versionable_outer_loop_p decided to version the
2880*38fd1498Szrj 	     outer loop, version also the inner loop of the non-vectorized
2881*38fd1498Szrj 	     loop copy.  So we transform:
2882*38fd1498Szrj 	      loop1
2883*38fd1498Szrj 		loop2
2884*38fd1498Szrj 	     into:
2885*38fd1498Szrj 	      if (LOOP_VECTORIZED (1, 3))
2886*38fd1498Szrj 		{
2887*38fd1498Szrj 		  loop1
2888*38fd1498Szrj 		    loop2
2889*38fd1498Szrj 		}
2890*38fd1498Szrj 	      else
2891*38fd1498Szrj 		loop3 (copy of loop1)
2892*38fd1498Szrj 		  if (LOOP_VECTORIZED (4, 5))
2893*38fd1498Szrj 		    loop4 (copy of loop2)
2894*38fd1498Szrj 		  else
2895*38fd1498Szrj 		    loop5 (copy of loop4)  */
2896*38fd1498Szrj 	  gcc_assert (nloop->inner && nloop->inner->next == NULL);
2897*38fd1498Szrj 	  rloop = nloop->inner;
2898*38fd1498Szrj 	}
2899*38fd1498Szrj     }
2900*38fd1498Szrj 
2901*38fd1498Szrj   /* Now all statements are if-convertible.  Combine all the basic
2902*38fd1498Szrj      blocks into one huge basic block doing the if-conversion
2903*38fd1498Szrj      on-the-fly.  */
2904*38fd1498Szrj   combine_blocks (loop);
2905*38fd1498Szrj 
2906*38fd1498Szrj   /* Delete dead predicate computations.  */
2907*38fd1498Szrj   ifcvt_local_dce (loop->header);
2908*38fd1498Szrj 
2909*38fd1498Szrj   todo |= TODO_cleanup_cfg;
2910*38fd1498Szrj 
2911*38fd1498Szrj  cleanup:
2912*38fd1498Szrj   if (ifc_bbs)
2913*38fd1498Szrj     {
2914*38fd1498Szrj       unsigned int i;
2915*38fd1498Szrj 
2916*38fd1498Szrj       for (i = 0; i < loop->num_nodes; i++)
2917*38fd1498Szrj 	free_bb_predicate (ifc_bbs[i]);
2918*38fd1498Szrj 
2919*38fd1498Szrj       free (ifc_bbs);
2920*38fd1498Szrj       ifc_bbs = NULL;
2921*38fd1498Szrj     }
2922*38fd1498Szrj   if (rloop != NULL)
2923*38fd1498Szrj     {
2924*38fd1498Szrj       loop = rloop;
2925*38fd1498Szrj       goto again;
2926*38fd1498Szrj     }
2927*38fd1498Szrj 
2928*38fd1498Szrj   return todo;
2929*38fd1498Szrj }
2930*38fd1498Szrj 
2931*38fd1498Szrj /* Tree if-conversion pass management.  */
2932*38fd1498Szrj 
2933*38fd1498Szrj namespace {
2934*38fd1498Szrj 
2935*38fd1498Szrj const pass_data pass_data_if_conversion =
2936*38fd1498Szrj {
2937*38fd1498Szrj   GIMPLE_PASS, /* type */
2938*38fd1498Szrj   "ifcvt", /* name */
2939*38fd1498Szrj   OPTGROUP_NONE, /* optinfo_flags */
2940*38fd1498Szrj   TV_TREE_LOOP_IFCVT, /* tv_id */
2941*38fd1498Szrj   ( PROP_cfg | PROP_ssa ), /* properties_required */
2942*38fd1498Szrj   0, /* properties_provided */
2943*38fd1498Szrj   0, /* properties_destroyed */
2944*38fd1498Szrj   0, /* todo_flags_start */
2945*38fd1498Szrj   0, /* todo_flags_finish */
2946*38fd1498Szrj };
2947*38fd1498Szrj 
2948*38fd1498Szrj class pass_if_conversion : public gimple_opt_pass
2949*38fd1498Szrj {
2950*38fd1498Szrj public:
pass_if_conversion(gcc::context * ctxt)2951*38fd1498Szrj   pass_if_conversion (gcc::context *ctxt)
2952*38fd1498Szrj     : gimple_opt_pass (pass_data_if_conversion, ctxt)
2953*38fd1498Szrj   {}
2954*38fd1498Szrj 
2955*38fd1498Szrj   /* opt_pass methods: */
2956*38fd1498Szrj   virtual bool gate (function *);
2957*38fd1498Szrj   virtual unsigned int execute (function *);
2958*38fd1498Szrj 
2959*38fd1498Szrj }; // class pass_if_conversion
2960*38fd1498Szrj 
2961*38fd1498Szrj bool
gate(function * fun)2962*38fd1498Szrj pass_if_conversion::gate (function *fun)
2963*38fd1498Szrj {
2964*38fd1498Szrj   return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
2965*38fd1498Szrj 	   && flag_tree_loop_if_convert != 0)
2966*38fd1498Szrj 	  || flag_tree_loop_if_convert == 1);
2967*38fd1498Szrj }
2968*38fd1498Szrj 
2969*38fd1498Szrj unsigned int
execute(function * fun)2970*38fd1498Szrj pass_if_conversion::execute (function *fun)
2971*38fd1498Szrj {
2972*38fd1498Szrj   struct loop *loop;
2973*38fd1498Szrj   unsigned todo = 0;
2974*38fd1498Szrj 
2975*38fd1498Szrj   if (number_of_loops (fun) <= 1)
2976*38fd1498Szrj     return 0;
2977*38fd1498Szrj 
2978*38fd1498Szrj   FOR_EACH_LOOP (loop, 0)
2979*38fd1498Szrj     if (flag_tree_loop_if_convert == 1
2980*38fd1498Szrj 	|| ((flag_tree_loop_vectorize || loop->force_vectorize)
2981*38fd1498Szrj 	    && !loop->dont_vectorize))
2982*38fd1498Szrj       todo |= tree_if_conversion (loop);
2983*38fd1498Szrj 
2984*38fd1498Szrj   if (todo)
2985*38fd1498Szrj     {
2986*38fd1498Szrj       free_numbers_of_iterations_estimates (fun);
2987*38fd1498Szrj       scev_reset ();
2988*38fd1498Szrj     }
2989*38fd1498Szrj 
2990*38fd1498Szrj   if (flag_checking)
2991*38fd1498Szrj     {
2992*38fd1498Szrj       basic_block bb;
2993*38fd1498Szrj       FOR_EACH_BB_FN (bb, fun)
2994*38fd1498Szrj 	gcc_assert (!bb->aux);
2995*38fd1498Szrj     }
2996*38fd1498Szrj 
2997*38fd1498Szrj   return todo;
2998*38fd1498Szrj }
2999*38fd1498Szrj 
3000*38fd1498Szrj } // anon namespace
3001*38fd1498Szrj 
3002*38fd1498Szrj gimple_opt_pass *
make_pass_if_conversion(gcc::context * ctxt)3003*38fd1498Szrj make_pass_if_conversion (gcc::context *ctxt)
3004*38fd1498Szrj {
3005*38fd1498Szrj   return new pass_if_conversion (ctxt);
3006*38fd1498Szrj }
3007