1*38fd1498Szrj /* If-conversion for vectorizer.
2*38fd1498Szrj Copyright (C) 2004-2018 Free Software Foundation, Inc.
3*38fd1498Szrj Contributed by Devang Patel <dpatel@apple.com>
4*38fd1498Szrj
5*38fd1498Szrj This file is part of GCC.
6*38fd1498Szrj
7*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
8*38fd1498Szrj the terms of the GNU General Public License as published by the Free
9*38fd1498Szrj Software Foundation; either version 3, or (at your option) any later
10*38fd1498Szrj version.
11*38fd1498Szrj
12*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
14*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15*38fd1498Szrj for more details.
16*38fd1498Szrj
17*38fd1498Szrj You should have received a copy of the GNU General Public License
18*38fd1498Szrj along with GCC; see the file COPYING3. If not see
19*38fd1498Szrj <http://www.gnu.org/licenses/>. */
20*38fd1498Szrj
21*38fd1498Szrj /* This pass implements a tree level if-conversion of loops. Its
22*38fd1498Szrj initial goal is to help the vectorizer to vectorize loops with
23*38fd1498Szrj conditions.
24*38fd1498Szrj
25*38fd1498Szrj A short description of if-conversion:
26*38fd1498Szrj
27*38fd1498Szrj o Decide if a loop is if-convertible or not.
28*38fd1498Szrj o Walk all loop basic blocks in breadth first order (BFS order).
29*38fd1498Szrj o Remove conditional statements (at the end of basic block)
30*38fd1498Szrj and propagate condition into destination basic blocks'
31*38fd1498Szrj predicate list.
32*38fd1498Szrj o Replace modify expression with conditional modify expression
33*38fd1498Szrj using current basic block's condition.
34*38fd1498Szrj o Merge all basic blocks
35*38fd1498Szrj o Replace phi nodes with conditional modify expr
36*38fd1498Szrj o Merge all basic blocks into header
37*38fd1498Szrj
38*38fd1498Szrj Sample transformation:
39*38fd1498Szrj
40*38fd1498Szrj INPUT
41*38fd1498Szrj -----
42*38fd1498Szrj
43*38fd1498Szrj # i_23 = PHI <0(0), i_18(10)>;
44*38fd1498Szrj <L0>:;
45*38fd1498Szrj j_15 = A[i_23];
46*38fd1498Szrj if (j_15 > 41) goto <L1>; else goto <L17>;
47*38fd1498Szrj
48*38fd1498Szrj <L17>:;
49*38fd1498Szrj goto <bb 3> (<L3>);
50*38fd1498Szrj
51*38fd1498Szrj <L1>:;
52*38fd1498Szrj
53*38fd1498Szrj # iftmp.2_4 = PHI <0(8), 42(2)>;
54*38fd1498Szrj <L3>:;
55*38fd1498Szrj A[i_23] = iftmp.2_4;
56*38fd1498Szrj i_18 = i_23 + 1;
57*38fd1498Szrj if (i_18 <= 15) goto <L19>; else goto <L18>;
58*38fd1498Szrj
59*38fd1498Szrj <L19>:;
60*38fd1498Szrj goto <bb 1> (<L0>);
61*38fd1498Szrj
62*38fd1498Szrj <L18>:;
63*38fd1498Szrj
64*38fd1498Szrj OUTPUT
65*38fd1498Szrj ------
66*38fd1498Szrj
67*38fd1498Szrj # i_23 = PHI <0(0), i_18(10)>;
68*38fd1498Szrj <L0>:;
69*38fd1498Szrj j_15 = A[i_23];
70*38fd1498Szrj
71*38fd1498Szrj <L3>:;
72*38fd1498Szrj iftmp.2_4 = j_15 > 41 ? 42 : 0;
73*38fd1498Szrj A[i_23] = iftmp.2_4;
74*38fd1498Szrj i_18 = i_23 + 1;
75*38fd1498Szrj if (i_18 <= 15) goto <L19>; else goto <L18>;
76*38fd1498Szrj
77*38fd1498Szrj <L19>:;
78*38fd1498Szrj goto <bb 1> (<L0>);
79*38fd1498Szrj
80*38fd1498Szrj <L18>:;
81*38fd1498Szrj */
82*38fd1498Szrj
83*38fd1498Szrj #include "config.h"
84*38fd1498Szrj #include "system.h"
85*38fd1498Szrj #include "coretypes.h"
86*38fd1498Szrj #include "backend.h"
87*38fd1498Szrj #include "rtl.h"
88*38fd1498Szrj #include "tree.h"
89*38fd1498Szrj #include "gimple.h"
90*38fd1498Szrj #include "cfghooks.h"
91*38fd1498Szrj #include "tree-pass.h"
92*38fd1498Szrj #include "ssa.h"
93*38fd1498Szrj #include "expmed.h"
94*38fd1498Szrj #include "optabs-query.h"
95*38fd1498Szrj #include "gimple-pretty-print.h"
96*38fd1498Szrj #include "alias.h"
97*38fd1498Szrj #include "fold-const.h"
98*38fd1498Szrj #include "stor-layout.h"
99*38fd1498Szrj #include "gimple-fold.h"
100*38fd1498Szrj #include "gimplify.h"
101*38fd1498Szrj #include "gimple-iterator.h"
102*38fd1498Szrj #include "gimplify-me.h"
103*38fd1498Szrj #include "tree-cfg.h"
104*38fd1498Szrj #include "tree-into-ssa.h"
105*38fd1498Szrj #include "tree-ssa.h"
106*38fd1498Szrj #include "cfgloop.h"
107*38fd1498Szrj #include "tree-data-ref.h"
108*38fd1498Szrj #include "tree-scalar-evolution.h"
109*38fd1498Szrj #include "tree-ssa-loop.h"
110*38fd1498Szrj #include "tree-ssa-loop-niter.h"
111*38fd1498Szrj #include "tree-ssa-loop-ivopts.h"
112*38fd1498Szrj #include "tree-ssa-address.h"
113*38fd1498Szrj #include "dbgcnt.h"
114*38fd1498Szrj #include "tree-hash-traits.h"
115*38fd1498Szrj #include "varasm.h"
116*38fd1498Szrj #include "builtins.h"
117*38fd1498Szrj #include "params.h"
118*38fd1498Szrj #include "cfganal.h"
119*38fd1498Szrj
120*38fd1498Szrj /* Only handle PHIs with no more arguments unless we are asked to by
121*38fd1498Szrj simd pragma. */
122*38fd1498Szrj #define MAX_PHI_ARG_NUM \
123*38fd1498Szrj ((unsigned) PARAM_VALUE (PARAM_MAX_TREE_IF_CONVERSION_PHI_ARGS))
124*38fd1498Szrj
125*38fd1498Szrj /* Indicate if new load/store that needs to be predicated is introduced
126*38fd1498Szrj during if conversion. */
127*38fd1498Szrj static bool any_pred_load_store;
128*38fd1498Szrj
129*38fd1498Szrj /* Indicate if there are any complicated PHIs that need to be handled in
130*38fd1498Szrj if-conversion. Complicated PHI has more than two arguments and can't
131*38fd1498Szrj be degenerated to two arguments PHI. See more information in comment
132*38fd1498Szrj before phi_convertible_by_degenerating_args. */
133*38fd1498Szrj static bool any_complicated_phi;
134*38fd1498Szrj
135*38fd1498Szrj /* Hash for struct innermost_loop_behavior. It depends on the user to
136*38fd1498Szrj free the memory. */
137*38fd1498Szrj
138*38fd1498Szrj struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior>
139*38fd1498Szrj {
140*38fd1498Szrj static inline hashval_t hash (const value_type &);
141*38fd1498Szrj static inline bool equal (const value_type &,
142*38fd1498Szrj const compare_type &);
143*38fd1498Szrj };
144*38fd1498Szrj
145*38fd1498Szrj inline hashval_t
hash(const value_type & e)146*38fd1498Szrj innermost_loop_behavior_hash::hash (const value_type &e)
147*38fd1498Szrj {
148*38fd1498Szrj hashval_t hash;
149*38fd1498Szrj
150*38fd1498Szrj hash = iterative_hash_expr (e->base_address, 0);
151*38fd1498Szrj hash = iterative_hash_expr (e->offset, hash);
152*38fd1498Szrj hash = iterative_hash_expr (e->init, hash);
153*38fd1498Szrj return iterative_hash_expr (e->step, hash);
154*38fd1498Szrj }
155*38fd1498Szrj
156*38fd1498Szrj inline bool
equal(const value_type & e1,const compare_type & e2)157*38fd1498Szrj innermost_loop_behavior_hash::equal (const value_type &e1,
158*38fd1498Szrj const compare_type &e2)
159*38fd1498Szrj {
160*38fd1498Szrj if ((e1->base_address && !e2->base_address)
161*38fd1498Szrj || (!e1->base_address && e2->base_address)
162*38fd1498Szrj || (!e1->offset && e2->offset)
163*38fd1498Szrj || (e1->offset && !e2->offset)
164*38fd1498Szrj || (!e1->init && e2->init)
165*38fd1498Szrj || (e1->init && !e2->init)
166*38fd1498Szrj || (!e1->step && e2->step)
167*38fd1498Szrj || (e1->step && !e2->step))
168*38fd1498Szrj return false;
169*38fd1498Szrj
170*38fd1498Szrj if (e1->base_address && e2->base_address
171*38fd1498Szrj && !operand_equal_p (e1->base_address, e2->base_address, 0))
172*38fd1498Szrj return false;
173*38fd1498Szrj if (e1->offset && e2->offset
174*38fd1498Szrj && !operand_equal_p (e1->offset, e2->offset, 0))
175*38fd1498Szrj return false;
176*38fd1498Szrj if (e1->init && e2->init
177*38fd1498Szrj && !operand_equal_p (e1->init, e2->init, 0))
178*38fd1498Szrj return false;
179*38fd1498Szrj if (e1->step && e2->step
180*38fd1498Szrj && !operand_equal_p (e1->step, e2->step, 0))
181*38fd1498Szrj return false;
182*38fd1498Szrj
183*38fd1498Szrj return true;
184*38fd1498Szrj }
185*38fd1498Szrj
186*38fd1498Szrj /* List of basic blocks in if-conversion-suitable order. */
187*38fd1498Szrj static basic_block *ifc_bbs;
188*38fd1498Szrj
189*38fd1498Szrj /* Hash table to store <DR's innermost loop behavior, DR> pairs. */
190*38fd1498Szrj static hash_map<innermost_loop_behavior_hash,
191*38fd1498Szrj data_reference_p> *innermost_DR_map;
192*38fd1498Szrj
193*38fd1498Szrj /* Hash table to store <base reference, DR> pairs. */
194*38fd1498Szrj static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map;
195*38fd1498Szrj
196*38fd1498Szrj /* Structure used to predicate basic blocks. This is attached to the
197*38fd1498Szrj ->aux field of the BBs in the loop to be if-converted. */
198*38fd1498Szrj struct bb_predicate {
199*38fd1498Szrj
200*38fd1498Szrj /* The condition under which this basic block is executed. */
201*38fd1498Szrj tree predicate;
202*38fd1498Szrj
203*38fd1498Szrj /* PREDICATE is gimplified, and the sequence of statements is
204*38fd1498Szrj recorded here, in order to avoid the duplication of computations
205*38fd1498Szrj that occur in previous conditions. See PR44483. */
206*38fd1498Szrj gimple_seq predicate_gimplified_stmts;
207*38fd1498Szrj };
208*38fd1498Szrj
209*38fd1498Szrj /* Returns true when the basic block BB has a predicate. */
210*38fd1498Szrj
211*38fd1498Szrj static inline bool
bb_has_predicate(basic_block bb)212*38fd1498Szrj bb_has_predicate (basic_block bb)
213*38fd1498Szrj {
214*38fd1498Szrj return bb->aux != NULL;
215*38fd1498Szrj }
216*38fd1498Szrj
217*38fd1498Szrj /* Returns the gimplified predicate for basic block BB. */
218*38fd1498Szrj
219*38fd1498Szrj static inline tree
bb_predicate(basic_block bb)220*38fd1498Szrj bb_predicate (basic_block bb)
221*38fd1498Szrj {
222*38fd1498Szrj return ((struct bb_predicate *) bb->aux)->predicate;
223*38fd1498Szrj }
224*38fd1498Szrj
225*38fd1498Szrj /* Sets the gimplified predicate COND for basic block BB. */
226*38fd1498Szrj
227*38fd1498Szrj static inline void
set_bb_predicate(basic_block bb,tree cond)228*38fd1498Szrj set_bb_predicate (basic_block bb, tree cond)
229*38fd1498Szrj {
230*38fd1498Szrj gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
231*38fd1498Szrj && is_gimple_condexpr (TREE_OPERAND (cond, 0)))
232*38fd1498Szrj || is_gimple_condexpr (cond));
233*38fd1498Szrj ((struct bb_predicate *) bb->aux)->predicate = cond;
234*38fd1498Szrj }
235*38fd1498Szrj
236*38fd1498Szrj /* Returns the sequence of statements of the gimplification of the
237*38fd1498Szrj predicate for basic block BB. */
238*38fd1498Szrj
239*38fd1498Szrj static inline gimple_seq
bb_predicate_gimplified_stmts(basic_block bb)240*38fd1498Szrj bb_predicate_gimplified_stmts (basic_block bb)
241*38fd1498Szrj {
242*38fd1498Szrj return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts;
243*38fd1498Szrj }
244*38fd1498Szrj
245*38fd1498Szrj /* Sets the sequence of statements STMTS of the gimplification of the
246*38fd1498Szrj predicate for basic block BB. */
247*38fd1498Szrj
248*38fd1498Szrj static inline void
set_bb_predicate_gimplified_stmts(basic_block bb,gimple_seq stmts)249*38fd1498Szrj set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
250*38fd1498Szrj {
251*38fd1498Szrj ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts;
252*38fd1498Szrj }
253*38fd1498Szrj
254*38fd1498Szrj /* Adds the sequence of statements STMTS to the sequence of statements
255*38fd1498Szrj of the predicate for basic block BB. */
256*38fd1498Szrj
257*38fd1498Szrj static inline void
add_bb_predicate_gimplified_stmts(basic_block bb,gimple_seq stmts)258*38fd1498Szrj add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
259*38fd1498Szrj {
260*38fd1498Szrj /* We might have updated some stmts in STMTS via force_gimple_operand
261*38fd1498Szrj calling fold_stmt and that producing multiple stmts. Delink immediate
262*38fd1498Szrj uses so update_ssa after loop versioning doesn't get confused for
263*38fd1498Szrj the not yet inserted predicates.
264*38fd1498Szrj ??? This should go away once we reliably avoid updating stmts
265*38fd1498Szrj not in any BB. */
266*38fd1498Szrj for (gimple_stmt_iterator gsi = gsi_start (stmts);
267*38fd1498Szrj !gsi_end_p (gsi); gsi_next (&gsi))
268*38fd1498Szrj {
269*38fd1498Szrj gimple *stmt = gsi_stmt (gsi);
270*38fd1498Szrj delink_stmt_imm_use (stmt);
271*38fd1498Szrj gimple_set_modified (stmt, true);
272*38fd1498Szrj }
273*38fd1498Szrj gimple_seq_add_seq_without_update
274*38fd1498Szrj (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
275*38fd1498Szrj }
276*38fd1498Szrj
277*38fd1498Szrj /* Initializes to TRUE the predicate of basic block BB. */
278*38fd1498Szrj
279*38fd1498Szrj static inline void
init_bb_predicate(basic_block bb)280*38fd1498Szrj init_bb_predicate (basic_block bb)
281*38fd1498Szrj {
282*38fd1498Szrj bb->aux = XNEW (struct bb_predicate);
283*38fd1498Szrj set_bb_predicate_gimplified_stmts (bb, NULL);
284*38fd1498Szrj set_bb_predicate (bb, boolean_true_node);
285*38fd1498Szrj }
286*38fd1498Szrj
287*38fd1498Szrj /* Release the SSA_NAMEs associated with the predicate of basic block BB. */
288*38fd1498Szrj
289*38fd1498Szrj static inline void
release_bb_predicate(basic_block bb)290*38fd1498Szrj release_bb_predicate (basic_block bb)
291*38fd1498Szrj {
292*38fd1498Szrj gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
293*38fd1498Szrj if (stmts)
294*38fd1498Szrj {
295*38fd1498Szrj /* Ensure that these stmts haven't yet been added to a bb. */
296*38fd1498Szrj if (flag_checking)
297*38fd1498Szrj for (gimple_stmt_iterator i = gsi_start (stmts);
298*38fd1498Szrj !gsi_end_p (i); gsi_next (&i))
299*38fd1498Szrj gcc_assert (! gimple_bb (gsi_stmt (i)));
300*38fd1498Szrj
301*38fd1498Szrj /* Discard them. */
302*38fd1498Szrj gimple_seq_discard (stmts);
303*38fd1498Szrj set_bb_predicate_gimplified_stmts (bb, NULL);
304*38fd1498Szrj }
305*38fd1498Szrj }
306*38fd1498Szrj
307*38fd1498Szrj /* Free the predicate of basic block BB. */
308*38fd1498Szrj
309*38fd1498Szrj static inline void
free_bb_predicate(basic_block bb)310*38fd1498Szrj free_bb_predicate (basic_block bb)
311*38fd1498Szrj {
312*38fd1498Szrj if (!bb_has_predicate (bb))
313*38fd1498Szrj return;
314*38fd1498Szrj
315*38fd1498Szrj release_bb_predicate (bb);
316*38fd1498Szrj free (bb->aux);
317*38fd1498Szrj bb->aux = NULL;
318*38fd1498Szrj }
319*38fd1498Szrj
320*38fd1498Szrj /* Reinitialize predicate of BB with the true predicate. */
321*38fd1498Szrj
322*38fd1498Szrj static inline void
reset_bb_predicate(basic_block bb)323*38fd1498Szrj reset_bb_predicate (basic_block bb)
324*38fd1498Szrj {
325*38fd1498Szrj if (!bb_has_predicate (bb))
326*38fd1498Szrj init_bb_predicate (bb);
327*38fd1498Szrj else
328*38fd1498Szrj {
329*38fd1498Szrj release_bb_predicate (bb);
330*38fd1498Szrj set_bb_predicate (bb, boolean_true_node);
331*38fd1498Szrj }
332*38fd1498Szrj }
333*38fd1498Szrj
334*38fd1498Szrj /* Returns a new SSA_NAME of type TYPE that is assigned the value of
335*38fd1498Szrj the expression EXPR. Inserts the statement created for this
336*38fd1498Szrj computation before GSI and leaves the iterator GSI at the same
337*38fd1498Szrj statement. */
338*38fd1498Szrj
339*38fd1498Szrj static tree
ifc_temp_var(tree type,tree expr,gimple_stmt_iterator * gsi)340*38fd1498Szrj ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
341*38fd1498Szrj {
342*38fd1498Szrj tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
343*38fd1498Szrj gimple *stmt = gimple_build_assign (new_name, expr);
344*38fd1498Szrj gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (*gsi)));
345*38fd1498Szrj gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
346*38fd1498Szrj return new_name;
347*38fd1498Szrj }
348*38fd1498Szrj
349*38fd1498Szrj /* Return true when COND is a false predicate. */
350*38fd1498Szrj
351*38fd1498Szrj static inline bool
is_false_predicate(tree cond)352*38fd1498Szrj is_false_predicate (tree cond)
353*38fd1498Szrj {
354*38fd1498Szrj return (cond != NULL_TREE
355*38fd1498Szrj && (cond == boolean_false_node
356*38fd1498Szrj || integer_zerop (cond)));
357*38fd1498Szrj }
358*38fd1498Szrj
359*38fd1498Szrj /* Return true when COND is a true predicate. */
360*38fd1498Szrj
361*38fd1498Szrj static inline bool
is_true_predicate(tree cond)362*38fd1498Szrj is_true_predicate (tree cond)
363*38fd1498Szrj {
364*38fd1498Szrj return (cond == NULL_TREE
365*38fd1498Szrj || cond == boolean_true_node
366*38fd1498Szrj || integer_onep (cond));
367*38fd1498Szrj }
368*38fd1498Szrj
369*38fd1498Szrj /* Returns true when BB has a predicate that is not trivial: true or
370*38fd1498Szrj NULL_TREE. */
371*38fd1498Szrj
372*38fd1498Szrj static inline bool
is_predicated(basic_block bb)373*38fd1498Szrj is_predicated (basic_block bb)
374*38fd1498Szrj {
375*38fd1498Szrj return !is_true_predicate (bb_predicate (bb));
376*38fd1498Szrj }
377*38fd1498Szrj
378*38fd1498Szrj /* Parses the predicate COND and returns its comparison code and
379*38fd1498Szrj operands OP0 and OP1. */
380*38fd1498Szrj
381*38fd1498Szrj static enum tree_code
parse_predicate(tree cond,tree * op0,tree * op1)382*38fd1498Szrj parse_predicate (tree cond, tree *op0, tree *op1)
383*38fd1498Szrj {
384*38fd1498Szrj gimple *s;
385*38fd1498Szrj
386*38fd1498Szrj if (TREE_CODE (cond) == SSA_NAME
387*38fd1498Szrj && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
388*38fd1498Szrj {
389*38fd1498Szrj if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
390*38fd1498Szrj {
391*38fd1498Szrj *op0 = gimple_assign_rhs1 (s);
392*38fd1498Szrj *op1 = gimple_assign_rhs2 (s);
393*38fd1498Szrj return gimple_assign_rhs_code (s);
394*38fd1498Szrj }
395*38fd1498Szrj
396*38fd1498Szrj else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
397*38fd1498Szrj {
398*38fd1498Szrj tree op = gimple_assign_rhs1 (s);
399*38fd1498Szrj tree type = TREE_TYPE (op);
400*38fd1498Szrj enum tree_code code = parse_predicate (op, op0, op1);
401*38fd1498Szrj
402*38fd1498Szrj return code == ERROR_MARK ? ERROR_MARK
403*38fd1498Szrj : invert_tree_comparison (code, HONOR_NANS (type));
404*38fd1498Szrj }
405*38fd1498Szrj
406*38fd1498Szrj return ERROR_MARK;
407*38fd1498Szrj }
408*38fd1498Szrj
409*38fd1498Szrj if (COMPARISON_CLASS_P (cond))
410*38fd1498Szrj {
411*38fd1498Szrj *op0 = TREE_OPERAND (cond, 0);
412*38fd1498Szrj *op1 = TREE_OPERAND (cond, 1);
413*38fd1498Szrj return TREE_CODE (cond);
414*38fd1498Szrj }
415*38fd1498Szrj
416*38fd1498Szrj return ERROR_MARK;
417*38fd1498Szrj }
418*38fd1498Szrj
419*38fd1498Szrj /* Returns the fold of predicate C1 OR C2 at location LOC. */
420*38fd1498Szrj
421*38fd1498Szrj static tree
fold_or_predicates(location_t loc,tree c1,tree c2)422*38fd1498Szrj fold_or_predicates (location_t loc, tree c1, tree c2)
423*38fd1498Szrj {
424*38fd1498Szrj tree op1a, op1b, op2a, op2b;
425*38fd1498Szrj enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
426*38fd1498Szrj enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
427*38fd1498Szrj
428*38fd1498Szrj if (code1 != ERROR_MARK && code2 != ERROR_MARK)
429*38fd1498Szrj {
430*38fd1498Szrj tree t = maybe_fold_or_comparisons (code1, op1a, op1b,
431*38fd1498Szrj code2, op2a, op2b);
432*38fd1498Szrj if (t)
433*38fd1498Szrj return t;
434*38fd1498Szrj }
435*38fd1498Szrj
436*38fd1498Szrj return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
437*38fd1498Szrj }
438*38fd1498Szrj
439*38fd1498Szrj /* Returns either a COND_EXPR or the folded expression if the folded
440*38fd1498Szrj expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
441*38fd1498Szrj a constant or a SSA_NAME. */
442*38fd1498Szrj
443*38fd1498Szrj static tree
fold_build_cond_expr(tree type,tree cond,tree rhs,tree lhs)444*38fd1498Szrj fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
445*38fd1498Szrj {
446*38fd1498Szrj tree rhs1, lhs1, cond_expr;
447*38fd1498Szrj
448*38fd1498Szrj /* If COND is comparison r != 0 and r has boolean type, convert COND
449*38fd1498Szrj to SSA_NAME to accept by vect bool pattern. */
450*38fd1498Szrj if (TREE_CODE (cond) == NE_EXPR)
451*38fd1498Szrj {
452*38fd1498Szrj tree op0 = TREE_OPERAND (cond, 0);
453*38fd1498Szrj tree op1 = TREE_OPERAND (cond, 1);
454*38fd1498Szrj if (TREE_CODE (op0) == SSA_NAME
455*38fd1498Szrj && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
456*38fd1498Szrj && (integer_zerop (op1)))
457*38fd1498Szrj cond = op0;
458*38fd1498Szrj }
459*38fd1498Szrj cond_expr = fold_ternary (COND_EXPR, type, cond, rhs, lhs);
460*38fd1498Szrj
461*38fd1498Szrj if (cond_expr == NULL_TREE)
462*38fd1498Szrj return build3 (COND_EXPR, type, cond, rhs, lhs);
463*38fd1498Szrj
464*38fd1498Szrj STRIP_USELESS_TYPE_CONVERSION (cond_expr);
465*38fd1498Szrj
466*38fd1498Szrj if (is_gimple_val (cond_expr))
467*38fd1498Szrj return cond_expr;
468*38fd1498Szrj
469*38fd1498Szrj if (TREE_CODE (cond_expr) == ABS_EXPR)
470*38fd1498Szrj {
471*38fd1498Szrj rhs1 = TREE_OPERAND (cond_expr, 1);
472*38fd1498Szrj STRIP_USELESS_TYPE_CONVERSION (rhs1);
473*38fd1498Szrj if (is_gimple_val (rhs1))
474*38fd1498Szrj return build1 (ABS_EXPR, type, rhs1);
475*38fd1498Szrj }
476*38fd1498Szrj
477*38fd1498Szrj if (TREE_CODE (cond_expr) == MIN_EXPR
478*38fd1498Szrj || TREE_CODE (cond_expr) == MAX_EXPR)
479*38fd1498Szrj {
480*38fd1498Szrj lhs1 = TREE_OPERAND (cond_expr, 0);
481*38fd1498Szrj STRIP_USELESS_TYPE_CONVERSION (lhs1);
482*38fd1498Szrj rhs1 = TREE_OPERAND (cond_expr, 1);
483*38fd1498Szrj STRIP_USELESS_TYPE_CONVERSION (rhs1);
484*38fd1498Szrj if (is_gimple_val (rhs1) && is_gimple_val (lhs1))
485*38fd1498Szrj return build2 (TREE_CODE (cond_expr), type, lhs1, rhs1);
486*38fd1498Szrj }
487*38fd1498Szrj return build3 (COND_EXPR, type, cond, rhs, lhs);
488*38fd1498Szrj }
489*38fd1498Szrj
490*38fd1498Szrj /* Add condition NC to the predicate list of basic block BB. LOOP is
491*38fd1498Szrj the loop to be if-converted. Use predicate of cd-equivalent block
492*38fd1498Szrj for join bb if it exists: we call basic blocks bb1 and bb2
493*38fd1498Szrj cd-equivalent if they are executed under the same condition. */
494*38fd1498Szrj
495*38fd1498Szrj static inline void
add_to_predicate_list(struct loop * loop,basic_block bb,tree nc)496*38fd1498Szrj add_to_predicate_list (struct loop *loop, basic_block bb, tree nc)
497*38fd1498Szrj {
498*38fd1498Szrj tree bc, *tp;
499*38fd1498Szrj basic_block dom_bb;
500*38fd1498Szrj
501*38fd1498Szrj if (is_true_predicate (nc))
502*38fd1498Szrj return;
503*38fd1498Szrj
504*38fd1498Szrj /* If dominance tells us this basic block is always executed,
505*38fd1498Szrj don't record any predicates for it. */
506*38fd1498Szrj if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
507*38fd1498Szrj return;
508*38fd1498Szrj
509*38fd1498Szrj dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
510*38fd1498Szrj /* We use notion of cd equivalence to get simpler predicate for
511*38fd1498Szrj join block, e.g. if join block has 2 predecessors with predicates
512*38fd1498Szrj p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
513*38fd1498Szrj p1 & p2 | p1 & !p2. */
514*38fd1498Szrj if (dom_bb != loop->header
515*38fd1498Szrj && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
516*38fd1498Szrj {
517*38fd1498Szrj gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
518*38fd1498Szrj bc = bb_predicate (dom_bb);
519*38fd1498Szrj if (!is_true_predicate (bc))
520*38fd1498Szrj set_bb_predicate (bb, bc);
521*38fd1498Szrj else
522*38fd1498Szrj gcc_assert (is_true_predicate (bb_predicate (bb)));
523*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
524*38fd1498Szrj fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
525*38fd1498Szrj dom_bb->index, bb->index);
526*38fd1498Szrj return;
527*38fd1498Szrj }
528*38fd1498Szrj
529*38fd1498Szrj if (!is_predicated (bb))
530*38fd1498Szrj bc = nc;
531*38fd1498Szrj else
532*38fd1498Szrj {
533*38fd1498Szrj bc = bb_predicate (bb);
534*38fd1498Szrj bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
535*38fd1498Szrj if (is_true_predicate (bc))
536*38fd1498Szrj {
537*38fd1498Szrj reset_bb_predicate (bb);
538*38fd1498Szrj return;
539*38fd1498Szrj }
540*38fd1498Szrj }
541*38fd1498Szrj
542*38fd1498Szrj /* Allow a TRUTH_NOT_EXPR around the main predicate. */
543*38fd1498Szrj if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
544*38fd1498Szrj tp = &TREE_OPERAND (bc, 0);
545*38fd1498Szrj else
546*38fd1498Szrj tp = &bc;
547*38fd1498Szrj if (!is_gimple_condexpr (*tp))
548*38fd1498Szrj {
549*38fd1498Szrj gimple_seq stmts;
550*38fd1498Szrj *tp = force_gimple_operand_1 (*tp, &stmts, is_gimple_condexpr, NULL_TREE);
551*38fd1498Szrj add_bb_predicate_gimplified_stmts (bb, stmts);
552*38fd1498Szrj }
553*38fd1498Szrj set_bb_predicate (bb, bc);
554*38fd1498Szrj }
555*38fd1498Szrj
556*38fd1498Szrj /* Add the condition COND to the previous condition PREV_COND, and add
557*38fd1498Szrj this to the predicate list of the destination of edge E. LOOP is
558*38fd1498Szrj the loop to be if-converted. */
559*38fd1498Szrj
560*38fd1498Szrj static void
add_to_dst_predicate_list(struct loop * loop,edge e,tree prev_cond,tree cond)561*38fd1498Szrj add_to_dst_predicate_list (struct loop *loop, edge e,
562*38fd1498Szrj tree prev_cond, tree cond)
563*38fd1498Szrj {
564*38fd1498Szrj if (!flow_bb_inside_loop_p (loop, e->dest))
565*38fd1498Szrj return;
566*38fd1498Szrj
567*38fd1498Szrj if (!is_true_predicate (prev_cond))
568*38fd1498Szrj cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
569*38fd1498Szrj prev_cond, cond);
570*38fd1498Szrj
571*38fd1498Szrj if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
572*38fd1498Szrj add_to_predicate_list (loop, e->dest, cond);
573*38fd1498Szrj }
574*38fd1498Szrj
575*38fd1498Szrj /* Return true if one of the successor edges of BB exits LOOP. */
576*38fd1498Szrj
577*38fd1498Szrj static bool
bb_with_exit_edge_p(struct loop * loop,basic_block bb)578*38fd1498Szrj bb_with_exit_edge_p (struct loop *loop, basic_block bb)
579*38fd1498Szrj {
580*38fd1498Szrj edge e;
581*38fd1498Szrj edge_iterator ei;
582*38fd1498Szrj
583*38fd1498Szrj FOR_EACH_EDGE (e, ei, bb->succs)
584*38fd1498Szrj if (loop_exit_edge_p (loop, e))
585*38fd1498Szrj return true;
586*38fd1498Szrj
587*38fd1498Szrj return false;
588*38fd1498Szrj }
589*38fd1498Szrj
590*38fd1498Szrj /* Given PHI which has more than two arguments, this function checks if
591*38fd1498Szrj it's if-convertible by degenerating its arguments. Specifically, if
592*38fd1498Szrj below two conditions are satisfied:
593*38fd1498Szrj
594*38fd1498Szrj 1) Number of PHI arguments with different values equals to 2 and one
595*38fd1498Szrj argument has the only occurrence.
596*38fd1498Szrj 2) The edge corresponding to the unique argument isn't critical edge.
597*38fd1498Szrj
598*38fd1498Szrj Such PHI can be handled as PHIs have only two arguments. For example,
599*38fd1498Szrj below PHI:
600*38fd1498Szrj
601*38fd1498Szrj res = PHI <A_1(e1), A_1(e2), A_2(e3)>;
602*38fd1498Szrj
603*38fd1498Szrj can be transformed into:
604*38fd1498Szrj
605*38fd1498Szrj res = (predicate of e3) ? A_2 : A_1;
606*38fd1498Szrj
607*38fd1498Szrj Return TRUE if it is the case, FALSE otherwise. */
608*38fd1498Szrj
609*38fd1498Szrj static bool
phi_convertible_by_degenerating_args(gphi * phi)610*38fd1498Szrj phi_convertible_by_degenerating_args (gphi *phi)
611*38fd1498Szrj {
612*38fd1498Szrj edge e;
613*38fd1498Szrj tree arg, t1 = NULL, t2 = NULL;
614*38fd1498Szrj unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0;
615*38fd1498Szrj unsigned int num_args = gimple_phi_num_args (phi);
616*38fd1498Szrj
617*38fd1498Szrj gcc_assert (num_args > 2);
618*38fd1498Szrj
619*38fd1498Szrj for (i = 0; i < num_args; i++)
620*38fd1498Szrj {
621*38fd1498Szrj arg = gimple_phi_arg_def (phi, i);
622*38fd1498Szrj if (t1 == NULL || operand_equal_p (t1, arg, 0))
623*38fd1498Szrj {
624*38fd1498Szrj n1++;
625*38fd1498Szrj i1 = i;
626*38fd1498Szrj t1 = arg;
627*38fd1498Szrj }
628*38fd1498Szrj else if (t2 == NULL || operand_equal_p (t2, arg, 0))
629*38fd1498Szrj {
630*38fd1498Szrj n2++;
631*38fd1498Szrj i2 = i;
632*38fd1498Szrj t2 = arg;
633*38fd1498Szrj }
634*38fd1498Szrj else
635*38fd1498Szrj return false;
636*38fd1498Szrj }
637*38fd1498Szrj
638*38fd1498Szrj if (n1 != 1 && n2 != 1)
639*38fd1498Szrj return false;
640*38fd1498Szrj
641*38fd1498Szrj /* Check if the edge corresponding to the unique arg is critical. */
642*38fd1498Szrj e = gimple_phi_arg_edge (phi, (n1 == 1) ? i1 : i2);
643*38fd1498Szrj if (EDGE_COUNT (e->src->succs) > 1)
644*38fd1498Szrj return false;
645*38fd1498Szrj
646*38fd1498Szrj return true;
647*38fd1498Szrj }
648*38fd1498Szrj
649*38fd1498Szrj /* Return true when PHI is if-convertible. PHI is part of loop LOOP
650*38fd1498Szrj and it belongs to basic block BB. Note at this point, it is sure
651*38fd1498Szrj that PHI is if-convertible. This function updates global variable
652*38fd1498Szrj ANY_COMPLICATED_PHI if PHI is complicated. */
653*38fd1498Szrj
654*38fd1498Szrj static bool
if_convertible_phi_p(struct loop * loop,basic_block bb,gphi * phi)655*38fd1498Szrj if_convertible_phi_p (struct loop *loop, basic_block bb, gphi *phi)
656*38fd1498Szrj {
657*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
658*38fd1498Szrj {
659*38fd1498Szrj fprintf (dump_file, "-------------------------\n");
660*38fd1498Szrj print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
661*38fd1498Szrj }
662*38fd1498Szrj
663*38fd1498Szrj if (bb != loop->header
664*38fd1498Szrj && gimple_phi_num_args (phi) > 2
665*38fd1498Szrj && !phi_convertible_by_degenerating_args (phi))
666*38fd1498Szrj any_complicated_phi = true;
667*38fd1498Szrj
668*38fd1498Szrj return true;
669*38fd1498Szrj }
670*38fd1498Szrj
671*38fd1498Szrj /* Records the status of a data reference. This struct is attached to
672*38fd1498Szrj each DR->aux field. */
673*38fd1498Szrj
674*38fd1498Szrj struct ifc_dr {
675*38fd1498Szrj bool rw_unconditionally;
676*38fd1498Szrj bool w_unconditionally;
677*38fd1498Szrj bool written_at_least_once;
678*38fd1498Szrj
679*38fd1498Szrj tree rw_predicate;
680*38fd1498Szrj tree w_predicate;
681*38fd1498Szrj tree base_w_predicate;
682*38fd1498Szrj };
683*38fd1498Szrj
684*38fd1498Szrj #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
685*38fd1498Szrj #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once)
686*38fd1498Szrj #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
687*38fd1498Szrj #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally)
688*38fd1498Szrj
689*38fd1498Szrj /* Iterates over DR's and stores refs, DR and base refs, DR pairs in
690*38fd1498Szrj HASH tables. While storing them in HASH table, it checks if the
691*38fd1498Szrj reference is unconditionally read or written and stores that as a flag
692*38fd1498Szrj information. For base reference it checks if it is written atlest once
693*38fd1498Szrj unconditionally and stores it as flag information along with DR.
694*38fd1498Szrj In other words for every data reference A in STMT there exist other
695*38fd1498Szrj accesses to a data reference with the same base with predicates that
696*38fd1498Szrj add up (OR-up) to the true predicate: this ensures that the data
697*38fd1498Szrj reference A is touched (read or written) on every iteration of the
698*38fd1498Szrj if-converted loop. */
699*38fd1498Szrj static void
hash_memrefs_baserefs_and_store_DRs_read_written_info(data_reference_p a)700*38fd1498Szrj hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a)
701*38fd1498Szrj {
702*38fd1498Szrj
703*38fd1498Szrj data_reference_p *master_dr, *base_master_dr;
704*38fd1498Szrj tree base_ref = DR_BASE_OBJECT (a);
705*38fd1498Szrj innermost_loop_behavior *innermost = &DR_INNERMOST (a);
706*38fd1498Szrj tree ca = bb_predicate (gimple_bb (DR_STMT (a)));
707*38fd1498Szrj bool exist1, exist2;
708*38fd1498Szrj
709*38fd1498Szrj master_dr = &innermost_DR_map->get_or_insert (innermost, &exist1);
710*38fd1498Szrj if (!exist1)
711*38fd1498Szrj *master_dr = a;
712*38fd1498Szrj
713*38fd1498Szrj if (DR_IS_WRITE (a))
714*38fd1498Szrj {
715*38fd1498Szrj IFC_DR (*master_dr)->w_predicate
716*38fd1498Szrj = fold_or_predicates (UNKNOWN_LOCATION, ca,
717*38fd1498Szrj IFC_DR (*master_dr)->w_predicate);
718*38fd1498Szrj if (is_true_predicate (IFC_DR (*master_dr)->w_predicate))
719*38fd1498Szrj DR_W_UNCONDITIONALLY (*master_dr) = true;
720*38fd1498Szrj }
721*38fd1498Szrj IFC_DR (*master_dr)->rw_predicate
722*38fd1498Szrj = fold_or_predicates (UNKNOWN_LOCATION, ca,
723*38fd1498Szrj IFC_DR (*master_dr)->rw_predicate);
724*38fd1498Szrj if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate))
725*38fd1498Szrj DR_RW_UNCONDITIONALLY (*master_dr) = true;
726*38fd1498Szrj
727*38fd1498Szrj if (DR_IS_WRITE (a))
728*38fd1498Szrj {
729*38fd1498Szrj base_master_dr = &baseref_DR_map->get_or_insert (base_ref, &exist2);
730*38fd1498Szrj if (!exist2)
731*38fd1498Szrj *base_master_dr = a;
732*38fd1498Szrj IFC_DR (*base_master_dr)->base_w_predicate
733*38fd1498Szrj = fold_or_predicates (UNKNOWN_LOCATION, ca,
734*38fd1498Szrj IFC_DR (*base_master_dr)->base_w_predicate);
735*38fd1498Szrj if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate))
736*38fd1498Szrj DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true;
737*38fd1498Szrj }
738*38fd1498Szrj }
739*38fd1498Szrj
740*38fd1498Szrj /* Return TRUE if can prove the index IDX of an array reference REF is
741*38fd1498Szrj within array bound. Return false otherwise. */
742*38fd1498Szrj
743*38fd1498Szrj static bool
idx_within_array_bound(tree ref,tree * idx,void * dta)744*38fd1498Szrj idx_within_array_bound (tree ref, tree *idx, void *dta)
745*38fd1498Szrj {
746*38fd1498Szrj bool overflow;
747*38fd1498Szrj widest_int niter, valid_niter, delta, wi_step;
748*38fd1498Szrj tree ev, init, step;
749*38fd1498Szrj tree low, high;
750*38fd1498Szrj struct loop *loop = (struct loop*) dta;
751*38fd1498Szrj
752*38fd1498Szrj /* Only support within-bound access for array references. */
753*38fd1498Szrj if (TREE_CODE (ref) != ARRAY_REF)
754*38fd1498Szrj return false;
755*38fd1498Szrj
756*38fd1498Szrj /* For arrays at the end of the structure, we are not guaranteed that they
757*38fd1498Szrj do not really extend over their declared size. However, for arrays of
758*38fd1498Szrj size greater than one, this is unlikely to be intended. */
759*38fd1498Szrj if (array_at_struct_end_p (ref))
760*38fd1498Szrj return false;
761*38fd1498Szrj
762*38fd1498Szrj ev = analyze_scalar_evolution (loop, *idx);
763*38fd1498Szrj ev = instantiate_parameters (loop, ev);
764*38fd1498Szrj init = initial_condition (ev);
765*38fd1498Szrj step = evolution_part_in_loop_num (ev, loop->num);
766*38fd1498Szrj
767*38fd1498Szrj if (!init || TREE_CODE (init) != INTEGER_CST
768*38fd1498Szrj || (step && TREE_CODE (step) != INTEGER_CST))
769*38fd1498Szrj return false;
770*38fd1498Szrj
771*38fd1498Szrj low = array_ref_low_bound (ref);
772*38fd1498Szrj high = array_ref_up_bound (ref);
773*38fd1498Szrj
774*38fd1498Szrj /* The case of nonconstant bounds could be handled, but it would be
775*38fd1498Szrj complicated. */
776*38fd1498Szrj if (TREE_CODE (low) != INTEGER_CST
777*38fd1498Szrj || !high || TREE_CODE (high) != INTEGER_CST)
778*38fd1498Szrj return false;
779*38fd1498Szrj
780*38fd1498Szrj /* Check if the intial idx is within bound. */
781*38fd1498Szrj if (wi::to_widest (init) < wi::to_widest (low)
782*38fd1498Szrj || wi::to_widest (init) > wi::to_widest (high))
783*38fd1498Szrj return false;
784*38fd1498Szrj
785*38fd1498Szrj /* The idx is always within bound. */
786*38fd1498Szrj if (!step || integer_zerop (step))
787*38fd1498Szrj return true;
788*38fd1498Szrj
789*38fd1498Szrj if (!max_loop_iterations (loop, &niter))
790*38fd1498Szrj return false;
791*38fd1498Szrj
792*38fd1498Szrj if (wi::to_widest (step) < 0)
793*38fd1498Szrj {
794*38fd1498Szrj delta = wi::to_widest (init) - wi::to_widest (low);
795*38fd1498Szrj wi_step = -wi::to_widest (step);
796*38fd1498Szrj }
797*38fd1498Szrj else
798*38fd1498Szrj {
799*38fd1498Szrj delta = wi::to_widest (high) - wi::to_widest (init);
800*38fd1498Szrj wi_step = wi::to_widest (step);
801*38fd1498Szrj }
802*38fd1498Szrj
803*38fd1498Szrj valid_niter = wi::div_floor (delta, wi_step, SIGNED, &overflow);
804*38fd1498Szrj /* The iteration space of idx is within array bound. */
805*38fd1498Szrj if (!overflow && niter <= valid_niter)
806*38fd1498Szrj return true;
807*38fd1498Szrj
808*38fd1498Szrj return false;
809*38fd1498Szrj }
810*38fd1498Szrj
811*38fd1498Szrj /* Return TRUE if ref is a within bound array reference. */
812*38fd1498Szrj
813*38fd1498Szrj static bool
ref_within_array_bound(gimple * stmt,tree ref)814*38fd1498Szrj ref_within_array_bound (gimple *stmt, tree ref)
815*38fd1498Szrj {
816*38fd1498Szrj struct loop *loop = loop_containing_stmt (stmt);
817*38fd1498Szrj
818*38fd1498Szrj gcc_assert (loop != NULL);
819*38fd1498Szrj return for_each_index (&ref, idx_within_array_bound, loop);
820*38fd1498Szrj }
821*38fd1498Szrj
822*38fd1498Szrj
823*38fd1498Szrj /* Given a memory reference expression T, return TRUE if base object
824*38fd1498Szrj it refers to is writable. The base object of a memory reference
825*38fd1498Szrj is the main object being referenced, which is returned by function
826*38fd1498Szrj get_base_address. */
827*38fd1498Szrj
828*38fd1498Szrj static bool
base_object_writable(tree ref)829*38fd1498Szrj base_object_writable (tree ref)
830*38fd1498Szrj {
831*38fd1498Szrj tree base_tree = get_base_address (ref);
832*38fd1498Szrj
833*38fd1498Szrj return (base_tree
834*38fd1498Szrj && DECL_P (base_tree)
835*38fd1498Szrj && decl_binds_to_current_def_p (base_tree)
836*38fd1498Szrj && !TREE_READONLY (base_tree));
837*38fd1498Szrj }
838*38fd1498Szrj
839*38fd1498Szrj /* Return true when the memory references of STMT won't trap in the
840*38fd1498Szrj if-converted code. There are two things that we have to check for:
841*38fd1498Szrj
842*38fd1498Szrj - writes to memory occur to writable memory: if-conversion of
843*38fd1498Szrj memory writes transforms the conditional memory writes into
844*38fd1498Szrj unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
845*38fd1498Szrj into "A[i] = cond ? foo : A[i]", and as the write to memory may not
846*38fd1498Szrj be executed at all in the original code, it may be a readonly
847*38fd1498Szrj memory. To check that A is not const-qualified, we check that
848*38fd1498Szrj there exists at least an unconditional write to A in the current
849*38fd1498Szrj function.
850*38fd1498Szrj
851*38fd1498Szrj - reads or writes to memory are valid memory accesses for every
852*38fd1498Szrj iteration. To check that the memory accesses are correctly formed
853*38fd1498Szrj and that we are allowed to read and write in these locations, we
854*38fd1498Szrj check that the memory accesses to be if-converted occur at every
855*38fd1498Szrj iteration unconditionally.
856*38fd1498Szrj
857*38fd1498Szrj Returns true for the memory reference in STMT, same memory reference
858*38fd1498Szrj is read or written unconditionally atleast once and the base memory
859*38fd1498Szrj reference is written unconditionally once. This is to check reference
860*38fd1498Szrj will not write fault. Also retuns true if the memory reference is
861*38fd1498Szrj unconditionally read once then we are conditionally writing to memory
862*38fd1498Szrj which is defined as read and write and is bound to the definition
863*38fd1498Szrj we are seeing. */
864*38fd1498Szrj static bool
ifcvt_memrefs_wont_trap(gimple * stmt,vec<data_reference_p> drs)865*38fd1498Szrj ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs)
866*38fd1498Szrj {
867*38fd1498Szrj /* If DR didn't see a reference here we can't use it to tell
868*38fd1498Szrj whether the ref traps or not. */
869*38fd1498Szrj if (gimple_uid (stmt) == 0)
870*38fd1498Szrj return false;
871*38fd1498Szrj
872*38fd1498Szrj data_reference_p *master_dr, *base_master_dr;
873*38fd1498Szrj data_reference_p a = drs[gimple_uid (stmt) - 1];
874*38fd1498Szrj
875*38fd1498Szrj tree base = DR_BASE_OBJECT (a);
876*38fd1498Szrj innermost_loop_behavior *innermost = &DR_INNERMOST (a);
877*38fd1498Szrj
878*38fd1498Szrj gcc_assert (DR_STMT (a) == stmt);
879*38fd1498Szrj gcc_assert (DR_BASE_ADDRESS (a) || DR_OFFSET (a)
880*38fd1498Szrj || DR_INIT (a) || DR_STEP (a));
881*38fd1498Szrj
882*38fd1498Szrj master_dr = innermost_DR_map->get (innermost);
883*38fd1498Szrj gcc_assert (master_dr != NULL);
884*38fd1498Szrj
885*38fd1498Szrj base_master_dr = baseref_DR_map->get (base);
886*38fd1498Szrj
887*38fd1498Szrj /* If a is unconditionally written to it doesn't trap. */
888*38fd1498Szrj if (DR_W_UNCONDITIONALLY (*master_dr))
889*38fd1498Szrj return true;
890*38fd1498Szrj
891*38fd1498Szrj /* If a is unconditionally accessed then ...
892*38fd1498Szrj
893*38fd1498Szrj Even a is conditional access, we can treat it as an unconditional
894*38fd1498Szrj one if it's an array reference and all its index are within array
895*38fd1498Szrj bound. */
896*38fd1498Szrj if (DR_RW_UNCONDITIONALLY (*master_dr)
897*38fd1498Szrj || ref_within_array_bound (stmt, DR_REF (a)))
898*38fd1498Szrj {
899*38fd1498Szrj /* an unconditional read won't trap. */
900*38fd1498Szrj if (DR_IS_READ (a))
901*38fd1498Szrj return true;
902*38fd1498Szrj
903*38fd1498Szrj /* an unconditionaly write won't trap if the base is written
904*38fd1498Szrj to unconditionally. */
905*38fd1498Szrj if (base_master_dr
906*38fd1498Szrj && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
907*38fd1498Szrj return PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES);
908*38fd1498Szrj /* or the base is known to be not readonly. */
909*38fd1498Szrj else if (base_object_writable (DR_REF (a)))
910*38fd1498Szrj return PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES);
911*38fd1498Szrj }
912*38fd1498Szrj
913*38fd1498Szrj return false;
914*38fd1498Szrj }
915*38fd1498Szrj
916*38fd1498Szrj /* Return true if STMT could be converted into a masked load or store
917*38fd1498Szrj (conditional load or store based on a mask computed from bb predicate). */
918*38fd1498Szrj
919*38fd1498Szrj static bool
ifcvt_can_use_mask_load_store(gimple * stmt)920*38fd1498Szrj ifcvt_can_use_mask_load_store (gimple *stmt)
921*38fd1498Szrj {
922*38fd1498Szrj tree lhs, ref;
923*38fd1498Szrj machine_mode mode;
924*38fd1498Szrj basic_block bb = gimple_bb (stmt);
925*38fd1498Szrj bool is_load;
926*38fd1498Szrj
927*38fd1498Szrj if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
928*38fd1498Szrj || bb->loop_father->dont_vectorize
929*38fd1498Szrj || !gimple_assign_single_p (stmt)
930*38fd1498Szrj || gimple_has_volatile_ops (stmt))
931*38fd1498Szrj return false;
932*38fd1498Szrj
933*38fd1498Szrj /* Check whether this is a load or store. */
934*38fd1498Szrj lhs = gimple_assign_lhs (stmt);
935*38fd1498Szrj if (gimple_store_p (stmt))
936*38fd1498Szrj {
937*38fd1498Szrj if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
938*38fd1498Szrj return false;
939*38fd1498Szrj is_load = false;
940*38fd1498Szrj ref = lhs;
941*38fd1498Szrj }
942*38fd1498Szrj else if (gimple_assign_load_p (stmt))
943*38fd1498Szrj {
944*38fd1498Szrj is_load = true;
945*38fd1498Szrj ref = gimple_assign_rhs1 (stmt);
946*38fd1498Szrj }
947*38fd1498Szrj else
948*38fd1498Szrj return false;
949*38fd1498Szrj
950*38fd1498Szrj if (may_be_nonaddressable_p (ref))
951*38fd1498Szrj return false;
952*38fd1498Szrj
953*38fd1498Szrj /* Mask should be integer mode of the same size as the load/store
954*38fd1498Szrj mode. */
955*38fd1498Szrj mode = TYPE_MODE (TREE_TYPE (lhs));
956*38fd1498Szrj if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode))
957*38fd1498Szrj return false;
958*38fd1498Szrj
959*38fd1498Szrj if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
960*38fd1498Szrj return true;
961*38fd1498Szrj
962*38fd1498Szrj return false;
963*38fd1498Szrj }
964*38fd1498Szrj
965*38fd1498Szrj /* Return true when STMT is if-convertible.
966*38fd1498Szrj
967*38fd1498Szrj GIMPLE_ASSIGN statement is not if-convertible if,
968*38fd1498Szrj - it is not movable,
969*38fd1498Szrj - it could trap,
970*38fd1498Szrj - LHS is not var decl. */
971*38fd1498Szrj
972*38fd1498Szrj static bool
if_convertible_gimple_assign_stmt_p(gimple * stmt,vec<data_reference_p> refs)973*38fd1498Szrj if_convertible_gimple_assign_stmt_p (gimple *stmt,
974*38fd1498Szrj vec<data_reference_p> refs)
975*38fd1498Szrj {
976*38fd1498Szrj tree lhs = gimple_assign_lhs (stmt);
977*38fd1498Szrj
978*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
979*38fd1498Szrj {
980*38fd1498Szrj fprintf (dump_file, "-------------------------\n");
981*38fd1498Szrj print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
982*38fd1498Szrj }
983*38fd1498Szrj
984*38fd1498Szrj if (!is_gimple_reg_type (TREE_TYPE (lhs)))
985*38fd1498Szrj return false;
986*38fd1498Szrj
987*38fd1498Szrj /* Some of these constrains might be too conservative. */
988*38fd1498Szrj if (stmt_ends_bb_p (stmt)
989*38fd1498Szrj || gimple_has_volatile_ops (stmt)
990*38fd1498Szrj || (TREE_CODE (lhs) == SSA_NAME
991*38fd1498Szrj && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
992*38fd1498Szrj || gimple_has_side_effects (stmt))
993*38fd1498Szrj {
994*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
995*38fd1498Szrj fprintf (dump_file, "stmt not suitable for ifcvt\n");
996*38fd1498Szrj return false;
997*38fd1498Szrj }
998*38fd1498Szrj
999*38fd1498Szrj /* tree-into-ssa.c uses GF_PLF_1, so avoid it, because
1000*38fd1498Szrj in between if_convertible_loop_p and combine_blocks
1001*38fd1498Szrj we can perform loop versioning. */
1002*38fd1498Szrj gimple_set_plf (stmt, GF_PLF_2, false);
1003*38fd1498Szrj
1004*38fd1498Szrj if ((! gimple_vuse (stmt)
1005*38fd1498Szrj || gimple_could_trap_p_1 (stmt, false, false)
1006*38fd1498Szrj || ! ifcvt_memrefs_wont_trap (stmt, refs))
1007*38fd1498Szrj && gimple_could_trap_p (stmt))
1008*38fd1498Szrj {
1009*38fd1498Szrj if (ifcvt_can_use_mask_load_store (stmt))
1010*38fd1498Szrj {
1011*38fd1498Szrj gimple_set_plf (stmt, GF_PLF_2, true);
1012*38fd1498Szrj any_pred_load_store = true;
1013*38fd1498Szrj return true;
1014*38fd1498Szrj }
1015*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
1016*38fd1498Szrj fprintf (dump_file, "tree could trap...\n");
1017*38fd1498Szrj return false;
1018*38fd1498Szrj }
1019*38fd1498Szrj
1020*38fd1498Szrj /* When if-converting stores force versioning, likewise if we
1021*38fd1498Szrj ended up generating store data races. */
1022*38fd1498Szrj if (gimple_vdef (stmt))
1023*38fd1498Szrj any_pred_load_store = true;
1024*38fd1498Szrj
1025*38fd1498Szrj return true;
1026*38fd1498Szrj }
1027*38fd1498Szrj
1028*38fd1498Szrj /* Return true when STMT is if-convertible.
1029*38fd1498Szrj
1030*38fd1498Szrj A statement is if-convertible if:
1031*38fd1498Szrj - it is an if-convertible GIMPLE_ASSIGN,
1032*38fd1498Szrj - it is a GIMPLE_LABEL or a GIMPLE_COND,
1033*38fd1498Szrj - it is builtins call. */
1034*38fd1498Szrj
1035*38fd1498Szrj static bool
if_convertible_stmt_p(gimple * stmt,vec<data_reference_p> refs)1036*38fd1498Szrj if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
1037*38fd1498Szrj {
1038*38fd1498Szrj switch (gimple_code (stmt))
1039*38fd1498Szrj {
1040*38fd1498Szrj case GIMPLE_LABEL:
1041*38fd1498Szrj case GIMPLE_DEBUG:
1042*38fd1498Szrj case GIMPLE_COND:
1043*38fd1498Szrj return true;
1044*38fd1498Szrj
1045*38fd1498Szrj case GIMPLE_ASSIGN:
1046*38fd1498Szrj return if_convertible_gimple_assign_stmt_p (stmt, refs);
1047*38fd1498Szrj
1048*38fd1498Szrj case GIMPLE_CALL:
1049*38fd1498Szrj {
1050*38fd1498Szrj tree fndecl = gimple_call_fndecl (stmt);
1051*38fd1498Szrj if (fndecl)
1052*38fd1498Szrj {
1053*38fd1498Szrj int flags = gimple_call_flags (stmt);
1054*38fd1498Szrj if ((flags & ECF_CONST)
1055*38fd1498Szrj && !(flags & ECF_LOOPING_CONST_OR_PURE)
1056*38fd1498Szrj /* We can only vectorize some builtins at the moment,
1057*38fd1498Szrj so restrict if-conversion to those. */
1058*38fd1498Szrj && DECL_BUILT_IN (fndecl))
1059*38fd1498Szrj return true;
1060*38fd1498Szrj }
1061*38fd1498Szrj return false;
1062*38fd1498Szrj }
1063*38fd1498Szrj
1064*38fd1498Szrj default:
1065*38fd1498Szrj /* Don't know what to do with 'em so don't do anything. */
1066*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
1067*38fd1498Szrj {
1068*38fd1498Szrj fprintf (dump_file, "don't know what to do\n");
1069*38fd1498Szrj print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1070*38fd1498Szrj }
1071*38fd1498Szrj return false;
1072*38fd1498Szrj }
1073*38fd1498Szrj
1074*38fd1498Szrj return true;
1075*38fd1498Szrj }
1076*38fd1498Szrj
1077*38fd1498Szrj /* Assumes that BB has more than 1 predecessors.
1078*38fd1498Szrj Returns false if at least one successor is not on critical edge
1079*38fd1498Szrj and true otherwise. */
1080*38fd1498Szrj
1081*38fd1498Szrj static inline bool
all_preds_critical_p(basic_block bb)1082*38fd1498Szrj all_preds_critical_p (basic_block bb)
1083*38fd1498Szrj {
1084*38fd1498Szrj edge e;
1085*38fd1498Szrj edge_iterator ei;
1086*38fd1498Szrj
1087*38fd1498Szrj FOR_EACH_EDGE (e, ei, bb->preds)
1088*38fd1498Szrj if (EDGE_COUNT (e->src->succs) == 1)
1089*38fd1498Szrj return false;
1090*38fd1498Szrj return true;
1091*38fd1498Szrj }
1092*38fd1498Szrj
1093*38fd1498Szrj /* Returns true if at least one successor in on critical edge. */
1094*38fd1498Szrj static inline bool
has_pred_critical_p(basic_block bb)1095*38fd1498Szrj has_pred_critical_p (basic_block bb)
1096*38fd1498Szrj {
1097*38fd1498Szrj edge e;
1098*38fd1498Szrj edge_iterator ei;
1099*38fd1498Szrj
1100*38fd1498Szrj FOR_EACH_EDGE (e, ei, bb->preds)
1101*38fd1498Szrj if (EDGE_COUNT (e->src->succs) > 1)
1102*38fd1498Szrj return true;
1103*38fd1498Szrj return false;
1104*38fd1498Szrj }
1105*38fd1498Szrj
1106*38fd1498Szrj /* Return true when BB is if-convertible. This routine does not check
1107*38fd1498Szrj basic block's statements and phis.
1108*38fd1498Szrj
1109*38fd1498Szrj A basic block is not if-convertible if:
1110*38fd1498Szrj - it is non-empty and it is after the exit block (in BFS order),
1111*38fd1498Szrj - it is after the exit block but before the latch,
1112*38fd1498Szrj - its edges are not normal.
1113*38fd1498Szrj
1114*38fd1498Szrj EXIT_BB is the basic block containing the exit of the LOOP. BB is
1115*38fd1498Szrj inside LOOP. */
1116*38fd1498Szrj
1117*38fd1498Szrj static bool
if_convertible_bb_p(struct loop * loop,basic_block bb,basic_block exit_bb)1118*38fd1498Szrj if_convertible_bb_p (struct loop *loop, basic_block bb, basic_block exit_bb)
1119*38fd1498Szrj {
1120*38fd1498Szrj edge e;
1121*38fd1498Szrj edge_iterator ei;
1122*38fd1498Szrj
1123*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
1124*38fd1498Szrj fprintf (dump_file, "----------[%d]-------------\n", bb->index);
1125*38fd1498Szrj
1126*38fd1498Szrj if (EDGE_COUNT (bb->succs) > 2)
1127*38fd1498Szrj return false;
1128*38fd1498Szrj
1129*38fd1498Szrj if (exit_bb)
1130*38fd1498Szrj {
1131*38fd1498Szrj if (bb != loop->latch)
1132*38fd1498Szrj {
1133*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
1134*38fd1498Szrj fprintf (dump_file, "basic block after exit bb but before latch\n");
1135*38fd1498Szrj return false;
1136*38fd1498Szrj }
1137*38fd1498Szrj else if (!empty_block_p (bb))
1138*38fd1498Szrj {
1139*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
1140*38fd1498Szrj fprintf (dump_file, "non empty basic block after exit bb\n");
1141*38fd1498Szrj return false;
1142*38fd1498Szrj }
1143*38fd1498Szrj else if (bb == loop->latch
1144*38fd1498Szrj && bb != exit_bb
1145*38fd1498Szrj && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1146*38fd1498Szrj {
1147*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
1148*38fd1498Szrj fprintf (dump_file, "latch is not dominated by exit_block\n");
1149*38fd1498Szrj return false;
1150*38fd1498Szrj }
1151*38fd1498Szrj }
1152*38fd1498Szrj
1153*38fd1498Szrj /* Be less adventurous and handle only normal edges. */
1154*38fd1498Szrj FOR_EACH_EDGE (e, ei, bb->succs)
1155*38fd1498Szrj if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1156*38fd1498Szrj {
1157*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
1158*38fd1498Szrj fprintf (dump_file, "Difficult to handle edges\n");
1159*38fd1498Szrj return false;
1160*38fd1498Szrj }
1161*38fd1498Szrj
1162*38fd1498Szrj return true;
1163*38fd1498Szrj }
1164*38fd1498Szrj
1165*38fd1498Szrj /* Return true when all predecessor blocks of BB are visited. The
1166*38fd1498Szrj VISITED bitmap keeps track of the visited blocks. */
1167*38fd1498Szrj
1168*38fd1498Szrj static bool
pred_blocks_visited_p(basic_block bb,bitmap * visited)1169*38fd1498Szrj pred_blocks_visited_p (basic_block bb, bitmap *visited)
1170*38fd1498Szrj {
1171*38fd1498Szrj edge e;
1172*38fd1498Szrj edge_iterator ei;
1173*38fd1498Szrj FOR_EACH_EDGE (e, ei, bb->preds)
1174*38fd1498Szrj if (!bitmap_bit_p (*visited, e->src->index))
1175*38fd1498Szrj return false;
1176*38fd1498Szrj
1177*38fd1498Szrj return true;
1178*38fd1498Szrj }
1179*38fd1498Szrj
1180*38fd1498Szrj /* Get body of a LOOP in suitable order for if-conversion. It is
1181*38fd1498Szrj caller's responsibility to deallocate basic block list.
1182*38fd1498Szrj If-conversion suitable order is, breadth first sort (BFS) order
1183*38fd1498Szrj with an additional constraint: select a block only if all its
1184*38fd1498Szrj predecessors are already selected. */
1185*38fd1498Szrj
1186*38fd1498Szrj static basic_block *
get_loop_body_in_if_conv_order(const struct loop * loop)1187*38fd1498Szrj get_loop_body_in_if_conv_order (const struct loop *loop)
1188*38fd1498Szrj {
1189*38fd1498Szrj basic_block *blocks, *blocks_in_bfs_order;
1190*38fd1498Szrj basic_block bb;
1191*38fd1498Szrj bitmap visited;
1192*38fd1498Szrj unsigned int index = 0;
1193*38fd1498Szrj unsigned int visited_count = 0;
1194*38fd1498Szrj
1195*38fd1498Szrj gcc_assert (loop->num_nodes);
1196*38fd1498Szrj gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1197*38fd1498Szrj
1198*38fd1498Szrj blocks = XCNEWVEC (basic_block, loop->num_nodes);
1199*38fd1498Szrj visited = BITMAP_ALLOC (NULL);
1200*38fd1498Szrj
1201*38fd1498Szrj blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1202*38fd1498Szrj
1203*38fd1498Szrj index = 0;
1204*38fd1498Szrj while (index < loop->num_nodes)
1205*38fd1498Szrj {
1206*38fd1498Szrj bb = blocks_in_bfs_order [index];
1207*38fd1498Szrj
1208*38fd1498Szrj if (bb->flags & BB_IRREDUCIBLE_LOOP)
1209*38fd1498Szrj {
1210*38fd1498Szrj free (blocks_in_bfs_order);
1211*38fd1498Szrj BITMAP_FREE (visited);
1212*38fd1498Szrj free (blocks);
1213*38fd1498Szrj return NULL;
1214*38fd1498Szrj }
1215*38fd1498Szrj
1216*38fd1498Szrj if (!bitmap_bit_p (visited, bb->index))
1217*38fd1498Szrj {
1218*38fd1498Szrj if (pred_blocks_visited_p (bb, &visited)
1219*38fd1498Szrj || bb == loop->header)
1220*38fd1498Szrj {
1221*38fd1498Szrj /* This block is now visited. */
1222*38fd1498Szrj bitmap_set_bit (visited, bb->index);
1223*38fd1498Szrj blocks[visited_count++] = bb;
1224*38fd1498Szrj }
1225*38fd1498Szrj }
1226*38fd1498Szrj
1227*38fd1498Szrj index++;
1228*38fd1498Szrj
1229*38fd1498Szrj if (index == loop->num_nodes
1230*38fd1498Szrj && visited_count != loop->num_nodes)
1231*38fd1498Szrj /* Not done yet. */
1232*38fd1498Szrj index = 0;
1233*38fd1498Szrj }
1234*38fd1498Szrj free (blocks_in_bfs_order);
1235*38fd1498Szrj BITMAP_FREE (visited);
1236*38fd1498Szrj return blocks;
1237*38fd1498Szrj }
1238*38fd1498Szrj
1239*38fd1498Szrj /* Returns true when the analysis of the predicates for all the basic
1240*38fd1498Szrj blocks in LOOP succeeded.
1241*38fd1498Szrj
1242*38fd1498Szrj predicate_bbs first allocates the predicates of the basic blocks.
1243*38fd1498Szrj These fields are then initialized with the tree expressions
1244*38fd1498Szrj representing the predicates under which a basic block is executed
1245*38fd1498Szrj in the LOOP. As the loop->header is executed at each iteration, it
1246*38fd1498Szrj has the "true" predicate. Other statements executed under a
1247*38fd1498Szrj condition are predicated with that condition, for example
1248*38fd1498Szrj
1249*38fd1498Szrj | if (x)
1250*38fd1498Szrj | S1;
1251*38fd1498Szrj | else
1252*38fd1498Szrj | S2;
1253*38fd1498Szrj
1254*38fd1498Szrj S1 will be predicated with "x", and
1255*38fd1498Szrj S2 will be predicated with "!x". */
1256*38fd1498Szrj
1257*38fd1498Szrj static void
predicate_bbs(loop_p loop)1258*38fd1498Szrj predicate_bbs (loop_p loop)
1259*38fd1498Szrj {
1260*38fd1498Szrj unsigned int i;
1261*38fd1498Szrj
1262*38fd1498Szrj for (i = 0; i < loop->num_nodes; i++)
1263*38fd1498Szrj init_bb_predicate (ifc_bbs[i]);
1264*38fd1498Szrj
1265*38fd1498Szrj for (i = 0; i < loop->num_nodes; i++)
1266*38fd1498Szrj {
1267*38fd1498Szrj basic_block bb = ifc_bbs[i];
1268*38fd1498Szrj tree cond;
1269*38fd1498Szrj gimple *stmt;
1270*38fd1498Szrj
1271*38fd1498Szrj /* The loop latch and loop exit block are always executed and
1272*38fd1498Szrj have no extra conditions to be processed: skip them. */
1273*38fd1498Szrj if (bb == loop->latch
1274*38fd1498Szrj || bb_with_exit_edge_p (loop, bb))
1275*38fd1498Szrj {
1276*38fd1498Szrj reset_bb_predicate (bb);
1277*38fd1498Szrj continue;
1278*38fd1498Szrj }
1279*38fd1498Szrj
1280*38fd1498Szrj cond = bb_predicate (bb);
1281*38fd1498Szrj stmt = last_stmt (bb);
1282*38fd1498Szrj if (stmt && gimple_code (stmt) == GIMPLE_COND)
1283*38fd1498Szrj {
1284*38fd1498Szrj tree c2;
1285*38fd1498Szrj edge true_edge, false_edge;
1286*38fd1498Szrj location_t loc = gimple_location (stmt);
1287*38fd1498Szrj tree c = build2_loc (loc, gimple_cond_code (stmt),
1288*38fd1498Szrj boolean_type_node,
1289*38fd1498Szrj gimple_cond_lhs (stmt),
1290*38fd1498Szrj gimple_cond_rhs (stmt));
1291*38fd1498Szrj
1292*38fd1498Szrj /* Add new condition into destination's predicate list. */
1293*38fd1498Szrj extract_true_false_edges_from_block (gimple_bb (stmt),
1294*38fd1498Szrj &true_edge, &false_edge);
1295*38fd1498Szrj
1296*38fd1498Szrj /* If C is true, then TRUE_EDGE is taken. */
1297*38fd1498Szrj add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1298*38fd1498Szrj unshare_expr (c));
1299*38fd1498Szrj
1300*38fd1498Szrj /* If C is false, then FALSE_EDGE is taken. */
1301*38fd1498Szrj c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1302*38fd1498Szrj unshare_expr (c));
1303*38fd1498Szrj add_to_dst_predicate_list (loop, false_edge,
1304*38fd1498Szrj unshare_expr (cond), c2);
1305*38fd1498Szrj
1306*38fd1498Szrj cond = NULL_TREE;
1307*38fd1498Szrj }
1308*38fd1498Szrj
1309*38fd1498Szrj /* If current bb has only one successor, then consider it as an
1310*38fd1498Szrj unconditional goto. */
1311*38fd1498Szrj if (single_succ_p (bb))
1312*38fd1498Szrj {
1313*38fd1498Szrj basic_block bb_n = single_succ (bb);
1314*38fd1498Szrj
1315*38fd1498Szrj /* The successor bb inherits the predicate of its
1316*38fd1498Szrj predecessor. If there is no predicate in the predecessor
1317*38fd1498Szrj bb, then consider the successor bb as always executed. */
1318*38fd1498Szrj if (cond == NULL_TREE)
1319*38fd1498Szrj cond = boolean_true_node;
1320*38fd1498Szrj
1321*38fd1498Szrj add_to_predicate_list (loop, bb_n, cond);
1322*38fd1498Szrj }
1323*38fd1498Szrj }
1324*38fd1498Szrj
1325*38fd1498Szrj /* The loop header is always executed. */
1326*38fd1498Szrj reset_bb_predicate (loop->header);
1327*38fd1498Szrj gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1328*38fd1498Szrj && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1329*38fd1498Szrj }
1330*38fd1498Szrj
1331*38fd1498Szrj /* Build region by adding loop pre-header and post-header blocks. */
1332*38fd1498Szrj
1333*38fd1498Szrj static vec<basic_block>
build_region(struct loop * loop)1334*38fd1498Szrj build_region (struct loop *loop)
1335*38fd1498Szrj {
1336*38fd1498Szrj vec<basic_block> region = vNULL;
1337*38fd1498Szrj basic_block exit_bb = NULL;
1338*38fd1498Szrj
1339*38fd1498Szrj gcc_assert (ifc_bbs);
1340*38fd1498Szrj /* The first element is loop pre-header. */
1341*38fd1498Szrj region.safe_push (loop_preheader_edge (loop)->src);
1342*38fd1498Szrj
1343*38fd1498Szrj for (unsigned int i = 0; i < loop->num_nodes; i++)
1344*38fd1498Szrj {
1345*38fd1498Szrj basic_block bb = ifc_bbs[i];
1346*38fd1498Szrj region.safe_push (bb);
1347*38fd1498Szrj /* Find loop postheader. */
1348*38fd1498Szrj edge e;
1349*38fd1498Szrj edge_iterator ei;
1350*38fd1498Szrj FOR_EACH_EDGE (e, ei, bb->succs)
1351*38fd1498Szrj if (loop_exit_edge_p (loop, e))
1352*38fd1498Szrj {
1353*38fd1498Szrj exit_bb = e->dest;
1354*38fd1498Szrj break;
1355*38fd1498Szrj }
1356*38fd1498Szrj }
1357*38fd1498Szrj /* The last element is loop post-header. */
1358*38fd1498Szrj gcc_assert (exit_bb);
1359*38fd1498Szrj region.safe_push (exit_bb);
1360*38fd1498Szrj return region;
1361*38fd1498Szrj }
1362*38fd1498Szrj
1363*38fd1498Szrj /* Return true when LOOP is if-convertible. This is a helper function
1364*38fd1498Szrj for if_convertible_loop_p. REFS and DDRS are initialized and freed
1365*38fd1498Szrj in if_convertible_loop_p. */
1366*38fd1498Szrj
1367*38fd1498Szrj static bool
if_convertible_loop_p_1(struct loop * loop,vec<data_reference_p> * refs)1368*38fd1498Szrj if_convertible_loop_p_1 (struct loop *loop, vec<data_reference_p> *refs)
1369*38fd1498Szrj {
1370*38fd1498Szrj unsigned int i;
1371*38fd1498Szrj basic_block exit_bb = NULL;
1372*38fd1498Szrj vec<basic_block> region;
1373*38fd1498Szrj
1374*38fd1498Szrj if (find_data_references_in_loop (loop, refs) == chrec_dont_know)
1375*38fd1498Szrj return false;
1376*38fd1498Szrj
1377*38fd1498Szrj calculate_dominance_info (CDI_DOMINATORS);
1378*38fd1498Szrj
1379*38fd1498Szrj /* Allow statements that can be handled during if-conversion. */
1380*38fd1498Szrj ifc_bbs = get_loop_body_in_if_conv_order (loop);
1381*38fd1498Szrj if (!ifc_bbs)
1382*38fd1498Szrj {
1383*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
1384*38fd1498Szrj fprintf (dump_file, "Irreducible loop\n");
1385*38fd1498Szrj return false;
1386*38fd1498Szrj }
1387*38fd1498Szrj
1388*38fd1498Szrj for (i = 0; i < loop->num_nodes; i++)
1389*38fd1498Szrj {
1390*38fd1498Szrj basic_block bb = ifc_bbs[i];
1391*38fd1498Szrj
1392*38fd1498Szrj if (!if_convertible_bb_p (loop, bb, exit_bb))
1393*38fd1498Szrj return false;
1394*38fd1498Szrj
1395*38fd1498Szrj if (bb_with_exit_edge_p (loop, bb))
1396*38fd1498Szrj exit_bb = bb;
1397*38fd1498Szrj }
1398*38fd1498Szrj
1399*38fd1498Szrj for (i = 0; i < loop->num_nodes; i++)
1400*38fd1498Szrj {
1401*38fd1498Szrj basic_block bb = ifc_bbs[i];
1402*38fd1498Szrj gimple_stmt_iterator gsi;
1403*38fd1498Szrj
1404*38fd1498Szrj for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1405*38fd1498Szrj switch (gimple_code (gsi_stmt (gsi)))
1406*38fd1498Szrj {
1407*38fd1498Szrj case GIMPLE_LABEL:
1408*38fd1498Szrj case GIMPLE_ASSIGN:
1409*38fd1498Szrj case GIMPLE_CALL:
1410*38fd1498Szrj case GIMPLE_DEBUG:
1411*38fd1498Szrj case GIMPLE_COND:
1412*38fd1498Szrj gimple_set_uid (gsi_stmt (gsi), 0);
1413*38fd1498Szrj break;
1414*38fd1498Szrj default:
1415*38fd1498Szrj return false;
1416*38fd1498Szrj }
1417*38fd1498Szrj }
1418*38fd1498Szrj
1419*38fd1498Szrj data_reference_p dr;
1420*38fd1498Szrj
1421*38fd1498Szrj innermost_DR_map
1422*38fd1498Szrj = new hash_map<innermost_loop_behavior_hash, data_reference_p>;
1423*38fd1498Szrj baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
1424*38fd1498Szrj
1425*38fd1498Szrj /* Compute post-dominator tree locally. */
1426*38fd1498Szrj region = build_region (loop);
1427*38fd1498Szrj calculate_dominance_info_for_region (CDI_POST_DOMINATORS, region);
1428*38fd1498Szrj
1429*38fd1498Szrj predicate_bbs (loop);
1430*38fd1498Szrj
1431*38fd1498Szrj /* Free post-dominator tree since it is not used after predication. */
1432*38fd1498Szrj free_dominance_info_for_region (cfun, CDI_POST_DOMINATORS, region);
1433*38fd1498Szrj region.release ();
1434*38fd1498Szrj
1435*38fd1498Szrj for (i = 0; refs->iterate (i, &dr); i++)
1436*38fd1498Szrj {
1437*38fd1498Szrj tree ref = DR_REF (dr);
1438*38fd1498Szrj
1439*38fd1498Szrj dr->aux = XNEW (struct ifc_dr);
1440*38fd1498Szrj DR_BASE_W_UNCONDITIONALLY (dr) = false;
1441*38fd1498Szrj DR_RW_UNCONDITIONALLY (dr) = false;
1442*38fd1498Szrj DR_W_UNCONDITIONALLY (dr) = false;
1443*38fd1498Szrj IFC_DR (dr)->rw_predicate = boolean_false_node;
1444*38fd1498Szrj IFC_DR (dr)->w_predicate = boolean_false_node;
1445*38fd1498Szrj IFC_DR (dr)->base_w_predicate = boolean_false_node;
1446*38fd1498Szrj if (gimple_uid (DR_STMT (dr)) == 0)
1447*38fd1498Szrj gimple_set_uid (DR_STMT (dr), i + 1);
1448*38fd1498Szrj
1449*38fd1498Szrj /* If DR doesn't have innermost loop behavior or it's a compound
1450*38fd1498Szrj memory reference, we synthesize its innermost loop behavior
1451*38fd1498Szrj for hashing. */
1452*38fd1498Szrj if (TREE_CODE (ref) == COMPONENT_REF
1453*38fd1498Szrj || TREE_CODE (ref) == IMAGPART_EXPR
1454*38fd1498Szrj || TREE_CODE (ref) == REALPART_EXPR
1455*38fd1498Szrj || !(DR_BASE_ADDRESS (dr) || DR_OFFSET (dr)
1456*38fd1498Szrj || DR_INIT (dr) || DR_STEP (dr)))
1457*38fd1498Szrj {
1458*38fd1498Szrj while (TREE_CODE (ref) == COMPONENT_REF
1459*38fd1498Szrj || TREE_CODE (ref) == IMAGPART_EXPR
1460*38fd1498Szrj || TREE_CODE (ref) == REALPART_EXPR)
1461*38fd1498Szrj ref = TREE_OPERAND (ref, 0);
1462*38fd1498Szrj
1463*38fd1498Szrj memset (&DR_INNERMOST (dr), 0, sizeof (DR_INNERMOST (dr)));
1464*38fd1498Szrj DR_BASE_ADDRESS (dr) = ref;
1465*38fd1498Szrj }
1466*38fd1498Szrj hash_memrefs_baserefs_and_store_DRs_read_written_info (dr);
1467*38fd1498Szrj }
1468*38fd1498Szrj
1469*38fd1498Szrj for (i = 0; i < loop->num_nodes; i++)
1470*38fd1498Szrj {
1471*38fd1498Szrj basic_block bb = ifc_bbs[i];
1472*38fd1498Szrj gimple_stmt_iterator itr;
1473*38fd1498Szrj
1474*38fd1498Szrj /* Check the if-convertibility of statements in predicated BBs. */
1475*38fd1498Szrj if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1476*38fd1498Szrj for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1477*38fd1498Szrj if (!if_convertible_stmt_p (gsi_stmt (itr), *refs))
1478*38fd1498Szrj return false;
1479*38fd1498Szrj }
1480*38fd1498Szrj
1481*38fd1498Szrj /* Checking PHIs needs to be done after stmts, as the fact whether there
1482*38fd1498Szrj are any masked loads or stores affects the tests. */
1483*38fd1498Szrj for (i = 0; i < loop->num_nodes; i++)
1484*38fd1498Szrj {
1485*38fd1498Szrj basic_block bb = ifc_bbs[i];
1486*38fd1498Szrj gphi_iterator itr;
1487*38fd1498Szrj
1488*38fd1498Szrj for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1489*38fd1498Szrj if (!if_convertible_phi_p (loop, bb, itr.phi ()))
1490*38fd1498Szrj return false;
1491*38fd1498Szrj }
1492*38fd1498Szrj
1493*38fd1498Szrj if (dump_file)
1494*38fd1498Szrj fprintf (dump_file, "Applying if-conversion\n");
1495*38fd1498Szrj
1496*38fd1498Szrj return true;
1497*38fd1498Szrj }
1498*38fd1498Szrj
1499*38fd1498Szrj /* Return true when LOOP is if-convertible.
1500*38fd1498Szrj LOOP is if-convertible if:
1501*38fd1498Szrj - it is innermost,
1502*38fd1498Szrj - it has two or more basic blocks,
1503*38fd1498Szrj - it has only one exit,
1504*38fd1498Szrj - loop header is not the exit edge,
1505*38fd1498Szrj - if its basic blocks and phi nodes are if convertible. */
1506*38fd1498Szrj
1507*38fd1498Szrj static bool
if_convertible_loop_p(struct loop * loop)1508*38fd1498Szrj if_convertible_loop_p (struct loop *loop)
1509*38fd1498Szrj {
1510*38fd1498Szrj edge e;
1511*38fd1498Szrj edge_iterator ei;
1512*38fd1498Szrj bool res = false;
1513*38fd1498Szrj vec<data_reference_p> refs;
1514*38fd1498Szrj
1515*38fd1498Szrj /* Handle only innermost loop. */
1516*38fd1498Szrj if (!loop || loop->inner)
1517*38fd1498Szrj {
1518*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
1519*38fd1498Szrj fprintf (dump_file, "not innermost loop\n");
1520*38fd1498Szrj return false;
1521*38fd1498Szrj }
1522*38fd1498Szrj
1523*38fd1498Szrj /* If only one block, no need for if-conversion. */
1524*38fd1498Szrj if (loop->num_nodes <= 2)
1525*38fd1498Szrj {
1526*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
1527*38fd1498Szrj fprintf (dump_file, "less than 2 basic blocks\n");
1528*38fd1498Szrj return false;
1529*38fd1498Szrj }
1530*38fd1498Szrj
1531*38fd1498Szrj /* More than one loop exit is too much to handle. */
1532*38fd1498Szrj if (!single_exit (loop))
1533*38fd1498Szrj {
1534*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
1535*38fd1498Szrj fprintf (dump_file, "multiple exits\n");
1536*38fd1498Szrj return false;
1537*38fd1498Szrj }
1538*38fd1498Szrj
1539*38fd1498Szrj /* If one of the loop header's edge is an exit edge then do not
1540*38fd1498Szrj apply if-conversion. */
1541*38fd1498Szrj FOR_EACH_EDGE (e, ei, loop->header->succs)
1542*38fd1498Szrj if (loop_exit_edge_p (loop, e))
1543*38fd1498Szrj return false;
1544*38fd1498Szrj
1545*38fd1498Szrj refs.create (5);
1546*38fd1498Szrj res = if_convertible_loop_p_1 (loop, &refs);
1547*38fd1498Szrj
1548*38fd1498Szrj data_reference_p dr;
1549*38fd1498Szrj unsigned int i;
1550*38fd1498Szrj for (i = 0; refs.iterate (i, &dr); i++)
1551*38fd1498Szrj free (dr->aux);
1552*38fd1498Szrj
1553*38fd1498Szrj free_data_refs (refs);
1554*38fd1498Szrj
1555*38fd1498Szrj delete innermost_DR_map;
1556*38fd1498Szrj innermost_DR_map = NULL;
1557*38fd1498Szrj
1558*38fd1498Szrj delete baseref_DR_map;
1559*38fd1498Szrj baseref_DR_map = NULL;
1560*38fd1498Szrj
1561*38fd1498Szrj return res;
1562*38fd1498Szrj }
1563*38fd1498Szrj
1564*38fd1498Szrj /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1565*38fd1498Szrj which is in predicated basic block.
1566*38fd1498Szrj In fact, the following PHI pattern is searching:
1567*38fd1498Szrj loop-header:
1568*38fd1498Szrj reduc_1 = PHI <..., reduc_2>
1569*38fd1498Szrj ...
1570*38fd1498Szrj if (...)
1571*38fd1498Szrj reduc_3 = ...
1572*38fd1498Szrj reduc_2 = PHI <reduc_1, reduc_3>
1573*38fd1498Szrj
1574*38fd1498Szrj ARG_0 and ARG_1 are correspondent PHI arguments.
1575*38fd1498Szrj REDUC, OP0 and OP1 contain reduction stmt and its operands.
1576*38fd1498Szrj EXTENDED is true if PHI has > 2 arguments. */
1577*38fd1498Szrj
1578*38fd1498Szrj static bool
is_cond_scalar_reduction(gimple * phi,gimple ** reduc,tree arg_0,tree arg_1,tree * op0,tree * op1,bool extended)1579*38fd1498Szrj is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
1580*38fd1498Szrj tree *op0, tree *op1, bool extended)
1581*38fd1498Szrj {
1582*38fd1498Szrj tree lhs, r_op1, r_op2;
1583*38fd1498Szrj gimple *stmt;
1584*38fd1498Szrj gimple *header_phi = NULL;
1585*38fd1498Szrj enum tree_code reduction_op;
1586*38fd1498Szrj basic_block bb = gimple_bb (phi);
1587*38fd1498Szrj struct loop *loop = bb->loop_father;
1588*38fd1498Szrj edge latch_e = loop_latch_edge (loop);
1589*38fd1498Szrj imm_use_iterator imm_iter;
1590*38fd1498Szrj use_operand_p use_p;
1591*38fd1498Szrj edge e;
1592*38fd1498Szrj edge_iterator ei;
1593*38fd1498Szrj bool result = false;
1594*38fd1498Szrj if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1595*38fd1498Szrj return false;
1596*38fd1498Szrj
1597*38fd1498Szrj if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1598*38fd1498Szrj {
1599*38fd1498Szrj lhs = arg_1;
1600*38fd1498Szrj header_phi = SSA_NAME_DEF_STMT (arg_0);
1601*38fd1498Szrj stmt = SSA_NAME_DEF_STMT (arg_1);
1602*38fd1498Szrj }
1603*38fd1498Szrj else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1604*38fd1498Szrj {
1605*38fd1498Szrj lhs = arg_0;
1606*38fd1498Szrj header_phi = SSA_NAME_DEF_STMT (arg_1);
1607*38fd1498Szrj stmt = SSA_NAME_DEF_STMT (arg_0);
1608*38fd1498Szrj }
1609*38fd1498Szrj else
1610*38fd1498Szrj return false;
1611*38fd1498Szrj if (gimple_bb (header_phi) != loop->header)
1612*38fd1498Szrj return false;
1613*38fd1498Szrj
1614*38fd1498Szrj if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1615*38fd1498Szrj return false;
1616*38fd1498Szrj
1617*38fd1498Szrj if (gimple_code (stmt) != GIMPLE_ASSIGN
1618*38fd1498Szrj || gimple_has_volatile_ops (stmt))
1619*38fd1498Szrj return false;
1620*38fd1498Szrj
1621*38fd1498Szrj if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1622*38fd1498Szrj return false;
1623*38fd1498Szrj
1624*38fd1498Szrj if (!is_predicated (gimple_bb (stmt)))
1625*38fd1498Szrj return false;
1626*38fd1498Szrj
1627*38fd1498Szrj /* Check that stmt-block is predecessor of phi-block. */
1628*38fd1498Szrj FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1629*38fd1498Szrj if (e->dest == bb)
1630*38fd1498Szrj {
1631*38fd1498Szrj result = true;
1632*38fd1498Szrj break;
1633*38fd1498Szrj }
1634*38fd1498Szrj if (!result)
1635*38fd1498Szrj return false;
1636*38fd1498Szrj
1637*38fd1498Szrj if (!has_single_use (lhs))
1638*38fd1498Szrj return false;
1639*38fd1498Szrj
1640*38fd1498Szrj reduction_op = gimple_assign_rhs_code (stmt);
1641*38fd1498Szrj if (reduction_op != PLUS_EXPR && reduction_op != MINUS_EXPR)
1642*38fd1498Szrj return false;
1643*38fd1498Szrj r_op1 = gimple_assign_rhs1 (stmt);
1644*38fd1498Szrj r_op2 = gimple_assign_rhs2 (stmt);
1645*38fd1498Szrj
1646*38fd1498Szrj /* Make R_OP1 to hold reduction variable. */
1647*38fd1498Szrj if (r_op2 == PHI_RESULT (header_phi)
1648*38fd1498Szrj && reduction_op == PLUS_EXPR)
1649*38fd1498Szrj std::swap (r_op1, r_op2);
1650*38fd1498Szrj else if (r_op1 != PHI_RESULT (header_phi))
1651*38fd1498Szrj return false;
1652*38fd1498Szrj
1653*38fd1498Szrj /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1654*38fd1498Szrj FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1655*38fd1498Szrj {
1656*38fd1498Szrj gimple *use_stmt = USE_STMT (use_p);
1657*38fd1498Szrj if (is_gimple_debug (use_stmt))
1658*38fd1498Szrj continue;
1659*38fd1498Szrj if (use_stmt == stmt)
1660*38fd1498Szrj continue;
1661*38fd1498Szrj if (gimple_code (use_stmt) != GIMPLE_PHI)
1662*38fd1498Szrj return false;
1663*38fd1498Szrj }
1664*38fd1498Szrj
1665*38fd1498Szrj *op0 = r_op1; *op1 = r_op2;
1666*38fd1498Szrj *reduc = stmt;
1667*38fd1498Szrj return true;
1668*38fd1498Szrj }
1669*38fd1498Szrj
1670*38fd1498Szrj /* Converts conditional scalar reduction into unconditional form, e.g.
1671*38fd1498Szrj bb_4
1672*38fd1498Szrj if (_5 != 0) goto bb_5 else goto bb_6
1673*38fd1498Szrj end_bb_4
1674*38fd1498Szrj bb_5
1675*38fd1498Szrj res_6 = res_13 + 1;
1676*38fd1498Szrj end_bb_5
1677*38fd1498Szrj bb_6
1678*38fd1498Szrj # res_2 = PHI <res_13(4), res_6(5)>
1679*38fd1498Szrj end_bb_6
1680*38fd1498Szrj
1681*38fd1498Szrj will be converted into sequence
1682*38fd1498Szrj _ifc__1 = _5 != 0 ? 1 : 0;
1683*38fd1498Szrj res_2 = res_13 + _ifc__1;
1684*38fd1498Szrj Argument SWAP tells that arguments of conditional expression should be
1685*38fd1498Szrj swapped.
1686*38fd1498Szrj Returns rhs of resulting PHI assignment. */
1687*38fd1498Szrj
1688*38fd1498Szrj static tree
convert_scalar_cond_reduction(gimple * reduc,gimple_stmt_iterator * gsi,tree cond,tree op0,tree op1,bool swap)1689*38fd1498Szrj convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
1690*38fd1498Szrj tree cond, tree op0, tree op1, bool swap)
1691*38fd1498Szrj {
1692*38fd1498Szrj gimple_stmt_iterator stmt_it;
1693*38fd1498Szrj gimple *new_assign;
1694*38fd1498Szrj tree rhs;
1695*38fd1498Szrj tree rhs1 = gimple_assign_rhs1 (reduc);
1696*38fd1498Szrj tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1697*38fd1498Szrj tree c;
1698*38fd1498Szrj tree zero = build_zero_cst (TREE_TYPE (rhs1));
1699*38fd1498Szrj
1700*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
1701*38fd1498Szrj {
1702*38fd1498Szrj fprintf (dump_file, "Found cond scalar reduction.\n");
1703*38fd1498Szrj print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1704*38fd1498Szrj }
1705*38fd1498Szrj
1706*38fd1498Szrj /* Build cond expression using COND and constant operand
1707*38fd1498Szrj of reduction rhs. */
1708*38fd1498Szrj c = fold_build_cond_expr (TREE_TYPE (rhs1),
1709*38fd1498Szrj unshare_expr (cond),
1710*38fd1498Szrj swap ? zero : op1,
1711*38fd1498Szrj swap ? op1 : zero);
1712*38fd1498Szrj
1713*38fd1498Szrj /* Create assignment stmt and insert it at GSI. */
1714*38fd1498Szrj new_assign = gimple_build_assign (tmp, c);
1715*38fd1498Szrj gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1716*38fd1498Szrj /* Build rhs for unconditional increment/decrement. */
1717*38fd1498Szrj rhs = fold_build2 (gimple_assign_rhs_code (reduc),
1718*38fd1498Szrj TREE_TYPE (rhs1), op0, tmp);
1719*38fd1498Szrj
1720*38fd1498Szrj /* Delete original reduction stmt. */
1721*38fd1498Szrj stmt_it = gsi_for_stmt (reduc);
1722*38fd1498Szrj gsi_remove (&stmt_it, true);
1723*38fd1498Szrj release_defs (reduc);
1724*38fd1498Szrj return rhs;
1725*38fd1498Szrj }
1726*38fd1498Szrj
1727*38fd1498Szrj /* Produce condition for all occurrences of ARG in PHI node. */
1728*38fd1498Szrj
1729*38fd1498Szrj static tree
gen_phi_arg_condition(gphi * phi,vec<int> * occur,gimple_stmt_iterator * gsi)1730*38fd1498Szrj gen_phi_arg_condition (gphi *phi, vec<int> *occur,
1731*38fd1498Szrj gimple_stmt_iterator *gsi)
1732*38fd1498Szrj {
1733*38fd1498Szrj int len;
1734*38fd1498Szrj int i;
1735*38fd1498Szrj tree cond = NULL_TREE;
1736*38fd1498Szrj tree c;
1737*38fd1498Szrj edge e;
1738*38fd1498Szrj
1739*38fd1498Szrj len = occur->length ();
1740*38fd1498Szrj gcc_assert (len > 0);
1741*38fd1498Szrj for (i = 0; i < len; i++)
1742*38fd1498Szrj {
1743*38fd1498Szrj e = gimple_phi_arg_edge (phi, (*occur)[i]);
1744*38fd1498Szrj c = bb_predicate (e->src);
1745*38fd1498Szrj if (is_true_predicate (c))
1746*38fd1498Szrj {
1747*38fd1498Szrj cond = c;
1748*38fd1498Szrj break;
1749*38fd1498Szrj }
1750*38fd1498Szrj c = force_gimple_operand_gsi_1 (gsi, unshare_expr (c),
1751*38fd1498Szrj is_gimple_condexpr, NULL_TREE,
1752*38fd1498Szrj true, GSI_SAME_STMT);
1753*38fd1498Szrj if (cond != NULL_TREE)
1754*38fd1498Szrj {
1755*38fd1498Szrj /* Must build OR expression. */
1756*38fd1498Szrj cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
1757*38fd1498Szrj cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1758*38fd1498Szrj is_gimple_condexpr, NULL_TREE,
1759*38fd1498Szrj true, GSI_SAME_STMT);
1760*38fd1498Szrj }
1761*38fd1498Szrj else
1762*38fd1498Szrj cond = c;
1763*38fd1498Szrj }
1764*38fd1498Szrj gcc_assert (cond != NULL_TREE);
1765*38fd1498Szrj return cond;
1766*38fd1498Szrj }
1767*38fd1498Szrj
1768*38fd1498Szrj /* Local valueization callback that follows all-use SSA edges. */
1769*38fd1498Szrj
1770*38fd1498Szrj static tree
ifcvt_follow_ssa_use_edges(tree val)1771*38fd1498Szrj ifcvt_follow_ssa_use_edges (tree val)
1772*38fd1498Szrj {
1773*38fd1498Szrj return val;
1774*38fd1498Szrj }
1775*38fd1498Szrj
1776*38fd1498Szrj /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1777*38fd1498Szrj This routine can handle PHI nodes with more than two arguments.
1778*38fd1498Szrj
1779*38fd1498Szrj For example,
1780*38fd1498Szrj S1: A = PHI <x1(1), x2(5)>
1781*38fd1498Szrj is converted into,
1782*38fd1498Szrj S2: A = cond ? x1 : x2;
1783*38fd1498Szrj
1784*38fd1498Szrj The generated code is inserted at GSI that points to the top of
1785*38fd1498Szrj basic block's statement list.
1786*38fd1498Szrj If PHI node has more than two arguments a chain of conditional
1787*38fd1498Szrj expression is produced. */
1788*38fd1498Szrj
1789*38fd1498Szrj
1790*38fd1498Szrj static void
predicate_scalar_phi(gphi * phi,gimple_stmt_iterator * gsi)1791*38fd1498Szrj predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
1792*38fd1498Szrj {
1793*38fd1498Szrj gimple *new_stmt = NULL, *reduc;
1794*38fd1498Szrj tree rhs, res, arg0, arg1, op0, op1, scev;
1795*38fd1498Szrj tree cond;
1796*38fd1498Szrj unsigned int index0;
1797*38fd1498Szrj unsigned int max, args_len;
1798*38fd1498Szrj edge e;
1799*38fd1498Szrj basic_block bb;
1800*38fd1498Szrj unsigned int i;
1801*38fd1498Szrj
1802*38fd1498Szrj res = gimple_phi_result (phi);
1803*38fd1498Szrj if (virtual_operand_p (res))
1804*38fd1498Szrj return;
1805*38fd1498Szrj
1806*38fd1498Szrj if ((rhs = degenerate_phi_result (phi))
1807*38fd1498Szrj || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
1808*38fd1498Szrj res))
1809*38fd1498Szrj && !chrec_contains_undetermined (scev)
1810*38fd1498Szrj && scev != res
1811*38fd1498Szrj && (rhs = gimple_phi_arg_def (phi, 0))))
1812*38fd1498Szrj {
1813*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
1814*38fd1498Szrj {
1815*38fd1498Szrj fprintf (dump_file, "Degenerate phi!\n");
1816*38fd1498Szrj print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
1817*38fd1498Szrj }
1818*38fd1498Szrj new_stmt = gimple_build_assign (res, rhs);
1819*38fd1498Szrj gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1820*38fd1498Szrj update_stmt (new_stmt);
1821*38fd1498Szrj return;
1822*38fd1498Szrj }
1823*38fd1498Szrj
1824*38fd1498Szrj bb = gimple_bb (phi);
1825*38fd1498Szrj if (EDGE_COUNT (bb->preds) == 2)
1826*38fd1498Szrj {
1827*38fd1498Szrj /* Predicate ordinary PHI node with 2 arguments. */
1828*38fd1498Szrj edge first_edge, second_edge;
1829*38fd1498Szrj basic_block true_bb;
1830*38fd1498Szrj first_edge = EDGE_PRED (bb, 0);
1831*38fd1498Szrj second_edge = EDGE_PRED (bb, 1);
1832*38fd1498Szrj cond = bb_predicate (first_edge->src);
1833*38fd1498Szrj if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1834*38fd1498Szrj std::swap (first_edge, second_edge);
1835*38fd1498Szrj if (EDGE_COUNT (first_edge->src->succs) > 1)
1836*38fd1498Szrj {
1837*38fd1498Szrj cond = bb_predicate (second_edge->src);
1838*38fd1498Szrj if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1839*38fd1498Szrj cond = TREE_OPERAND (cond, 0);
1840*38fd1498Szrj else
1841*38fd1498Szrj first_edge = second_edge;
1842*38fd1498Szrj }
1843*38fd1498Szrj else
1844*38fd1498Szrj cond = bb_predicate (first_edge->src);
1845*38fd1498Szrj /* Gimplify the condition to a valid cond-expr conditonal operand. */
1846*38fd1498Szrj cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1847*38fd1498Szrj is_gimple_condexpr, NULL_TREE,
1848*38fd1498Szrj true, GSI_SAME_STMT);
1849*38fd1498Szrj true_bb = first_edge->src;
1850*38fd1498Szrj if (EDGE_PRED (bb, 1)->src == true_bb)
1851*38fd1498Szrj {
1852*38fd1498Szrj arg0 = gimple_phi_arg_def (phi, 1);
1853*38fd1498Szrj arg1 = gimple_phi_arg_def (phi, 0);
1854*38fd1498Szrj }
1855*38fd1498Szrj else
1856*38fd1498Szrj {
1857*38fd1498Szrj arg0 = gimple_phi_arg_def (phi, 0);
1858*38fd1498Szrj arg1 = gimple_phi_arg_def (phi, 1);
1859*38fd1498Szrj }
1860*38fd1498Szrj if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
1861*38fd1498Szrj &op0, &op1, false))
1862*38fd1498Szrj /* Convert reduction stmt into vectorizable form. */
1863*38fd1498Szrj rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1864*38fd1498Szrj true_bb != gimple_bb (reduc));
1865*38fd1498Szrj else
1866*38fd1498Szrj /* Build new RHS using selected condition and arguments. */
1867*38fd1498Szrj rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1868*38fd1498Szrj arg0, arg1);
1869*38fd1498Szrj new_stmt = gimple_build_assign (res, rhs);
1870*38fd1498Szrj gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1871*38fd1498Szrj gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt);
1872*38fd1498Szrj if (fold_stmt (&new_gsi, ifcvt_follow_ssa_use_edges))
1873*38fd1498Szrj {
1874*38fd1498Szrj new_stmt = gsi_stmt (new_gsi);
1875*38fd1498Szrj update_stmt (new_stmt);
1876*38fd1498Szrj }
1877*38fd1498Szrj
1878*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
1879*38fd1498Szrj {
1880*38fd1498Szrj fprintf (dump_file, "new phi replacement stmt\n");
1881*38fd1498Szrj print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
1882*38fd1498Szrj }
1883*38fd1498Szrj return;
1884*38fd1498Szrj }
1885*38fd1498Szrj
1886*38fd1498Szrj /* Create hashmap for PHI node which contain vector of argument indexes
1887*38fd1498Szrj having the same value. */
1888*38fd1498Szrj bool swap = false;
1889*38fd1498Szrj hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
1890*38fd1498Szrj unsigned int num_args = gimple_phi_num_args (phi);
1891*38fd1498Szrj int max_ind = -1;
1892*38fd1498Szrj /* Vector of different PHI argument values. */
1893*38fd1498Szrj auto_vec<tree> args (num_args);
1894*38fd1498Szrj
1895*38fd1498Szrj /* Compute phi_arg_map. */
1896*38fd1498Szrj for (i = 0; i < num_args; i++)
1897*38fd1498Szrj {
1898*38fd1498Szrj tree arg;
1899*38fd1498Szrj
1900*38fd1498Szrj arg = gimple_phi_arg_def (phi, i);
1901*38fd1498Szrj if (!phi_arg_map.get (arg))
1902*38fd1498Szrj args.quick_push (arg);
1903*38fd1498Szrj phi_arg_map.get_or_insert (arg).safe_push (i);
1904*38fd1498Szrj }
1905*38fd1498Szrj
1906*38fd1498Szrj /* Determine element with max number of occurrences. */
1907*38fd1498Szrj max_ind = -1;
1908*38fd1498Szrj max = 1;
1909*38fd1498Szrj args_len = args.length ();
1910*38fd1498Szrj for (i = 0; i < args_len; i++)
1911*38fd1498Szrj {
1912*38fd1498Szrj unsigned int len;
1913*38fd1498Szrj if ((len = phi_arg_map.get (args[i])->length ()) > max)
1914*38fd1498Szrj {
1915*38fd1498Szrj max_ind = (int) i;
1916*38fd1498Szrj max = len;
1917*38fd1498Szrj }
1918*38fd1498Szrj }
1919*38fd1498Szrj
1920*38fd1498Szrj /* Put element with max number of occurences to the end of ARGS. */
1921*38fd1498Szrj if (max_ind != -1 && max_ind +1 != (int) args_len)
1922*38fd1498Szrj std::swap (args[args_len - 1], args[max_ind]);
1923*38fd1498Szrj
1924*38fd1498Szrj /* Handle one special case when number of arguments with different values
1925*38fd1498Szrj is equal 2 and one argument has the only occurrence. Such PHI can be
1926*38fd1498Szrj handled as if would have only 2 arguments. */
1927*38fd1498Szrj if (args_len == 2 && phi_arg_map.get (args[0])->length () == 1)
1928*38fd1498Szrj {
1929*38fd1498Szrj vec<int> *indexes;
1930*38fd1498Szrj indexes = phi_arg_map.get (args[0]);
1931*38fd1498Szrj index0 = (*indexes)[0];
1932*38fd1498Szrj arg0 = args[0];
1933*38fd1498Szrj arg1 = args[1];
1934*38fd1498Szrj e = gimple_phi_arg_edge (phi, index0);
1935*38fd1498Szrj cond = bb_predicate (e->src);
1936*38fd1498Szrj if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1937*38fd1498Szrj {
1938*38fd1498Szrj swap = true;
1939*38fd1498Szrj cond = TREE_OPERAND (cond, 0);
1940*38fd1498Szrj }
1941*38fd1498Szrj /* Gimplify the condition to a valid cond-expr conditonal operand. */
1942*38fd1498Szrj cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1943*38fd1498Szrj is_gimple_condexpr, NULL_TREE,
1944*38fd1498Szrj true, GSI_SAME_STMT);
1945*38fd1498Szrj if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
1946*38fd1498Szrj &op0, &op1, true)))
1947*38fd1498Szrj rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1948*38fd1498Szrj swap? arg1 : arg0,
1949*38fd1498Szrj swap? arg0 : arg1);
1950*38fd1498Szrj else
1951*38fd1498Szrj /* Convert reduction stmt into vectorizable form. */
1952*38fd1498Szrj rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1953*38fd1498Szrj swap);
1954*38fd1498Szrj new_stmt = gimple_build_assign (res, rhs);
1955*38fd1498Szrj gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1956*38fd1498Szrj update_stmt (new_stmt);
1957*38fd1498Szrj }
1958*38fd1498Szrj else
1959*38fd1498Szrj {
1960*38fd1498Szrj /* Common case. */
1961*38fd1498Szrj vec<int> *indexes;
1962*38fd1498Szrj tree type = TREE_TYPE (gimple_phi_result (phi));
1963*38fd1498Szrj tree lhs;
1964*38fd1498Szrj arg1 = args[1];
1965*38fd1498Szrj for (i = 0; i < args_len; i++)
1966*38fd1498Szrj {
1967*38fd1498Szrj arg0 = args[i];
1968*38fd1498Szrj indexes = phi_arg_map.get (args[i]);
1969*38fd1498Szrj if (i != args_len - 1)
1970*38fd1498Szrj lhs = make_temp_ssa_name (type, NULL, "_ifc_");
1971*38fd1498Szrj else
1972*38fd1498Szrj lhs = res;
1973*38fd1498Szrj cond = gen_phi_arg_condition (phi, indexes, gsi);
1974*38fd1498Szrj rhs = fold_build_cond_expr (type, unshare_expr (cond),
1975*38fd1498Szrj arg0, arg1);
1976*38fd1498Szrj new_stmt = gimple_build_assign (lhs, rhs);
1977*38fd1498Szrj gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1978*38fd1498Szrj update_stmt (new_stmt);
1979*38fd1498Szrj arg1 = lhs;
1980*38fd1498Szrj }
1981*38fd1498Szrj }
1982*38fd1498Szrj
1983*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
1984*38fd1498Szrj {
1985*38fd1498Szrj fprintf (dump_file, "new extended phi replacement stmt\n");
1986*38fd1498Szrj print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
1987*38fd1498Szrj }
1988*38fd1498Szrj }
1989*38fd1498Szrj
1990*38fd1498Szrj /* Replaces in LOOP all the scalar phi nodes other than those in the
1991*38fd1498Szrj LOOP->header block with conditional modify expressions. */
1992*38fd1498Szrj
1993*38fd1498Szrj static void
predicate_all_scalar_phis(struct loop * loop)1994*38fd1498Szrj predicate_all_scalar_phis (struct loop *loop)
1995*38fd1498Szrj {
1996*38fd1498Szrj basic_block bb;
1997*38fd1498Szrj unsigned int orig_loop_num_nodes = loop->num_nodes;
1998*38fd1498Szrj unsigned int i;
1999*38fd1498Szrj
2000*38fd1498Szrj for (i = 1; i < orig_loop_num_nodes; i++)
2001*38fd1498Szrj {
2002*38fd1498Szrj gphi *phi;
2003*38fd1498Szrj gimple_stmt_iterator gsi;
2004*38fd1498Szrj gphi_iterator phi_gsi;
2005*38fd1498Szrj bb = ifc_bbs[i];
2006*38fd1498Szrj
2007*38fd1498Szrj if (bb == loop->header)
2008*38fd1498Szrj continue;
2009*38fd1498Szrj
2010*38fd1498Szrj phi_gsi = gsi_start_phis (bb);
2011*38fd1498Szrj if (gsi_end_p (phi_gsi))
2012*38fd1498Szrj continue;
2013*38fd1498Szrj
2014*38fd1498Szrj gsi = gsi_after_labels (bb);
2015*38fd1498Szrj while (!gsi_end_p (phi_gsi))
2016*38fd1498Szrj {
2017*38fd1498Szrj phi = phi_gsi.phi ();
2018*38fd1498Szrj if (virtual_operand_p (gimple_phi_result (phi)))
2019*38fd1498Szrj gsi_next (&phi_gsi);
2020*38fd1498Szrj else
2021*38fd1498Szrj {
2022*38fd1498Szrj predicate_scalar_phi (phi, &gsi);
2023*38fd1498Szrj remove_phi_node (&phi_gsi, false);
2024*38fd1498Szrj }
2025*38fd1498Szrj }
2026*38fd1498Szrj }
2027*38fd1498Szrj }
2028*38fd1498Szrj
2029*38fd1498Szrj /* Insert in each basic block of LOOP the statements produced by the
2030*38fd1498Szrj gimplification of the predicates. */
2031*38fd1498Szrj
2032*38fd1498Szrj static void
insert_gimplified_predicates(loop_p loop)2033*38fd1498Szrj insert_gimplified_predicates (loop_p loop)
2034*38fd1498Szrj {
2035*38fd1498Szrj unsigned int i;
2036*38fd1498Szrj
2037*38fd1498Szrj for (i = 0; i < loop->num_nodes; i++)
2038*38fd1498Szrj {
2039*38fd1498Szrj basic_block bb = ifc_bbs[i];
2040*38fd1498Szrj gimple_seq stmts;
2041*38fd1498Szrj if (!is_predicated (bb))
2042*38fd1498Szrj gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
2043*38fd1498Szrj if (!is_predicated (bb))
2044*38fd1498Szrj {
2045*38fd1498Szrj /* Do not insert statements for a basic block that is not
2046*38fd1498Szrj predicated. Also make sure that the predicate of the
2047*38fd1498Szrj basic block is set to true. */
2048*38fd1498Szrj reset_bb_predicate (bb);
2049*38fd1498Szrj continue;
2050*38fd1498Szrj }
2051*38fd1498Szrj
2052*38fd1498Szrj stmts = bb_predicate_gimplified_stmts (bb);
2053*38fd1498Szrj if (stmts)
2054*38fd1498Szrj {
2055*38fd1498Szrj if (any_pred_load_store)
2056*38fd1498Szrj {
2057*38fd1498Szrj /* Insert the predicate of the BB just after the label,
2058*38fd1498Szrj as the if-conversion of memory writes will use this
2059*38fd1498Szrj predicate. */
2060*38fd1498Szrj gimple_stmt_iterator gsi = gsi_after_labels (bb);
2061*38fd1498Szrj gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2062*38fd1498Szrj }
2063*38fd1498Szrj else
2064*38fd1498Szrj {
2065*38fd1498Szrj /* Insert the predicate of the BB at the end of the BB
2066*38fd1498Szrj as this would reduce the register pressure: the only
2067*38fd1498Szrj use of this predicate will be in successor BBs. */
2068*38fd1498Szrj gimple_stmt_iterator gsi = gsi_last_bb (bb);
2069*38fd1498Szrj
2070*38fd1498Szrj if (gsi_end_p (gsi)
2071*38fd1498Szrj || stmt_ends_bb_p (gsi_stmt (gsi)))
2072*38fd1498Szrj gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2073*38fd1498Szrj else
2074*38fd1498Szrj gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
2075*38fd1498Szrj }
2076*38fd1498Szrj
2077*38fd1498Szrj /* Once the sequence is code generated, set it to NULL. */
2078*38fd1498Szrj set_bb_predicate_gimplified_stmts (bb, NULL);
2079*38fd1498Szrj }
2080*38fd1498Szrj }
2081*38fd1498Szrj }
2082*38fd1498Szrj
2083*38fd1498Szrj /* Helper function for predicate_mem_writes. Returns index of existent
2084*38fd1498Szrj mask if it was created for given SIZE and -1 otherwise. */
2085*38fd1498Szrj
2086*38fd1498Szrj static int
mask_exists(int size,vec<int> vec)2087*38fd1498Szrj mask_exists (int size, vec<int> vec)
2088*38fd1498Szrj {
2089*38fd1498Szrj unsigned int ix;
2090*38fd1498Szrj int v;
2091*38fd1498Szrj FOR_EACH_VEC_ELT (vec, ix, v)
2092*38fd1498Szrj if (v == size)
2093*38fd1498Szrj return (int) ix;
2094*38fd1498Szrj return -1;
2095*38fd1498Szrj }
2096*38fd1498Szrj
2097*38fd1498Szrj /* Predicate each write to memory in LOOP.
2098*38fd1498Szrj
2099*38fd1498Szrj This function transforms control flow constructs containing memory
2100*38fd1498Szrj writes of the form:
2101*38fd1498Szrj
2102*38fd1498Szrj | for (i = 0; i < N; i++)
2103*38fd1498Szrj | if (cond)
2104*38fd1498Szrj | A[i] = expr;
2105*38fd1498Szrj
2106*38fd1498Szrj into the following form that does not contain control flow:
2107*38fd1498Szrj
2108*38fd1498Szrj | for (i = 0; i < N; i++)
2109*38fd1498Szrj | A[i] = cond ? expr : A[i];
2110*38fd1498Szrj
2111*38fd1498Szrj The original CFG looks like this:
2112*38fd1498Szrj
2113*38fd1498Szrj | bb_0
2114*38fd1498Szrj | i = 0
2115*38fd1498Szrj | end_bb_0
2116*38fd1498Szrj |
2117*38fd1498Szrj | bb_1
2118*38fd1498Szrj | if (i < N) goto bb_5 else goto bb_2
2119*38fd1498Szrj | end_bb_1
2120*38fd1498Szrj |
2121*38fd1498Szrj | bb_2
2122*38fd1498Szrj | cond = some_computation;
2123*38fd1498Szrj | if (cond) goto bb_3 else goto bb_4
2124*38fd1498Szrj | end_bb_2
2125*38fd1498Szrj |
2126*38fd1498Szrj | bb_3
2127*38fd1498Szrj | A[i] = expr;
2128*38fd1498Szrj | goto bb_4
2129*38fd1498Szrj | end_bb_3
2130*38fd1498Szrj |
2131*38fd1498Szrj | bb_4
2132*38fd1498Szrj | goto bb_1
2133*38fd1498Szrj | end_bb_4
2134*38fd1498Szrj
2135*38fd1498Szrj insert_gimplified_predicates inserts the computation of the COND
2136*38fd1498Szrj expression at the beginning of the destination basic block:
2137*38fd1498Szrj
2138*38fd1498Szrj | bb_0
2139*38fd1498Szrj | i = 0
2140*38fd1498Szrj | end_bb_0
2141*38fd1498Szrj |
2142*38fd1498Szrj | bb_1
2143*38fd1498Szrj | if (i < N) goto bb_5 else goto bb_2
2144*38fd1498Szrj | end_bb_1
2145*38fd1498Szrj |
2146*38fd1498Szrj | bb_2
2147*38fd1498Szrj | cond = some_computation;
2148*38fd1498Szrj | if (cond) goto bb_3 else goto bb_4
2149*38fd1498Szrj | end_bb_2
2150*38fd1498Szrj |
2151*38fd1498Szrj | bb_3
2152*38fd1498Szrj | cond = some_computation;
2153*38fd1498Szrj | A[i] = expr;
2154*38fd1498Szrj | goto bb_4
2155*38fd1498Szrj | end_bb_3
2156*38fd1498Szrj |
2157*38fd1498Szrj | bb_4
2158*38fd1498Szrj | goto bb_1
2159*38fd1498Szrj | end_bb_4
2160*38fd1498Szrj
2161*38fd1498Szrj predicate_mem_writes is then predicating the memory write as follows:
2162*38fd1498Szrj
2163*38fd1498Szrj | bb_0
2164*38fd1498Szrj | i = 0
2165*38fd1498Szrj | end_bb_0
2166*38fd1498Szrj |
2167*38fd1498Szrj | bb_1
2168*38fd1498Szrj | if (i < N) goto bb_5 else goto bb_2
2169*38fd1498Szrj | end_bb_1
2170*38fd1498Szrj |
2171*38fd1498Szrj | bb_2
2172*38fd1498Szrj | if (cond) goto bb_3 else goto bb_4
2173*38fd1498Szrj | end_bb_2
2174*38fd1498Szrj |
2175*38fd1498Szrj | bb_3
2176*38fd1498Szrj | cond = some_computation;
2177*38fd1498Szrj | A[i] = cond ? expr : A[i];
2178*38fd1498Szrj | goto bb_4
2179*38fd1498Szrj | end_bb_3
2180*38fd1498Szrj |
2181*38fd1498Szrj | bb_4
2182*38fd1498Szrj | goto bb_1
2183*38fd1498Szrj | end_bb_4
2184*38fd1498Szrj
2185*38fd1498Szrj and finally combine_blocks removes the basic block boundaries making
2186*38fd1498Szrj the loop vectorizable:
2187*38fd1498Szrj
2188*38fd1498Szrj | bb_0
2189*38fd1498Szrj | i = 0
2190*38fd1498Szrj | if (i < N) goto bb_5 else goto bb_1
2191*38fd1498Szrj | end_bb_0
2192*38fd1498Szrj |
2193*38fd1498Szrj | bb_1
2194*38fd1498Szrj | cond = some_computation;
2195*38fd1498Szrj | A[i] = cond ? expr : A[i];
2196*38fd1498Szrj | if (i < N) goto bb_5 else goto bb_4
2197*38fd1498Szrj | end_bb_1
2198*38fd1498Szrj |
2199*38fd1498Szrj | bb_4
2200*38fd1498Szrj | goto bb_1
2201*38fd1498Szrj | end_bb_4
2202*38fd1498Szrj */
2203*38fd1498Szrj
2204*38fd1498Szrj static void
predicate_mem_writes(loop_p loop)2205*38fd1498Szrj predicate_mem_writes (loop_p loop)
2206*38fd1498Szrj {
2207*38fd1498Szrj unsigned int i, orig_loop_num_nodes = loop->num_nodes;
2208*38fd1498Szrj auto_vec<int, 1> vect_sizes;
2209*38fd1498Szrj auto_vec<tree, 1> vect_masks;
2210*38fd1498Szrj
2211*38fd1498Szrj for (i = 1; i < orig_loop_num_nodes; i++)
2212*38fd1498Szrj {
2213*38fd1498Szrj gimple_stmt_iterator gsi;
2214*38fd1498Szrj basic_block bb = ifc_bbs[i];
2215*38fd1498Szrj tree cond = bb_predicate (bb);
2216*38fd1498Szrj bool swap;
2217*38fd1498Szrj gimple *stmt;
2218*38fd1498Szrj int index;
2219*38fd1498Szrj
2220*38fd1498Szrj if (is_true_predicate (cond))
2221*38fd1498Szrj continue;
2222*38fd1498Szrj
2223*38fd1498Szrj swap = false;
2224*38fd1498Szrj if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2225*38fd1498Szrj {
2226*38fd1498Szrj swap = true;
2227*38fd1498Szrj cond = TREE_OPERAND (cond, 0);
2228*38fd1498Szrj }
2229*38fd1498Szrj
2230*38fd1498Szrj vect_sizes.truncate (0);
2231*38fd1498Szrj vect_masks.truncate (0);
2232*38fd1498Szrj
2233*38fd1498Szrj for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2234*38fd1498Szrj {
2235*38fd1498Szrj if (!gimple_assign_single_p (stmt = gsi_stmt (gsi)))
2236*38fd1498Szrj ;
2237*38fd1498Szrj else if (is_false_predicate (cond)
2238*38fd1498Szrj && gimple_vdef (stmt))
2239*38fd1498Szrj {
2240*38fd1498Szrj unlink_stmt_vdef (stmt);
2241*38fd1498Szrj gsi_remove (&gsi, true);
2242*38fd1498Szrj release_defs (stmt);
2243*38fd1498Szrj continue;
2244*38fd1498Szrj }
2245*38fd1498Szrj else if (gimple_plf (stmt, GF_PLF_2))
2246*38fd1498Szrj {
2247*38fd1498Szrj tree lhs = gimple_assign_lhs (stmt);
2248*38fd1498Szrj tree rhs = gimple_assign_rhs1 (stmt);
2249*38fd1498Szrj tree ref, addr, ptr, mask;
2250*38fd1498Szrj gcall *new_stmt;
2251*38fd1498Szrj gimple_seq stmts = NULL;
2252*38fd1498Szrj machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
2253*38fd1498Szrj /* We checked before setting GF_PLF_2 that an equivalent
2254*38fd1498Szrj integer mode exists. */
2255*38fd1498Szrj int bitsize = GET_MODE_BITSIZE (mode).to_constant ();
2256*38fd1498Szrj ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2257*38fd1498Szrj mark_addressable (ref);
2258*38fd1498Szrj addr = force_gimple_operand_gsi (&gsi, build_fold_addr_expr (ref),
2259*38fd1498Szrj true, NULL_TREE, true,
2260*38fd1498Szrj GSI_SAME_STMT);
2261*38fd1498Szrj if (!vect_sizes.is_empty ()
2262*38fd1498Szrj && (index = mask_exists (bitsize, vect_sizes)) != -1)
2263*38fd1498Szrj /* Use created mask. */
2264*38fd1498Szrj mask = vect_masks[index];
2265*38fd1498Szrj else
2266*38fd1498Szrj {
2267*38fd1498Szrj if (COMPARISON_CLASS_P (cond))
2268*38fd1498Szrj mask = gimple_build (&stmts, TREE_CODE (cond),
2269*38fd1498Szrj boolean_type_node,
2270*38fd1498Szrj TREE_OPERAND (cond, 0),
2271*38fd1498Szrj TREE_OPERAND (cond, 1));
2272*38fd1498Szrj else
2273*38fd1498Szrj mask = cond;
2274*38fd1498Szrj
2275*38fd1498Szrj if (swap)
2276*38fd1498Szrj {
2277*38fd1498Szrj tree true_val
2278*38fd1498Szrj = constant_boolean_node (true, TREE_TYPE (mask));
2279*38fd1498Szrj mask = gimple_build (&stmts, BIT_XOR_EXPR,
2280*38fd1498Szrj TREE_TYPE (mask), mask, true_val);
2281*38fd1498Szrj }
2282*38fd1498Szrj gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2283*38fd1498Szrj
2284*38fd1498Szrj /* Save mask and its size for further use. */
2285*38fd1498Szrj vect_sizes.safe_push (bitsize);
2286*38fd1498Szrj vect_masks.safe_push (mask);
2287*38fd1498Szrj }
2288*38fd1498Szrj ptr = build_int_cst (reference_alias_ptr_type (ref),
2289*38fd1498Szrj get_object_alignment (ref));
2290*38fd1498Szrj /* Copy points-to info if possible. */
2291*38fd1498Szrj if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2292*38fd1498Szrj copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2293*38fd1498Szrj ref);
2294*38fd1498Szrj if (TREE_CODE (lhs) == SSA_NAME)
2295*38fd1498Szrj {
2296*38fd1498Szrj new_stmt
2297*38fd1498Szrj = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
2298*38fd1498Szrj ptr, mask);
2299*38fd1498Szrj gimple_call_set_lhs (new_stmt, lhs);
2300*38fd1498Szrj gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2301*38fd1498Szrj }
2302*38fd1498Szrj else
2303*38fd1498Szrj {
2304*38fd1498Szrj new_stmt
2305*38fd1498Szrj = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2306*38fd1498Szrj mask, rhs);
2307*38fd1498Szrj gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2308*38fd1498Szrj gimple_set_vdef (new_stmt, gimple_vdef (stmt));
2309*38fd1498Szrj SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt;
2310*38fd1498Szrj }
2311*38fd1498Szrj gimple_call_set_nothrow (new_stmt, true);
2312*38fd1498Szrj
2313*38fd1498Szrj gsi_replace (&gsi, new_stmt, true);
2314*38fd1498Szrj }
2315*38fd1498Szrj else if (gimple_vdef (stmt))
2316*38fd1498Szrj {
2317*38fd1498Szrj tree lhs = gimple_assign_lhs (stmt);
2318*38fd1498Szrj tree rhs = gimple_assign_rhs1 (stmt);
2319*38fd1498Szrj tree type = TREE_TYPE (lhs);
2320*38fd1498Szrj
2321*38fd1498Szrj lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
2322*38fd1498Szrj rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
2323*38fd1498Szrj if (swap)
2324*38fd1498Szrj std::swap (lhs, rhs);
2325*38fd1498Szrj cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
2326*38fd1498Szrj is_gimple_condexpr, NULL_TREE,
2327*38fd1498Szrj true, GSI_SAME_STMT);
2328*38fd1498Szrj rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
2329*38fd1498Szrj gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
2330*38fd1498Szrj update_stmt (stmt);
2331*38fd1498Szrj }
2332*38fd1498Szrj gsi_next (&gsi);
2333*38fd1498Szrj }
2334*38fd1498Szrj }
2335*38fd1498Szrj }
2336*38fd1498Szrj
2337*38fd1498Szrj /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2338*38fd1498Szrj other than the exit and latch of the LOOP. Also resets the
2339*38fd1498Szrj GIMPLE_DEBUG information. */
2340*38fd1498Szrj
2341*38fd1498Szrj static void
remove_conditions_and_labels(loop_p loop)2342*38fd1498Szrj remove_conditions_and_labels (loop_p loop)
2343*38fd1498Szrj {
2344*38fd1498Szrj gimple_stmt_iterator gsi;
2345*38fd1498Szrj unsigned int i;
2346*38fd1498Szrj
2347*38fd1498Szrj for (i = 0; i < loop->num_nodes; i++)
2348*38fd1498Szrj {
2349*38fd1498Szrj basic_block bb = ifc_bbs[i];
2350*38fd1498Szrj
2351*38fd1498Szrj if (bb_with_exit_edge_p (loop, bb)
2352*38fd1498Szrj || bb == loop->latch)
2353*38fd1498Szrj continue;
2354*38fd1498Szrj
2355*38fd1498Szrj for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2356*38fd1498Szrj switch (gimple_code (gsi_stmt (gsi)))
2357*38fd1498Szrj {
2358*38fd1498Szrj case GIMPLE_COND:
2359*38fd1498Szrj case GIMPLE_LABEL:
2360*38fd1498Szrj gsi_remove (&gsi, true);
2361*38fd1498Szrj break;
2362*38fd1498Szrj
2363*38fd1498Szrj case GIMPLE_DEBUG:
2364*38fd1498Szrj /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
2365*38fd1498Szrj if (gimple_debug_bind_p (gsi_stmt (gsi)))
2366*38fd1498Szrj {
2367*38fd1498Szrj gimple_debug_bind_reset_value (gsi_stmt (gsi));
2368*38fd1498Szrj update_stmt (gsi_stmt (gsi));
2369*38fd1498Szrj }
2370*38fd1498Szrj gsi_next (&gsi);
2371*38fd1498Szrj break;
2372*38fd1498Szrj
2373*38fd1498Szrj default:
2374*38fd1498Szrj gsi_next (&gsi);
2375*38fd1498Szrj }
2376*38fd1498Szrj }
2377*38fd1498Szrj }
2378*38fd1498Szrj
2379*38fd1498Szrj /* Combine all the basic blocks from LOOP into one or two super basic
2380*38fd1498Szrj blocks. Replace PHI nodes with conditional modify expressions. */
2381*38fd1498Szrj
2382*38fd1498Szrj static void
combine_blocks(struct loop * loop)2383*38fd1498Szrj combine_blocks (struct loop *loop)
2384*38fd1498Szrj {
2385*38fd1498Szrj basic_block bb, exit_bb, merge_target_bb;
2386*38fd1498Szrj unsigned int orig_loop_num_nodes = loop->num_nodes;
2387*38fd1498Szrj unsigned int i;
2388*38fd1498Szrj edge e;
2389*38fd1498Szrj edge_iterator ei;
2390*38fd1498Szrj
2391*38fd1498Szrj remove_conditions_and_labels (loop);
2392*38fd1498Szrj insert_gimplified_predicates (loop);
2393*38fd1498Szrj predicate_all_scalar_phis (loop);
2394*38fd1498Szrj
2395*38fd1498Szrj if (any_pred_load_store)
2396*38fd1498Szrj predicate_mem_writes (loop);
2397*38fd1498Szrj
2398*38fd1498Szrj /* Merge basic blocks: first remove all the edges in the loop,
2399*38fd1498Szrj except for those from the exit block. */
2400*38fd1498Szrj exit_bb = NULL;
2401*38fd1498Szrj bool *predicated = XNEWVEC (bool, orig_loop_num_nodes);
2402*38fd1498Szrj for (i = 0; i < orig_loop_num_nodes; i++)
2403*38fd1498Szrj {
2404*38fd1498Szrj bb = ifc_bbs[i];
2405*38fd1498Szrj predicated[i] = !is_true_predicate (bb_predicate (bb));
2406*38fd1498Szrj free_bb_predicate (bb);
2407*38fd1498Szrj if (bb_with_exit_edge_p (loop, bb))
2408*38fd1498Szrj {
2409*38fd1498Szrj gcc_assert (exit_bb == NULL);
2410*38fd1498Szrj exit_bb = bb;
2411*38fd1498Szrj }
2412*38fd1498Szrj }
2413*38fd1498Szrj gcc_assert (exit_bb != loop->latch);
2414*38fd1498Szrj
2415*38fd1498Szrj for (i = 1; i < orig_loop_num_nodes; i++)
2416*38fd1498Szrj {
2417*38fd1498Szrj bb = ifc_bbs[i];
2418*38fd1498Szrj
2419*38fd1498Szrj for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
2420*38fd1498Szrj {
2421*38fd1498Szrj if (e->src == exit_bb)
2422*38fd1498Szrj ei_next (&ei);
2423*38fd1498Szrj else
2424*38fd1498Szrj remove_edge (e);
2425*38fd1498Szrj }
2426*38fd1498Szrj }
2427*38fd1498Szrj
2428*38fd1498Szrj if (exit_bb != NULL)
2429*38fd1498Szrj {
2430*38fd1498Szrj if (exit_bb != loop->header)
2431*38fd1498Szrj {
2432*38fd1498Szrj /* Connect this node to loop header. */
2433*38fd1498Szrj make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU);
2434*38fd1498Szrj set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
2435*38fd1498Szrj }
2436*38fd1498Szrj
2437*38fd1498Szrj /* Redirect non-exit edges to loop->latch. */
2438*38fd1498Szrj FOR_EACH_EDGE (e, ei, exit_bb->succs)
2439*38fd1498Szrj {
2440*38fd1498Szrj if (!loop_exit_edge_p (loop, e))
2441*38fd1498Szrj redirect_edge_and_branch (e, loop->latch);
2442*38fd1498Szrj }
2443*38fd1498Szrj set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
2444*38fd1498Szrj }
2445*38fd1498Szrj else
2446*38fd1498Szrj {
2447*38fd1498Szrj /* If the loop does not have an exit, reconnect header and latch. */
2448*38fd1498Szrj make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
2449*38fd1498Szrj set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
2450*38fd1498Szrj }
2451*38fd1498Szrj
2452*38fd1498Szrj merge_target_bb = loop->header;
2453*38fd1498Szrj
2454*38fd1498Szrj /* Get at the virtual def valid for uses starting at the first block
2455*38fd1498Szrj we merge into the header. Without a virtual PHI the loop has the
2456*38fd1498Szrj same virtual use on all stmts. */
2457*38fd1498Szrj gphi *vphi = get_virtual_phi (loop->header);
2458*38fd1498Szrj tree last_vdef = NULL_TREE;
2459*38fd1498Szrj if (vphi)
2460*38fd1498Szrj {
2461*38fd1498Szrj last_vdef = gimple_phi_result (vphi);
2462*38fd1498Szrj for (gimple_stmt_iterator gsi = gsi_start_bb (loop->header);
2463*38fd1498Szrj ! gsi_end_p (gsi); gsi_next (&gsi))
2464*38fd1498Szrj if (gimple_vdef (gsi_stmt (gsi)))
2465*38fd1498Szrj last_vdef = gimple_vdef (gsi_stmt (gsi));
2466*38fd1498Szrj }
2467*38fd1498Szrj for (i = 1; i < orig_loop_num_nodes; i++)
2468*38fd1498Szrj {
2469*38fd1498Szrj gimple_stmt_iterator gsi;
2470*38fd1498Szrj gimple_stmt_iterator last;
2471*38fd1498Szrj
2472*38fd1498Szrj bb = ifc_bbs[i];
2473*38fd1498Szrj
2474*38fd1498Szrj if (bb == exit_bb || bb == loop->latch)
2475*38fd1498Szrj continue;
2476*38fd1498Szrj
2477*38fd1498Szrj /* We release virtual PHIs late because we have to propagate them
2478*38fd1498Szrj out using the current VUSE. The def might be the one used
2479*38fd1498Szrj after the loop. */
2480*38fd1498Szrj vphi = get_virtual_phi (bb);
2481*38fd1498Szrj if (vphi)
2482*38fd1498Szrj {
2483*38fd1498Szrj imm_use_iterator iter;
2484*38fd1498Szrj use_operand_p use_p;
2485*38fd1498Szrj gimple *use_stmt;
2486*38fd1498Szrj FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2487*38fd1498Szrj {
2488*38fd1498Szrj FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2489*38fd1498Szrj SET_USE (use_p, last_vdef);
2490*38fd1498Szrj }
2491*38fd1498Szrj gsi = gsi_for_stmt (vphi);
2492*38fd1498Szrj remove_phi_node (&gsi, true);
2493*38fd1498Szrj }
2494*38fd1498Szrj
2495*38fd1498Szrj /* Make stmts member of loop->header and clear range info from all stmts
2496*38fd1498Szrj in BB which is now no longer executed conditional on a predicate we
2497*38fd1498Szrj could have derived it from. */
2498*38fd1498Szrj for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2499*38fd1498Szrj {
2500*38fd1498Szrj gimple *stmt = gsi_stmt (gsi);
2501*38fd1498Szrj gimple_set_bb (stmt, merge_target_bb);
2502*38fd1498Szrj /* Update virtual operands. */
2503*38fd1498Szrj if (last_vdef)
2504*38fd1498Szrj {
2505*38fd1498Szrj use_operand_p use_p = ssa_vuse_operand (stmt);
2506*38fd1498Szrj if (use_p
2507*38fd1498Szrj && USE_FROM_PTR (use_p) != last_vdef)
2508*38fd1498Szrj SET_USE (use_p, last_vdef);
2509*38fd1498Szrj if (gimple_vdef (stmt))
2510*38fd1498Szrj last_vdef = gimple_vdef (stmt);
2511*38fd1498Szrj }
2512*38fd1498Szrj if (predicated[i])
2513*38fd1498Szrj {
2514*38fd1498Szrj ssa_op_iter i;
2515*38fd1498Szrj tree op;
2516*38fd1498Szrj FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF)
2517*38fd1498Szrj reset_flow_sensitive_info (op);
2518*38fd1498Szrj }
2519*38fd1498Szrj }
2520*38fd1498Szrj
2521*38fd1498Szrj /* Update stmt list. */
2522*38fd1498Szrj last = gsi_last_bb (merge_target_bb);
2523*38fd1498Szrj gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT);
2524*38fd1498Szrj set_bb_seq (bb, NULL);
2525*38fd1498Szrj
2526*38fd1498Szrj delete_basic_block (bb);
2527*38fd1498Szrj }
2528*38fd1498Szrj
2529*38fd1498Szrj /* If possible, merge loop header to the block with the exit edge.
2530*38fd1498Szrj This reduces the number of basic blocks to two, to please the
2531*38fd1498Szrj vectorizer that handles only loops with two nodes. */
2532*38fd1498Szrj if (exit_bb
2533*38fd1498Szrj && exit_bb != loop->header)
2534*38fd1498Szrj {
2535*38fd1498Szrj /* We release virtual PHIs late because we have to propagate them
2536*38fd1498Szrj out using the current VUSE. The def might be the one used
2537*38fd1498Szrj after the loop. */
2538*38fd1498Szrj vphi = get_virtual_phi (exit_bb);
2539*38fd1498Szrj if (vphi)
2540*38fd1498Szrj {
2541*38fd1498Szrj imm_use_iterator iter;
2542*38fd1498Szrj use_operand_p use_p;
2543*38fd1498Szrj gimple *use_stmt;
2544*38fd1498Szrj FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2545*38fd1498Szrj {
2546*38fd1498Szrj FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2547*38fd1498Szrj SET_USE (use_p, last_vdef);
2548*38fd1498Szrj }
2549*38fd1498Szrj gimple_stmt_iterator gsi = gsi_for_stmt (vphi);
2550*38fd1498Szrj remove_phi_node (&gsi, true);
2551*38fd1498Szrj }
2552*38fd1498Szrj
2553*38fd1498Szrj if (can_merge_blocks_p (loop->header, exit_bb))
2554*38fd1498Szrj merge_blocks (loop->header, exit_bb);
2555*38fd1498Szrj }
2556*38fd1498Szrj
2557*38fd1498Szrj free (ifc_bbs);
2558*38fd1498Szrj ifc_bbs = NULL;
2559*38fd1498Szrj free (predicated);
2560*38fd1498Szrj }
2561*38fd1498Szrj
2562*38fd1498Szrj /* Version LOOP before if-converting it; the original loop
2563*38fd1498Szrj will be if-converted, the new copy of the loop will not,
2564*38fd1498Szrj and the LOOP_VECTORIZED internal call will be guarding which
2565*38fd1498Szrj loop to execute. The vectorizer pass will fold this
2566*38fd1498Szrj internal call into either true or false.
2567*38fd1498Szrj
2568*38fd1498Szrj Note that this function intentionally invalidates profile. Both edges
2569*38fd1498Szrj out of LOOP_VECTORIZED must have 100% probability so the profile remains
2570*38fd1498Szrj consistent after the condition is folded in the vectorizer. */
2571*38fd1498Szrj
2572*38fd1498Szrj static struct loop *
version_loop_for_if_conversion(struct loop * loop)2573*38fd1498Szrj version_loop_for_if_conversion (struct loop *loop)
2574*38fd1498Szrj {
2575*38fd1498Szrj basic_block cond_bb;
2576*38fd1498Szrj tree cond = make_ssa_name (boolean_type_node);
2577*38fd1498Szrj struct loop *new_loop;
2578*38fd1498Szrj gimple *g;
2579*38fd1498Szrj gimple_stmt_iterator gsi;
2580*38fd1498Szrj unsigned int save_length;
2581*38fd1498Szrj
2582*38fd1498Szrj g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
2583*38fd1498Szrj build_int_cst (integer_type_node, loop->num),
2584*38fd1498Szrj integer_zero_node);
2585*38fd1498Szrj gimple_call_set_lhs (g, cond);
2586*38fd1498Szrj
2587*38fd1498Szrj /* Save BB->aux around loop_version as that uses the same field. */
2588*38fd1498Szrj save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes;
2589*38fd1498Szrj void **saved_preds = XALLOCAVEC (void *, save_length);
2590*38fd1498Szrj for (unsigned i = 0; i < save_length; i++)
2591*38fd1498Szrj saved_preds[i] = ifc_bbs[i]->aux;
2592*38fd1498Szrj
2593*38fd1498Szrj initialize_original_copy_tables ();
2594*38fd1498Szrj /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
2595*38fd1498Szrj is re-merged in the vectorizer. */
2596*38fd1498Szrj new_loop = loop_version (loop, cond, &cond_bb,
2597*38fd1498Szrj profile_probability::always (),
2598*38fd1498Szrj profile_probability::always (),
2599*38fd1498Szrj profile_probability::always (),
2600*38fd1498Szrj profile_probability::always (), true);
2601*38fd1498Szrj free_original_copy_tables ();
2602*38fd1498Szrj
2603*38fd1498Szrj for (unsigned i = 0; i < save_length; i++)
2604*38fd1498Szrj ifc_bbs[i]->aux = saved_preds[i];
2605*38fd1498Szrj
2606*38fd1498Szrj if (new_loop == NULL)
2607*38fd1498Szrj return NULL;
2608*38fd1498Szrj
2609*38fd1498Szrj new_loop->dont_vectorize = true;
2610*38fd1498Szrj new_loop->force_vectorize = false;
2611*38fd1498Szrj gsi = gsi_last_bb (cond_bb);
2612*38fd1498Szrj gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
2613*38fd1498Szrj gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2614*38fd1498Szrj update_ssa (TODO_update_ssa);
2615*38fd1498Szrj return new_loop;
2616*38fd1498Szrj }
2617*38fd1498Szrj
2618*38fd1498Szrj /* Return true when LOOP satisfies the follow conditions that will
2619*38fd1498Szrj allow it to be recognized by the vectorizer for outer-loop
2620*38fd1498Szrj vectorization:
2621*38fd1498Szrj - The loop is not the root node of the loop tree.
2622*38fd1498Szrj - The loop has exactly one inner loop.
2623*38fd1498Szrj - The loop has a single exit.
2624*38fd1498Szrj - The loop header has a single successor, which is the inner
2625*38fd1498Szrj loop header.
2626*38fd1498Szrj - Each of the inner and outer loop latches have a single
2627*38fd1498Szrj predecessor.
2628*38fd1498Szrj - The loop exit block has a single predecessor, which is the
2629*38fd1498Szrj inner loop's exit block. */
2630*38fd1498Szrj
2631*38fd1498Szrj static bool
versionable_outer_loop_p(struct loop * loop)2632*38fd1498Szrj versionable_outer_loop_p (struct loop *loop)
2633*38fd1498Szrj {
2634*38fd1498Szrj if (!loop_outer (loop)
2635*38fd1498Szrj || loop->dont_vectorize
2636*38fd1498Szrj || !loop->inner
2637*38fd1498Szrj || loop->inner->next
2638*38fd1498Szrj || !single_exit (loop)
2639*38fd1498Szrj || !single_succ_p (loop->header)
2640*38fd1498Szrj || single_succ (loop->header) != loop->inner->header
2641*38fd1498Szrj || !single_pred_p (loop->latch)
2642*38fd1498Szrj || !single_pred_p (loop->inner->latch))
2643*38fd1498Szrj return false;
2644*38fd1498Szrj
2645*38fd1498Szrj basic_block outer_exit = single_pred (loop->latch);
2646*38fd1498Szrj basic_block inner_exit = single_pred (loop->inner->latch);
2647*38fd1498Szrj
2648*38fd1498Szrj if (!single_pred_p (outer_exit) || single_pred (outer_exit) != inner_exit)
2649*38fd1498Szrj return false;
2650*38fd1498Szrj
2651*38fd1498Szrj if (dump_file)
2652*38fd1498Szrj fprintf (dump_file, "Found vectorizable outer loop for versioning\n");
2653*38fd1498Szrj
2654*38fd1498Szrj return true;
2655*38fd1498Szrj }
2656*38fd1498Szrj
2657*38fd1498Szrj /* Performs splitting of critical edges. Skip splitting and return false
2658*38fd1498Szrj if LOOP will not be converted because:
2659*38fd1498Szrj
2660*38fd1498Szrj - LOOP is not well formed.
2661*38fd1498Szrj - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
2662*38fd1498Szrj
2663*38fd1498Szrj Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */
2664*38fd1498Szrj
2665*38fd1498Szrj static bool
ifcvt_split_critical_edges(struct loop * loop,bool aggressive_if_conv)2666*38fd1498Szrj ifcvt_split_critical_edges (struct loop *loop, bool aggressive_if_conv)
2667*38fd1498Szrj {
2668*38fd1498Szrj basic_block *body;
2669*38fd1498Szrj basic_block bb;
2670*38fd1498Szrj unsigned int num = loop->num_nodes;
2671*38fd1498Szrj unsigned int i;
2672*38fd1498Szrj gimple *stmt;
2673*38fd1498Szrj edge e;
2674*38fd1498Szrj edge_iterator ei;
2675*38fd1498Szrj auto_vec<edge> critical_edges;
2676*38fd1498Szrj
2677*38fd1498Szrj /* Loop is not well formed. */
2678*38fd1498Szrj if (num <= 2 || loop->inner || !single_exit (loop))
2679*38fd1498Szrj return false;
2680*38fd1498Szrj
2681*38fd1498Szrj body = get_loop_body (loop);
2682*38fd1498Szrj for (i = 0; i < num; i++)
2683*38fd1498Szrj {
2684*38fd1498Szrj bb = body[i];
2685*38fd1498Szrj if (!aggressive_if_conv
2686*38fd1498Szrj && phi_nodes (bb)
2687*38fd1498Szrj && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM)
2688*38fd1498Szrj {
2689*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
2690*38fd1498Szrj fprintf (dump_file,
2691*38fd1498Szrj "BB %d has complicated PHI with more than %u args.\n",
2692*38fd1498Szrj bb->index, MAX_PHI_ARG_NUM);
2693*38fd1498Szrj
2694*38fd1498Szrj free (body);
2695*38fd1498Szrj return false;
2696*38fd1498Szrj }
2697*38fd1498Szrj if (bb == loop->latch || bb_with_exit_edge_p (loop, bb))
2698*38fd1498Szrj continue;
2699*38fd1498Szrj
2700*38fd1498Szrj stmt = last_stmt (bb);
2701*38fd1498Szrj /* Skip basic blocks not ending with conditional branch. */
2702*38fd1498Szrj if (!stmt || gimple_code (stmt) != GIMPLE_COND)
2703*38fd1498Szrj continue;
2704*38fd1498Szrj
2705*38fd1498Szrj FOR_EACH_EDGE (e, ei, bb->succs)
2706*38fd1498Szrj if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
2707*38fd1498Szrj critical_edges.safe_push (e);
2708*38fd1498Szrj }
2709*38fd1498Szrj free (body);
2710*38fd1498Szrj
2711*38fd1498Szrj while (critical_edges.length () > 0)
2712*38fd1498Szrj {
2713*38fd1498Szrj e = critical_edges.pop ();
2714*38fd1498Szrj /* Don't split if bb can be predicated along non-critical edge. */
2715*38fd1498Szrj if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (e->dest))
2716*38fd1498Szrj split_edge (e);
2717*38fd1498Szrj }
2718*38fd1498Szrj
2719*38fd1498Szrj return true;
2720*38fd1498Szrj }
2721*38fd1498Szrj
2722*38fd1498Szrj /* Delete redundant statements produced by predication which prevents
2723*38fd1498Szrj loop vectorization. */
2724*38fd1498Szrj
2725*38fd1498Szrj static void
ifcvt_local_dce(basic_block bb)2726*38fd1498Szrj ifcvt_local_dce (basic_block bb)
2727*38fd1498Szrj {
2728*38fd1498Szrj gimple *stmt;
2729*38fd1498Szrj gimple *stmt1;
2730*38fd1498Szrj gimple *phi;
2731*38fd1498Szrj gimple_stmt_iterator gsi;
2732*38fd1498Szrj auto_vec<gimple *> worklist;
2733*38fd1498Szrj enum gimple_code code;
2734*38fd1498Szrj use_operand_p use_p;
2735*38fd1498Szrj imm_use_iterator imm_iter;
2736*38fd1498Szrj
2737*38fd1498Szrj worklist.create (64);
2738*38fd1498Szrj /* Consider all phi as live statements. */
2739*38fd1498Szrj for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2740*38fd1498Szrj {
2741*38fd1498Szrj phi = gsi_stmt (gsi);
2742*38fd1498Szrj gimple_set_plf (phi, GF_PLF_2, true);
2743*38fd1498Szrj worklist.safe_push (phi);
2744*38fd1498Szrj }
2745*38fd1498Szrj /* Consider load/store statements, CALL and COND as live. */
2746*38fd1498Szrj for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2747*38fd1498Szrj {
2748*38fd1498Szrj stmt = gsi_stmt (gsi);
2749*38fd1498Szrj if (gimple_store_p (stmt)
2750*38fd1498Szrj || gimple_assign_load_p (stmt)
2751*38fd1498Szrj || is_gimple_debug (stmt))
2752*38fd1498Szrj {
2753*38fd1498Szrj gimple_set_plf (stmt, GF_PLF_2, true);
2754*38fd1498Szrj worklist.safe_push (stmt);
2755*38fd1498Szrj continue;
2756*38fd1498Szrj }
2757*38fd1498Szrj code = gimple_code (stmt);
2758*38fd1498Szrj if (code == GIMPLE_COND || code == GIMPLE_CALL)
2759*38fd1498Szrj {
2760*38fd1498Szrj gimple_set_plf (stmt, GF_PLF_2, true);
2761*38fd1498Szrj worklist.safe_push (stmt);
2762*38fd1498Szrj continue;
2763*38fd1498Szrj }
2764*38fd1498Szrj gimple_set_plf (stmt, GF_PLF_2, false);
2765*38fd1498Szrj
2766*38fd1498Szrj if (code == GIMPLE_ASSIGN)
2767*38fd1498Szrj {
2768*38fd1498Szrj tree lhs = gimple_assign_lhs (stmt);
2769*38fd1498Szrj FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2770*38fd1498Szrj {
2771*38fd1498Szrj stmt1 = USE_STMT (use_p);
2772*38fd1498Szrj if (gimple_bb (stmt1) != bb)
2773*38fd1498Szrj {
2774*38fd1498Szrj gimple_set_plf (stmt, GF_PLF_2, true);
2775*38fd1498Szrj worklist.safe_push (stmt);
2776*38fd1498Szrj break;
2777*38fd1498Szrj }
2778*38fd1498Szrj }
2779*38fd1498Szrj }
2780*38fd1498Szrj }
2781*38fd1498Szrj /* Propagate liveness through arguments of live stmt. */
2782*38fd1498Szrj while (worklist.length () > 0)
2783*38fd1498Szrj {
2784*38fd1498Szrj ssa_op_iter iter;
2785*38fd1498Szrj use_operand_p use_p;
2786*38fd1498Szrj tree use;
2787*38fd1498Szrj
2788*38fd1498Szrj stmt = worklist.pop ();
2789*38fd1498Szrj FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2790*38fd1498Szrj {
2791*38fd1498Szrj use = USE_FROM_PTR (use_p);
2792*38fd1498Szrj if (TREE_CODE (use) != SSA_NAME)
2793*38fd1498Szrj continue;
2794*38fd1498Szrj stmt1 = SSA_NAME_DEF_STMT (use);
2795*38fd1498Szrj if (gimple_bb (stmt1) != bb
2796*38fd1498Szrj || gimple_plf (stmt1, GF_PLF_2))
2797*38fd1498Szrj continue;
2798*38fd1498Szrj gimple_set_plf (stmt1, GF_PLF_2, true);
2799*38fd1498Szrj worklist.safe_push (stmt1);
2800*38fd1498Szrj }
2801*38fd1498Szrj }
2802*38fd1498Szrj /* Delete dead statements. */
2803*38fd1498Szrj gsi = gsi_start_bb (bb);
2804*38fd1498Szrj while (!gsi_end_p (gsi))
2805*38fd1498Szrj {
2806*38fd1498Szrj stmt = gsi_stmt (gsi);
2807*38fd1498Szrj if (gimple_plf (stmt, GF_PLF_2))
2808*38fd1498Szrj {
2809*38fd1498Szrj gsi_next (&gsi);
2810*38fd1498Szrj continue;
2811*38fd1498Szrj }
2812*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
2813*38fd1498Szrj {
2814*38fd1498Szrj fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
2815*38fd1498Szrj print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2816*38fd1498Szrj }
2817*38fd1498Szrj gsi_remove (&gsi, true);
2818*38fd1498Szrj release_defs (stmt);
2819*38fd1498Szrj }
2820*38fd1498Szrj }
2821*38fd1498Szrj
2822*38fd1498Szrj /* If-convert LOOP when it is legal. For the moment this pass has no
2823*38fd1498Szrj profitability analysis. Returns non-zero todo flags when something
2824*38fd1498Szrj changed. */
2825*38fd1498Szrj
2826*38fd1498Szrj unsigned int
tree_if_conversion(struct loop * loop)2827*38fd1498Szrj tree_if_conversion (struct loop *loop)
2828*38fd1498Szrj {
2829*38fd1498Szrj unsigned int todo = 0;
2830*38fd1498Szrj bool aggressive_if_conv;
2831*38fd1498Szrj struct loop *rloop;
2832*38fd1498Szrj
2833*38fd1498Szrj again:
2834*38fd1498Szrj rloop = NULL;
2835*38fd1498Szrj ifc_bbs = NULL;
2836*38fd1498Szrj any_pred_load_store = false;
2837*38fd1498Szrj any_complicated_phi = false;
2838*38fd1498Szrj
2839*38fd1498Szrj /* Apply more aggressive if-conversion when loop or its outer loop were
2840*38fd1498Szrj marked with simd pragma. When that's the case, we try to if-convert
2841*38fd1498Szrj loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */
2842*38fd1498Szrj aggressive_if_conv = loop->force_vectorize;
2843*38fd1498Szrj if (!aggressive_if_conv)
2844*38fd1498Szrj {
2845*38fd1498Szrj struct loop *outer_loop = loop_outer (loop);
2846*38fd1498Szrj if (outer_loop && outer_loop->force_vectorize)
2847*38fd1498Szrj aggressive_if_conv = true;
2848*38fd1498Szrj }
2849*38fd1498Szrj
2850*38fd1498Szrj if (!ifcvt_split_critical_edges (loop, aggressive_if_conv))
2851*38fd1498Szrj goto cleanup;
2852*38fd1498Szrj
2853*38fd1498Szrj if (!if_convertible_loop_p (loop)
2854*38fd1498Szrj || !dbg_cnt (if_conversion_tree))
2855*38fd1498Szrj goto cleanup;
2856*38fd1498Szrj
2857*38fd1498Szrj if ((any_pred_load_store || any_complicated_phi)
2858*38fd1498Szrj && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
2859*38fd1498Szrj || loop->dont_vectorize))
2860*38fd1498Szrj goto cleanup;
2861*38fd1498Szrj
2862*38fd1498Szrj /* Since we have no cost model, always version loops unless the user
2863*38fd1498Szrj specified -ftree-loop-if-convert or unless versioning is required.
2864*38fd1498Szrj Either version this loop, or if the pattern is right for outer-loop
2865*38fd1498Szrj vectorization, version the outer loop. In the latter case we will
2866*38fd1498Szrj still if-convert the original inner loop. */
2867*38fd1498Szrj if (any_pred_load_store
2868*38fd1498Szrj || any_complicated_phi
2869*38fd1498Szrj || flag_tree_loop_if_convert != 1)
2870*38fd1498Szrj {
2871*38fd1498Szrj struct loop *vloop
2872*38fd1498Szrj = (versionable_outer_loop_p (loop_outer (loop))
2873*38fd1498Szrj ? loop_outer (loop) : loop);
2874*38fd1498Szrj struct loop *nloop = version_loop_for_if_conversion (vloop);
2875*38fd1498Szrj if (nloop == NULL)
2876*38fd1498Szrj goto cleanup;
2877*38fd1498Szrj if (vloop != loop)
2878*38fd1498Szrj {
2879*38fd1498Szrj /* If versionable_outer_loop_p decided to version the
2880*38fd1498Szrj outer loop, version also the inner loop of the non-vectorized
2881*38fd1498Szrj loop copy. So we transform:
2882*38fd1498Szrj loop1
2883*38fd1498Szrj loop2
2884*38fd1498Szrj into:
2885*38fd1498Szrj if (LOOP_VECTORIZED (1, 3))
2886*38fd1498Szrj {
2887*38fd1498Szrj loop1
2888*38fd1498Szrj loop2
2889*38fd1498Szrj }
2890*38fd1498Szrj else
2891*38fd1498Szrj loop3 (copy of loop1)
2892*38fd1498Szrj if (LOOP_VECTORIZED (4, 5))
2893*38fd1498Szrj loop4 (copy of loop2)
2894*38fd1498Szrj else
2895*38fd1498Szrj loop5 (copy of loop4) */
2896*38fd1498Szrj gcc_assert (nloop->inner && nloop->inner->next == NULL);
2897*38fd1498Szrj rloop = nloop->inner;
2898*38fd1498Szrj }
2899*38fd1498Szrj }
2900*38fd1498Szrj
2901*38fd1498Szrj /* Now all statements are if-convertible. Combine all the basic
2902*38fd1498Szrj blocks into one huge basic block doing the if-conversion
2903*38fd1498Szrj on-the-fly. */
2904*38fd1498Szrj combine_blocks (loop);
2905*38fd1498Szrj
2906*38fd1498Szrj /* Delete dead predicate computations. */
2907*38fd1498Szrj ifcvt_local_dce (loop->header);
2908*38fd1498Szrj
2909*38fd1498Szrj todo |= TODO_cleanup_cfg;
2910*38fd1498Szrj
2911*38fd1498Szrj cleanup:
2912*38fd1498Szrj if (ifc_bbs)
2913*38fd1498Szrj {
2914*38fd1498Szrj unsigned int i;
2915*38fd1498Szrj
2916*38fd1498Szrj for (i = 0; i < loop->num_nodes; i++)
2917*38fd1498Szrj free_bb_predicate (ifc_bbs[i]);
2918*38fd1498Szrj
2919*38fd1498Szrj free (ifc_bbs);
2920*38fd1498Szrj ifc_bbs = NULL;
2921*38fd1498Szrj }
2922*38fd1498Szrj if (rloop != NULL)
2923*38fd1498Szrj {
2924*38fd1498Szrj loop = rloop;
2925*38fd1498Szrj goto again;
2926*38fd1498Szrj }
2927*38fd1498Szrj
2928*38fd1498Szrj return todo;
2929*38fd1498Szrj }
2930*38fd1498Szrj
2931*38fd1498Szrj /* Tree if-conversion pass management. */
2932*38fd1498Szrj
2933*38fd1498Szrj namespace {
2934*38fd1498Szrj
2935*38fd1498Szrj const pass_data pass_data_if_conversion =
2936*38fd1498Szrj {
2937*38fd1498Szrj GIMPLE_PASS, /* type */
2938*38fd1498Szrj "ifcvt", /* name */
2939*38fd1498Szrj OPTGROUP_NONE, /* optinfo_flags */
2940*38fd1498Szrj TV_TREE_LOOP_IFCVT, /* tv_id */
2941*38fd1498Szrj ( PROP_cfg | PROP_ssa ), /* properties_required */
2942*38fd1498Szrj 0, /* properties_provided */
2943*38fd1498Szrj 0, /* properties_destroyed */
2944*38fd1498Szrj 0, /* todo_flags_start */
2945*38fd1498Szrj 0, /* todo_flags_finish */
2946*38fd1498Szrj };
2947*38fd1498Szrj
2948*38fd1498Szrj class pass_if_conversion : public gimple_opt_pass
2949*38fd1498Szrj {
2950*38fd1498Szrj public:
pass_if_conversion(gcc::context * ctxt)2951*38fd1498Szrj pass_if_conversion (gcc::context *ctxt)
2952*38fd1498Szrj : gimple_opt_pass (pass_data_if_conversion, ctxt)
2953*38fd1498Szrj {}
2954*38fd1498Szrj
2955*38fd1498Szrj /* opt_pass methods: */
2956*38fd1498Szrj virtual bool gate (function *);
2957*38fd1498Szrj virtual unsigned int execute (function *);
2958*38fd1498Szrj
2959*38fd1498Szrj }; // class pass_if_conversion
2960*38fd1498Szrj
2961*38fd1498Szrj bool
gate(function * fun)2962*38fd1498Szrj pass_if_conversion::gate (function *fun)
2963*38fd1498Szrj {
2964*38fd1498Szrj return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
2965*38fd1498Szrj && flag_tree_loop_if_convert != 0)
2966*38fd1498Szrj || flag_tree_loop_if_convert == 1);
2967*38fd1498Szrj }
2968*38fd1498Szrj
2969*38fd1498Szrj unsigned int
execute(function * fun)2970*38fd1498Szrj pass_if_conversion::execute (function *fun)
2971*38fd1498Szrj {
2972*38fd1498Szrj struct loop *loop;
2973*38fd1498Szrj unsigned todo = 0;
2974*38fd1498Szrj
2975*38fd1498Szrj if (number_of_loops (fun) <= 1)
2976*38fd1498Szrj return 0;
2977*38fd1498Szrj
2978*38fd1498Szrj FOR_EACH_LOOP (loop, 0)
2979*38fd1498Szrj if (flag_tree_loop_if_convert == 1
2980*38fd1498Szrj || ((flag_tree_loop_vectorize || loop->force_vectorize)
2981*38fd1498Szrj && !loop->dont_vectorize))
2982*38fd1498Szrj todo |= tree_if_conversion (loop);
2983*38fd1498Szrj
2984*38fd1498Szrj if (todo)
2985*38fd1498Szrj {
2986*38fd1498Szrj free_numbers_of_iterations_estimates (fun);
2987*38fd1498Szrj scev_reset ();
2988*38fd1498Szrj }
2989*38fd1498Szrj
2990*38fd1498Szrj if (flag_checking)
2991*38fd1498Szrj {
2992*38fd1498Szrj basic_block bb;
2993*38fd1498Szrj FOR_EACH_BB_FN (bb, fun)
2994*38fd1498Szrj gcc_assert (!bb->aux);
2995*38fd1498Szrj }
2996*38fd1498Szrj
2997*38fd1498Szrj return todo;
2998*38fd1498Szrj }
2999*38fd1498Szrj
3000*38fd1498Szrj } // anon namespace
3001*38fd1498Szrj
3002*38fd1498Szrj gimple_opt_pass *
make_pass_if_conversion(gcc::context * ctxt)3003*38fd1498Szrj make_pass_if_conversion (gcc::context *ctxt)
3004*38fd1498Szrj {
3005*38fd1498Szrj return new pass_if_conversion (ctxt);
3006*38fd1498Szrj }
3007