1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2022 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
23 conditions.
24
25 A short description of if-conversion:
26
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
31 predicate list.
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
37
38 Sample transformation:
39
40 INPUT
41 -----
42
43 # i_23 = PHI <0(0), i_18(10)>;
44 <L0>:;
45 j_15 = A[i_23];
46 if (j_15 > 41) goto <L1>; else goto <L17>;
47
48 <L17>:;
49 goto <bb 3> (<L3>);
50
51 <L1>:;
52
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
54 <L3>:;
55 A[i_23] = iftmp.2_4;
56 i_18 = i_23 + 1;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
58
59 <L19>:;
60 goto <bb 1> (<L0>);
61
62 <L18>:;
63
64 OUTPUT
65 ------
66
67 # i_23 = PHI <0(0), i_18(10)>;
68 <L0>:;
69 j_15 = A[i_23];
70
71 <L3>:;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 A[i_23] = iftmp.2_4;
74 i_18 = i_23 + 1;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
76
77 <L19>:;
78 goto <bb 1> (<L0>);
79
80 <L18>:;
81 */
82
83 #include "config.h"
84 #include "system.h"
85 #include "coretypes.h"
86 #include "backend.h"
87 #include "rtl.h"
88 #include "tree.h"
89 #include "gimple.h"
90 #include "cfghooks.h"
91 #include "tree-pass.h"
92 #include "ssa.h"
93 #include "expmed.h"
94 #include "optabs-query.h"
95 #include "gimple-pretty-print.h"
96 #include "alias.h"
97 #include "fold-const.h"
98 #include "stor-layout.h"
99 #include "gimple-fold.h"
100 #include "gimplify.h"
101 #include "gimple-iterator.h"
102 #include "gimplify-me.h"
103 #include "tree-cfg.h"
104 #include "tree-into-ssa.h"
105 #include "tree-ssa.h"
106 #include "cfgloop.h"
107 #include "tree-data-ref.h"
108 #include "tree-scalar-evolution.h"
109 #include "tree-ssa-loop.h"
110 #include "tree-ssa-loop-niter.h"
111 #include "tree-ssa-loop-ivopts.h"
112 #include "tree-ssa-address.h"
113 #include "dbgcnt.h"
114 #include "tree-hash-traits.h"
115 #include "varasm.h"
116 #include "builtins.h"
117 #include "cfganal.h"
118 #include "internal-fn.h"
119 #include "fold-const.h"
120 #include "tree-ssa-sccvn.h"
121 #include "tree-cfgcleanup.h"
122 #include "tree-ssa-dse.h"
123 #include "tree-vectorizer.h"
124 #include "tree-eh.h"
125
126 /* Only handle PHIs with no more arguments unless we are asked to by
127 simd pragma. */
128 #define MAX_PHI_ARG_NUM \
129 ((unsigned) param_max_tree_if_conversion_phi_args)
130
131 /* True if we've converted a statement that was only executed when some
132 condition C was true, and if for correctness we need to predicate the
133 statement to ensure that it is a no-op when C is false. See
134 predicate_statements for the kinds of predication we support. */
135 static bool need_to_predicate;
136
137 /* True if we have to rewrite stmts that may invoke undefined behavior
138 when a condition C was false so it doesn't if it is always executed.
139 See predicate_statements for the kinds of predication we support. */
140 static bool need_to_rewrite_undefined;
141
142 /* Indicate if there are any complicated PHIs that need to be handled in
143 if-conversion. Complicated PHI has more than two arguments and can't
144 be degenerated to two arguments PHI. See more information in comment
145 before phi_convertible_by_degenerating_args. */
146 static bool any_complicated_phi;
147
148 /* Hash for struct innermost_loop_behavior. It depends on the user to
149 free the memory. */
150
151 struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior>
152 {
153 static inline hashval_t hash (const value_type &);
154 static inline bool equal (const value_type &,
155 const compare_type &);
156 };
157
158 inline hashval_t
hash(const value_type & e)159 innermost_loop_behavior_hash::hash (const value_type &e)
160 {
161 hashval_t hash;
162
163 hash = iterative_hash_expr (e->base_address, 0);
164 hash = iterative_hash_expr (e->offset, hash);
165 hash = iterative_hash_expr (e->init, hash);
166 return iterative_hash_expr (e->step, hash);
167 }
168
169 inline bool
equal(const value_type & e1,const compare_type & e2)170 innermost_loop_behavior_hash::equal (const value_type &e1,
171 const compare_type &e2)
172 {
173 if ((e1->base_address && !e2->base_address)
174 || (!e1->base_address && e2->base_address)
175 || (!e1->offset && e2->offset)
176 || (e1->offset && !e2->offset)
177 || (!e1->init && e2->init)
178 || (e1->init && !e2->init)
179 || (!e1->step && e2->step)
180 || (e1->step && !e2->step))
181 return false;
182
183 if (e1->base_address && e2->base_address
184 && !operand_equal_p (e1->base_address, e2->base_address, 0))
185 return false;
186 if (e1->offset && e2->offset
187 && !operand_equal_p (e1->offset, e2->offset, 0))
188 return false;
189 if (e1->init && e2->init
190 && !operand_equal_p (e1->init, e2->init, 0))
191 return false;
192 if (e1->step && e2->step
193 && !operand_equal_p (e1->step, e2->step, 0))
194 return false;
195
196 return true;
197 }
198
199 /* List of basic blocks in if-conversion-suitable order. */
200 static basic_block *ifc_bbs;
201
202 /* Hash table to store <DR's innermost loop behavior, DR> pairs. */
203 static hash_map<innermost_loop_behavior_hash,
204 data_reference_p> *innermost_DR_map;
205
206 /* Hash table to store <base reference, DR> pairs. */
207 static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map;
208
209 /* List of redundant SSA names: the first should be replaced by the second. */
210 static vec< std::pair<tree, tree> > redundant_ssa_names;
211
212 /* Structure used to predicate basic blocks. This is attached to the
213 ->aux field of the BBs in the loop to be if-converted. */
214 struct bb_predicate {
215
216 /* The condition under which this basic block is executed. */
217 tree predicate;
218
219 /* PREDICATE is gimplified, and the sequence of statements is
220 recorded here, in order to avoid the duplication of computations
221 that occur in previous conditions. See PR44483. */
222 gimple_seq predicate_gimplified_stmts;
223 };
224
225 /* Returns true when the basic block BB has a predicate. */
226
227 static inline bool
bb_has_predicate(basic_block bb)228 bb_has_predicate (basic_block bb)
229 {
230 return bb->aux != NULL;
231 }
232
233 /* Returns the gimplified predicate for basic block BB. */
234
235 static inline tree
bb_predicate(basic_block bb)236 bb_predicate (basic_block bb)
237 {
238 return ((struct bb_predicate *) bb->aux)->predicate;
239 }
240
241 /* Sets the gimplified predicate COND for basic block BB. */
242
243 static inline void
set_bb_predicate(basic_block bb,tree cond)244 set_bb_predicate (basic_block bb, tree cond)
245 {
246 gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
247 && is_gimple_condexpr (TREE_OPERAND (cond, 0)))
248 || is_gimple_condexpr (cond));
249 ((struct bb_predicate *) bb->aux)->predicate = cond;
250 }
251
252 /* Returns the sequence of statements of the gimplification of the
253 predicate for basic block BB. */
254
255 static inline gimple_seq
bb_predicate_gimplified_stmts(basic_block bb)256 bb_predicate_gimplified_stmts (basic_block bb)
257 {
258 return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts;
259 }
260
261 /* Sets the sequence of statements STMTS of the gimplification of the
262 predicate for basic block BB. */
263
264 static inline void
set_bb_predicate_gimplified_stmts(basic_block bb,gimple_seq stmts)265 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
266 {
267 ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts;
268 }
269
270 /* Adds the sequence of statements STMTS to the sequence of statements
271 of the predicate for basic block BB. */
272
273 static inline void
add_bb_predicate_gimplified_stmts(basic_block bb,gimple_seq stmts)274 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
275 {
276 /* We might have updated some stmts in STMTS via force_gimple_operand
277 calling fold_stmt and that producing multiple stmts. Delink immediate
278 uses so update_ssa after loop versioning doesn't get confused for
279 the not yet inserted predicates.
280 ??? This should go away once we reliably avoid updating stmts
281 not in any BB. */
282 for (gimple_stmt_iterator gsi = gsi_start (stmts);
283 !gsi_end_p (gsi); gsi_next (&gsi))
284 {
285 gimple *stmt = gsi_stmt (gsi);
286 delink_stmt_imm_use (stmt);
287 gimple_set_modified (stmt, true);
288 }
289 gimple_seq_add_seq_without_update
290 (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
291 }
292
293 /* Initializes to TRUE the predicate of basic block BB. */
294
295 static inline void
init_bb_predicate(basic_block bb)296 init_bb_predicate (basic_block bb)
297 {
298 bb->aux = XNEW (struct bb_predicate);
299 set_bb_predicate_gimplified_stmts (bb, NULL);
300 set_bb_predicate (bb, boolean_true_node);
301 }
302
303 /* Release the SSA_NAMEs associated with the predicate of basic block BB. */
304
305 static inline void
release_bb_predicate(basic_block bb)306 release_bb_predicate (basic_block bb)
307 {
308 gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
309 if (stmts)
310 {
311 /* Ensure that these stmts haven't yet been added to a bb. */
312 if (flag_checking)
313 for (gimple_stmt_iterator i = gsi_start (stmts);
314 !gsi_end_p (i); gsi_next (&i))
315 gcc_assert (! gimple_bb (gsi_stmt (i)));
316
317 /* Discard them. */
318 gimple_seq_discard (stmts);
319 set_bb_predicate_gimplified_stmts (bb, NULL);
320 }
321 }
322
323 /* Free the predicate of basic block BB. */
324
325 static inline void
free_bb_predicate(basic_block bb)326 free_bb_predicate (basic_block bb)
327 {
328 if (!bb_has_predicate (bb))
329 return;
330
331 release_bb_predicate (bb);
332 free (bb->aux);
333 bb->aux = NULL;
334 }
335
336 /* Reinitialize predicate of BB with the true predicate. */
337
338 static inline void
reset_bb_predicate(basic_block bb)339 reset_bb_predicate (basic_block bb)
340 {
341 if (!bb_has_predicate (bb))
342 init_bb_predicate (bb);
343 else
344 {
345 release_bb_predicate (bb);
346 set_bb_predicate (bb, boolean_true_node);
347 }
348 }
349
350 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
351 the expression EXPR. Inserts the statement created for this
352 computation before GSI and leaves the iterator GSI at the same
353 statement. */
354
355 static tree
ifc_temp_var(tree type,tree expr,gimple_stmt_iterator * gsi)356 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
357 {
358 tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
359 gimple *stmt = gimple_build_assign (new_name, expr);
360 gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (*gsi)));
361 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
362 return new_name;
363 }
364
365 /* Return true when COND is a false predicate. */
366
367 static inline bool
is_false_predicate(tree cond)368 is_false_predicate (tree cond)
369 {
370 return (cond != NULL_TREE
371 && (cond == boolean_false_node
372 || integer_zerop (cond)));
373 }
374
375 /* Return true when COND is a true predicate. */
376
377 static inline bool
is_true_predicate(tree cond)378 is_true_predicate (tree cond)
379 {
380 return (cond == NULL_TREE
381 || cond == boolean_true_node
382 || integer_onep (cond));
383 }
384
385 /* Returns true when BB has a predicate that is not trivial: true or
386 NULL_TREE. */
387
388 static inline bool
is_predicated(basic_block bb)389 is_predicated (basic_block bb)
390 {
391 return !is_true_predicate (bb_predicate (bb));
392 }
393
394 /* Parses the predicate COND and returns its comparison code and
395 operands OP0 and OP1. */
396
397 static enum tree_code
parse_predicate(tree cond,tree * op0,tree * op1)398 parse_predicate (tree cond, tree *op0, tree *op1)
399 {
400 gimple *s;
401
402 if (TREE_CODE (cond) == SSA_NAME
403 && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
404 {
405 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
406 {
407 *op0 = gimple_assign_rhs1 (s);
408 *op1 = gimple_assign_rhs2 (s);
409 return gimple_assign_rhs_code (s);
410 }
411
412 else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
413 {
414 tree op = gimple_assign_rhs1 (s);
415 tree type = TREE_TYPE (op);
416 enum tree_code code = parse_predicate (op, op0, op1);
417
418 return code == ERROR_MARK ? ERROR_MARK
419 : invert_tree_comparison (code, HONOR_NANS (type));
420 }
421
422 return ERROR_MARK;
423 }
424
425 if (COMPARISON_CLASS_P (cond))
426 {
427 *op0 = TREE_OPERAND (cond, 0);
428 *op1 = TREE_OPERAND (cond, 1);
429 return TREE_CODE (cond);
430 }
431
432 return ERROR_MARK;
433 }
434
435 /* Returns the fold of predicate C1 OR C2 at location LOC. */
436
437 static tree
fold_or_predicates(location_t loc,tree c1,tree c2)438 fold_or_predicates (location_t loc, tree c1, tree c2)
439 {
440 tree op1a, op1b, op2a, op2b;
441 enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
442 enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
443
444 if (code1 != ERROR_MARK && code2 != ERROR_MARK)
445 {
446 tree t = maybe_fold_or_comparisons (boolean_type_node, code1, op1a, op1b,
447 code2, op2a, op2b);
448 if (t)
449 return t;
450 }
451
452 return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
453 }
454
455 /* Returns either a COND_EXPR or the folded expression if the folded
456 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
457 a constant or a SSA_NAME. */
458
459 static tree
fold_build_cond_expr(tree type,tree cond,tree rhs,tree lhs)460 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
461 {
462 tree rhs1, lhs1, cond_expr;
463
464 /* If COND is comparison r != 0 and r has boolean type, convert COND
465 to SSA_NAME to accept by vect bool pattern. */
466 if (TREE_CODE (cond) == NE_EXPR)
467 {
468 tree op0 = TREE_OPERAND (cond, 0);
469 tree op1 = TREE_OPERAND (cond, 1);
470 if (TREE_CODE (op0) == SSA_NAME
471 && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
472 && (integer_zerop (op1)))
473 cond = op0;
474 }
475 cond_expr = fold_ternary (COND_EXPR, type, cond, rhs, lhs);
476
477 if (cond_expr == NULL_TREE)
478 return build3 (COND_EXPR, type, cond, rhs, lhs);
479
480 STRIP_USELESS_TYPE_CONVERSION (cond_expr);
481
482 if (is_gimple_val (cond_expr))
483 return cond_expr;
484
485 if (TREE_CODE (cond_expr) == ABS_EXPR)
486 {
487 rhs1 = TREE_OPERAND (cond_expr, 1);
488 STRIP_USELESS_TYPE_CONVERSION (rhs1);
489 if (is_gimple_val (rhs1))
490 return build1 (ABS_EXPR, type, rhs1);
491 }
492
493 if (TREE_CODE (cond_expr) == MIN_EXPR
494 || TREE_CODE (cond_expr) == MAX_EXPR)
495 {
496 lhs1 = TREE_OPERAND (cond_expr, 0);
497 STRIP_USELESS_TYPE_CONVERSION (lhs1);
498 rhs1 = TREE_OPERAND (cond_expr, 1);
499 STRIP_USELESS_TYPE_CONVERSION (rhs1);
500 if (is_gimple_val (rhs1) && is_gimple_val (lhs1))
501 return build2 (TREE_CODE (cond_expr), type, lhs1, rhs1);
502 }
503 return build3 (COND_EXPR, type, cond, rhs, lhs);
504 }
505
506 /* Add condition NC to the predicate list of basic block BB. LOOP is
507 the loop to be if-converted. Use predicate of cd-equivalent block
508 for join bb if it exists: we call basic blocks bb1 and bb2
509 cd-equivalent if they are executed under the same condition. */
510
511 static inline void
add_to_predicate_list(class loop * loop,basic_block bb,tree nc)512 add_to_predicate_list (class loop *loop, basic_block bb, tree nc)
513 {
514 tree bc, *tp;
515 basic_block dom_bb;
516
517 if (is_true_predicate (nc))
518 return;
519
520 /* If dominance tells us this basic block is always executed,
521 don't record any predicates for it. */
522 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
523 return;
524
525 dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
526 /* We use notion of cd equivalence to get simpler predicate for
527 join block, e.g. if join block has 2 predecessors with predicates
528 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
529 p1 & p2 | p1 & !p2. */
530 if (dom_bb != loop->header
531 && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
532 {
533 gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
534 bc = bb_predicate (dom_bb);
535 if (!is_true_predicate (bc))
536 set_bb_predicate (bb, bc);
537 else
538 gcc_assert (is_true_predicate (bb_predicate (bb)));
539 if (dump_file && (dump_flags & TDF_DETAILS))
540 fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
541 dom_bb->index, bb->index);
542 return;
543 }
544
545 if (!is_predicated (bb))
546 bc = nc;
547 else
548 {
549 bc = bb_predicate (bb);
550 bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
551 if (is_true_predicate (bc))
552 {
553 reset_bb_predicate (bb);
554 return;
555 }
556 }
557
558 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
559 if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
560 tp = &TREE_OPERAND (bc, 0);
561 else
562 tp = &bc;
563 if (!is_gimple_condexpr (*tp))
564 {
565 gimple_seq stmts;
566 *tp = force_gimple_operand_1 (*tp, &stmts, is_gimple_condexpr, NULL_TREE);
567 add_bb_predicate_gimplified_stmts (bb, stmts);
568 }
569 set_bb_predicate (bb, bc);
570 }
571
572 /* Add the condition COND to the previous condition PREV_COND, and add
573 this to the predicate list of the destination of edge E. LOOP is
574 the loop to be if-converted. */
575
576 static void
add_to_dst_predicate_list(class loop * loop,edge e,tree prev_cond,tree cond)577 add_to_dst_predicate_list (class loop *loop, edge e,
578 tree prev_cond, tree cond)
579 {
580 if (!flow_bb_inside_loop_p (loop, e->dest))
581 return;
582
583 if (!is_true_predicate (prev_cond))
584 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
585 prev_cond, cond);
586
587 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
588 add_to_predicate_list (loop, e->dest, cond);
589 }
590
591 /* Return true if one of the successor edges of BB exits LOOP. */
592
593 static bool
bb_with_exit_edge_p(class loop * loop,basic_block bb)594 bb_with_exit_edge_p (class loop *loop, basic_block bb)
595 {
596 edge e;
597 edge_iterator ei;
598
599 FOR_EACH_EDGE (e, ei, bb->succs)
600 if (loop_exit_edge_p (loop, e))
601 return true;
602
603 return false;
604 }
605
606 /* Given PHI which has more than two arguments, this function checks if
607 it's if-convertible by degenerating its arguments. Specifically, if
608 below two conditions are satisfied:
609
610 1) Number of PHI arguments with different values equals to 2 and one
611 argument has the only occurrence.
612 2) The edge corresponding to the unique argument isn't critical edge.
613
614 Such PHI can be handled as PHIs have only two arguments. For example,
615 below PHI:
616
617 res = PHI <A_1(e1), A_1(e2), A_2(e3)>;
618
619 can be transformed into:
620
621 res = (predicate of e3) ? A_2 : A_1;
622
623 Return TRUE if it is the case, FALSE otherwise. */
624
625 static bool
phi_convertible_by_degenerating_args(gphi * phi)626 phi_convertible_by_degenerating_args (gphi *phi)
627 {
628 edge e;
629 tree arg, t1 = NULL, t2 = NULL;
630 unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0;
631 unsigned int num_args = gimple_phi_num_args (phi);
632
633 gcc_assert (num_args > 2);
634
635 for (i = 0; i < num_args; i++)
636 {
637 arg = gimple_phi_arg_def (phi, i);
638 if (t1 == NULL || operand_equal_p (t1, arg, 0))
639 {
640 n1++;
641 i1 = i;
642 t1 = arg;
643 }
644 else if (t2 == NULL || operand_equal_p (t2, arg, 0))
645 {
646 n2++;
647 i2 = i;
648 t2 = arg;
649 }
650 else
651 return false;
652 }
653
654 if (n1 != 1 && n2 != 1)
655 return false;
656
657 /* Check if the edge corresponding to the unique arg is critical. */
658 e = gimple_phi_arg_edge (phi, (n1 == 1) ? i1 : i2);
659 if (EDGE_COUNT (e->src->succs) > 1)
660 return false;
661
662 return true;
663 }
664
665 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
666 and it belongs to basic block BB. Note at this point, it is sure
667 that PHI is if-convertible. This function updates global variable
668 ANY_COMPLICATED_PHI if PHI is complicated. */
669
670 static bool
if_convertible_phi_p(class loop * loop,basic_block bb,gphi * phi)671 if_convertible_phi_p (class loop *loop, basic_block bb, gphi *phi)
672 {
673 if (dump_file && (dump_flags & TDF_DETAILS))
674 {
675 fprintf (dump_file, "-------------------------\n");
676 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
677 }
678
679 if (bb != loop->header
680 && gimple_phi_num_args (phi) > 2
681 && !phi_convertible_by_degenerating_args (phi))
682 any_complicated_phi = true;
683
684 return true;
685 }
686
687 /* Records the status of a data reference. This struct is attached to
688 each DR->aux field. */
689
690 struct ifc_dr {
691 bool rw_unconditionally;
692 bool w_unconditionally;
693 bool written_at_least_once;
694
695 tree rw_predicate;
696 tree w_predicate;
697 tree base_w_predicate;
698 };
699
700 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
701 #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once)
702 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
703 #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally)
704
705 /* Iterates over DR's and stores refs, DR and base refs, DR pairs in
706 HASH tables. While storing them in HASH table, it checks if the
707 reference is unconditionally read or written and stores that as a flag
708 information. For base reference it checks if it is written atlest once
709 unconditionally and stores it as flag information along with DR.
710 In other words for every data reference A in STMT there exist other
711 accesses to a data reference with the same base with predicates that
712 add up (OR-up) to the true predicate: this ensures that the data
713 reference A is touched (read or written) on every iteration of the
714 if-converted loop. */
715 static void
hash_memrefs_baserefs_and_store_DRs_read_written_info(data_reference_p a)716 hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a)
717 {
718
719 data_reference_p *master_dr, *base_master_dr;
720 tree base_ref = DR_BASE_OBJECT (a);
721 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
722 tree ca = bb_predicate (gimple_bb (DR_STMT (a)));
723 bool exist1, exist2;
724
725 master_dr = &innermost_DR_map->get_or_insert (innermost, &exist1);
726 if (!exist1)
727 *master_dr = a;
728
729 if (DR_IS_WRITE (a))
730 {
731 IFC_DR (*master_dr)->w_predicate
732 = fold_or_predicates (UNKNOWN_LOCATION, ca,
733 IFC_DR (*master_dr)->w_predicate);
734 if (is_true_predicate (IFC_DR (*master_dr)->w_predicate))
735 DR_W_UNCONDITIONALLY (*master_dr) = true;
736 }
737 IFC_DR (*master_dr)->rw_predicate
738 = fold_or_predicates (UNKNOWN_LOCATION, ca,
739 IFC_DR (*master_dr)->rw_predicate);
740 if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate))
741 DR_RW_UNCONDITIONALLY (*master_dr) = true;
742
743 if (DR_IS_WRITE (a))
744 {
745 base_master_dr = &baseref_DR_map->get_or_insert (base_ref, &exist2);
746 if (!exist2)
747 *base_master_dr = a;
748 IFC_DR (*base_master_dr)->base_w_predicate
749 = fold_or_predicates (UNKNOWN_LOCATION, ca,
750 IFC_DR (*base_master_dr)->base_w_predicate);
751 if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate))
752 DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true;
753 }
754 }
755
756 /* Return TRUE if can prove the index IDX of an array reference REF is
757 within array bound. Return false otherwise. */
758
759 static bool
idx_within_array_bound(tree ref,tree * idx,void * dta)760 idx_within_array_bound (tree ref, tree *idx, void *dta)
761 {
762 wi::overflow_type overflow;
763 widest_int niter, valid_niter, delta, wi_step;
764 tree ev, init, step;
765 tree low, high;
766 class loop *loop = (class loop*) dta;
767
768 /* Only support within-bound access for array references. */
769 if (TREE_CODE (ref) != ARRAY_REF)
770 return false;
771
772 /* For arrays at the end of the structure, we are not guaranteed that they
773 do not really extend over their declared size. However, for arrays of
774 size greater than one, this is unlikely to be intended. */
775 if (array_at_struct_end_p (ref))
776 return false;
777
778 ev = analyze_scalar_evolution (loop, *idx);
779 ev = instantiate_parameters (loop, ev);
780 init = initial_condition (ev);
781 step = evolution_part_in_loop_num (ev, loop->num);
782
783 if (!init || TREE_CODE (init) != INTEGER_CST
784 || (step && TREE_CODE (step) != INTEGER_CST))
785 return false;
786
787 low = array_ref_low_bound (ref);
788 high = array_ref_up_bound (ref);
789
790 /* The case of nonconstant bounds could be handled, but it would be
791 complicated. */
792 if (TREE_CODE (low) != INTEGER_CST
793 || !high || TREE_CODE (high) != INTEGER_CST)
794 return false;
795
796 /* Check if the intial idx is within bound. */
797 if (wi::to_widest (init) < wi::to_widest (low)
798 || wi::to_widest (init) > wi::to_widest (high))
799 return false;
800
801 /* The idx is always within bound. */
802 if (!step || integer_zerop (step))
803 return true;
804
805 if (!max_loop_iterations (loop, &niter))
806 return false;
807
808 if (wi::to_widest (step) < 0)
809 {
810 delta = wi::to_widest (init) - wi::to_widest (low);
811 wi_step = -wi::to_widest (step);
812 }
813 else
814 {
815 delta = wi::to_widest (high) - wi::to_widest (init);
816 wi_step = wi::to_widest (step);
817 }
818
819 valid_niter = wi::div_floor (delta, wi_step, SIGNED, &overflow);
820 /* The iteration space of idx is within array bound. */
821 if (!overflow && niter <= valid_niter)
822 return true;
823
824 return false;
825 }
826
827 /* Return TRUE if ref is a within bound array reference. */
828
829 static bool
ref_within_array_bound(gimple * stmt,tree ref)830 ref_within_array_bound (gimple *stmt, tree ref)
831 {
832 class loop *loop = loop_containing_stmt (stmt);
833
834 gcc_assert (loop != NULL);
835 return for_each_index (&ref, idx_within_array_bound, loop);
836 }
837
838
839 /* Given a memory reference expression T, return TRUE if base object
840 it refers to is writable. The base object of a memory reference
841 is the main object being referenced, which is returned by function
842 get_base_address. */
843
844 static bool
base_object_writable(tree ref)845 base_object_writable (tree ref)
846 {
847 tree base_tree = get_base_address (ref);
848
849 return (base_tree
850 && DECL_P (base_tree)
851 && decl_binds_to_current_def_p (base_tree)
852 && !TREE_READONLY (base_tree));
853 }
854
855 /* Return true when the memory references of STMT won't trap in the
856 if-converted code. There are two things that we have to check for:
857
858 - writes to memory occur to writable memory: if-conversion of
859 memory writes transforms the conditional memory writes into
860 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
861 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
862 be executed at all in the original code, it may be a readonly
863 memory. To check that A is not const-qualified, we check that
864 there exists at least an unconditional write to A in the current
865 function.
866
867 - reads or writes to memory are valid memory accesses for every
868 iteration. To check that the memory accesses are correctly formed
869 and that we are allowed to read and write in these locations, we
870 check that the memory accesses to be if-converted occur at every
871 iteration unconditionally.
872
873 Returns true for the memory reference in STMT, same memory reference
874 is read or written unconditionally atleast once and the base memory
875 reference is written unconditionally once. This is to check reference
876 will not write fault. Also retuns true if the memory reference is
877 unconditionally read once then we are conditionally writing to memory
878 which is defined as read and write and is bound to the definition
879 we are seeing. */
880 static bool
ifcvt_memrefs_wont_trap(gimple * stmt,vec<data_reference_p> drs)881 ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs)
882 {
883 /* If DR didn't see a reference here we can't use it to tell
884 whether the ref traps or not. */
885 if (gimple_uid (stmt) == 0)
886 return false;
887
888 data_reference_p *master_dr, *base_master_dr;
889 data_reference_p a = drs[gimple_uid (stmt) - 1];
890
891 tree base = DR_BASE_OBJECT (a);
892 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
893
894 gcc_assert (DR_STMT (a) == stmt);
895 gcc_assert (DR_BASE_ADDRESS (a) || DR_OFFSET (a)
896 || DR_INIT (a) || DR_STEP (a));
897
898 master_dr = innermost_DR_map->get (innermost);
899 gcc_assert (master_dr != NULL);
900
901 base_master_dr = baseref_DR_map->get (base);
902
903 /* If a is unconditionally written to it doesn't trap. */
904 if (DR_W_UNCONDITIONALLY (*master_dr))
905 return true;
906
907 /* If a is unconditionally accessed then ...
908
909 Even a is conditional access, we can treat it as an unconditional
910 one if it's an array reference and all its index are within array
911 bound. */
912 if (DR_RW_UNCONDITIONALLY (*master_dr)
913 || ref_within_array_bound (stmt, DR_REF (a)))
914 {
915 /* an unconditional read won't trap. */
916 if (DR_IS_READ (a))
917 return true;
918
919 /* an unconditionaly write won't trap if the base is written
920 to unconditionally. */
921 if (base_master_dr
922 && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
923 return flag_store_data_races;
924 /* or the base is known to be not readonly. */
925 else if (base_object_writable (DR_REF (a)))
926 return flag_store_data_races;
927 }
928
929 return false;
930 }
931
932 /* Return true if STMT could be converted into a masked load or store
933 (conditional load or store based on a mask computed from bb predicate). */
934
935 static bool
ifcvt_can_use_mask_load_store(gimple * stmt)936 ifcvt_can_use_mask_load_store (gimple *stmt)
937 {
938 /* Check whether this is a load or store. */
939 tree lhs = gimple_assign_lhs (stmt);
940 bool is_load;
941 tree ref;
942 if (gimple_store_p (stmt))
943 {
944 if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
945 return false;
946 is_load = false;
947 ref = lhs;
948 }
949 else if (gimple_assign_load_p (stmt))
950 {
951 is_load = true;
952 ref = gimple_assign_rhs1 (stmt);
953 }
954 else
955 return false;
956
957 if (may_be_nonaddressable_p (ref))
958 return false;
959
960 /* Mask should be integer mode of the same size as the load/store
961 mode. */
962 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
963 if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode))
964 return false;
965
966 if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
967 return true;
968
969 return false;
970 }
971
972 /* Return true if STMT could be converted from an operation that is
973 unconditional to one that is conditional on a bb predicate mask. */
974
975 static bool
ifcvt_can_predicate(gimple * stmt)976 ifcvt_can_predicate (gimple *stmt)
977 {
978 basic_block bb = gimple_bb (stmt);
979
980 if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
981 || bb->loop_father->dont_vectorize
982 || gimple_has_volatile_ops (stmt))
983 return false;
984
985 if (gimple_assign_single_p (stmt))
986 return ifcvt_can_use_mask_load_store (stmt);
987
988 tree_code code = gimple_assign_rhs_code (stmt);
989 tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
990 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
991 if (!types_compatible_p (lhs_type, rhs_type))
992 return false;
993 internal_fn cond_fn = get_conditional_internal_fn (code);
994 return (cond_fn != IFN_LAST
995 && vectorized_internal_fn_supported_p (cond_fn, lhs_type));
996 }
997
998 /* Return true when STMT is if-convertible.
999
1000 GIMPLE_ASSIGN statement is not if-convertible if,
1001 - it is not movable,
1002 - it could trap,
1003 - LHS is not var decl. */
1004
1005 static bool
if_convertible_gimple_assign_stmt_p(gimple * stmt,vec<data_reference_p> refs)1006 if_convertible_gimple_assign_stmt_p (gimple *stmt,
1007 vec<data_reference_p> refs)
1008 {
1009 tree lhs = gimple_assign_lhs (stmt);
1010
1011 if (dump_file && (dump_flags & TDF_DETAILS))
1012 {
1013 fprintf (dump_file, "-------------------------\n");
1014 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1015 }
1016
1017 if (!is_gimple_reg_type (TREE_TYPE (lhs)))
1018 return false;
1019
1020 /* Some of these constrains might be too conservative. */
1021 if (stmt_ends_bb_p (stmt)
1022 || gimple_has_volatile_ops (stmt)
1023 || (TREE_CODE (lhs) == SSA_NAME
1024 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
1025 || gimple_has_side_effects (stmt))
1026 {
1027 if (dump_file && (dump_flags & TDF_DETAILS))
1028 fprintf (dump_file, "stmt not suitable for ifcvt\n");
1029 return false;
1030 }
1031
1032 /* tree-into-ssa.cc uses GF_PLF_1, so avoid it, because
1033 in between if_convertible_loop_p and combine_blocks
1034 we can perform loop versioning. */
1035 gimple_set_plf (stmt, GF_PLF_2, false);
1036
1037 if ((! gimple_vuse (stmt)
1038 || gimple_could_trap_p_1 (stmt, false, false)
1039 || ! ifcvt_memrefs_wont_trap (stmt, refs))
1040 && gimple_could_trap_p (stmt))
1041 {
1042 if (ifcvt_can_predicate (stmt))
1043 {
1044 gimple_set_plf (stmt, GF_PLF_2, true);
1045 need_to_predicate = true;
1046 return true;
1047 }
1048 if (dump_file && (dump_flags & TDF_DETAILS))
1049 fprintf (dump_file, "tree could trap...\n");
1050 return false;
1051 }
1052 else if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
1053 || POINTER_TYPE_P (TREE_TYPE (lhs)))
1054 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
1055 && arith_code_with_undefined_signed_overflow
1056 (gimple_assign_rhs_code (stmt)))
1057 /* We have to rewrite stmts with undefined overflow. */
1058 need_to_rewrite_undefined = true;
1059
1060 /* When if-converting stores force versioning, likewise if we
1061 ended up generating store data races. */
1062 if (gimple_vdef (stmt))
1063 need_to_predicate = true;
1064
1065 return true;
1066 }
1067
1068 /* Return true when STMT is if-convertible.
1069
1070 A statement is if-convertible if:
1071 - it is an if-convertible GIMPLE_ASSIGN,
1072 - it is a GIMPLE_LABEL or a GIMPLE_COND,
1073 - it is builtins call. */
1074
1075 static bool
if_convertible_stmt_p(gimple * stmt,vec<data_reference_p> refs)1076 if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
1077 {
1078 switch (gimple_code (stmt))
1079 {
1080 case GIMPLE_LABEL:
1081 case GIMPLE_DEBUG:
1082 case GIMPLE_COND:
1083 return true;
1084
1085 case GIMPLE_ASSIGN:
1086 return if_convertible_gimple_assign_stmt_p (stmt, refs);
1087
1088 case GIMPLE_CALL:
1089 {
1090 tree fndecl = gimple_call_fndecl (stmt);
1091 if (fndecl)
1092 {
1093 int flags = gimple_call_flags (stmt);
1094 if ((flags & ECF_CONST)
1095 && !(flags & ECF_LOOPING_CONST_OR_PURE)
1096 /* We can only vectorize some builtins at the moment,
1097 so restrict if-conversion to those. */
1098 && fndecl_built_in_p (fndecl))
1099 return true;
1100 }
1101 return false;
1102 }
1103
1104 default:
1105 /* Don't know what to do with 'em so don't do anything. */
1106 if (dump_file && (dump_flags & TDF_DETAILS))
1107 {
1108 fprintf (dump_file, "don't know what to do\n");
1109 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1110 }
1111 return false;
1112 }
1113 }
1114
1115 /* Assumes that BB has more than 1 predecessors.
1116 Returns false if at least one successor is not on critical edge
1117 and true otherwise. */
1118
1119 static inline bool
all_preds_critical_p(basic_block bb)1120 all_preds_critical_p (basic_block bb)
1121 {
1122 edge e;
1123 edge_iterator ei;
1124
1125 FOR_EACH_EDGE (e, ei, bb->preds)
1126 if (EDGE_COUNT (e->src->succs) == 1)
1127 return false;
1128 return true;
1129 }
1130
1131 /* Return true when BB is if-convertible. This routine does not check
1132 basic block's statements and phis.
1133
1134 A basic block is not if-convertible if:
1135 - it is non-empty and it is after the exit block (in BFS order),
1136 - it is after the exit block but before the latch,
1137 - its edges are not normal.
1138
1139 EXIT_BB is the basic block containing the exit of the LOOP. BB is
1140 inside LOOP. */
1141
1142 static bool
if_convertible_bb_p(class loop * loop,basic_block bb,basic_block exit_bb)1143 if_convertible_bb_p (class loop *loop, basic_block bb, basic_block exit_bb)
1144 {
1145 edge e;
1146 edge_iterator ei;
1147
1148 if (dump_file && (dump_flags & TDF_DETAILS))
1149 fprintf (dump_file, "----------[%d]-------------\n", bb->index);
1150
1151 if (EDGE_COUNT (bb->succs) > 2)
1152 return false;
1153
1154 gimple *last = last_stmt (bb);
1155 if (gcall *call = safe_dyn_cast <gcall *> (last))
1156 if (gimple_call_ctrl_altering_p (call))
1157 return false;
1158
1159 if (exit_bb)
1160 {
1161 if (bb != loop->latch)
1162 {
1163 if (dump_file && (dump_flags & TDF_DETAILS))
1164 fprintf (dump_file, "basic block after exit bb but before latch\n");
1165 return false;
1166 }
1167 else if (!empty_block_p (bb))
1168 {
1169 if (dump_file && (dump_flags & TDF_DETAILS))
1170 fprintf (dump_file, "non empty basic block after exit bb\n");
1171 return false;
1172 }
1173 else if (bb == loop->latch
1174 && bb != exit_bb
1175 && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1176 {
1177 if (dump_file && (dump_flags & TDF_DETAILS))
1178 fprintf (dump_file, "latch is not dominated by exit_block\n");
1179 return false;
1180 }
1181 }
1182
1183 /* Be less adventurous and handle only normal edges. */
1184 FOR_EACH_EDGE (e, ei, bb->succs)
1185 if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1186 {
1187 if (dump_file && (dump_flags & TDF_DETAILS))
1188 fprintf (dump_file, "Difficult to handle edges\n");
1189 return false;
1190 }
1191
1192 return true;
1193 }
1194
1195 /* Return true when all predecessor blocks of BB are visited. The
1196 VISITED bitmap keeps track of the visited blocks. */
1197
1198 static bool
pred_blocks_visited_p(basic_block bb,bitmap * visited)1199 pred_blocks_visited_p (basic_block bb, bitmap *visited)
1200 {
1201 edge e;
1202 edge_iterator ei;
1203 FOR_EACH_EDGE (e, ei, bb->preds)
1204 if (!bitmap_bit_p (*visited, e->src->index))
1205 return false;
1206
1207 return true;
1208 }
1209
1210 /* Get body of a LOOP in suitable order for if-conversion. It is
1211 caller's responsibility to deallocate basic block list.
1212 If-conversion suitable order is, breadth first sort (BFS) order
1213 with an additional constraint: select a block only if all its
1214 predecessors are already selected. */
1215
1216 static basic_block *
get_loop_body_in_if_conv_order(const class loop * loop)1217 get_loop_body_in_if_conv_order (const class loop *loop)
1218 {
1219 basic_block *blocks, *blocks_in_bfs_order;
1220 basic_block bb;
1221 bitmap visited;
1222 unsigned int index = 0;
1223 unsigned int visited_count = 0;
1224
1225 gcc_assert (loop->num_nodes);
1226 gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1227
1228 blocks = XCNEWVEC (basic_block, loop->num_nodes);
1229 visited = BITMAP_ALLOC (NULL);
1230
1231 blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1232
1233 index = 0;
1234 while (index < loop->num_nodes)
1235 {
1236 bb = blocks_in_bfs_order [index];
1237
1238 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1239 {
1240 free (blocks_in_bfs_order);
1241 BITMAP_FREE (visited);
1242 free (blocks);
1243 return NULL;
1244 }
1245
1246 if (!bitmap_bit_p (visited, bb->index))
1247 {
1248 if (pred_blocks_visited_p (bb, &visited)
1249 || bb == loop->header)
1250 {
1251 /* This block is now visited. */
1252 bitmap_set_bit (visited, bb->index);
1253 blocks[visited_count++] = bb;
1254 }
1255 }
1256
1257 index++;
1258
1259 if (index == loop->num_nodes
1260 && visited_count != loop->num_nodes)
1261 /* Not done yet. */
1262 index = 0;
1263 }
1264 free (blocks_in_bfs_order);
1265 BITMAP_FREE (visited);
1266 return blocks;
1267 }
1268
1269 /* Returns true when the analysis of the predicates for all the basic
1270 blocks in LOOP succeeded.
1271
1272 predicate_bbs first allocates the predicates of the basic blocks.
1273 These fields are then initialized with the tree expressions
1274 representing the predicates under which a basic block is executed
1275 in the LOOP. As the loop->header is executed at each iteration, it
1276 has the "true" predicate. Other statements executed under a
1277 condition are predicated with that condition, for example
1278
1279 | if (x)
1280 | S1;
1281 | else
1282 | S2;
1283
1284 S1 will be predicated with "x", and
1285 S2 will be predicated with "!x". */
1286
1287 static void
predicate_bbs(loop_p loop)1288 predicate_bbs (loop_p loop)
1289 {
1290 unsigned int i;
1291
1292 for (i = 0; i < loop->num_nodes; i++)
1293 init_bb_predicate (ifc_bbs[i]);
1294
1295 for (i = 0; i < loop->num_nodes; i++)
1296 {
1297 basic_block bb = ifc_bbs[i];
1298 tree cond;
1299 gimple *stmt;
1300
1301 /* The loop latch and loop exit block are always executed and
1302 have no extra conditions to be processed: skip them. */
1303 if (bb == loop->latch
1304 || bb_with_exit_edge_p (loop, bb))
1305 {
1306 reset_bb_predicate (bb);
1307 continue;
1308 }
1309
1310 cond = bb_predicate (bb);
1311 stmt = last_stmt (bb);
1312 if (stmt && gimple_code (stmt) == GIMPLE_COND)
1313 {
1314 tree c2;
1315 edge true_edge, false_edge;
1316 location_t loc = gimple_location (stmt);
1317 tree c = build2_loc (loc, gimple_cond_code (stmt),
1318 boolean_type_node,
1319 gimple_cond_lhs (stmt),
1320 gimple_cond_rhs (stmt));
1321
1322 /* Add new condition into destination's predicate list. */
1323 extract_true_false_edges_from_block (gimple_bb (stmt),
1324 &true_edge, &false_edge);
1325
1326 /* If C is true, then TRUE_EDGE is taken. */
1327 add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1328 unshare_expr (c));
1329
1330 /* If C is false, then FALSE_EDGE is taken. */
1331 c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1332 unshare_expr (c));
1333 add_to_dst_predicate_list (loop, false_edge,
1334 unshare_expr (cond), c2);
1335
1336 cond = NULL_TREE;
1337 }
1338
1339 /* If current bb has only one successor, then consider it as an
1340 unconditional goto. */
1341 if (single_succ_p (bb))
1342 {
1343 basic_block bb_n = single_succ (bb);
1344
1345 /* The successor bb inherits the predicate of its
1346 predecessor. If there is no predicate in the predecessor
1347 bb, then consider the successor bb as always executed. */
1348 if (cond == NULL_TREE)
1349 cond = boolean_true_node;
1350
1351 add_to_predicate_list (loop, bb_n, cond);
1352 }
1353 }
1354
1355 /* The loop header is always executed. */
1356 reset_bb_predicate (loop->header);
1357 gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1358 && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1359 }
1360
1361 /* Build region by adding loop pre-header and post-header blocks. */
1362
1363 static vec<basic_block>
build_region(class loop * loop)1364 build_region (class loop *loop)
1365 {
1366 vec<basic_block> region = vNULL;
1367 basic_block exit_bb = NULL;
1368
1369 gcc_assert (ifc_bbs);
1370 /* The first element is loop pre-header. */
1371 region.safe_push (loop_preheader_edge (loop)->src);
1372
1373 for (unsigned int i = 0; i < loop->num_nodes; i++)
1374 {
1375 basic_block bb = ifc_bbs[i];
1376 region.safe_push (bb);
1377 /* Find loop postheader. */
1378 edge e;
1379 edge_iterator ei;
1380 FOR_EACH_EDGE (e, ei, bb->succs)
1381 if (loop_exit_edge_p (loop, e))
1382 {
1383 exit_bb = e->dest;
1384 break;
1385 }
1386 }
1387 /* The last element is loop post-header. */
1388 gcc_assert (exit_bb);
1389 region.safe_push (exit_bb);
1390 return region;
1391 }
1392
1393 /* Return true when LOOP is if-convertible. This is a helper function
1394 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1395 in if_convertible_loop_p. */
1396
1397 static bool
if_convertible_loop_p_1(class loop * loop,vec<data_reference_p> * refs)1398 if_convertible_loop_p_1 (class loop *loop, vec<data_reference_p> *refs)
1399 {
1400 unsigned int i;
1401 basic_block exit_bb = NULL;
1402 vec<basic_block> region;
1403
1404 if (find_data_references_in_loop (loop, refs) == chrec_dont_know)
1405 return false;
1406
1407 calculate_dominance_info (CDI_DOMINATORS);
1408
1409 /* Allow statements that can be handled during if-conversion. */
1410 ifc_bbs = get_loop_body_in_if_conv_order (loop);
1411 if (!ifc_bbs)
1412 {
1413 if (dump_file && (dump_flags & TDF_DETAILS))
1414 fprintf (dump_file, "Irreducible loop\n");
1415 return false;
1416 }
1417
1418 for (i = 0; i < loop->num_nodes; i++)
1419 {
1420 basic_block bb = ifc_bbs[i];
1421
1422 if (!if_convertible_bb_p (loop, bb, exit_bb))
1423 return false;
1424
1425 if (bb_with_exit_edge_p (loop, bb))
1426 exit_bb = bb;
1427 }
1428
1429 for (i = 0; i < loop->num_nodes; i++)
1430 {
1431 basic_block bb = ifc_bbs[i];
1432 gimple_stmt_iterator gsi;
1433
1434 bool may_have_nonlocal_labels
1435 = bb_with_exit_edge_p (loop, bb) || bb == loop->latch;
1436 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1437 switch (gimple_code (gsi_stmt (gsi)))
1438 {
1439 case GIMPLE_LABEL:
1440 if (!may_have_nonlocal_labels)
1441 {
1442 tree label
1443 = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi)));
1444 if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
1445 return false;
1446 }
1447 /* Fallthru. */
1448 case GIMPLE_ASSIGN:
1449 case GIMPLE_CALL:
1450 case GIMPLE_DEBUG:
1451 case GIMPLE_COND:
1452 gimple_set_uid (gsi_stmt (gsi), 0);
1453 break;
1454 default:
1455 return false;
1456 }
1457 }
1458
1459 data_reference_p dr;
1460
1461 innermost_DR_map
1462 = new hash_map<innermost_loop_behavior_hash, data_reference_p>;
1463 baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
1464
1465 /* Compute post-dominator tree locally. */
1466 region = build_region (loop);
1467 calculate_dominance_info_for_region (CDI_POST_DOMINATORS, region);
1468
1469 predicate_bbs (loop);
1470
1471 /* Free post-dominator tree since it is not used after predication. */
1472 free_dominance_info_for_region (cfun, CDI_POST_DOMINATORS, region);
1473 region.release ();
1474
1475 for (i = 0; refs->iterate (i, &dr); i++)
1476 {
1477 tree ref = DR_REF (dr);
1478
1479 dr->aux = XNEW (struct ifc_dr);
1480 DR_BASE_W_UNCONDITIONALLY (dr) = false;
1481 DR_RW_UNCONDITIONALLY (dr) = false;
1482 DR_W_UNCONDITIONALLY (dr) = false;
1483 IFC_DR (dr)->rw_predicate = boolean_false_node;
1484 IFC_DR (dr)->w_predicate = boolean_false_node;
1485 IFC_DR (dr)->base_w_predicate = boolean_false_node;
1486 if (gimple_uid (DR_STMT (dr)) == 0)
1487 gimple_set_uid (DR_STMT (dr), i + 1);
1488
1489 /* If DR doesn't have innermost loop behavior or it's a compound
1490 memory reference, we synthesize its innermost loop behavior
1491 for hashing. */
1492 if (TREE_CODE (ref) == COMPONENT_REF
1493 || TREE_CODE (ref) == IMAGPART_EXPR
1494 || TREE_CODE (ref) == REALPART_EXPR
1495 || !(DR_BASE_ADDRESS (dr) || DR_OFFSET (dr)
1496 || DR_INIT (dr) || DR_STEP (dr)))
1497 {
1498 while (TREE_CODE (ref) == COMPONENT_REF
1499 || TREE_CODE (ref) == IMAGPART_EXPR
1500 || TREE_CODE (ref) == REALPART_EXPR)
1501 ref = TREE_OPERAND (ref, 0);
1502
1503 memset (&DR_INNERMOST (dr), 0, sizeof (DR_INNERMOST (dr)));
1504 DR_BASE_ADDRESS (dr) = ref;
1505 }
1506 hash_memrefs_baserefs_and_store_DRs_read_written_info (dr);
1507 }
1508
1509 for (i = 0; i < loop->num_nodes; i++)
1510 {
1511 basic_block bb = ifc_bbs[i];
1512 gimple_stmt_iterator itr;
1513
1514 /* Check the if-convertibility of statements in predicated BBs. */
1515 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1516 for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1517 if (!if_convertible_stmt_p (gsi_stmt (itr), *refs))
1518 return false;
1519 }
1520
1521 /* Checking PHIs needs to be done after stmts, as the fact whether there
1522 are any masked loads or stores affects the tests. */
1523 for (i = 0; i < loop->num_nodes; i++)
1524 {
1525 basic_block bb = ifc_bbs[i];
1526 gphi_iterator itr;
1527
1528 for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1529 if (!if_convertible_phi_p (loop, bb, itr.phi ()))
1530 return false;
1531 }
1532
1533 if (dump_file)
1534 fprintf (dump_file, "Applying if-conversion\n");
1535
1536 return true;
1537 }
1538
1539 /* Return true when LOOP is if-convertible.
1540 LOOP is if-convertible if:
1541 - it is innermost,
1542 - it has two or more basic blocks,
1543 - it has only one exit,
1544 - loop header is not the exit edge,
1545 - if its basic blocks and phi nodes are if convertible. */
1546
1547 static bool
if_convertible_loop_p(class loop * loop)1548 if_convertible_loop_p (class loop *loop)
1549 {
1550 edge e;
1551 edge_iterator ei;
1552 bool res = false;
1553 vec<data_reference_p> refs;
1554
1555 /* Handle only innermost loop. */
1556 if (!loop || loop->inner)
1557 {
1558 if (dump_file && (dump_flags & TDF_DETAILS))
1559 fprintf (dump_file, "not innermost loop\n");
1560 return false;
1561 }
1562
1563 /* If only one block, no need for if-conversion. */
1564 if (loop->num_nodes <= 2)
1565 {
1566 if (dump_file && (dump_flags & TDF_DETAILS))
1567 fprintf (dump_file, "less than 2 basic blocks\n");
1568 return false;
1569 }
1570
1571 /* More than one loop exit is too much to handle. */
1572 if (!single_exit (loop))
1573 {
1574 if (dump_file && (dump_flags & TDF_DETAILS))
1575 fprintf (dump_file, "multiple exits\n");
1576 return false;
1577 }
1578
1579 /* If one of the loop header's edge is an exit edge then do not
1580 apply if-conversion. */
1581 FOR_EACH_EDGE (e, ei, loop->header->succs)
1582 if (loop_exit_edge_p (loop, e))
1583 return false;
1584
1585 refs.create (5);
1586 res = if_convertible_loop_p_1 (loop, &refs);
1587
1588 data_reference_p dr;
1589 unsigned int i;
1590 for (i = 0; refs.iterate (i, &dr); i++)
1591 free (dr->aux);
1592
1593 free_data_refs (refs);
1594
1595 delete innermost_DR_map;
1596 innermost_DR_map = NULL;
1597
1598 delete baseref_DR_map;
1599 baseref_DR_map = NULL;
1600
1601 return res;
1602 }
1603
1604 /* Return reduc_1 if has_nop.
1605
1606 if (...)
1607 tmp1 = (unsigned type) reduc_1;
1608 tmp2 = tmp1 + rhs2;
1609 reduc_3 = (signed type) tmp2. */
1610 static tree
strip_nop_cond_scalar_reduction(bool has_nop,tree op)1611 strip_nop_cond_scalar_reduction (bool has_nop, tree op)
1612 {
1613 if (!has_nop)
1614 return op;
1615
1616 if (TREE_CODE (op) != SSA_NAME)
1617 return NULL_TREE;
1618
1619 gassign *stmt = safe_dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
1620 if (!stmt
1621 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
1622 || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE
1623 (gimple_assign_rhs1 (stmt))))
1624 return NULL_TREE;
1625
1626 return gimple_assign_rhs1 (stmt);
1627 }
1628
1629 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1630 which is in predicated basic block.
1631 In fact, the following PHI pattern is searching:
1632 loop-header:
1633 reduc_1 = PHI <..., reduc_2>
1634 ...
1635 if (...)
1636 reduc_3 = ...
1637 reduc_2 = PHI <reduc_1, reduc_3>
1638
1639 ARG_0 and ARG_1 are correspondent PHI arguments.
1640 REDUC, OP0 and OP1 contain reduction stmt and its operands.
1641 EXTENDED is true if PHI has > 2 arguments. */
1642
1643 static bool
is_cond_scalar_reduction(gimple * phi,gimple ** reduc,tree arg_0,tree arg_1,tree * op0,tree * op1,bool extended,bool * has_nop,gimple ** nop_reduc)1644 is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
1645 tree *op0, tree *op1, bool extended, bool* has_nop,
1646 gimple **nop_reduc)
1647 {
1648 tree lhs, r_op1, r_op2, r_nop1, r_nop2;
1649 gimple *stmt;
1650 gimple *header_phi = NULL;
1651 enum tree_code reduction_op;
1652 basic_block bb = gimple_bb (phi);
1653 class loop *loop = bb->loop_father;
1654 edge latch_e = loop_latch_edge (loop);
1655 imm_use_iterator imm_iter;
1656 use_operand_p use_p;
1657 edge e;
1658 edge_iterator ei;
1659 bool result = *has_nop = false;
1660 if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1661 return false;
1662
1663 if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1664 {
1665 lhs = arg_1;
1666 header_phi = SSA_NAME_DEF_STMT (arg_0);
1667 stmt = SSA_NAME_DEF_STMT (arg_1);
1668 }
1669 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1670 {
1671 lhs = arg_0;
1672 header_phi = SSA_NAME_DEF_STMT (arg_1);
1673 stmt = SSA_NAME_DEF_STMT (arg_0);
1674 }
1675 else
1676 return false;
1677 if (gimple_bb (header_phi) != loop->header)
1678 return false;
1679
1680 if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1681 return false;
1682
1683 if (gimple_code (stmt) != GIMPLE_ASSIGN
1684 || gimple_has_volatile_ops (stmt))
1685 return false;
1686
1687 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1688 return false;
1689
1690 if (!is_predicated (gimple_bb (stmt)))
1691 return false;
1692
1693 /* Check that stmt-block is predecessor of phi-block. */
1694 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1695 if (e->dest == bb)
1696 {
1697 result = true;
1698 break;
1699 }
1700 if (!result)
1701 return false;
1702
1703 if (!has_single_use (lhs))
1704 return false;
1705
1706 reduction_op = gimple_assign_rhs_code (stmt);
1707
1708 /* Catch something like below
1709
1710 loop-header:
1711 reduc_1 = PHI <..., reduc_2>
1712 ...
1713 if (...)
1714 tmp1 = (unsigned type) reduc_1;
1715 tmp2 = tmp1 + rhs2;
1716 reduc_3 = (signed type) tmp2;
1717
1718 reduc_2 = PHI <reduc_1, reduc_3>
1719
1720 and convert to
1721
1722 reduc_2 = PHI <0, reduc_3>
1723 tmp1 = (unsigned type)reduce_1;
1724 ifcvt = cond_expr ? rhs2 : 0
1725 tmp2 = tmp1 +/- ifcvt;
1726 reduce_1 = (signed type)tmp2; */
1727
1728 if (CONVERT_EXPR_CODE_P (reduction_op))
1729 {
1730 lhs = gimple_assign_rhs1 (stmt);
1731 if (TREE_CODE (lhs) != SSA_NAME
1732 || !has_single_use (lhs))
1733 return false;
1734
1735 *nop_reduc = stmt;
1736 stmt = SSA_NAME_DEF_STMT (lhs);
1737 if (gimple_bb (stmt) != gimple_bb (*nop_reduc)
1738 || !is_gimple_assign (stmt))
1739 return false;
1740
1741 *has_nop = true;
1742 reduction_op = gimple_assign_rhs_code (stmt);
1743 }
1744
1745 if (reduction_op != PLUS_EXPR
1746 && reduction_op != MINUS_EXPR
1747 && reduction_op != BIT_IOR_EXPR
1748 && reduction_op != BIT_XOR_EXPR
1749 && reduction_op != BIT_AND_EXPR)
1750 return false;
1751 r_op1 = gimple_assign_rhs1 (stmt);
1752 r_op2 = gimple_assign_rhs2 (stmt);
1753
1754 r_nop1 = strip_nop_cond_scalar_reduction (*has_nop, r_op1);
1755 r_nop2 = strip_nop_cond_scalar_reduction (*has_nop, r_op2);
1756
1757 /* Make R_OP1 to hold reduction variable. */
1758 if (r_nop2 == PHI_RESULT (header_phi)
1759 && commutative_tree_code (reduction_op))
1760 {
1761 std::swap (r_op1, r_op2);
1762 std::swap (r_nop1, r_nop2);
1763 }
1764 else if (r_nop1 != PHI_RESULT (header_phi))
1765 return false;
1766
1767 if (*has_nop)
1768 {
1769 /* Check that R_NOP1 is used in nop_stmt or in PHI only. */
1770 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1)
1771 {
1772 gimple *use_stmt = USE_STMT (use_p);
1773 if (is_gimple_debug (use_stmt))
1774 continue;
1775 if (use_stmt == SSA_NAME_DEF_STMT (r_op1))
1776 continue;
1777 if (use_stmt != phi)
1778 return false;
1779 }
1780 }
1781
1782 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1783 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1784 {
1785 gimple *use_stmt = USE_STMT (use_p);
1786 if (is_gimple_debug (use_stmt))
1787 continue;
1788 if (use_stmt == stmt)
1789 continue;
1790 if (gimple_code (use_stmt) != GIMPLE_PHI)
1791 return false;
1792 }
1793
1794 *op0 = r_op1; *op1 = r_op2;
1795 *reduc = stmt;
1796 return true;
1797 }
1798
1799 /* Converts conditional scalar reduction into unconditional form, e.g.
1800 bb_4
1801 if (_5 != 0) goto bb_5 else goto bb_6
1802 end_bb_4
1803 bb_5
1804 res_6 = res_13 + 1;
1805 end_bb_5
1806 bb_6
1807 # res_2 = PHI <res_13(4), res_6(5)>
1808 end_bb_6
1809
1810 will be converted into sequence
1811 _ifc__1 = _5 != 0 ? 1 : 0;
1812 res_2 = res_13 + _ifc__1;
1813 Argument SWAP tells that arguments of conditional expression should be
1814 swapped.
1815 Returns rhs of resulting PHI assignment. */
1816
1817 static tree
convert_scalar_cond_reduction(gimple * reduc,gimple_stmt_iterator * gsi,tree cond,tree op0,tree op1,bool swap,bool has_nop,gimple * nop_reduc)1818 convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
1819 tree cond, tree op0, tree op1, bool swap,
1820 bool has_nop, gimple* nop_reduc)
1821 {
1822 gimple_stmt_iterator stmt_it;
1823 gimple *new_assign;
1824 tree rhs;
1825 tree rhs1 = gimple_assign_rhs1 (reduc);
1826 tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1827 tree c;
1828 enum tree_code reduction_op = gimple_assign_rhs_code (reduc);
1829 tree op_nochange = neutral_op_for_reduction (TREE_TYPE (rhs1), reduction_op, NULL);
1830 gimple_seq stmts = NULL;
1831
1832 if (dump_file && (dump_flags & TDF_DETAILS))
1833 {
1834 fprintf (dump_file, "Found cond scalar reduction.\n");
1835 print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1836 }
1837
1838 /* Build cond expression using COND and constant operand
1839 of reduction rhs. */
1840 c = fold_build_cond_expr (TREE_TYPE (rhs1),
1841 unshare_expr (cond),
1842 swap ? op_nochange : op1,
1843 swap ? op1 : op_nochange);
1844
1845 /* Create assignment stmt and insert it at GSI. */
1846 new_assign = gimple_build_assign (tmp, c);
1847 gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1848 /* Build rhs for unconditional increment/decrement/logic_operation. */
1849 rhs = gimple_build (&stmts, reduction_op,
1850 TREE_TYPE (rhs1), op0, tmp);
1851
1852 if (has_nop)
1853 {
1854 rhs = gimple_convert (&stmts,
1855 TREE_TYPE (gimple_assign_lhs (nop_reduc)), rhs);
1856 stmt_it = gsi_for_stmt (nop_reduc);
1857 gsi_remove (&stmt_it, true);
1858 release_defs (nop_reduc);
1859 }
1860 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1861
1862 /* Delete original reduction stmt. */
1863 stmt_it = gsi_for_stmt (reduc);
1864 gsi_remove (&stmt_it, true);
1865 release_defs (reduc);
1866 return rhs;
1867 }
1868
1869 /* Produce condition for all occurrences of ARG in PHI node. */
1870
1871 static tree
gen_phi_arg_condition(gphi * phi,vec<int> * occur,gimple_stmt_iterator * gsi)1872 gen_phi_arg_condition (gphi *phi, vec<int> *occur,
1873 gimple_stmt_iterator *gsi)
1874 {
1875 int len;
1876 int i;
1877 tree cond = NULL_TREE;
1878 tree c;
1879 edge e;
1880
1881 len = occur->length ();
1882 gcc_assert (len > 0);
1883 for (i = 0; i < len; i++)
1884 {
1885 e = gimple_phi_arg_edge (phi, (*occur)[i]);
1886 c = bb_predicate (e->src);
1887 if (is_true_predicate (c))
1888 {
1889 cond = c;
1890 break;
1891 }
1892 c = force_gimple_operand_gsi_1 (gsi, unshare_expr (c),
1893 is_gimple_condexpr, NULL_TREE,
1894 true, GSI_SAME_STMT);
1895 if (cond != NULL_TREE)
1896 {
1897 /* Must build OR expression. */
1898 cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
1899 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1900 is_gimple_condexpr, NULL_TREE,
1901 true, GSI_SAME_STMT);
1902 }
1903 else
1904 cond = c;
1905 }
1906 gcc_assert (cond != NULL_TREE);
1907 return cond;
1908 }
1909
1910 /* Local valueization callback that follows all-use SSA edges. */
1911
1912 static tree
ifcvt_follow_ssa_use_edges(tree val)1913 ifcvt_follow_ssa_use_edges (tree val)
1914 {
1915 return val;
1916 }
1917
1918 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1919 This routine can handle PHI nodes with more than two arguments.
1920
1921 For example,
1922 S1: A = PHI <x1(1), x2(5)>
1923 is converted into,
1924 S2: A = cond ? x1 : x2;
1925
1926 The generated code is inserted at GSI that points to the top of
1927 basic block's statement list.
1928 If PHI node has more than two arguments a chain of conditional
1929 expression is produced. */
1930
1931
1932 static void
predicate_scalar_phi(gphi * phi,gimple_stmt_iterator * gsi)1933 predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
1934 {
1935 gimple *new_stmt = NULL, *reduc, *nop_reduc;
1936 tree rhs, res, arg0, arg1, op0, op1, scev;
1937 tree cond;
1938 unsigned int index0;
1939 unsigned int max, args_len;
1940 edge e;
1941 basic_block bb;
1942 unsigned int i;
1943 bool has_nop;
1944
1945 res = gimple_phi_result (phi);
1946 if (virtual_operand_p (res))
1947 return;
1948
1949 if ((rhs = degenerate_phi_result (phi))
1950 || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
1951 res))
1952 && !chrec_contains_undetermined (scev)
1953 && scev != res
1954 && (rhs = gimple_phi_arg_def (phi, 0))))
1955 {
1956 if (dump_file && (dump_flags & TDF_DETAILS))
1957 {
1958 fprintf (dump_file, "Degenerate phi!\n");
1959 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
1960 }
1961 new_stmt = gimple_build_assign (res, rhs);
1962 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1963 update_stmt (new_stmt);
1964 return;
1965 }
1966
1967 bb = gimple_bb (phi);
1968 if (EDGE_COUNT (bb->preds) == 2)
1969 {
1970 /* Predicate ordinary PHI node with 2 arguments. */
1971 edge first_edge, second_edge;
1972 basic_block true_bb;
1973 first_edge = EDGE_PRED (bb, 0);
1974 second_edge = EDGE_PRED (bb, 1);
1975 cond = bb_predicate (first_edge->src);
1976 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1977 std::swap (first_edge, second_edge);
1978 if (EDGE_COUNT (first_edge->src->succs) > 1)
1979 {
1980 cond = bb_predicate (second_edge->src);
1981 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1982 cond = TREE_OPERAND (cond, 0);
1983 else
1984 first_edge = second_edge;
1985 }
1986 else
1987 cond = bb_predicate (first_edge->src);
1988 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1989 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1990 is_gimple_condexpr, NULL_TREE,
1991 true, GSI_SAME_STMT);
1992 true_bb = first_edge->src;
1993 if (EDGE_PRED (bb, 1)->src == true_bb)
1994 {
1995 arg0 = gimple_phi_arg_def (phi, 1);
1996 arg1 = gimple_phi_arg_def (phi, 0);
1997 }
1998 else
1999 {
2000 arg0 = gimple_phi_arg_def (phi, 0);
2001 arg1 = gimple_phi_arg_def (phi, 1);
2002 }
2003 if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
2004 &op0, &op1, false, &has_nop,
2005 &nop_reduc))
2006 {
2007 /* Convert reduction stmt into vectorizable form. */
2008 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2009 true_bb != gimple_bb (reduc),
2010 has_nop, nop_reduc);
2011 redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2012 }
2013 else
2014 /* Build new RHS using selected condition and arguments. */
2015 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2016 arg0, arg1);
2017 new_stmt = gimple_build_assign (res, rhs);
2018 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2019 gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt);
2020 if (fold_stmt (&new_gsi, ifcvt_follow_ssa_use_edges))
2021 {
2022 new_stmt = gsi_stmt (new_gsi);
2023 update_stmt (new_stmt);
2024 }
2025
2026 if (dump_file && (dump_flags & TDF_DETAILS))
2027 {
2028 fprintf (dump_file, "new phi replacement stmt\n");
2029 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2030 }
2031 return;
2032 }
2033
2034 /* Create hashmap for PHI node which contain vector of argument indexes
2035 having the same value. */
2036 bool swap = false;
2037 hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
2038 unsigned int num_args = gimple_phi_num_args (phi);
2039 int max_ind = -1;
2040 /* Vector of different PHI argument values. */
2041 auto_vec<tree> args (num_args);
2042
2043 /* Compute phi_arg_map. */
2044 for (i = 0; i < num_args; i++)
2045 {
2046 tree arg;
2047
2048 arg = gimple_phi_arg_def (phi, i);
2049 if (!phi_arg_map.get (arg))
2050 args.quick_push (arg);
2051 phi_arg_map.get_or_insert (arg).safe_push (i);
2052 }
2053
2054 /* Determine element with max number of occurrences. */
2055 max_ind = -1;
2056 max = 1;
2057 args_len = args.length ();
2058 for (i = 0; i < args_len; i++)
2059 {
2060 unsigned int len;
2061 if ((len = phi_arg_map.get (args[i])->length ()) > max)
2062 {
2063 max_ind = (int) i;
2064 max = len;
2065 }
2066 }
2067
2068 /* Put element with max number of occurences to the end of ARGS. */
2069 if (max_ind != -1 && max_ind +1 != (int) args_len)
2070 std::swap (args[args_len - 1], args[max_ind]);
2071
2072 /* Handle one special case when number of arguments with different values
2073 is equal 2 and one argument has the only occurrence. Such PHI can be
2074 handled as if would have only 2 arguments. */
2075 if (args_len == 2 && phi_arg_map.get (args[0])->length () == 1)
2076 {
2077 vec<int> *indexes;
2078 indexes = phi_arg_map.get (args[0]);
2079 index0 = (*indexes)[0];
2080 arg0 = args[0];
2081 arg1 = args[1];
2082 e = gimple_phi_arg_edge (phi, index0);
2083 cond = bb_predicate (e->src);
2084 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2085 {
2086 swap = true;
2087 cond = TREE_OPERAND (cond, 0);
2088 }
2089 /* Gimplify the condition to a valid cond-expr conditonal operand. */
2090 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
2091 is_gimple_condexpr, NULL_TREE,
2092 true, GSI_SAME_STMT);
2093 if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
2094 &op0, &op1, true, &has_nop, &nop_reduc)))
2095 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2096 swap? arg1 : arg0,
2097 swap? arg0 : arg1);
2098 else
2099 {
2100 /* Convert reduction stmt into vectorizable form. */
2101 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2102 swap,has_nop, nop_reduc);
2103 redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2104 }
2105 new_stmt = gimple_build_assign (res, rhs);
2106 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2107 update_stmt (new_stmt);
2108 }
2109 else
2110 {
2111 /* Common case. */
2112 vec<int> *indexes;
2113 tree type = TREE_TYPE (gimple_phi_result (phi));
2114 tree lhs;
2115 arg1 = args[1];
2116 for (i = 0; i < args_len; i++)
2117 {
2118 arg0 = args[i];
2119 indexes = phi_arg_map.get (args[i]);
2120 if (i != args_len - 1)
2121 lhs = make_temp_ssa_name (type, NULL, "_ifc_");
2122 else
2123 lhs = res;
2124 cond = gen_phi_arg_condition (phi, indexes, gsi);
2125 rhs = fold_build_cond_expr (type, unshare_expr (cond),
2126 arg0, arg1);
2127 new_stmt = gimple_build_assign (lhs, rhs);
2128 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2129 update_stmt (new_stmt);
2130 arg1 = lhs;
2131 }
2132 }
2133
2134 if (dump_file && (dump_flags & TDF_DETAILS))
2135 {
2136 fprintf (dump_file, "new extended phi replacement stmt\n");
2137 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2138 }
2139 }
2140
2141 /* Replaces in LOOP all the scalar phi nodes other than those in the
2142 LOOP->header block with conditional modify expressions. */
2143
2144 static void
predicate_all_scalar_phis(class loop * loop)2145 predicate_all_scalar_phis (class loop *loop)
2146 {
2147 basic_block bb;
2148 unsigned int orig_loop_num_nodes = loop->num_nodes;
2149 unsigned int i;
2150
2151 for (i = 1; i < orig_loop_num_nodes; i++)
2152 {
2153 gphi *phi;
2154 gimple_stmt_iterator gsi;
2155 gphi_iterator phi_gsi;
2156 bb = ifc_bbs[i];
2157
2158 if (bb == loop->header)
2159 continue;
2160
2161 phi_gsi = gsi_start_phis (bb);
2162 if (gsi_end_p (phi_gsi))
2163 continue;
2164
2165 gsi = gsi_after_labels (bb);
2166 while (!gsi_end_p (phi_gsi))
2167 {
2168 phi = phi_gsi.phi ();
2169 if (virtual_operand_p (gimple_phi_result (phi)))
2170 gsi_next (&phi_gsi);
2171 else
2172 {
2173 predicate_scalar_phi (phi, &gsi);
2174 remove_phi_node (&phi_gsi, false);
2175 }
2176 }
2177 }
2178 }
2179
2180 /* Insert in each basic block of LOOP the statements produced by the
2181 gimplification of the predicates. */
2182
2183 static void
insert_gimplified_predicates(loop_p loop)2184 insert_gimplified_predicates (loop_p loop)
2185 {
2186 unsigned int i;
2187
2188 for (i = 0; i < loop->num_nodes; i++)
2189 {
2190 basic_block bb = ifc_bbs[i];
2191 gimple_seq stmts;
2192 if (!is_predicated (bb))
2193 gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
2194 if (!is_predicated (bb))
2195 {
2196 /* Do not insert statements for a basic block that is not
2197 predicated. Also make sure that the predicate of the
2198 basic block is set to true. */
2199 reset_bb_predicate (bb);
2200 continue;
2201 }
2202
2203 stmts = bb_predicate_gimplified_stmts (bb);
2204 if (stmts)
2205 {
2206 if (need_to_predicate)
2207 {
2208 /* Insert the predicate of the BB just after the label,
2209 as the if-conversion of memory writes will use this
2210 predicate. */
2211 gimple_stmt_iterator gsi = gsi_after_labels (bb);
2212 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2213 }
2214 else
2215 {
2216 /* Insert the predicate of the BB at the end of the BB
2217 as this would reduce the register pressure: the only
2218 use of this predicate will be in successor BBs. */
2219 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2220
2221 if (gsi_end_p (gsi)
2222 || stmt_ends_bb_p (gsi_stmt (gsi)))
2223 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2224 else
2225 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
2226 }
2227
2228 /* Once the sequence is code generated, set it to NULL. */
2229 set_bb_predicate_gimplified_stmts (bb, NULL);
2230 }
2231 }
2232 }
2233
2234 /* Helper function for predicate_statements. Returns index of existent
2235 mask if it was created for given SIZE and -1 otherwise. */
2236
2237 static int
mask_exists(int size,const vec<int> & vec)2238 mask_exists (int size, const vec<int> &vec)
2239 {
2240 unsigned int ix;
2241 int v;
2242 FOR_EACH_VEC_ELT (vec, ix, v)
2243 if (v == size)
2244 return (int) ix;
2245 return -1;
2246 }
2247
2248 /* Helper function for predicate_statements. STMT is a memory read or
2249 write and it needs to be predicated by MASK. Return a statement
2250 that does so. */
2251
2252 static gimple *
predicate_load_or_store(gimple_stmt_iterator * gsi,gassign * stmt,tree mask)2253 predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask)
2254 {
2255 gcall *new_stmt;
2256
2257 tree lhs = gimple_assign_lhs (stmt);
2258 tree rhs = gimple_assign_rhs1 (stmt);
2259 tree ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2260 mark_addressable (ref);
2261 tree addr = force_gimple_operand_gsi (gsi, build_fold_addr_expr (ref),
2262 true, NULL_TREE, true, GSI_SAME_STMT);
2263 tree ptr = build_int_cst (reference_alias_ptr_type (ref),
2264 get_object_alignment (ref));
2265 /* Copy points-to info if possible. */
2266 if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2267 copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2268 ref);
2269 if (TREE_CODE (lhs) == SSA_NAME)
2270 {
2271 new_stmt
2272 = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
2273 ptr, mask);
2274 gimple_call_set_lhs (new_stmt, lhs);
2275 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2276 }
2277 else
2278 {
2279 new_stmt
2280 = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2281 mask, rhs);
2282 gimple_move_vops (new_stmt, stmt);
2283 }
2284 gimple_call_set_nothrow (new_stmt, true);
2285 return new_stmt;
2286 }
2287
2288 /* STMT uses OP_LHS. Check whether it is equivalent to:
2289
2290 ... = OP_MASK ? OP_LHS : X;
2291
2292 Return X if so, otherwise return null. OP_MASK is an SSA_NAME that is
2293 known to have value OP_COND. */
2294
2295 static tree
check_redundant_cond_expr(gimple * stmt,tree op_mask,tree op_cond,tree op_lhs)2296 check_redundant_cond_expr (gimple *stmt, tree op_mask, tree op_cond,
2297 tree op_lhs)
2298 {
2299 gassign *assign = dyn_cast <gassign *> (stmt);
2300 if (!assign || gimple_assign_rhs_code (assign) != COND_EXPR)
2301 return NULL_TREE;
2302
2303 tree use_cond = gimple_assign_rhs1 (assign);
2304 tree if_true = gimple_assign_rhs2 (assign);
2305 tree if_false = gimple_assign_rhs3 (assign);
2306
2307 if ((use_cond == op_mask || operand_equal_p (use_cond, op_cond, 0))
2308 && if_true == op_lhs)
2309 return if_false;
2310
2311 if (inverse_conditions_p (use_cond, op_cond) && if_false == op_lhs)
2312 return if_true;
2313
2314 return NULL_TREE;
2315 }
2316
2317 /* Return true if VALUE is available for use at STMT. SSA_NAMES is
2318 the set of SSA names defined earlier in STMT's block. */
2319
2320 static bool
value_available_p(gimple * stmt,hash_set<tree_ssa_name_hash> * ssa_names,tree value)2321 value_available_p (gimple *stmt, hash_set<tree_ssa_name_hash> *ssa_names,
2322 tree value)
2323 {
2324 if (is_gimple_min_invariant (value))
2325 return true;
2326
2327 if (TREE_CODE (value) == SSA_NAME)
2328 {
2329 if (SSA_NAME_IS_DEFAULT_DEF (value))
2330 return true;
2331
2332 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
2333 basic_block use_bb = gimple_bb (stmt);
2334 return (def_bb == use_bb
2335 ? ssa_names->contains (value)
2336 : dominated_by_p (CDI_DOMINATORS, use_bb, def_bb));
2337 }
2338
2339 return false;
2340 }
2341
2342 /* Helper function for predicate_statements. STMT is a potentially-trapping
2343 arithmetic operation that needs to be predicated by MASK, an SSA_NAME that
2344 has value COND. Return a statement that does so. SSA_NAMES is the set of
2345 SSA names defined earlier in STMT's block. */
2346
2347 static gimple *
predicate_rhs_code(gassign * stmt,tree mask,tree cond,hash_set<tree_ssa_name_hash> * ssa_names)2348 predicate_rhs_code (gassign *stmt, tree mask, tree cond,
2349 hash_set<tree_ssa_name_hash> *ssa_names)
2350 {
2351 tree lhs = gimple_assign_lhs (stmt);
2352 tree_code code = gimple_assign_rhs_code (stmt);
2353 unsigned int nops = gimple_num_ops (stmt);
2354 internal_fn cond_fn = get_conditional_internal_fn (code);
2355
2356 /* Construct the arguments to the conditional internal function. */
2357 auto_vec<tree, 8> args;
2358 args.safe_grow (nops + 1, true);
2359 args[0] = mask;
2360 for (unsigned int i = 1; i < nops; ++i)
2361 args[i] = gimple_op (stmt, i);
2362 args[nops] = NULL_TREE;
2363
2364 /* Look for uses of the result to see whether they are COND_EXPRs that can
2365 be folded into the conditional call. */
2366 imm_use_iterator imm_iter;
2367 gimple *use_stmt;
2368 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
2369 {
2370 tree new_else = check_redundant_cond_expr (use_stmt, mask, cond, lhs);
2371 if (new_else && value_available_p (stmt, ssa_names, new_else))
2372 {
2373 if (!args[nops])
2374 args[nops] = new_else;
2375 if (operand_equal_p (new_else, args[nops], 0))
2376 {
2377 /* We have:
2378
2379 LHS = IFN_COND (MASK, ..., ELSE);
2380 X = MASK ? LHS : ELSE;
2381
2382 which makes X equivalent to LHS. */
2383 tree use_lhs = gimple_assign_lhs (use_stmt);
2384 redundant_ssa_names.safe_push (std::make_pair (use_lhs, lhs));
2385 }
2386 }
2387 }
2388 if (!args[nops])
2389 args[nops] = targetm.preferred_else_value (cond_fn, TREE_TYPE (lhs),
2390 nops - 1, &args[1]);
2391
2392 /* Create and insert the call. */
2393 gcall *new_stmt = gimple_build_call_internal_vec (cond_fn, args);
2394 gimple_call_set_lhs (new_stmt, lhs);
2395 gimple_call_set_nothrow (new_stmt, true);
2396
2397 return new_stmt;
2398 }
2399
2400 /* Predicate each write to memory in LOOP.
2401
2402 This function transforms control flow constructs containing memory
2403 writes of the form:
2404
2405 | for (i = 0; i < N; i++)
2406 | if (cond)
2407 | A[i] = expr;
2408
2409 into the following form that does not contain control flow:
2410
2411 | for (i = 0; i < N; i++)
2412 | A[i] = cond ? expr : A[i];
2413
2414 The original CFG looks like this:
2415
2416 | bb_0
2417 | i = 0
2418 | end_bb_0
2419 |
2420 | bb_1
2421 | if (i < N) goto bb_5 else goto bb_2
2422 | end_bb_1
2423 |
2424 | bb_2
2425 | cond = some_computation;
2426 | if (cond) goto bb_3 else goto bb_4
2427 | end_bb_2
2428 |
2429 | bb_3
2430 | A[i] = expr;
2431 | goto bb_4
2432 | end_bb_3
2433 |
2434 | bb_4
2435 | goto bb_1
2436 | end_bb_4
2437
2438 insert_gimplified_predicates inserts the computation of the COND
2439 expression at the beginning of the destination basic block:
2440
2441 | bb_0
2442 | i = 0
2443 | end_bb_0
2444 |
2445 | bb_1
2446 | if (i < N) goto bb_5 else goto bb_2
2447 | end_bb_1
2448 |
2449 | bb_2
2450 | cond = some_computation;
2451 | if (cond) goto bb_3 else goto bb_4
2452 | end_bb_2
2453 |
2454 | bb_3
2455 | cond = some_computation;
2456 | A[i] = expr;
2457 | goto bb_4
2458 | end_bb_3
2459 |
2460 | bb_4
2461 | goto bb_1
2462 | end_bb_4
2463
2464 predicate_statements is then predicating the memory write as follows:
2465
2466 | bb_0
2467 | i = 0
2468 | end_bb_0
2469 |
2470 | bb_1
2471 | if (i < N) goto bb_5 else goto bb_2
2472 | end_bb_1
2473 |
2474 | bb_2
2475 | if (cond) goto bb_3 else goto bb_4
2476 | end_bb_2
2477 |
2478 | bb_3
2479 | cond = some_computation;
2480 | A[i] = cond ? expr : A[i];
2481 | goto bb_4
2482 | end_bb_3
2483 |
2484 | bb_4
2485 | goto bb_1
2486 | end_bb_4
2487
2488 and finally combine_blocks removes the basic block boundaries making
2489 the loop vectorizable:
2490
2491 | bb_0
2492 | i = 0
2493 | if (i < N) goto bb_5 else goto bb_1
2494 | end_bb_0
2495 |
2496 | bb_1
2497 | cond = some_computation;
2498 | A[i] = cond ? expr : A[i];
2499 | if (i < N) goto bb_5 else goto bb_4
2500 | end_bb_1
2501 |
2502 | bb_4
2503 | goto bb_1
2504 | end_bb_4
2505 */
2506
2507 static void
predicate_statements(loop_p loop)2508 predicate_statements (loop_p loop)
2509 {
2510 unsigned int i, orig_loop_num_nodes = loop->num_nodes;
2511 auto_vec<int, 1> vect_sizes;
2512 auto_vec<tree, 1> vect_masks;
2513 hash_set<tree_ssa_name_hash> ssa_names;
2514
2515 for (i = 1; i < orig_loop_num_nodes; i++)
2516 {
2517 gimple_stmt_iterator gsi;
2518 basic_block bb = ifc_bbs[i];
2519 tree cond = bb_predicate (bb);
2520 bool swap;
2521 int index;
2522
2523 if (is_true_predicate (cond))
2524 continue;
2525
2526 swap = false;
2527 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2528 {
2529 swap = true;
2530 cond = TREE_OPERAND (cond, 0);
2531 }
2532
2533 vect_sizes.truncate (0);
2534 vect_masks.truncate (0);
2535
2536 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2537 {
2538 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
2539 tree lhs;
2540 if (!stmt)
2541 ;
2542 else if (is_false_predicate (cond)
2543 && gimple_vdef (stmt))
2544 {
2545 unlink_stmt_vdef (stmt);
2546 gsi_remove (&gsi, true);
2547 release_defs (stmt);
2548 continue;
2549 }
2550 else if (gimple_plf (stmt, GF_PLF_2))
2551 {
2552 tree lhs = gimple_assign_lhs (stmt);
2553 tree mask;
2554 gimple *new_stmt;
2555 gimple_seq stmts = NULL;
2556 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
2557 /* We checked before setting GF_PLF_2 that an equivalent
2558 integer mode exists. */
2559 int bitsize = GET_MODE_BITSIZE (mode).to_constant ();
2560 if (!vect_sizes.is_empty ()
2561 && (index = mask_exists (bitsize, vect_sizes)) != -1)
2562 /* Use created mask. */
2563 mask = vect_masks[index];
2564 else
2565 {
2566 if (COMPARISON_CLASS_P (cond))
2567 mask = gimple_build (&stmts, TREE_CODE (cond),
2568 boolean_type_node,
2569 TREE_OPERAND (cond, 0),
2570 TREE_OPERAND (cond, 1));
2571 else
2572 mask = cond;
2573
2574 if (swap)
2575 {
2576 tree true_val
2577 = constant_boolean_node (true, TREE_TYPE (mask));
2578 mask = gimple_build (&stmts, BIT_XOR_EXPR,
2579 TREE_TYPE (mask), mask, true_val);
2580 }
2581 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2582
2583 /* Save mask and its size for further use. */
2584 vect_sizes.safe_push (bitsize);
2585 vect_masks.safe_push (mask);
2586 }
2587 if (gimple_assign_single_p (stmt))
2588 new_stmt = predicate_load_or_store (&gsi, stmt, mask);
2589 else
2590 new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
2591
2592 gsi_replace (&gsi, new_stmt, true);
2593 }
2594 else if (((lhs = gimple_assign_lhs (stmt)), true)
2595 && (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2596 || POINTER_TYPE_P (TREE_TYPE (lhs)))
2597 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
2598 && arith_code_with_undefined_signed_overflow
2599 (gimple_assign_rhs_code (stmt)))
2600 {
2601 gsi_remove (&gsi, true);
2602 gimple_seq stmts = rewrite_to_defined_overflow (stmt);
2603 bool first = true;
2604 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
2605 !gsi_end_p (gsi2);)
2606 {
2607 gassign *stmt2 = as_a <gassign *> (gsi_stmt (gsi2));
2608 gsi_remove (&gsi2, false);
2609 if (first)
2610 {
2611 gsi_insert_before (&gsi, stmt2, GSI_NEW_STMT);
2612 first = false;
2613 }
2614 else
2615 gsi_insert_after (&gsi, stmt2, GSI_NEW_STMT);
2616 }
2617 }
2618 else if (gimple_vdef (stmt))
2619 {
2620 tree lhs = gimple_assign_lhs (stmt);
2621 tree rhs = gimple_assign_rhs1 (stmt);
2622 tree type = TREE_TYPE (lhs);
2623
2624 lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
2625 rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
2626 if (swap)
2627 std::swap (lhs, rhs);
2628 cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
2629 is_gimple_condexpr, NULL_TREE,
2630 true, GSI_SAME_STMT);
2631 rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
2632 gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
2633 update_stmt (stmt);
2634 }
2635 lhs = gimple_get_lhs (gsi_stmt (gsi));
2636 if (lhs && TREE_CODE (lhs) == SSA_NAME)
2637 ssa_names.add (lhs);
2638 gsi_next (&gsi);
2639 }
2640 ssa_names.empty ();
2641 }
2642 }
2643
2644 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2645 other than the exit and latch of the LOOP. Also resets the
2646 GIMPLE_DEBUG information. */
2647
2648 static void
remove_conditions_and_labels(loop_p loop)2649 remove_conditions_and_labels (loop_p loop)
2650 {
2651 gimple_stmt_iterator gsi;
2652 unsigned int i;
2653
2654 for (i = 0; i < loop->num_nodes; i++)
2655 {
2656 basic_block bb = ifc_bbs[i];
2657
2658 if (bb_with_exit_edge_p (loop, bb)
2659 || bb == loop->latch)
2660 continue;
2661
2662 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2663 switch (gimple_code (gsi_stmt (gsi)))
2664 {
2665 case GIMPLE_COND:
2666 case GIMPLE_LABEL:
2667 gsi_remove (&gsi, true);
2668 break;
2669
2670 case GIMPLE_DEBUG:
2671 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
2672 if (gimple_debug_bind_p (gsi_stmt (gsi)))
2673 {
2674 gimple_debug_bind_reset_value (gsi_stmt (gsi));
2675 update_stmt (gsi_stmt (gsi));
2676 }
2677 gsi_next (&gsi);
2678 break;
2679
2680 default:
2681 gsi_next (&gsi);
2682 }
2683 }
2684 }
2685
2686 /* Combine all the basic blocks from LOOP into one or two super basic
2687 blocks. Replace PHI nodes with conditional modify expressions. */
2688
2689 static void
combine_blocks(class loop * loop)2690 combine_blocks (class loop *loop)
2691 {
2692 basic_block bb, exit_bb, merge_target_bb;
2693 unsigned int orig_loop_num_nodes = loop->num_nodes;
2694 unsigned int i;
2695 edge e;
2696 edge_iterator ei;
2697
2698 remove_conditions_and_labels (loop);
2699 insert_gimplified_predicates (loop);
2700 predicate_all_scalar_phis (loop);
2701
2702 if (need_to_predicate || need_to_rewrite_undefined)
2703 predicate_statements (loop);
2704
2705 /* Merge basic blocks. */
2706 exit_bb = NULL;
2707 bool *predicated = XNEWVEC (bool, orig_loop_num_nodes);
2708 for (i = 0; i < orig_loop_num_nodes; i++)
2709 {
2710 bb = ifc_bbs[i];
2711 predicated[i] = !is_true_predicate (bb_predicate (bb));
2712 free_bb_predicate (bb);
2713 if (bb_with_exit_edge_p (loop, bb))
2714 {
2715 gcc_assert (exit_bb == NULL);
2716 exit_bb = bb;
2717 }
2718 }
2719 gcc_assert (exit_bb != loop->latch);
2720
2721 merge_target_bb = loop->header;
2722
2723 /* Get at the virtual def valid for uses starting at the first block
2724 we merge into the header. Without a virtual PHI the loop has the
2725 same virtual use on all stmts. */
2726 gphi *vphi = get_virtual_phi (loop->header);
2727 tree last_vdef = NULL_TREE;
2728 if (vphi)
2729 {
2730 last_vdef = gimple_phi_result (vphi);
2731 for (gimple_stmt_iterator gsi = gsi_start_bb (loop->header);
2732 ! gsi_end_p (gsi); gsi_next (&gsi))
2733 if (gimple_vdef (gsi_stmt (gsi)))
2734 last_vdef = gimple_vdef (gsi_stmt (gsi));
2735 }
2736 for (i = 1; i < orig_loop_num_nodes; i++)
2737 {
2738 gimple_stmt_iterator gsi;
2739 gimple_stmt_iterator last;
2740
2741 bb = ifc_bbs[i];
2742
2743 if (bb == exit_bb || bb == loop->latch)
2744 continue;
2745
2746 /* We release virtual PHIs late because we have to propagate them
2747 out using the current VUSE. The def might be the one used
2748 after the loop. */
2749 vphi = get_virtual_phi (bb);
2750 if (vphi)
2751 {
2752 /* When there's just loads inside the loop a stray virtual
2753 PHI merging the uses can appear, update last_vdef from
2754 it. */
2755 if (!last_vdef)
2756 last_vdef = gimple_phi_arg_def (vphi, 0);
2757 imm_use_iterator iter;
2758 use_operand_p use_p;
2759 gimple *use_stmt;
2760 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2761 {
2762 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2763 SET_USE (use_p, last_vdef);
2764 }
2765 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
2766 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
2767 gsi = gsi_for_stmt (vphi);
2768 remove_phi_node (&gsi, true);
2769 }
2770
2771 /* Make stmts member of loop->header and clear range info from all stmts
2772 in BB which is now no longer executed conditional on a predicate we
2773 could have derived it from. */
2774 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2775 {
2776 gimple *stmt = gsi_stmt (gsi);
2777 gimple_set_bb (stmt, merge_target_bb);
2778 /* Update virtual operands. */
2779 if (last_vdef)
2780 {
2781 use_operand_p use_p = ssa_vuse_operand (stmt);
2782 if (use_p
2783 && USE_FROM_PTR (use_p) != last_vdef)
2784 SET_USE (use_p, last_vdef);
2785 if (gimple_vdef (stmt))
2786 last_vdef = gimple_vdef (stmt);
2787 }
2788 else
2789 /* If this is the first load we arrive at update last_vdef
2790 so we handle stray PHIs correctly. */
2791 last_vdef = gimple_vuse (stmt);
2792 if (predicated[i])
2793 {
2794 ssa_op_iter i;
2795 tree op;
2796 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF)
2797 reset_flow_sensitive_info (op);
2798 }
2799 }
2800
2801 /* Update stmt list. */
2802 last = gsi_last_bb (merge_target_bb);
2803 gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT);
2804 set_bb_seq (bb, NULL);
2805 }
2806
2807 /* Fixup virtual operands in the exit block. */
2808 if (exit_bb
2809 && exit_bb != loop->header)
2810 {
2811 /* We release virtual PHIs late because we have to propagate them
2812 out using the current VUSE. The def might be the one used
2813 after the loop. */
2814 vphi = get_virtual_phi (exit_bb);
2815 if (vphi)
2816 {
2817 /* When there's just loads inside the loop a stray virtual
2818 PHI merging the uses can appear, update last_vdef from
2819 it. */
2820 if (!last_vdef)
2821 last_vdef = gimple_phi_arg_def (vphi, 0);
2822 imm_use_iterator iter;
2823 use_operand_p use_p;
2824 gimple *use_stmt;
2825 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2826 {
2827 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2828 SET_USE (use_p, last_vdef);
2829 }
2830 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
2831 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
2832 gimple_stmt_iterator gsi = gsi_for_stmt (vphi);
2833 remove_phi_node (&gsi, true);
2834 }
2835 }
2836
2837 /* Now remove all the edges in the loop, except for those from the exit
2838 block and delete the blocks we elided. */
2839 for (i = 1; i < orig_loop_num_nodes; i++)
2840 {
2841 bb = ifc_bbs[i];
2842
2843 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
2844 {
2845 if (e->src == exit_bb)
2846 ei_next (&ei);
2847 else
2848 remove_edge (e);
2849 }
2850 }
2851 for (i = 1; i < orig_loop_num_nodes; i++)
2852 {
2853 bb = ifc_bbs[i];
2854
2855 if (bb == exit_bb || bb == loop->latch)
2856 continue;
2857
2858 delete_basic_block (bb);
2859 }
2860
2861 /* Re-connect the exit block. */
2862 if (exit_bb != NULL)
2863 {
2864 if (exit_bb != loop->header)
2865 {
2866 /* Connect this node to loop header. */
2867 make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU);
2868 set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
2869 }
2870
2871 /* Redirect non-exit edges to loop->latch. */
2872 FOR_EACH_EDGE (e, ei, exit_bb->succs)
2873 {
2874 if (!loop_exit_edge_p (loop, e))
2875 redirect_edge_and_branch (e, loop->latch);
2876 }
2877 set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
2878 }
2879 else
2880 {
2881 /* If the loop does not have an exit, reconnect header and latch. */
2882 make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
2883 set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
2884 }
2885
2886 /* If possible, merge loop header to the block with the exit edge.
2887 This reduces the number of basic blocks to two, to please the
2888 vectorizer that handles only loops with two nodes. */
2889 if (exit_bb
2890 && exit_bb != loop->header)
2891 {
2892 if (can_merge_blocks_p (loop->header, exit_bb))
2893 merge_blocks (loop->header, exit_bb);
2894 }
2895
2896 free (ifc_bbs);
2897 ifc_bbs = NULL;
2898 free (predicated);
2899 }
2900
2901 /* Version LOOP before if-converting it; the original loop
2902 will be if-converted, the new copy of the loop will not,
2903 and the LOOP_VECTORIZED internal call will be guarding which
2904 loop to execute. The vectorizer pass will fold this
2905 internal call into either true or false.
2906
2907 Note that this function intentionally invalidates profile. Both edges
2908 out of LOOP_VECTORIZED must have 100% probability so the profile remains
2909 consistent after the condition is folded in the vectorizer. */
2910
2911 static class loop *
version_loop_for_if_conversion(class loop * loop,vec<gimple * > * preds)2912 version_loop_for_if_conversion (class loop *loop, vec<gimple *> *preds)
2913 {
2914 basic_block cond_bb;
2915 tree cond = make_ssa_name (boolean_type_node);
2916 class loop *new_loop;
2917 gimple *g;
2918 gimple_stmt_iterator gsi;
2919 unsigned int save_length;
2920
2921 g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
2922 build_int_cst (integer_type_node, loop->num),
2923 integer_zero_node);
2924 gimple_call_set_lhs (g, cond);
2925
2926 /* Save BB->aux around loop_version as that uses the same field. */
2927 save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes;
2928 void **saved_preds = XALLOCAVEC (void *, save_length);
2929 for (unsigned i = 0; i < save_length; i++)
2930 saved_preds[i] = ifc_bbs[i]->aux;
2931
2932 initialize_original_copy_tables ();
2933 /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
2934 is re-merged in the vectorizer. */
2935 new_loop = loop_version (loop, cond, &cond_bb,
2936 profile_probability::always (),
2937 profile_probability::always (),
2938 profile_probability::always (),
2939 profile_probability::always (), true);
2940 free_original_copy_tables ();
2941
2942 for (unsigned i = 0; i < save_length; i++)
2943 ifc_bbs[i]->aux = saved_preds[i];
2944
2945 if (new_loop == NULL)
2946 return NULL;
2947
2948 new_loop->dont_vectorize = true;
2949 new_loop->force_vectorize = false;
2950 gsi = gsi_last_bb (cond_bb);
2951 gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
2952 if (preds)
2953 preds->safe_push (g);
2954 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2955 update_ssa (TODO_update_ssa);
2956 return new_loop;
2957 }
2958
2959 /* Return true when LOOP satisfies the follow conditions that will
2960 allow it to be recognized by the vectorizer for outer-loop
2961 vectorization:
2962 - The loop is not the root node of the loop tree.
2963 - The loop has exactly one inner loop.
2964 - The loop has a single exit.
2965 - The loop header has a single successor, which is the inner
2966 loop header.
2967 - Each of the inner and outer loop latches have a single
2968 predecessor.
2969 - The loop exit block has a single predecessor, which is the
2970 inner loop's exit block. */
2971
2972 static bool
versionable_outer_loop_p(class loop * loop)2973 versionable_outer_loop_p (class loop *loop)
2974 {
2975 if (!loop_outer (loop)
2976 || loop->dont_vectorize
2977 || !loop->inner
2978 || loop->inner->next
2979 || !single_exit (loop)
2980 || !single_succ_p (loop->header)
2981 || single_succ (loop->header) != loop->inner->header
2982 || !single_pred_p (loop->latch)
2983 || !single_pred_p (loop->inner->latch))
2984 return false;
2985
2986 basic_block outer_exit = single_pred (loop->latch);
2987 basic_block inner_exit = single_pred (loop->inner->latch);
2988
2989 if (!single_pred_p (outer_exit) || single_pred (outer_exit) != inner_exit)
2990 return false;
2991
2992 if (dump_file)
2993 fprintf (dump_file, "Found vectorizable outer loop for versioning\n");
2994
2995 return true;
2996 }
2997
2998 /* Performs splitting of critical edges. Skip splitting and return false
2999 if LOOP will not be converted because:
3000
3001 - LOOP is not well formed.
3002 - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
3003
3004 Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */
3005
3006 static bool
ifcvt_split_critical_edges(class loop * loop,bool aggressive_if_conv)3007 ifcvt_split_critical_edges (class loop *loop, bool aggressive_if_conv)
3008 {
3009 basic_block *body;
3010 basic_block bb;
3011 unsigned int num = loop->num_nodes;
3012 unsigned int i;
3013 gimple *stmt;
3014 edge e;
3015 edge_iterator ei;
3016 auto_vec<edge> critical_edges;
3017
3018 /* Loop is not well formed. */
3019 if (num <= 2 || loop->inner || !single_exit (loop))
3020 return false;
3021
3022 body = get_loop_body (loop);
3023 for (i = 0; i < num; i++)
3024 {
3025 bb = body[i];
3026 if (!aggressive_if_conv
3027 && phi_nodes (bb)
3028 && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM)
3029 {
3030 if (dump_file && (dump_flags & TDF_DETAILS))
3031 fprintf (dump_file,
3032 "BB %d has complicated PHI with more than %u args.\n",
3033 bb->index, MAX_PHI_ARG_NUM);
3034
3035 free (body);
3036 return false;
3037 }
3038 if (bb == loop->latch || bb_with_exit_edge_p (loop, bb))
3039 continue;
3040
3041 stmt = last_stmt (bb);
3042 /* Skip basic blocks not ending with conditional branch. */
3043 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
3044 continue;
3045
3046 FOR_EACH_EDGE (e, ei, bb->succs)
3047 if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
3048 critical_edges.safe_push (e);
3049 }
3050 free (body);
3051
3052 while (critical_edges.length () > 0)
3053 {
3054 e = critical_edges.pop ();
3055 /* Don't split if bb can be predicated along non-critical edge. */
3056 if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (e->dest))
3057 split_edge (e);
3058 }
3059
3060 return true;
3061 }
3062
3063 /* Delete redundant statements produced by predication which prevents
3064 loop vectorization. */
3065
3066 static void
ifcvt_local_dce(class loop * loop)3067 ifcvt_local_dce (class loop *loop)
3068 {
3069 gimple *stmt;
3070 gimple *stmt1;
3071 gimple *phi;
3072 gimple_stmt_iterator gsi;
3073 auto_vec<gimple *> worklist;
3074 enum gimple_code code;
3075 use_operand_p use_p;
3076 imm_use_iterator imm_iter;
3077
3078 /* The loop has a single BB only. */
3079 basic_block bb = loop->header;
3080 tree latch_vdef = NULL_TREE;
3081
3082 worklist.create (64);
3083 /* Consider all phi as live statements. */
3084 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3085 {
3086 phi = gsi_stmt (gsi);
3087 gimple_set_plf (phi, GF_PLF_2, true);
3088 worklist.safe_push (phi);
3089 if (virtual_operand_p (gimple_phi_result (phi)))
3090 latch_vdef = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
3091 }
3092 /* Consider load/store statements, CALL and COND as live. */
3093 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3094 {
3095 stmt = gsi_stmt (gsi);
3096 if (is_gimple_debug (stmt))
3097 {
3098 gimple_set_plf (stmt, GF_PLF_2, true);
3099 continue;
3100 }
3101 if (gimple_store_p (stmt) || gimple_assign_load_p (stmt))
3102 {
3103 gimple_set_plf (stmt, GF_PLF_2, true);
3104 worklist.safe_push (stmt);
3105 continue;
3106 }
3107 code = gimple_code (stmt);
3108 if (code == GIMPLE_COND || code == GIMPLE_CALL)
3109 {
3110 gimple_set_plf (stmt, GF_PLF_2, true);
3111 worklist.safe_push (stmt);
3112 continue;
3113 }
3114 gimple_set_plf (stmt, GF_PLF_2, false);
3115
3116 if (code == GIMPLE_ASSIGN)
3117 {
3118 tree lhs = gimple_assign_lhs (stmt);
3119 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3120 {
3121 stmt1 = USE_STMT (use_p);
3122 if (!is_gimple_debug (stmt1) && gimple_bb (stmt1) != bb)
3123 {
3124 gimple_set_plf (stmt, GF_PLF_2, true);
3125 worklist.safe_push (stmt);
3126 break;
3127 }
3128 }
3129 }
3130 }
3131 /* Propagate liveness through arguments of live stmt. */
3132 while (worklist.length () > 0)
3133 {
3134 ssa_op_iter iter;
3135 use_operand_p use_p;
3136 tree use;
3137
3138 stmt = worklist.pop ();
3139 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3140 {
3141 use = USE_FROM_PTR (use_p);
3142 if (TREE_CODE (use) != SSA_NAME)
3143 continue;
3144 stmt1 = SSA_NAME_DEF_STMT (use);
3145 if (gimple_bb (stmt1) != bb || gimple_plf (stmt1, GF_PLF_2))
3146 continue;
3147 gimple_set_plf (stmt1, GF_PLF_2, true);
3148 worklist.safe_push (stmt1);
3149 }
3150 }
3151 /* Delete dead statements. */
3152 gsi = gsi_last_bb (bb);
3153 while (!gsi_end_p (gsi))
3154 {
3155 gimple_stmt_iterator gsiprev = gsi;
3156 gsi_prev (&gsiprev);
3157 stmt = gsi_stmt (gsi);
3158 if (gimple_store_p (stmt) && gimple_vdef (stmt))
3159 {
3160 tree lhs = gimple_get_lhs (stmt);
3161 ao_ref write;
3162 ao_ref_init (&write, lhs);
3163
3164 if (dse_classify_store (&write, stmt, false, NULL, NULL, latch_vdef)
3165 == DSE_STORE_DEAD)
3166 delete_dead_or_redundant_assignment (&gsi, "dead");
3167 gsi = gsiprev;
3168 continue;
3169 }
3170
3171 if (gimple_plf (stmt, GF_PLF_2))
3172 {
3173 gsi = gsiprev;
3174 continue;
3175 }
3176 if (dump_file && (dump_flags & TDF_DETAILS))
3177 {
3178 fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
3179 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3180 }
3181 gsi_remove (&gsi, true);
3182 release_defs (stmt);
3183 gsi = gsiprev;
3184 }
3185 }
3186
3187 /* Return true if VALUE is already available on edge PE. */
3188
3189 static bool
ifcvt_available_on_edge_p(edge pe,tree value)3190 ifcvt_available_on_edge_p (edge pe, tree value)
3191 {
3192 if (is_gimple_min_invariant (value))
3193 return true;
3194
3195 if (TREE_CODE (value) == SSA_NAME)
3196 {
3197 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
3198 if (!def_bb || dominated_by_p (CDI_DOMINATORS, pe->dest, def_bb))
3199 return true;
3200 }
3201
3202 return false;
3203 }
3204
3205 /* Return true if STMT can be hoisted from if-converted loop LOOP to
3206 edge PE. */
3207
3208 static bool
ifcvt_can_hoist(class loop * loop,edge pe,gimple * stmt)3209 ifcvt_can_hoist (class loop *loop, edge pe, gimple *stmt)
3210 {
3211 if (auto *call = dyn_cast<gcall *> (stmt))
3212 {
3213 if (gimple_call_internal_p (call)
3214 && internal_fn_mask_index (gimple_call_internal_fn (call)) >= 0)
3215 return false;
3216 }
3217 else if (auto *assign = dyn_cast<gassign *> (stmt))
3218 {
3219 if (gimple_assign_rhs_code (assign) == COND_EXPR)
3220 return false;
3221 }
3222 else
3223 return false;
3224
3225 if (gimple_has_side_effects (stmt)
3226 || gimple_could_trap_p (stmt)
3227 || stmt_could_throw_p (cfun, stmt)
3228 || gimple_vdef (stmt)
3229 || gimple_vuse (stmt))
3230 return false;
3231
3232 int num_args = gimple_num_args (stmt);
3233 if (pe != loop_preheader_edge (loop))
3234 {
3235 for (int i = 0; i < num_args; ++i)
3236 if (!ifcvt_available_on_edge_p (pe, gimple_arg (stmt, i)))
3237 return false;
3238 }
3239 else
3240 {
3241 for (int i = 0; i < num_args; ++i)
3242 if (!expr_invariant_in_loop_p (loop, gimple_arg (stmt, i)))
3243 return false;
3244 }
3245
3246 return true;
3247 }
3248
3249 /* Hoist invariant statements from LOOP to edge PE. */
3250
3251 static void
ifcvt_hoist_invariants(class loop * loop,edge pe)3252 ifcvt_hoist_invariants (class loop *loop, edge pe)
3253 {
3254 gimple_stmt_iterator hoist_gsi = {};
3255 unsigned int num_blocks = loop->num_nodes;
3256 basic_block *body = get_loop_body (loop);
3257 for (unsigned int i = 0; i < num_blocks; ++i)
3258 for (gimple_stmt_iterator gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);)
3259 {
3260 gimple *stmt = gsi_stmt (gsi);
3261 if (ifcvt_can_hoist (loop, pe, stmt))
3262 {
3263 /* Once we've hoisted one statement, insert other statements
3264 after it. */
3265 gsi_remove (&gsi, false);
3266 if (hoist_gsi.ptr)
3267 gsi_insert_after (&hoist_gsi, stmt, GSI_NEW_STMT);
3268 else
3269 {
3270 gsi_insert_on_edge_immediate (pe, stmt);
3271 hoist_gsi = gsi_for_stmt (stmt);
3272 }
3273 continue;
3274 }
3275 gsi_next (&gsi);
3276 }
3277 free (body);
3278 }
3279
3280 /* If-convert LOOP when it is legal. For the moment this pass has no
3281 profitability analysis. Returns non-zero todo flags when something
3282 changed. */
3283
3284 unsigned int
tree_if_conversion(class loop * loop,vec<gimple * > * preds)3285 tree_if_conversion (class loop *loop, vec<gimple *> *preds)
3286 {
3287 unsigned int todo = 0;
3288 bool aggressive_if_conv;
3289 class loop *rloop;
3290 bitmap exit_bbs;
3291 edge pe;
3292
3293 again:
3294 rloop = NULL;
3295 ifc_bbs = NULL;
3296 need_to_predicate = false;
3297 need_to_rewrite_undefined = false;
3298 any_complicated_phi = false;
3299
3300 /* Apply more aggressive if-conversion when loop or its outer loop were
3301 marked with simd pragma. When that's the case, we try to if-convert
3302 loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */
3303 aggressive_if_conv = loop->force_vectorize;
3304 if (!aggressive_if_conv)
3305 {
3306 class loop *outer_loop = loop_outer (loop);
3307 if (outer_loop && outer_loop->force_vectorize)
3308 aggressive_if_conv = true;
3309 }
3310
3311 if (!ifcvt_split_critical_edges (loop, aggressive_if_conv))
3312 goto cleanup;
3313
3314 if (!if_convertible_loop_p (loop)
3315 || !dbg_cnt (if_conversion_tree))
3316 goto cleanup;
3317
3318 if ((need_to_predicate || any_complicated_phi)
3319 && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
3320 || loop->dont_vectorize))
3321 goto cleanup;
3322
3323 /* The edge to insert invariant stmts on. */
3324 pe = loop_preheader_edge (loop);
3325
3326 /* Since we have no cost model, always version loops unless the user
3327 specified -ftree-loop-if-convert or unless versioning is required.
3328 Either version this loop, or if the pattern is right for outer-loop
3329 vectorization, version the outer loop. In the latter case we will
3330 still if-convert the original inner loop. */
3331 if (need_to_predicate
3332 || any_complicated_phi
3333 || flag_tree_loop_if_convert != 1)
3334 {
3335 class loop *vloop
3336 = (versionable_outer_loop_p (loop_outer (loop))
3337 ? loop_outer (loop) : loop);
3338 class loop *nloop = version_loop_for_if_conversion (vloop, preds);
3339 if (nloop == NULL)
3340 goto cleanup;
3341 if (vloop != loop)
3342 {
3343 /* If versionable_outer_loop_p decided to version the
3344 outer loop, version also the inner loop of the non-vectorized
3345 loop copy. So we transform:
3346 loop1
3347 loop2
3348 into:
3349 if (LOOP_VECTORIZED (1, 3))
3350 {
3351 loop1
3352 loop2
3353 }
3354 else
3355 loop3 (copy of loop1)
3356 if (LOOP_VECTORIZED (4, 5))
3357 loop4 (copy of loop2)
3358 else
3359 loop5 (copy of loop4) */
3360 gcc_assert (nloop->inner && nloop->inner->next == NULL);
3361 rloop = nloop->inner;
3362 }
3363 else
3364 /* If we versioned loop then make sure to insert invariant
3365 stmts before the .LOOP_VECTORIZED check since the vectorizer
3366 will re-use that for things like runtime alias versioning
3367 whose condition can end up using those invariants. */
3368 pe = single_pred_edge (gimple_bb (preds->last ()));
3369 }
3370
3371 /* Now all statements are if-convertible. Combine all the basic
3372 blocks into one huge basic block doing the if-conversion
3373 on-the-fly. */
3374 combine_blocks (loop);
3375
3376 /* Perform local CSE, this esp. helps the vectorizer analysis if loads
3377 and stores are involved. CSE only the loop body, not the entry
3378 PHIs, those are to be kept in sync with the non-if-converted copy.
3379 ??? We'll still keep dead stores though. */
3380 exit_bbs = BITMAP_ALLOC (NULL);
3381 bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
3382 bitmap_set_bit (exit_bbs, loop->latch->index);
3383
3384 std::pair <tree, tree> *name_pair;
3385 unsigned ssa_names_idx;
3386 FOR_EACH_VEC_ELT (redundant_ssa_names, ssa_names_idx, name_pair)
3387 replace_uses_by (name_pair->first, name_pair->second);
3388 redundant_ssa_names.release ();
3389
3390 todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs);
3391
3392 /* Delete dead predicate computations. */
3393 ifcvt_local_dce (loop);
3394 BITMAP_FREE (exit_bbs);
3395
3396 ifcvt_hoist_invariants (loop, pe);
3397
3398 todo |= TODO_cleanup_cfg;
3399
3400 cleanup:
3401 if (ifc_bbs)
3402 {
3403 unsigned int i;
3404
3405 for (i = 0; i < loop->num_nodes; i++)
3406 free_bb_predicate (ifc_bbs[i]);
3407
3408 free (ifc_bbs);
3409 ifc_bbs = NULL;
3410 }
3411 if (rloop != NULL)
3412 {
3413 loop = rloop;
3414 goto again;
3415 }
3416
3417 return todo;
3418 }
3419
3420 /* Tree if-conversion pass management. */
3421
3422 namespace {
3423
3424 const pass_data pass_data_if_conversion =
3425 {
3426 GIMPLE_PASS, /* type */
3427 "ifcvt", /* name */
3428 OPTGROUP_NONE, /* optinfo_flags */
3429 TV_TREE_LOOP_IFCVT, /* tv_id */
3430 ( PROP_cfg | PROP_ssa ), /* properties_required */
3431 0, /* properties_provided */
3432 0, /* properties_destroyed */
3433 0, /* todo_flags_start */
3434 0, /* todo_flags_finish */
3435 };
3436
3437 class pass_if_conversion : public gimple_opt_pass
3438 {
3439 public:
pass_if_conversion(gcc::context * ctxt)3440 pass_if_conversion (gcc::context *ctxt)
3441 : gimple_opt_pass (pass_data_if_conversion, ctxt)
3442 {}
3443
3444 /* opt_pass methods: */
3445 virtual bool gate (function *);
3446 virtual unsigned int execute (function *);
3447
3448 }; // class pass_if_conversion
3449
3450 bool
gate(function * fun)3451 pass_if_conversion::gate (function *fun)
3452 {
3453 return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
3454 && flag_tree_loop_if_convert != 0)
3455 || flag_tree_loop_if_convert == 1);
3456 }
3457
3458 unsigned int
execute(function * fun)3459 pass_if_conversion::execute (function *fun)
3460 {
3461 unsigned todo = 0;
3462
3463 if (number_of_loops (fun) <= 1)
3464 return 0;
3465
3466 auto_vec<gimple *> preds;
3467 for (auto loop : loops_list (cfun, 0))
3468 if (flag_tree_loop_if_convert == 1
3469 || ((flag_tree_loop_vectorize || loop->force_vectorize)
3470 && !loop->dont_vectorize))
3471 todo |= tree_if_conversion (loop, &preds);
3472
3473 if (todo)
3474 {
3475 free_numbers_of_iterations_estimates (fun);
3476 scev_reset ();
3477 }
3478
3479 if (flag_checking)
3480 {
3481 basic_block bb;
3482 FOR_EACH_BB_FN (bb, fun)
3483 gcc_assert (!bb->aux);
3484 }
3485
3486 /* Perform IL update now, it might elide some loops. */
3487 if (todo & TODO_cleanup_cfg)
3488 {
3489 cleanup_tree_cfg ();
3490 if (need_ssa_update_p (fun))
3491 todo |= TODO_update_ssa;
3492 }
3493 if (todo & TODO_update_ssa_any)
3494 update_ssa (todo & TODO_update_ssa_any);
3495
3496 /* If if-conversion elided the loop fall back to the original one. */
3497 for (unsigned i = 0; i < preds.length (); ++i)
3498 {
3499 gimple *g = preds[i];
3500 if (!gimple_bb (g))
3501 continue;
3502 unsigned ifcvt_loop = tree_to_uhwi (gimple_call_arg (g, 0));
3503 if (!get_loop (fun, ifcvt_loop))
3504 {
3505 if (dump_file)
3506 fprintf (dump_file, "If-converted loop vanished\n");
3507 fold_loop_internal_call (g, boolean_false_node);
3508 }
3509 }
3510
3511 return 0;
3512 }
3513
3514 } // anon namespace
3515
3516 gimple_opt_pass *
make_pass_if_conversion(gcc::context * ctxt)3517 make_pass_if_conversion (gcc::context *ctxt)
3518 {
3519 return new pass_if_conversion (ctxt);
3520 }
3521