1 /* Loop invariant motion.
2 Copyright (C) 2003-2022 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "tree.h"
25 #include "gimple.h"
26 #include "cfghooks.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "gimple-pretty-print.h"
30 #include "fold-const.h"
31 #include "cfganal.h"
32 #include "tree-eh.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "tree-cfg.h"
36 #include "tree-ssa-loop-manip.h"
37 #include "tree-ssa-loop.h"
38 #include "tree-into-ssa.h"
39 #include "cfgloop.h"
40 #include "tree-affine.h"
41 #include "tree-ssa-propagate.h"
42 #include "trans-mem.h"
43 #include "gimple-fold.h"
44 #include "tree-scalar-evolution.h"
45 #include "tree-ssa-loop-niter.h"
46 #include "alias.h"
47 #include "builtins.h"
48 #include "tree-dfa.h"
49 #include "tree-ssa.h"
50 #include "dbgcnt.h"
51
52 /* TODO: Support for predicated code motion. I.e.
53
54 while (1)
55 {
56 if (cond)
57 {
58 a = inv;
59 something;
60 }
61 }
62
63 Where COND and INV are invariants, but evaluating INV may trap or be
64 invalid from some other reason if !COND. This may be transformed to
65
66 if (cond)
67 a = inv;
68 while (1)
69 {
70 if (cond)
71 something;
72 } */
73
74 /* The auxiliary data kept for each statement. */
75
76 struct lim_aux_data
77 {
78 class loop *max_loop; /* The outermost loop in that the statement
79 is invariant. */
80
81 class loop *tgt_loop; /* The loop out of that we want to move the
82 invariant. */
83
84 class loop *always_executed_in;
85 /* The outermost loop for that we are sure
86 the statement is executed if the loop
87 is entered. */
88
89 unsigned cost; /* Cost of the computation performed by the
90 statement. */
91
92 unsigned ref; /* The simple_mem_ref in this stmt or 0. */
93
94 vec<gimple *> depends; /* Vector of statements that must be also
95 hoisted out of the loop when this statement
96 is hoisted; i.e. those that define the
97 operands of the statement and are inside of
98 the MAX_LOOP loop. */
99 };
100
101 /* Maps statements to their lim_aux_data. */
102
103 static hash_map<gimple *, lim_aux_data *> *lim_aux_data_map;
104
105 /* Description of a memory reference location. */
106
107 struct mem_ref_loc
108 {
109 tree *ref; /* The reference itself. */
110 gimple *stmt; /* The statement in that it occurs. */
111 };
112
113
114 /* Description of a memory reference. */
115
116 class im_mem_ref
117 {
118 public:
119 unsigned id : 30; /* ID assigned to the memory reference
120 (its index in memory_accesses.refs_list) */
121 unsigned ref_canonical : 1; /* Whether mem.ref was canonicalized. */
122 unsigned ref_decomposed : 1; /* Whether the ref was hashed from mem. */
123 hashval_t hash; /* Its hash value. */
124
125 /* The memory access itself and associated caching of alias-oracle
126 query meta-data. We are using mem.ref == error_mark_node for the
127 case the reference is represented by its single access stmt
128 in accesses_in_loop[0]. */
129 ao_ref mem;
130
131 bitmap stored; /* The set of loops in that this memory location
132 is stored to. */
133 bitmap loaded; /* The set of loops in that this memory location
134 is loaded from. */
135 vec<mem_ref_loc> accesses_in_loop;
136 /* The locations of the accesses. */
137
138 /* The following set is computed on demand. */
139 bitmap_head dep_loop; /* The set of loops in that the memory
140 reference is {in,}dependent in
141 different modes. */
142 };
143
144 /* We use six bits per loop in the ref->dep_loop bitmap to record
145 the dep_kind x dep_state combinations. */
146
147 enum dep_kind { lim_raw, sm_war, sm_waw };
148 enum dep_state { dep_unknown, dep_independent, dep_dependent };
149
150 /* coldest outermost loop for given loop. */
151 vec<class loop *> coldest_outermost_loop;
152 /* hotter outer loop nearest to given loop. */
153 vec<class loop *> hotter_than_inner_loop;
154
155 /* Populate the loop dependence cache of REF for LOOP, KIND with STATE. */
156
157 static void
record_loop_dependence(class loop * loop,im_mem_ref * ref,dep_kind kind,dep_state state)158 record_loop_dependence (class loop *loop, im_mem_ref *ref,
159 dep_kind kind, dep_state state)
160 {
161 gcc_assert (state != dep_unknown);
162 unsigned bit = 6 * loop->num + kind * 2 + state == dep_dependent ? 1 : 0;
163 bitmap_set_bit (&ref->dep_loop, bit);
164 }
165
166 /* Query the loop dependence cache of REF for LOOP, KIND. */
167
168 static dep_state
query_loop_dependence(class loop * loop,im_mem_ref * ref,dep_kind kind)169 query_loop_dependence (class loop *loop, im_mem_ref *ref, dep_kind kind)
170 {
171 unsigned first_bit = 6 * loop->num + kind * 2;
172 if (bitmap_bit_p (&ref->dep_loop, first_bit))
173 return dep_independent;
174 else if (bitmap_bit_p (&ref->dep_loop, first_bit + 1))
175 return dep_dependent;
176 return dep_unknown;
177 }
178
179 /* Mem_ref hashtable helpers. */
180
181 struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref>
182 {
183 typedef ao_ref *compare_type;
184 static inline hashval_t hash (const im_mem_ref *);
185 static inline bool equal (const im_mem_ref *, const ao_ref *);
186 };
187
188 /* A hash function for class im_mem_ref object OBJ. */
189
190 inline hashval_t
hash(const im_mem_ref * mem)191 mem_ref_hasher::hash (const im_mem_ref *mem)
192 {
193 return mem->hash;
194 }
195
196 /* An equality function for class im_mem_ref object MEM1 with
197 memory reference OBJ2. */
198
199 inline bool
equal(const im_mem_ref * mem1,const ao_ref * obj2)200 mem_ref_hasher::equal (const im_mem_ref *mem1, const ao_ref *obj2)
201 {
202 if (obj2->max_size_known_p ())
203 return (mem1->ref_decomposed
204 && ((TREE_CODE (mem1->mem.base) == MEM_REF
205 && TREE_CODE (obj2->base) == MEM_REF
206 && operand_equal_p (TREE_OPERAND (mem1->mem.base, 0),
207 TREE_OPERAND (obj2->base, 0), 0)
208 && known_eq (mem_ref_offset (mem1->mem.base) * BITS_PER_UNIT + mem1->mem.offset,
209 mem_ref_offset (obj2->base) * BITS_PER_UNIT + obj2->offset))
210 || (operand_equal_p (mem1->mem.base, obj2->base, 0)
211 && known_eq (mem1->mem.offset, obj2->offset)))
212 && known_eq (mem1->mem.size, obj2->size)
213 && known_eq (mem1->mem.max_size, obj2->max_size)
214 && mem1->mem.volatile_p == obj2->volatile_p
215 && (mem1->mem.ref_alias_set == obj2->ref_alias_set
216 /* We are not canonicalizing alias-sets but for the
217 special-case we didn't canonicalize yet and the
218 incoming ref is a alias-set zero MEM we pick
219 the correct one already. */
220 || (!mem1->ref_canonical
221 && (TREE_CODE (obj2->ref) == MEM_REF
222 || TREE_CODE (obj2->ref) == TARGET_MEM_REF)
223 && obj2->ref_alias_set == 0)
224 /* Likewise if there's a canonical ref with alias-set zero. */
225 || (mem1->ref_canonical && mem1->mem.ref_alias_set == 0))
226 && types_compatible_p (TREE_TYPE (mem1->mem.ref),
227 TREE_TYPE (obj2->ref)));
228 else
229 return operand_equal_p (mem1->mem.ref, obj2->ref, 0);
230 }
231
232
233 /* Description of memory accesses in loops. */
234
235 static struct
236 {
237 /* The hash table of memory references accessed in loops. */
238 hash_table<mem_ref_hasher> *refs;
239
240 /* The list of memory references. */
241 vec<im_mem_ref *> refs_list;
242
243 /* The set of memory references accessed in each loop. */
244 vec<bitmap_head> refs_loaded_in_loop;
245
246 /* The set of memory references stored in each loop. */
247 vec<bitmap_head> refs_stored_in_loop;
248
249 /* The set of memory references stored in each loop, including subloops . */
250 vec<bitmap_head> all_refs_stored_in_loop;
251
252 /* Cache for expanding memory addresses. */
253 hash_map<tree, name_expansion *> *ttae_cache;
254 } memory_accesses;
255
256 /* Obstack for the bitmaps in the above data structures. */
257 static bitmap_obstack lim_bitmap_obstack;
258 static obstack mem_ref_obstack;
259
260 static bool ref_indep_loop_p (class loop *, im_mem_ref *, dep_kind);
261 static bool ref_always_accessed_p (class loop *, im_mem_ref *, bool);
262 static bool refs_independent_p (im_mem_ref *, im_mem_ref *, bool = true);
263
264 /* Minimum cost of an expensive expression. */
265 #define LIM_EXPENSIVE ((unsigned) param_lim_expensive)
266
267 /* The outermost loop for which execution of the header guarantees that the
268 block will be executed. */
269 #define ALWAYS_EXECUTED_IN(BB) ((class loop *) (BB)->aux)
270 #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
271
272 /* ID of the shared unanalyzable mem. */
273 #define UNANALYZABLE_MEM_ID 0
274
275 /* Whether the reference was analyzable. */
276 #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
277
278 static struct lim_aux_data *
init_lim_data(gimple * stmt)279 init_lim_data (gimple *stmt)
280 {
281 lim_aux_data *p = XCNEW (struct lim_aux_data);
282 lim_aux_data_map->put (stmt, p);
283
284 return p;
285 }
286
287 static struct lim_aux_data *
get_lim_data(gimple * stmt)288 get_lim_data (gimple *stmt)
289 {
290 lim_aux_data **p = lim_aux_data_map->get (stmt);
291 if (!p)
292 return NULL;
293
294 return *p;
295 }
296
297 /* Releases the memory occupied by DATA. */
298
299 static void
free_lim_aux_data(struct lim_aux_data * data)300 free_lim_aux_data (struct lim_aux_data *data)
301 {
302 data->depends.release ();
303 free (data);
304 }
305
306 static void
clear_lim_data(gimple * stmt)307 clear_lim_data (gimple *stmt)
308 {
309 lim_aux_data **p = lim_aux_data_map->get (stmt);
310 if (!p)
311 return;
312
313 free_lim_aux_data (*p);
314 *p = NULL;
315 }
316
317
318 /* The possibilities of statement movement. */
319 enum move_pos
320 {
321 MOVE_IMPOSSIBLE, /* No movement -- side effect expression. */
322 MOVE_PRESERVE_EXECUTION, /* Must not cause the non-executed statement
323 become executed -- memory accesses, ... */
324 MOVE_POSSIBLE /* Unlimited movement. */
325 };
326
327
328 /* If it is possible to hoist the statement STMT unconditionally,
329 returns MOVE_POSSIBLE.
330 If it is possible to hoist the statement STMT, but we must avoid making
331 it executed if it would not be executed in the original program (e.g.
332 because it may trap), return MOVE_PRESERVE_EXECUTION.
333 Otherwise return MOVE_IMPOSSIBLE. */
334
335 static enum move_pos
movement_possibility_1(gimple * stmt)336 movement_possibility_1 (gimple *stmt)
337 {
338 tree lhs;
339 enum move_pos ret = MOVE_POSSIBLE;
340
341 if (flag_unswitch_loops
342 && gimple_code (stmt) == GIMPLE_COND)
343 {
344 /* If we perform unswitching, force the operands of the invariant
345 condition to be moved out of the loop. */
346 return MOVE_POSSIBLE;
347 }
348
349 if (gimple_code (stmt) == GIMPLE_PHI
350 && gimple_phi_num_args (stmt) <= 2
351 && !virtual_operand_p (gimple_phi_result (stmt))
352 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
353 return MOVE_POSSIBLE;
354
355 if (gimple_get_lhs (stmt) == NULL_TREE)
356 return MOVE_IMPOSSIBLE;
357
358 if (gimple_vdef (stmt))
359 return MOVE_IMPOSSIBLE;
360
361 if (stmt_ends_bb_p (stmt)
362 || gimple_has_volatile_ops (stmt)
363 || gimple_has_side_effects (stmt)
364 || stmt_could_throw_p (cfun, stmt))
365 return MOVE_IMPOSSIBLE;
366
367 if (is_gimple_call (stmt))
368 {
369 /* While pure or const call is guaranteed to have no side effects, we
370 cannot move it arbitrarily. Consider code like
371
372 char *s = something ();
373
374 while (1)
375 {
376 if (s)
377 t = strlen (s);
378 else
379 t = 0;
380 }
381
382 Here the strlen call cannot be moved out of the loop, even though
383 s is invariant. In addition to possibly creating a call with
384 invalid arguments, moving out a function call that is not executed
385 may cause performance regressions in case the call is costly and
386 not executed at all. */
387 ret = MOVE_PRESERVE_EXECUTION;
388 lhs = gimple_call_lhs (stmt);
389 }
390 else if (is_gimple_assign (stmt))
391 lhs = gimple_assign_lhs (stmt);
392 else
393 return MOVE_IMPOSSIBLE;
394
395 if (TREE_CODE (lhs) == SSA_NAME
396 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
397 return MOVE_IMPOSSIBLE;
398
399 if (TREE_CODE (lhs) != SSA_NAME
400 || gimple_could_trap_p (stmt))
401 return MOVE_PRESERVE_EXECUTION;
402
403 /* Non local loads in a transaction cannot be hoisted out. Well,
404 unless the load happens on every path out of the loop, but we
405 don't take this into account yet. */
406 if (flag_tm
407 && gimple_in_transaction (stmt)
408 && gimple_assign_single_p (stmt))
409 {
410 tree rhs = gimple_assign_rhs1 (stmt);
411 if (DECL_P (rhs) && is_global_var (rhs))
412 {
413 if (dump_file)
414 {
415 fprintf (dump_file, "Cannot hoist conditional load of ");
416 print_generic_expr (dump_file, rhs, TDF_SLIM);
417 fprintf (dump_file, " because it is in a transaction.\n");
418 }
419 return MOVE_IMPOSSIBLE;
420 }
421 }
422
423 return ret;
424 }
425
426 static enum move_pos
movement_possibility(gimple * stmt)427 movement_possibility (gimple *stmt)
428 {
429 enum move_pos pos = movement_possibility_1 (stmt);
430 if (pos == MOVE_POSSIBLE)
431 {
432 use_operand_p use_p;
433 ssa_op_iter ssa_iter;
434 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, ssa_iter, SSA_OP_USE)
435 if (TREE_CODE (USE_FROM_PTR (use_p)) == SSA_NAME
436 && ssa_name_maybe_undef_p (USE_FROM_PTR (use_p)))
437 return MOVE_PRESERVE_EXECUTION;
438 }
439 return pos;
440 }
441
442
443 /* Compare the profile count inequality of bb and loop's preheader, it is
444 three-state as stated in profile-count.h, FALSE is returned if inequality
445 cannot be decided. */
446 bool
bb_colder_than_loop_preheader(basic_block bb,class loop * loop)447 bb_colder_than_loop_preheader (basic_block bb, class loop *loop)
448 {
449 gcc_assert (bb && loop);
450 return bb->count < loop_preheader_edge (loop)->src->count;
451 }
452
453 /* Check coldest loop between OUTERMOST_LOOP and LOOP by comparing profile
454 count.
455 It does three steps check:
456 1) Check whether CURR_BB is cold in it's own loop_father, if it is cold, just
457 return NULL which means it should not be moved out at all;
458 2) CURR_BB is NOT cold, check if pre-computed COLDEST_LOOP is outside of
459 OUTERMOST_LOOP, if it is inside of OUTERMOST_LOOP, return the COLDEST_LOOP;
460 3) If COLDEST_LOOP is outside of OUTERMOST_LOOP, check whether there is a
461 hotter loop between OUTERMOST_LOOP and loop in pre-computed
462 HOTTER_THAN_INNER_LOOP, return it's nested inner loop, otherwise return
463 OUTERMOST_LOOP.
464 At last, the coldest_loop is inside of OUTERMOST_LOOP, just return it as
465 the hoist target. */
466
467 static class loop *
get_coldest_out_loop(class loop * outermost_loop,class loop * loop,basic_block curr_bb)468 get_coldest_out_loop (class loop *outermost_loop, class loop *loop,
469 basic_block curr_bb)
470 {
471 gcc_assert (outermost_loop == loop
472 || flow_loop_nested_p (outermost_loop, loop));
473
474 /* If bb_colder_than_loop_preheader returns false due to three-state
475 comparision, OUTERMOST_LOOP is returned finally to preserve the behavior.
476 Otherwise, return the coldest loop between OUTERMOST_LOOP and LOOP. */
477 if (curr_bb && bb_colder_than_loop_preheader (curr_bb, loop))
478 return NULL;
479
480 class loop *coldest_loop = coldest_outermost_loop[loop->num];
481 if (loop_depth (coldest_loop) < loop_depth (outermost_loop))
482 {
483 class loop *hotter_loop = hotter_than_inner_loop[loop->num];
484 if (!hotter_loop
485 || loop_depth (hotter_loop) < loop_depth (outermost_loop))
486 return outermost_loop;
487
488 /* hotter_loop is between OUTERMOST_LOOP and LOOP like:
489 [loop tree root, ..., coldest_loop, ..., outermost_loop, ...,
490 hotter_loop, second_coldest_loop, ..., loop]
491 return second_coldest_loop to be the hoist target. */
492 class loop *aloop;
493 for (aloop = hotter_loop->inner; aloop; aloop = aloop->next)
494 if (aloop == loop || flow_loop_nested_p (aloop, loop))
495 return aloop;
496 }
497 return coldest_loop;
498 }
499
500 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
501 loop to that we could move the expression using DEF if it did not have
502 other operands, i.e. the outermost loop enclosing LOOP in that the value
503 of DEF is invariant. */
504
505 static class loop *
outermost_invariant_loop(tree def,class loop * loop)506 outermost_invariant_loop (tree def, class loop *loop)
507 {
508 gimple *def_stmt;
509 basic_block def_bb;
510 class loop *max_loop;
511 struct lim_aux_data *lim_data;
512
513 if (!def)
514 return superloop_at_depth (loop, 1);
515
516 if (TREE_CODE (def) != SSA_NAME)
517 {
518 gcc_assert (is_gimple_min_invariant (def));
519 return superloop_at_depth (loop, 1);
520 }
521
522 def_stmt = SSA_NAME_DEF_STMT (def);
523 def_bb = gimple_bb (def_stmt);
524 if (!def_bb)
525 return superloop_at_depth (loop, 1);
526
527 max_loop = find_common_loop (loop, def_bb->loop_father);
528
529 lim_data = get_lim_data (def_stmt);
530 if (lim_data != NULL && lim_data->max_loop != NULL)
531 max_loop = find_common_loop (max_loop,
532 loop_outer (lim_data->max_loop));
533 if (max_loop == loop)
534 return NULL;
535 max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
536
537 return max_loop;
538 }
539
540 /* DATA is a structure containing information associated with a statement
541 inside LOOP. DEF is one of the operands of this statement.
542
543 Find the outermost loop enclosing LOOP in that value of DEF is invariant
544 and record this in DATA->max_loop field. If DEF itself is defined inside
545 this loop as well (i.e. we need to hoist it out of the loop if we want
546 to hoist the statement represented by DATA), record the statement in that
547 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
548 add the cost of the computation of DEF to the DATA->cost.
549
550 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
551
552 static bool
add_dependency(tree def,struct lim_aux_data * data,class loop * loop,bool add_cost)553 add_dependency (tree def, struct lim_aux_data *data, class loop *loop,
554 bool add_cost)
555 {
556 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
557 basic_block def_bb = gimple_bb (def_stmt);
558 class loop *max_loop;
559 struct lim_aux_data *def_data;
560
561 if (!def_bb)
562 return true;
563
564 max_loop = outermost_invariant_loop (def, loop);
565 if (!max_loop)
566 return false;
567
568 if (flow_loop_nested_p (data->max_loop, max_loop))
569 data->max_loop = max_loop;
570
571 def_data = get_lim_data (def_stmt);
572 if (!def_data)
573 return true;
574
575 if (add_cost
576 /* Only add the cost if the statement defining DEF is inside LOOP,
577 i.e. if it is likely that by moving the invariants dependent
578 on it, we will be able to avoid creating a new register for
579 it (since it will be only used in these dependent invariants). */
580 && def_bb->loop_father == loop)
581 data->cost += def_data->cost;
582
583 data->depends.safe_push (def_stmt);
584
585 return true;
586 }
587
588 /* Returns an estimate for a cost of statement STMT. The values here
589 are just ad-hoc constants, similar to costs for inlining. */
590
591 static unsigned
stmt_cost(gimple * stmt)592 stmt_cost (gimple *stmt)
593 {
594 /* Always try to create possibilities for unswitching. */
595 if (gimple_code (stmt) == GIMPLE_COND
596 || gimple_code (stmt) == GIMPLE_PHI)
597 return LIM_EXPENSIVE;
598
599 /* We should be hoisting calls if possible. */
600 if (is_gimple_call (stmt))
601 {
602 tree fndecl;
603
604 /* Unless the call is a builtin_constant_p; this always folds to a
605 constant, so moving it is useless. */
606 fndecl = gimple_call_fndecl (stmt);
607 if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_CONSTANT_P))
608 return 0;
609
610 return LIM_EXPENSIVE;
611 }
612
613 /* Hoisting memory references out should almost surely be a win. */
614 if (gimple_references_memory_p (stmt))
615 return LIM_EXPENSIVE;
616
617 if (gimple_code (stmt) != GIMPLE_ASSIGN)
618 return 1;
619
620 switch (gimple_assign_rhs_code (stmt))
621 {
622 case MULT_EXPR:
623 case WIDEN_MULT_EXPR:
624 case WIDEN_MULT_PLUS_EXPR:
625 case WIDEN_MULT_MINUS_EXPR:
626 case DOT_PROD_EXPR:
627 case TRUNC_DIV_EXPR:
628 case CEIL_DIV_EXPR:
629 case FLOOR_DIV_EXPR:
630 case ROUND_DIV_EXPR:
631 case EXACT_DIV_EXPR:
632 case CEIL_MOD_EXPR:
633 case FLOOR_MOD_EXPR:
634 case ROUND_MOD_EXPR:
635 case TRUNC_MOD_EXPR:
636 case RDIV_EXPR:
637 /* Division and multiplication are usually expensive. */
638 return LIM_EXPENSIVE;
639
640 case LSHIFT_EXPR:
641 case RSHIFT_EXPR:
642 case WIDEN_LSHIFT_EXPR:
643 case LROTATE_EXPR:
644 case RROTATE_EXPR:
645 /* Shifts and rotates are usually expensive. */
646 return LIM_EXPENSIVE;
647
648 case CONSTRUCTOR:
649 /* Make vector construction cost proportional to the number
650 of elements. */
651 return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
652
653 case SSA_NAME:
654 case PAREN_EXPR:
655 /* Whether or not something is wrapped inside a PAREN_EXPR
656 should not change move cost. Nor should an intermediate
657 unpropagated SSA name copy. */
658 return 0;
659
660 default:
661 return 1;
662 }
663 }
664
665 /* Finds the outermost loop between OUTER and LOOP in that the memory reference
666 REF is independent. If REF is not independent in LOOP, NULL is returned
667 instead. */
668
669 static class loop *
outermost_indep_loop(class loop * outer,class loop * loop,im_mem_ref * ref)670 outermost_indep_loop (class loop *outer, class loop *loop, im_mem_ref *ref)
671 {
672 class loop *aloop;
673
674 if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
675 return NULL;
676
677 for (aloop = outer;
678 aloop != loop;
679 aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
680 if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
681 && ref_indep_loop_p (aloop, ref, lim_raw))
682 return aloop;
683
684 if (ref_indep_loop_p (loop, ref, lim_raw))
685 return loop;
686 else
687 return NULL;
688 }
689
690 /* If there is a simple load or store to a memory reference in STMT, returns
691 the location of the memory reference, and sets IS_STORE according to whether
692 it is a store or load. Otherwise, returns NULL. */
693
694 static tree *
simple_mem_ref_in_stmt(gimple * stmt,bool * is_store)695 simple_mem_ref_in_stmt (gimple *stmt, bool *is_store)
696 {
697 tree *lhs, *rhs;
698
699 /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */
700 if (!gimple_assign_single_p (stmt))
701 return NULL;
702
703 lhs = gimple_assign_lhs_ptr (stmt);
704 rhs = gimple_assign_rhs1_ptr (stmt);
705
706 if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
707 {
708 *is_store = false;
709 return rhs;
710 }
711 else if (gimple_vdef (stmt)
712 && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
713 {
714 *is_store = true;
715 return lhs;
716 }
717 else
718 return NULL;
719 }
720
721 /* From a controlling predicate in DOM determine the arguments from
722 the PHI node PHI that are chosen if the predicate evaluates to
723 true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
724 they are non-NULL. Returns true if the arguments can be determined,
725 else return false. */
726
727 static bool
extract_true_false_args_from_phi(basic_block dom,gphi * phi,tree * true_arg_p,tree * false_arg_p)728 extract_true_false_args_from_phi (basic_block dom, gphi *phi,
729 tree *true_arg_p, tree *false_arg_p)
730 {
731 edge te, fe;
732 if (! extract_true_false_controlled_edges (dom, gimple_bb (phi),
733 &te, &fe))
734 return false;
735
736 if (true_arg_p)
737 *true_arg_p = PHI_ARG_DEF (phi, te->dest_idx);
738 if (false_arg_p)
739 *false_arg_p = PHI_ARG_DEF (phi, fe->dest_idx);
740
741 return true;
742 }
743
744 /* Determine the outermost loop to that it is possible to hoist a statement
745 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
746 the outermost loop in that the value computed by STMT is invariant.
747 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
748 we preserve the fact whether STMT is executed. It also fills other related
749 information to LIM_DATA (STMT).
750
751 The function returns false if STMT cannot be hoisted outside of the loop it
752 is defined in, and true otherwise. */
753
754 static bool
determine_max_movement(gimple * stmt,bool must_preserve_exec)755 determine_max_movement (gimple *stmt, bool must_preserve_exec)
756 {
757 basic_block bb = gimple_bb (stmt);
758 class loop *loop = bb->loop_father;
759 class loop *level;
760 struct lim_aux_data *lim_data = get_lim_data (stmt);
761 tree val;
762 ssa_op_iter iter;
763
764 if (must_preserve_exec)
765 level = ALWAYS_EXECUTED_IN (bb);
766 else
767 level = superloop_at_depth (loop, 1);
768 lim_data->max_loop = get_coldest_out_loop (level, loop, bb);
769 if (!lim_data->max_loop)
770 return false;
771
772 if (gphi *phi = dyn_cast <gphi *> (stmt))
773 {
774 use_operand_p use_p;
775 unsigned min_cost = UINT_MAX;
776 unsigned total_cost = 0;
777 struct lim_aux_data *def_data;
778
779 /* We will end up promoting dependencies to be unconditionally
780 evaluated. For this reason the PHI cost (and thus the
781 cost we remove from the loop by doing the invariant motion)
782 is that of the cheapest PHI argument dependency chain. */
783 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
784 {
785 val = USE_FROM_PTR (use_p);
786
787 if (TREE_CODE (val) != SSA_NAME)
788 {
789 /* Assign const 1 to constants. */
790 min_cost = MIN (min_cost, 1);
791 total_cost += 1;
792 continue;
793 }
794 if (!add_dependency (val, lim_data, loop, false))
795 return false;
796
797 gimple *def_stmt = SSA_NAME_DEF_STMT (val);
798 if (gimple_bb (def_stmt)
799 && gimple_bb (def_stmt)->loop_father == loop)
800 {
801 def_data = get_lim_data (def_stmt);
802 if (def_data)
803 {
804 min_cost = MIN (min_cost, def_data->cost);
805 total_cost += def_data->cost;
806 }
807 }
808 }
809
810 min_cost = MIN (min_cost, total_cost);
811 lim_data->cost += min_cost;
812
813 if (gimple_phi_num_args (phi) > 1)
814 {
815 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
816 gimple *cond;
817 if (gsi_end_p (gsi_last_bb (dom)))
818 return false;
819 cond = gsi_stmt (gsi_last_bb (dom));
820 if (gimple_code (cond) != GIMPLE_COND)
821 return false;
822 /* Verify that this is an extended form of a diamond and
823 the PHI arguments are completely controlled by the
824 predicate in DOM. */
825 if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
826 return false;
827
828 /* Fold in dependencies and cost of the condition. */
829 FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
830 {
831 if (!add_dependency (val, lim_data, loop, false))
832 return false;
833 def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
834 if (def_data)
835 lim_data->cost += def_data->cost;
836 }
837
838 /* We want to avoid unconditionally executing very expensive
839 operations. As costs for our dependencies cannot be
840 negative just claim we are not invariand for this case.
841 We also are not sure whether the control-flow inside the
842 loop will vanish. */
843 if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
844 && !(min_cost != 0
845 && total_cost / min_cost <= 2))
846 return false;
847
848 /* Assume that the control-flow in the loop will vanish.
849 ??? We should verify this and not artificially increase
850 the cost if that is not the case. */
851 lim_data->cost += stmt_cost (stmt);
852 }
853
854 return true;
855 }
856 else
857 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
858 if (!add_dependency (val, lim_data, loop, true))
859 return false;
860
861 if (gimple_vuse (stmt))
862 {
863 im_mem_ref *ref
864 = lim_data ? memory_accesses.refs_list[lim_data->ref] : NULL;
865 if (ref
866 && MEM_ANALYZABLE (ref))
867 {
868 lim_data->max_loop = outermost_indep_loop (lim_data->max_loop,
869 loop, ref);
870 if (!lim_data->max_loop)
871 return false;
872 }
873 else if (! add_dependency (gimple_vuse (stmt), lim_data, loop, false))
874 return false;
875 }
876
877 lim_data->cost += stmt_cost (stmt);
878
879 return true;
880 }
881
882 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
883 and that one of the operands of this statement is computed by STMT.
884 Ensure that STMT (together with all the statements that define its
885 operands) is hoisted at least out of the loop LEVEL. */
886
887 static void
set_level(gimple * stmt,class loop * orig_loop,class loop * level)888 set_level (gimple *stmt, class loop *orig_loop, class loop *level)
889 {
890 class loop *stmt_loop = gimple_bb (stmt)->loop_father;
891 struct lim_aux_data *lim_data;
892 gimple *dep_stmt;
893 unsigned i;
894
895 stmt_loop = find_common_loop (orig_loop, stmt_loop);
896 lim_data = get_lim_data (stmt);
897 if (lim_data != NULL && lim_data->tgt_loop != NULL)
898 stmt_loop = find_common_loop (stmt_loop,
899 loop_outer (lim_data->tgt_loop));
900 if (flow_loop_nested_p (stmt_loop, level))
901 return;
902
903 gcc_assert (level == lim_data->max_loop
904 || flow_loop_nested_p (lim_data->max_loop, level));
905
906 lim_data->tgt_loop = level;
907 FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
908 set_level (dep_stmt, orig_loop, level);
909 }
910
911 /* Determines an outermost loop from that we want to hoist the statement STMT.
912 For now we chose the outermost possible loop. TODO -- use profiling
913 information to set it more sanely. */
914
915 static void
set_profitable_level(gimple * stmt)916 set_profitable_level (gimple *stmt)
917 {
918 set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
919 }
920
921 /* Returns true if STMT is a call that has side effects. */
922
923 static bool
nonpure_call_p(gimple * stmt)924 nonpure_call_p (gimple *stmt)
925 {
926 if (gimple_code (stmt) != GIMPLE_CALL)
927 return false;
928
929 return gimple_has_side_effects (stmt);
930 }
931
932 /* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */
933
934 static gimple *
rewrite_reciprocal(gimple_stmt_iterator * bsi)935 rewrite_reciprocal (gimple_stmt_iterator *bsi)
936 {
937 gassign *stmt, *stmt1, *stmt2;
938 tree name, lhs, type;
939 tree real_one;
940 gimple_stmt_iterator gsi;
941
942 stmt = as_a <gassign *> (gsi_stmt (*bsi));
943 lhs = gimple_assign_lhs (stmt);
944 type = TREE_TYPE (lhs);
945
946 real_one = build_one_cst (type);
947
948 name = make_temp_ssa_name (type, NULL, "reciptmp");
949 stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
950 gimple_assign_rhs2 (stmt));
951 stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
952 gimple_assign_rhs1 (stmt));
953
954 /* Replace division stmt with reciprocal and multiply stmts.
955 The multiply stmt is not invariant, so update iterator
956 and avoid rescanning. */
957 gsi = *bsi;
958 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
959 gsi_replace (&gsi, stmt2, true);
960
961 /* Continue processing with invariant reciprocal statement. */
962 return stmt1;
963 }
964
965 /* Check if the pattern at *BSI is a bittest of the form
966 (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */
967
968 static gimple *
rewrite_bittest(gimple_stmt_iterator * bsi)969 rewrite_bittest (gimple_stmt_iterator *bsi)
970 {
971 gassign *stmt;
972 gimple *stmt1;
973 gassign *stmt2;
974 gimple *use_stmt;
975 gcond *cond_stmt;
976 tree lhs, name, t, a, b;
977 use_operand_p use;
978
979 stmt = as_a <gassign *> (gsi_stmt (*bsi));
980 lhs = gimple_assign_lhs (stmt);
981
982 /* Verify that the single use of lhs is a comparison against zero. */
983 if (TREE_CODE (lhs) != SSA_NAME
984 || !single_imm_use (lhs, &use, &use_stmt))
985 return stmt;
986 cond_stmt = dyn_cast <gcond *> (use_stmt);
987 if (!cond_stmt)
988 return stmt;
989 if (gimple_cond_lhs (cond_stmt) != lhs
990 || (gimple_cond_code (cond_stmt) != NE_EXPR
991 && gimple_cond_code (cond_stmt) != EQ_EXPR)
992 || !integer_zerop (gimple_cond_rhs (cond_stmt)))
993 return stmt;
994
995 /* Get at the operands of the shift. The rhs is TMP1 & 1. */
996 stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
997 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
998 return stmt;
999
1000 /* There is a conversion in between possibly inserted by fold. */
1001 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
1002 {
1003 t = gimple_assign_rhs1 (stmt1);
1004 if (TREE_CODE (t) != SSA_NAME
1005 || !has_single_use (t))
1006 return stmt;
1007 stmt1 = SSA_NAME_DEF_STMT (t);
1008 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
1009 return stmt;
1010 }
1011
1012 /* Verify that B is loop invariant but A is not. Verify that with
1013 all the stmt walking we are still in the same loop. */
1014 if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
1015 || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
1016 return stmt;
1017
1018 a = gimple_assign_rhs1 (stmt1);
1019 b = gimple_assign_rhs2 (stmt1);
1020
1021 if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
1022 && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
1023 {
1024 gimple_stmt_iterator rsi;
1025
1026 /* 1 << B */
1027 t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
1028 build_int_cst (TREE_TYPE (a), 1), b);
1029 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
1030 stmt1 = gimple_build_assign (name, t);
1031
1032 /* A & (1 << B) */
1033 t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
1034 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
1035 stmt2 = gimple_build_assign (name, t);
1036
1037 /* Replace the SSA_NAME we compare against zero. Adjust
1038 the type of zero accordingly. */
1039 SET_USE (use, name);
1040 gimple_cond_set_rhs (cond_stmt,
1041 build_int_cst_type (TREE_TYPE (name),
1042 0));
1043
1044 /* Don't use gsi_replace here, none of the new assignments sets
1045 the variable originally set in stmt. Move bsi to stmt1, and
1046 then remove the original stmt, so that we get a chance to
1047 retain debug info for it. */
1048 rsi = *bsi;
1049 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
1050 gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
1051 gimple *to_release = gsi_stmt (rsi);
1052 gsi_remove (&rsi, true);
1053 release_defs (to_release);
1054
1055 return stmt1;
1056 }
1057
1058 return stmt;
1059 }
1060
1061 /* Determine the outermost loops in that statements in basic block BB are
1062 invariant, and record them to the LIM_DATA associated with the
1063 statements. */
1064
1065 static void
compute_invariantness(basic_block bb)1066 compute_invariantness (basic_block bb)
1067 {
1068 enum move_pos pos;
1069 gimple_stmt_iterator bsi;
1070 gimple *stmt;
1071 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
1072 class loop *outermost = ALWAYS_EXECUTED_IN (bb);
1073 struct lim_aux_data *lim_data;
1074
1075 if (!loop_outer (bb->loop_father))
1076 return;
1077
1078 if (dump_file && (dump_flags & TDF_DETAILS))
1079 fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
1080 bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
1081
1082 /* Look at PHI nodes, but only if there is at most two.
1083 ??? We could relax this further by post-processing the inserted
1084 code and transforming adjacent cond-exprs with the same predicate
1085 to control flow again. */
1086 bsi = gsi_start_phis (bb);
1087 if (!gsi_end_p (bsi)
1088 && ((gsi_next (&bsi), gsi_end_p (bsi))
1089 || (gsi_next (&bsi), gsi_end_p (bsi))))
1090 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1091 {
1092 stmt = gsi_stmt (bsi);
1093
1094 pos = movement_possibility (stmt);
1095 if (pos == MOVE_IMPOSSIBLE)
1096 continue;
1097
1098 lim_data = get_lim_data (stmt);
1099 if (! lim_data)
1100 lim_data = init_lim_data (stmt);
1101 lim_data->always_executed_in = outermost;
1102
1103 if (!determine_max_movement (stmt, false))
1104 {
1105 lim_data->max_loop = NULL;
1106 continue;
1107 }
1108
1109 if (dump_file && (dump_flags & TDF_DETAILS))
1110 {
1111 print_gimple_stmt (dump_file, stmt, 2);
1112 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1113 loop_depth (lim_data->max_loop),
1114 lim_data->cost);
1115 }
1116
1117 if (lim_data->cost >= LIM_EXPENSIVE)
1118 set_profitable_level (stmt);
1119 }
1120
1121 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1122 {
1123 stmt = gsi_stmt (bsi);
1124
1125 pos = movement_possibility (stmt);
1126 if (pos == MOVE_IMPOSSIBLE)
1127 {
1128 if (nonpure_call_p (stmt))
1129 {
1130 maybe_never = true;
1131 outermost = NULL;
1132 }
1133 /* Make sure to note always_executed_in for stores to make
1134 store-motion work. */
1135 else if (stmt_makes_single_store (stmt))
1136 {
1137 struct lim_aux_data *lim_data = get_lim_data (stmt);
1138 if (! lim_data)
1139 lim_data = init_lim_data (stmt);
1140 lim_data->always_executed_in = outermost;
1141 }
1142 continue;
1143 }
1144
1145 if (is_gimple_assign (stmt)
1146 && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1147 == GIMPLE_BINARY_RHS))
1148 {
1149 tree op0 = gimple_assign_rhs1 (stmt);
1150 tree op1 = gimple_assign_rhs2 (stmt);
1151 class loop *ol1 = outermost_invariant_loop (op1,
1152 loop_containing_stmt (stmt));
1153
1154 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1155 to be hoisted out of loop, saving expensive divide. */
1156 if (pos == MOVE_POSSIBLE
1157 && gimple_assign_rhs_code (stmt) == RDIV_EXPR
1158 && flag_unsafe_math_optimizations
1159 && !flag_trapping_math
1160 && ol1 != NULL
1161 && outermost_invariant_loop (op0, ol1) == NULL)
1162 stmt = rewrite_reciprocal (&bsi);
1163
1164 /* If the shift count is invariant, convert (A >> B) & 1 to
1165 A & (1 << B) allowing the bit mask to be hoisted out of the loop
1166 saving an expensive shift. */
1167 if (pos == MOVE_POSSIBLE
1168 && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
1169 && integer_onep (op1)
1170 && TREE_CODE (op0) == SSA_NAME
1171 && has_single_use (op0))
1172 stmt = rewrite_bittest (&bsi);
1173 }
1174
1175 lim_data = get_lim_data (stmt);
1176 if (! lim_data)
1177 lim_data = init_lim_data (stmt);
1178 lim_data->always_executed_in = outermost;
1179
1180 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1181 continue;
1182
1183 if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
1184 {
1185 lim_data->max_loop = NULL;
1186 continue;
1187 }
1188
1189 if (dump_file && (dump_flags & TDF_DETAILS))
1190 {
1191 print_gimple_stmt (dump_file, stmt, 2);
1192 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1193 loop_depth (lim_data->max_loop),
1194 lim_data->cost);
1195 }
1196
1197 if (lim_data->cost >= LIM_EXPENSIVE)
1198 set_profitable_level (stmt);
1199 }
1200 }
1201
1202 /* Hoist the statements in basic block BB out of the loops prescribed by
1203 data stored in LIM_DATA structures associated with each statement. Callback
1204 for walk_dominator_tree. */
1205
1206 unsigned int
move_computations_worker(basic_block bb)1207 move_computations_worker (basic_block bb)
1208 {
1209 class loop *level;
1210 unsigned cost = 0;
1211 struct lim_aux_data *lim_data;
1212 unsigned int todo = 0;
1213
1214 if (!loop_outer (bb->loop_father))
1215 return todo;
1216
1217 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
1218 {
1219 gassign *new_stmt;
1220 gphi *stmt = bsi.phi ();
1221
1222 lim_data = get_lim_data (stmt);
1223 if (lim_data == NULL)
1224 {
1225 gsi_next (&bsi);
1226 continue;
1227 }
1228
1229 cost = lim_data->cost;
1230 level = lim_data->tgt_loop;
1231 clear_lim_data (stmt);
1232
1233 if (!level)
1234 {
1235 gsi_next (&bsi);
1236 continue;
1237 }
1238
1239 if (dump_file && (dump_flags & TDF_DETAILS))
1240 {
1241 fprintf (dump_file, "Moving PHI node\n");
1242 print_gimple_stmt (dump_file, stmt, 0);
1243 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1244 cost, level->num);
1245 }
1246
1247 if (gimple_phi_num_args (stmt) == 1)
1248 {
1249 tree arg = PHI_ARG_DEF (stmt, 0);
1250 new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1251 TREE_CODE (arg), arg);
1252 }
1253 else
1254 {
1255 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
1256 gimple *cond = gsi_stmt (gsi_last_bb (dom));
1257 tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1258 /* Get the PHI arguments corresponding to the true and false
1259 edges of COND. */
1260 extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
1261 gcc_assert (arg0 && arg1);
1262 t = build2 (gimple_cond_code (cond), boolean_type_node,
1263 gimple_cond_lhs (cond), gimple_cond_rhs (cond));
1264 new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1265 COND_EXPR, t, arg0, arg1);
1266 todo |= TODO_cleanup_cfg;
1267 }
1268 if (!ALWAYS_EXECUTED_IN (bb)
1269 || (ALWAYS_EXECUTED_IN (bb) != level
1270 && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level)))
1271 reset_flow_sensitive_info (gimple_assign_lhs (new_stmt));
1272 gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1273 remove_phi_node (&bsi, false);
1274 }
1275
1276 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
1277 {
1278 edge e;
1279
1280 gimple *stmt = gsi_stmt (bsi);
1281
1282 lim_data = get_lim_data (stmt);
1283 if (lim_data == NULL)
1284 {
1285 gsi_next (&bsi);
1286 continue;
1287 }
1288
1289 cost = lim_data->cost;
1290 level = lim_data->tgt_loop;
1291 clear_lim_data (stmt);
1292
1293 if (!level)
1294 {
1295 gsi_next (&bsi);
1296 continue;
1297 }
1298
1299 /* We do not really want to move conditionals out of the loop; we just
1300 placed it here to force its operands to be moved if necessary. */
1301 if (gimple_code (stmt) == GIMPLE_COND)
1302 {
1303 gsi_next (&bsi);
1304 continue;
1305 }
1306
1307 if (dump_file && (dump_flags & TDF_DETAILS))
1308 {
1309 fprintf (dump_file, "Moving statement\n");
1310 print_gimple_stmt (dump_file, stmt, 0);
1311 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1312 cost, level->num);
1313 }
1314
1315 e = loop_preheader_edge (level);
1316 gcc_assert (!gimple_vdef (stmt));
1317 if (gimple_vuse (stmt))
1318 {
1319 /* The new VUSE is the one from the virtual PHI in the loop
1320 header or the one already present. */
1321 gphi_iterator gsi2;
1322 for (gsi2 = gsi_start_phis (e->dest);
1323 !gsi_end_p (gsi2); gsi_next (&gsi2))
1324 {
1325 gphi *phi = gsi2.phi ();
1326 if (virtual_operand_p (gimple_phi_result (phi)))
1327 {
1328 SET_USE (gimple_vuse_op (stmt),
1329 PHI_ARG_DEF_FROM_EDGE (phi, e));
1330 break;
1331 }
1332 }
1333 }
1334 gsi_remove (&bsi, false);
1335 if (gimple_has_lhs (stmt)
1336 && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
1337 && (!ALWAYS_EXECUTED_IN (bb)
1338 || !(ALWAYS_EXECUTED_IN (bb) == level
1339 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1340 reset_flow_sensitive_info (gimple_get_lhs (stmt));
1341 /* In case this is a stmt that is not unconditionally executed
1342 when the target loop header is executed and the stmt may
1343 invoke undefined integer or pointer overflow rewrite it to
1344 unsigned arithmetic. */
1345 if (is_gimple_assign (stmt)
1346 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))
1347 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt)))
1348 && arith_code_with_undefined_signed_overflow
1349 (gimple_assign_rhs_code (stmt))
1350 && (!ALWAYS_EXECUTED_IN (bb)
1351 || !(ALWAYS_EXECUTED_IN (bb) == level
1352 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1353 gsi_insert_seq_on_edge (e, rewrite_to_defined_overflow (stmt));
1354 else
1355 gsi_insert_on_edge (e, stmt);
1356 }
1357
1358 return todo;
1359 }
1360
1361 /* Checks whether the statement defining variable *INDEX can be hoisted
1362 out of the loop passed in DATA. Callback for for_each_index. */
1363
1364 static bool
may_move_till(tree ref,tree * index,void * data)1365 may_move_till (tree ref, tree *index, void *data)
1366 {
1367 class loop *loop = (class loop *) data, *max_loop;
1368
1369 /* If REF is an array reference, check also that the step and the lower
1370 bound is invariant in LOOP. */
1371 if (TREE_CODE (ref) == ARRAY_REF)
1372 {
1373 tree step = TREE_OPERAND (ref, 3);
1374 tree lbound = TREE_OPERAND (ref, 2);
1375
1376 max_loop = outermost_invariant_loop (step, loop);
1377 if (!max_loop)
1378 return false;
1379
1380 max_loop = outermost_invariant_loop (lbound, loop);
1381 if (!max_loop)
1382 return false;
1383 }
1384
1385 max_loop = outermost_invariant_loop (*index, loop);
1386 if (!max_loop)
1387 return false;
1388
1389 return true;
1390 }
1391
1392 /* If OP is SSA NAME, force the statement that defines it to be
1393 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
1394
1395 static void
force_move_till_op(tree op,class loop * orig_loop,class loop * loop)1396 force_move_till_op (tree op, class loop *orig_loop, class loop *loop)
1397 {
1398 gimple *stmt;
1399
1400 if (!op
1401 || is_gimple_min_invariant (op))
1402 return;
1403
1404 gcc_assert (TREE_CODE (op) == SSA_NAME);
1405
1406 stmt = SSA_NAME_DEF_STMT (op);
1407 if (gimple_nop_p (stmt))
1408 return;
1409
1410 set_level (stmt, orig_loop, loop);
1411 }
1412
1413 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1414 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
1415 for_each_index. */
1416
1417 struct fmt_data
1418 {
1419 class loop *loop;
1420 class loop *orig_loop;
1421 };
1422
1423 static bool
force_move_till(tree ref,tree * index,void * data)1424 force_move_till (tree ref, tree *index, void *data)
1425 {
1426 struct fmt_data *fmt_data = (struct fmt_data *) data;
1427
1428 if (TREE_CODE (ref) == ARRAY_REF)
1429 {
1430 tree step = TREE_OPERAND (ref, 3);
1431 tree lbound = TREE_OPERAND (ref, 2);
1432
1433 force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
1434 force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
1435 }
1436
1437 force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
1438
1439 return true;
1440 }
1441
1442 /* A function to free the mem_ref object OBJ. */
1443
1444 static void
memref_free(class im_mem_ref * mem)1445 memref_free (class im_mem_ref *mem)
1446 {
1447 mem->accesses_in_loop.release ();
1448 }
1449
1450 /* Allocates and returns a memory reference description for MEM whose hash
1451 value is HASH and id is ID. */
1452
1453 static im_mem_ref *
mem_ref_alloc(ao_ref * mem,unsigned hash,unsigned id)1454 mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id)
1455 {
1456 im_mem_ref *ref = XOBNEW (&mem_ref_obstack, class im_mem_ref);
1457 if (mem)
1458 ref->mem = *mem;
1459 else
1460 ao_ref_init (&ref->mem, error_mark_node);
1461 ref->id = id;
1462 ref->ref_canonical = false;
1463 ref->ref_decomposed = false;
1464 ref->hash = hash;
1465 ref->stored = NULL;
1466 ref->loaded = NULL;
1467 bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
1468 ref->accesses_in_loop.create (1);
1469
1470 return ref;
1471 }
1472
1473 /* Records memory reference location *LOC in LOOP to the memory reference
1474 description REF. The reference occurs in statement STMT. */
1475
1476 static void
record_mem_ref_loc(im_mem_ref * ref,gimple * stmt,tree * loc)1477 record_mem_ref_loc (im_mem_ref *ref, gimple *stmt, tree *loc)
1478 {
1479 mem_ref_loc aref;
1480 aref.stmt = stmt;
1481 aref.ref = loc;
1482 ref->accesses_in_loop.safe_push (aref);
1483 }
1484
1485 /* Set the LOOP bit in REF stored bitmap and allocate that if
1486 necessary. Return whether a bit was changed. */
1487
1488 static bool
set_ref_stored_in_loop(im_mem_ref * ref,class loop * loop)1489 set_ref_stored_in_loop (im_mem_ref *ref, class loop *loop)
1490 {
1491 if (!ref->stored)
1492 ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
1493 return bitmap_set_bit (ref->stored, loop->num);
1494 }
1495
1496 /* Marks reference REF as stored in LOOP. */
1497
1498 static void
mark_ref_stored(im_mem_ref * ref,class loop * loop)1499 mark_ref_stored (im_mem_ref *ref, class loop *loop)
1500 {
1501 while (loop != current_loops->tree_root
1502 && set_ref_stored_in_loop (ref, loop))
1503 loop = loop_outer (loop);
1504 }
1505
1506 /* Set the LOOP bit in REF loaded bitmap and allocate that if
1507 necessary. Return whether a bit was changed. */
1508
1509 static bool
set_ref_loaded_in_loop(im_mem_ref * ref,class loop * loop)1510 set_ref_loaded_in_loop (im_mem_ref *ref, class loop *loop)
1511 {
1512 if (!ref->loaded)
1513 ref->loaded = BITMAP_ALLOC (&lim_bitmap_obstack);
1514 return bitmap_set_bit (ref->loaded, loop->num);
1515 }
1516
1517 /* Marks reference REF as loaded in LOOP. */
1518
1519 static void
mark_ref_loaded(im_mem_ref * ref,class loop * loop)1520 mark_ref_loaded (im_mem_ref *ref, class loop *loop)
1521 {
1522 while (loop != current_loops->tree_root
1523 && set_ref_loaded_in_loop (ref, loop))
1524 loop = loop_outer (loop);
1525 }
1526
1527 /* Gathers memory references in statement STMT in LOOP, storing the
1528 information about them in the memory_accesses structure. Marks
1529 the vops accessed through unrecognized statements there as
1530 well. */
1531
1532 static void
gather_mem_refs_stmt(class loop * loop,gimple * stmt)1533 gather_mem_refs_stmt (class loop *loop, gimple *stmt)
1534 {
1535 tree *mem = NULL;
1536 hashval_t hash;
1537 im_mem_ref **slot;
1538 im_mem_ref *ref;
1539 bool is_stored;
1540 unsigned id;
1541
1542 if (!gimple_vuse (stmt))
1543 return;
1544
1545 mem = simple_mem_ref_in_stmt (stmt, &is_stored);
1546 if (!mem && is_gimple_assign (stmt))
1547 {
1548 /* For aggregate copies record distinct references but use them
1549 only for disambiguation purposes. */
1550 id = memory_accesses.refs_list.length ();
1551 ref = mem_ref_alloc (NULL, 0, id);
1552 memory_accesses.refs_list.safe_push (ref);
1553 if (dump_file && (dump_flags & TDF_DETAILS))
1554 {
1555 fprintf (dump_file, "Unhandled memory reference %u: ", id);
1556 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1557 }
1558 record_mem_ref_loc (ref, stmt, mem);
1559 is_stored = gimple_vdef (stmt);
1560 }
1561 else if (!mem)
1562 {
1563 /* We use the shared mem_ref for all unanalyzable refs. */
1564 id = UNANALYZABLE_MEM_ID;
1565 ref = memory_accesses.refs_list[id];
1566 if (dump_file && (dump_flags & TDF_DETAILS))
1567 {
1568 fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
1569 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1570 }
1571 is_stored = gimple_vdef (stmt);
1572 }
1573 else
1574 {
1575 /* We are looking for equal refs that might differ in structure
1576 such as a.b vs. MEM[&a + 4]. So we key off the ao_ref but
1577 make sure we can canonicalize the ref in the hashtable if
1578 non-operand_equal_p refs are found. For the lookup we mark
1579 the case we want strict equality with aor.max_size == -1. */
1580 ao_ref aor;
1581 ao_ref_init (&aor, *mem);
1582 ao_ref_base (&aor);
1583 ao_ref_alias_set (&aor);
1584 HOST_WIDE_INT offset, size, max_size;
1585 poly_int64 saved_maxsize = aor.max_size, mem_off;
1586 tree mem_base;
1587 bool ref_decomposed;
1588 if (aor.max_size_known_p ()
1589 && aor.offset.is_constant (&offset)
1590 && aor.size.is_constant (&size)
1591 && aor.max_size.is_constant (&max_size)
1592 && size == max_size
1593 && (size % BITS_PER_UNIT) == 0
1594 /* We're canonicalizing to a MEM where TYPE_SIZE specifies the
1595 size. Make sure this is consistent with the extraction. */
1596 && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (*mem)))
1597 && known_eq (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (*mem))),
1598 aor.size)
1599 && (mem_base = get_addr_base_and_unit_offset (aor.ref, &mem_off)))
1600 {
1601 ref_decomposed = true;
1602 tree base = ao_ref_base (&aor);
1603 poly_int64 moffset;
1604 HOST_WIDE_INT mcoffset;
1605 if (TREE_CODE (base) == MEM_REF
1606 && (mem_ref_offset (base) * BITS_PER_UNIT + offset).to_shwi (&moffset)
1607 && moffset.is_constant (&mcoffset))
1608 {
1609 hash = iterative_hash_expr (TREE_OPERAND (base, 0), 0);
1610 hash = iterative_hash_host_wide_int (mcoffset, hash);
1611 }
1612 else
1613 {
1614 hash = iterative_hash_expr (base, 0);
1615 hash = iterative_hash_host_wide_int (offset, hash);
1616 }
1617 hash = iterative_hash_host_wide_int (size, hash);
1618 }
1619 else
1620 {
1621 ref_decomposed = false;
1622 hash = iterative_hash_expr (aor.ref, 0);
1623 aor.max_size = -1;
1624 }
1625 slot = memory_accesses.refs->find_slot_with_hash (&aor, hash, INSERT);
1626 aor.max_size = saved_maxsize;
1627 if (*slot)
1628 {
1629 if (!(*slot)->ref_canonical
1630 && !operand_equal_p (*mem, (*slot)->mem.ref, 0))
1631 {
1632 /* If we didn't yet canonicalize the hashtable ref (which
1633 we'll end up using for code insertion) and hit a second
1634 equal ref that is not structurally equivalent create
1635 a canonical ref which is a bare MEM_REF. */
1636 if (TREE_CODE (*mem) == MEM_REF
1637 || TREE_CODE (*mem) == TARGET_MEM_REF)
1638 {
1639 (*slot)->mem.ref = *mem;
1640 (*slot)->mem.base_alias_set = ao_ref_base_alias_set (&aor);
1641 }
1642 else
1643 {
1644 tree ref_alias_type = reference_alias_ptr_type (*mem);
1645 unsigned int ref_align = get_object_alignment (*mem);
1646 tree ref_type = TREE_TYPE (*mem);
1647 tree tmp = build1 (ADDR_EXPR, ptr_type_node,
1648 unshare_expr (mem_base));
1649 if (TYPE_ALIGN (ref_type) != ref_align)
1650 ref_type = build_aligned_type (ref_type, ref_align);
1651 tree new_ref
1652 = fold_build2 (MEM_REF, ref_type, tmp,
1653 build_int_cst (ref_alias_type, mem_off));
1654 if ((*slot)->mem.volatile_p)
1655 TREE_THIS_VOLATILE (new_ref) = 1;
1656 (*slot)->mem.ref = new_ref;
1657 /* Make sure the recorded base and offset are consistent
1658 with the newly built ref. */
1659 if (TREE_CODE (TREE_OPERAND (new_ref, 0)) == ADDR_EXPR)
1660 ;
1661 else
1662 {
1663 (*slot)->mem.base = new_ref;
1664 (*slot)->mem.offset = 0;
1665 }
1666 gcc_checking_assert (TREE_CODE ((*slot)->mem.ref) == MEM_REF
1667 && is_gimple_mem_ref_addr
1668 (TREE_OPERAND ((*slot)->mem.ref,
1669 0)));
1670 (*slot)->mem.base_alias_set = (*slot)->mem.ref_alias_set;
1671 }
1672 (*slot)->ref_canonical = true;
1673 }
1674 ref = *slot;
1675 id = ref->id;
1676 }
1677 else
1678 {
1679 id = memory_accesses.refs_list.length ();
1680 ref = mem_ref_alloc (&aor, hash, id);
1681 ref->ref_decomposed = ref_decomposed;
1682 memory_accesses.refs_list.safe_push (ref);
1683 *slot = ref;
1684
1685 if (dump_file && (dump_flags & TDF_DETAILS))
1686 {
1687 fprintf (dump_file, "Memory reference %u: ", id);
1688 print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
1689 fprintf (dump_file, "\n");
1690 }
1691 }
1692
1693 record_mem_ref_loc (ref, stmt, mem);
1694 }
1695 if (is_stored)
1696 {
1697 bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
1698 mark_ref_stored (ref, loop);
1699 }
1700 /* A not simple memory op is also a read when it is a write. */
1701 if (!is_stored || id == UNANALYZABLE_MEM_ID
1702 || ref->mem.ref == error_mark_node)
1703 {
1704 bitmap_set_bit (&memory_accesses.refs_loaded_in_loop[loop->num], ref->id);
1705 mark_ref_loaded (ref, loop);
1706 }
1707 init_lim_data (stmt)->ref = ref->id;
1708 return;
1709 }
1710
1711 static unsigned *bb_loop_postorder;
1712
1713 /* qsort sort function to sort blocks after their loop fathers postorder. */
1714
1715 static int
sort_bbs_in_loop_postorder_cmp(const void * bb1_,const void * bb2_,void * bb_loop_postorder_)1716 sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_,
1717 void *bb_loop_postorder_)
1718 {
1719 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1720 basic_block bb1 = *(const basic_block *)bb1_;
1721 basic_block bb2 = *(const basic_block *)bb2_;
1722 class loop *loop1 = bb1->loop_father;
1723 class loop *loop2 = bb2->loop_father;
1724 if (loop1->num == loop2->num)
1725 return bb1->index - bb2->index;
1726 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1727 }
1728
1729 /* qsort sort function to sort ref locs after their loop fathers postorder. */
1730
1731 static int
sort_locs_in_loop_postorder_cmp(const void * loc1_,const void * loc2_,void * bb_loop_postorder_)1732 sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_,
1733 void *bb_loop_postorder_)
1734 {
1735 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1736 const mem_ref_loc *loc1 = (const mem_ref_loc *)loc1_;
1737 const mem_ref_loc *loc2 = (const mem_ref_loc *)loc2_;
1738 class loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
1739 class loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
1740 if (loop1->num == loop2->num)
1741 return 0;
1742 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1743 }
1744
1745 /* Gathers memory references in loops. */
1746
1747 static void
analyze_memory_references(bool store_motion)1748 analyze_memory_references (bool store_motion)
1749 {
1750 gimple_stmt_iterator bsi;
1751 basic_block bb, *bbs;
1752 class loop *outer;
1753 unsigned i, n;
1754
1755 /* Collect all basic-blocks in loops and sort them after their
1756 loops postorder. */
1757 i = 0;
1758 bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
1759 FOR_EACH_BB_FN (bb, cfun)
1760 if (bb->loop_father != current_loops->tree_root)
1761 bbs[i++] = bb;
1762 n = i;
1763 gcc_sort_r (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp,
1764 bb_loop_postorder);
1765
1766 /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1767 That results in better locality for all the bitmaps. It also
1768 automatically sorts the location list of gathered memory references
1769 after their loop postorder number allowing to binary-search it. */
1770 for (i = 0; i < n; ++i)
1771 {
1772 basic_block bb = bbs[i];
1773 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1774 gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
1775 }
1776
1777 /* Verify the list of gathered memory references is sorted after their
1778 loop postorder number. */
1779 if (flag_checking)
1780 {
1781 im_mem_ref *ref;
1782 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
1783 for (unsigned j = 1; j < ref->accesses_in_loop.length (); ++j)
1784 gcc_assert (sort_locs_in_loop_postorder_cmp
1785 (&ref->accesses_in_loop[j-1], &ref->accesses_in_loop[j],
1786 bb_loop_postorder) <= 0);
1787 }
1788
1789 free (bbs);
1790
1791 if (!store_motion)
1792 return;
1793
1794 /* Propagate the information about accessed memory references up
1795 the loop hierarchy. */
1796 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1797 {
1798 /* Finalize the overall touched references (including subloops). */
1799 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
1800 &memory_accesses.refs_stored_in_loop[loop->num]);
1801
1802 /* Propagate the information about accessed memory references up
1803 the loop hierarchy. */
1804 outer = loop_outer (loop);
1805 if (outer == current_loops->tree_root)
1806 continue;
1807
1808 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
1809 &memory_accesses.all_refs_stored_in_loop[loop->num]);
1810 }
1811 }
1812
1813 /* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in
1814 tree_to_aff_combination_expand. */
1815
1816 static bool
mem_refs_may_alias_p(im_mem_ref * mem1,im_mem_ref * mem2,hash_map<tree,name_expansion * > ** ttae_cache,bool tbaa_p)1817 mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2,
1818 hash_map<tree, name_expansion *> **ttae_cache,
1819 bool tbaa_p)
1820 {
1821 gcc_checking_assert (mem1->mem.ref != error_mark_node
1822 && mem2->mem.ref != error_mark_node);
1823
1824 /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1825 object and their offset differ in such a way that the locations cannot
1826 overlap, then they cannot alias. */
1827 poly_widest_int size1, size2;
1828 aff_tree off1, off2;
1829
1830 /* Perform basic offset and type-based disambiguation. */
1831 if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, tbaa_p))
1832 return false;
1833
1834 /* The expansion of addresses may be a bit expensive, thus we only do
1835 the check at -O2 and higher optimization levels. */
1836 if (optimize < 2)
1837 return true;
1838
1839 get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
1840 get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
1841 aff_combination_expand (&off1, ttae_cache);
1842 aff_combination_expand (&off2, ttae_cache);
1843 aff_combination_scale (&off1, -1);
1844 aff_combination_add (&off2, &off1);
1845
1846 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1847 return false;
1848
1849 return true;
1850 }
1851
1852 /* Compare function for bsearch searching for reference locations
1853 in a loop. */
1854
1855 static int
find_ref_loc_in_loop_cmp(const void * loop_,const void * loc_,void * bb_loop_postorder_)1856 find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_,
1857 void *bb_loop_postorder_)
1858 {
1859 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1860 class loop *loop = (class loop *)const_cast<void *>(loop_);
1861 mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
1862 class loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
1863 if (loop->num == loc_loop->num
1864 || flow_loop_nested_p (loop, loc_loop))
1865 return 0;
1866 return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
1867 ? -1 : 1);
1868 }
1869
1870 /* Iterates over all locations of REF in LOOP and its subloops calling
1871 fn.operator() with the location as argument. When that operator
1872 returns true the iteration is stopped and true is returned.
1873 Otherwise false is returned. */
1874
1875 template <typename FN>
1876 static bool
for_all_locs_in_loop(class loop * loop,im_mem_ref * ref,FN fn)1877 for_all_locs_in_loop (class loop *loop, im_mem_ref *ref, FN fn)
1878 {
1879 unsigned i;
1880 mem_ref_loc *loc;
1881
1882 /* Search for the cluster of locs in the accesses_in_loop vector
1883 which is sorted after postorder index of the loop father. */
1884 loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp,
1885 bb_loop_postorder);
1886 if (!loc)
1887 return false;
1888
1889 /* We have found one location inside loop or its sub-loops. Iterate
1890 both forward and backward to cover the whole cluster. */
1891 i = loc - ref->accesses_in_loop.address ();
1892 while (i > 0)
1893 {
1894 --i;
1895 mem_ref_loc *l = &ref->accesses_in_loop[i];
1896 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1897 break;
1898 if (fn (l))
1899 return true;
1900 }
1901 for (i = loc - ref->accesses_in_loop.address ();
1902 i < ref->accesses_in_loop.length (); ++i)
1903 {
1904 mem_ref_loc *l = &ref->accesses_in_loop[i];
1905 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1906 break;
1907 if (fn (l))
1908 return true;
1909 }
1910
1911 return false;
1912 }
1913
1914 /* Rewrites location LOC by TMP_VAR. */
1915
1916 class rewrite_mem_ref_loc
1917 {
1918 public:
rewrite_mem_ref_loc(tree tmp_var_)1919 rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
1920 bool operator () (mem_ref_loc *loc);
1921 tree tmp_var;
1922 };
1923
1924 bool
operator ()(mem_ref_loc * loc)1925 rewrite_mem_ref_loc::operator () (mem_ref_loc *loc)
1926 {
1927 *loc->ref = tmp_var;
1928 update_stmt (loc->stmt);
1929 return false;
1930 }
1931
1932 /* Rewrites all references to REF in LOOP by variable TMP_VAR. */
1933
1934 static void
rewrite_mem_refs(class loop * loop,im_mem_ref * ref,tree tmp_var)1935 rewrite_mem_refs (class loop *loop, im_mem_ref *ref, tree tmp_var)
1936 {
1937 for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
1938 }
1939
1940 /* Stores the first reference location in LOCP. */
1941
1942 class first_mem_ref_loc_1
1943 {
1944 public:
first_mem_ref_loc_1(mem_ref_loc ** locp_)1945 first_mem_ref_loc_1 (mem_ref_loc **locp_) : locp (locp_) {}
1946 bool operator () (mem_ref_loc *loc);
1947 mem_ref_loc **locp;
1948 };
1949
1950 bool
operator ()(mem_ref_loc * loc)1951 first_mem_ref_loc_1::operator () (mem_ref_loc *loc)
1952 {
1953 *locp = loc;
1954 return true;
1955 }
1956
1957 /* Returns the first reference location to REF in LOOP. */
1958
1959 static mem_ref_loc *
first_mem_ref_loc(class loop * loop,im_mem_ref * ref)1960 first_mem_ref_loc (class loop *loop, im_mem_ref *ref)
1961 {
1962 mem_ref_loc *locp = NULL;
1963 for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
1964 return locp;
1965 }
1966
1967 /* Helper function for execute_sm. Emit code to store TMP_VAR into
1968 MEM along edge EX.
1969
1970 The store is only done if MEM has changed. We do this so no
1971 changes to MEM occur on code paths that did not originally store
1972 into it.
1973
1974 The common case for execute_sm will transform:
1975
1976 for (...) {
1977 if (foo)
1978 stuff;
1979 else
1980 MEM = TMP_VAR;
1981 }
1982
1983 into:
1984
1985 lsm = MEM;
1986 for (...) {
1987 if (foo)
1988 stuff;
1989 else
1990 lsm = TMP_VAR;
1991 }
1992 MEM = lsm;
1993
1994 This function will generate:
1995
1996 lsm = MEM;
1997
1998 lsm_flag = false;
1999 ...
2000 for (...) {
2001 if (foo)
2002 stuff;
2003 else {
2004 lsm = TMP_VAR;
2005 lsm_flag = true;
2006 }
2007 }
2008 if (lsm_flag) <--
2009 MEM = lsm; <-- (X)
2010
2011 In case MEM and TMP_VAR are NULL the function will return the then
2012 block so the caller can insert (X) and other related stmts.
2013 */
2014
2015 static basic_block
execute_sm_if_changed(edge ex,tree mem,tree tmp_var,tree flag,edge preheader,hash_set<basic_block> * flag_bbs,edge & append_cond_position,edge & last_cond_fallthru)2016 execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
2017 edge preheader, hash_set <basic_block> *flag_bbs,
2018 edge &append_cond_position, edge &last_cond_fallthru)
2019 {
2020 basic_block new_bb, then_bb, old_dest;
2021 bool loop_has_only_one_exit;
2022 edge then_old_edge;
2023 gimple_stmt_iterator gsi;
2024 gimple *stmt;
2025 bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
2026
2027 profile_count count_sum = profile_count::zero ();
2028 int nbbs = 0, ncount = 0;
2029 profile_probability flag_probability = profile_probability::uninitialized ();
2030
2031 /* Flag is set in FLAG_BBS. Determine probability that flag will be true
2032 at loop exit.
2033
2034 This code may look fancy, but it cannot update profile very realistically
2035 because we do not know the probability that flag will be true at given
2036 loop exit.
2037
2038 We look for two interesting extremes
2039 - when exit is dominated by block setting the flag, we know it will
2040 always be true. This is a common case.
2041 - when all blocks setting the flag have very low frequency we know
2042 it will likely be false.
2043 In all other cases we default to 2/3 for flag being true. */
2044
2045 for (hash_set<basic_block>::iterator it = flag_bbs->begin ();
2046 it != flag_bbs->end (); ++it)
2047 {
2048 if ((*it)->count.initialized_p ())
2049 count_sum += (*it)->count, ncount ++;
2050 if (dominated_by_p (CDI_DOMINATORS, ex->src, *it))
2051 flag_probability = profile_probability::always ();
2052 nbbs++;
2053 }
2054
2055 profile_probability cap = profile_probability::always ().apply_scale (2, 3);
2056
2057 if (flag_probability.initialized_p ())
2058 ;
2059 else if (ncount == nbbs
2060 && preheader->count () >= count_sum && preheader->count ().nonzero_p ())
2061 {
2062 flag_probability = count_sum.probability_in (preheader->count ());
2063 if (flag_probability > cap)
2064 flag_probability = cap;
2065 }
2066
2067 if (!flag_probability.initialized_p ())
2068 flag_probability = cap;
2069
2070 /* ?? Insert store after previous store if applicable. See note
2071 below. */
2072 if (append_cond_position)
2073 ex = append_cond_position;
2074
2075 loop_has_only_one_exit = single_pred_p (ex->dest);
2076
2077 if (loop_has_only_one_exit)
2078 ex = split_block_after_labels (ex->dest);
2079 else
2080 {
2081 for (gphi_iterator gpi = gsi_start_phis (ex->dest);
2082 !gsi_end_p (gpi); gsi_next (&gpi))
2083 {
2084 gphi *phi = gpi.phi ();
2085 if (virtual_operand_p (gimple_phi_result (phi)))
2086 continue;
2087
2088 /* When the destination has a non-virtual PHI node with multiple
2089 predecessors make sure we preserve the PHI structure by
2090 forcing a forwarder block so that hoisting of that PHI will
2091 still work. */
2092 split_edge (ex);
2093 break;
2094 }
2095 }
2096
2097 old_dest = ex->dest;
2098 new_bb = split_edge (ex);
2099 then_bb = create_empty_bb (new_bb);
2100 then_bb->count = new_bb->count.apply_probability (flag_probability);
2101 if (irr)
2102 then_bb->flags = BB_IRREDUCIBLE_LOOP;
2103 add_bb_to_loop (then_bb, new_bb->loop_father);
2104
2105 gsi = gsi_start_bb (new_bb);
2106 stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
2107 NULL_TREE, NULL_TREE);
2108 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2109
2110 /* Insert actual store. */
2111 if (mem)
2112 {
2113 gsi = gsi_start_bb (then_bb);
2114 stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
2115 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2116 }
2117
2118 edge e1 = single_succ_edge (new_bb);
2119 edge e2 = make_edge (new_bb, then_bb,
2120 EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2121 e2->probability = flag_probability;
2122
2123 e1->flags |= EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0);
2124 e1->flags &= ~EDGE_FALLTHRU;
2125
2126 e1->probability = flag_probability.invert ();
2127
2128 then_old_edge = make_single_succ_edge (then_bb, old_dest,
2129 EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2130
2131 set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
2132
2133 if (append_cond_position)
2134 {
2135 basic_block prevbb = last_cond_fallthru->src;
2136 redirect_edge_succ (last_cond_fallthru, new_bb);
2137 set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
2138 set_immediate_dominator (CDI_DOMINATORS, old_dest,
2139 recompute_dominator (CDI_DOMINATORS, old_dest));
2140 }
2141
2142 /* ?? Because stores may alias, they must happen in the exact
2143 sequence they originally happened. Save the position right after
2144 the (_lsm) store we just created so we can continue appending after
2145 it and maintain the original order. */
2146 append_cond_position = then_old_edge;
2147 last_cond_fallthru = find_edge (new_bb, old_dest);
2148
2149 if (!loop_has_only_one_exit)
2150 for (gphi_iterator gpi = gsi_start_phis (old_dest);
2151 !gsi_end_p (gpi); gsi_next (&gpi))
2152 {
2153 gphi *phi = gpi.phi ();
2154 unsigned i;
2155
2156 for (i = 0; i < gimple_phi_num_args (phi); i++)
2157 if (gimple_phi_arg_edge (phi, i)->src == new_bb)
2158 {
2159 tree arg = gimple_phi_arg_def (phi, i);
2160 add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
2161 update_stmt (phi);
2162 }
2163 }
2164
2165 return then_bb;
2166 }
2167
2168 /* When REF is set on the location, set flag indicating the store. */
2169
2170 class sm_set_flag_if_changed
2171 {
2172 public:
sm_set_flag_if_changed(tree flag_,hash_set<basic_block> * bbs_)2173 sm_set_flag_if_changed (tree flag_, hash_set <basic_block> *bbs_)
2174 : flag (flag_), bbs (bbs_) {}
2175 bool operator () (mem_ref_loc *loc);
2176 tree flag;
2177 hash_set <basic_block> *bbs;
2178 };
2179
2180 bool
operator ()(mem_ref_loc * loc)2181 sm_set_flag_if_changed::operator () (mem_ref_loc *loc)
2182 {
2183 /* Only set the flag for writes. */
2184 if (is_gimple_assign (loc->stmt)
2185 && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
2186 {
2187 gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
2188 gimple *stmt = gimple_build_assign (flag, boolean_true_node);
2189 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2190 bbs->add (gimple_bb (stmt));
2191 }
2192 return false;
2193 }
2194
2195 /* Helper function for execute_sm. On every location where REF is
2196 set, set an appropriate flag indicating the store. */
2197
2198 static tree
execute_sm_if_changed_flag_set(class loop * loop,im_mem_ref * ref,hash_set<basic_block> * bbs)2199 execute_sm_if_changed_flag_set (class loop *loop, im_mem_ref *ref,
2200 hash_set <basic_block> *bbs)
2201 {
2202 tree flag;
2203 char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag");
2204 flag = create_tmp_reg (boolean_type_node, str);
2205 for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag, bbs));
2206 return flag;
2207 }
2208
2209 struct sm_aux
2210 {
2211 tree tmp_var;
2212 tree store_flag;
2213 hash_set <basic_block> flag_bbs;
2214 };
2215
2216 /* Executes store motion of memory reference REF from LOOP.
2217 Exits from the LOOP are stored in EXITS. The initialization of the
2218 temporary variable is put to the preheader of the loop, and assignments
2219 to the reference from the temporary variable are emitted to exits. */
2220
2221 static void
execute_sm(class loop * loop,im_mem_ref * ref,hash_map<im_mem_ref *,sm_aux * > & aux_map,bool maybe_mt,bool use_other_flag_var)2222 execute_sm (class loop *loop, im_mem_ref *ref,
2223 hash_map<im_mem_ref *, sm_aux *> &aux_map, bool maybe_mt,
2224 bool use_other_flag_var)
2225 {
2226 gassign *load;
2227 struct fmt_data fmt_data;
2228 struct lim_aux_data *lim_data;
2229 bool multi_threaded_model_p = false;
2230 gimple_stmt_iterator gsi;
2231 sm_aux *aux = new sm_aux;
2232
2233 if (dump_file && (dump_flags & TDF_DETAILS))
2234 {
2235 fprintf (dump_file, "Executing store motion of ");
2236 print_generic_expr (dump_file, ref->mem.ref);
2237 fprintf (dump_file, " from loop %d\n", loop->num);
2238 }
2239
2240 aux->tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
2241 get_lsm_tmp_name (ref->mem.ref, ~0));
2242
2243 fmt_data.loop = loop;
2244 fmt_data.orig_loop = loop;
2245 for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
2246
2247 bool always_stored = ref_always_accessed_p (loop, ref, true);
2248 if (maybe_mt
2249 && (bb_in_transaction (loop_preheader_edge (loop)->src)
2250 || (! flag_store_data_races && ! always_stored)))
2251 multi_threaded_model_p = true;
2252
2253 if (multi_threaded_model_p && !use_other_flag_var)
2254 aux->store_flag
2255 = execute_sm_if_changed_flag_set (loop, ref, &aux->flag_bbs);
2256 else
2257 aux->store_flag = NULL_TREE;
2258
2259 /* Remember variable setup. */
2260 aux_map.put (ref, aux);
2261
2262 rewrite_mem_refs (loop, ref, aux->tmp_var);
2263
2264 /* Emit the load code on a random exit edge or into the latch if
2265 the loop does not exit, so that we are sure it will be processed
2266 by move_computations after all dependencies. */
2267 gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
2268
2269 /* Avoid doing a load if there was no load of the ref in the loop.
2270 Esp. when the ref is not always stored we cannot optimize it
2271 away later. But when it is not always stored we must use a conditional
2272 store then. */
2273 if ((!always_stored && !multi_threaded_model_p)
2274 || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)))
2275 load = gimple_build_assign (aux->tmp_var, unshare_expr (ref->mem.ref));
2276 else
2277 {
2278 /* If not emitting a load mark the uninitialized state on the
2279 loop entry as not to be warned for. */
2280 tree uninit = create_tmp_reg (TREE_TYPE (aux->tmp_var));
2281 suppress_warning (uninit, OPT_Wuninitialized);
2282 load = gimple_build_assign (aux->tmp_var, uninit);
2283 }
2284 lim_data = init_lim_data (load);
2285 lim_data->max_loop = loop;
2286 lim_data->tgt_loop = loop;
2287 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2288
2289 if (aux->store_flag)
2290 {
2291 load = gimple_build_assign (aux->store_flag, boolean_false_node);
2292 lim_data = init_lim_data (load);
2293 lim_data->max_loop = loop;
2294 lim_data->tgt_loop = loop;
2295 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2296 }
2297 }
2298
2299 /* sm_ord is used for ordinary stores we can retain order with respect
2300 to other stores
2301 sm_unord is used for conditional executed stores which need to be
2302 able to execute in arbitrary order with respect to other stores
2303 sm_other is used for stores we do not try to apply store motion to. */
2304 enum sm_kind { sm_ord, sm_unord, sm_other };
2305 struct seq_entry
2306 {
seq_entryseq_entry2307 seq_entry () {}
seq_entryseq_entry2308 seq_entry (unsigned f, sm_kind k, tree fr = NULL)
2309 : first (f), second (k), from (fr) {}
2310 unsigned first;
2311 sm_kind second;
2312 tree from;
2313 };
2314
2315 static void
execute_sm_exit(class loop * loop,edge ex,vec<seq_entry> & seq,hash_map<im_mem_ref *,sm_aux * > & aux_map,sm_kind kind,edge & append_cond_position,edge & last_cond_fallthru)2316 execute_sm_exit (class loop *loop, edge ex, vec<seq_entry> &seq,
2317 hash_map<im_mem_ref *, sm_aux *> &aux_map, sm_kind kind,
2318 edge &append_cond_position, edge &last_cond_fallthru)
2319 {
2320 /* Sink the stores to exit from the loop. */
2321 for (unsigned i = seq.length (); i > 0; --i)
2322 {
2323 im_mem_ref *ref = memory_accesses.refs_list[seq[i-1].first];
2324 if (seq[i-1].second == sm_other)
2325 {
2326 gcc_assert (kind == sm_ord && seq[i-1].from != NULL_TREE);
2327 if (dump_file && (dump_flags & TDF_DETAILS))
2328 {
2329 fprintf (dump_file, "Re-issueing dependent store of ");
2330 print_generic_expr (dump_file, ref->mem.ref);
2331 fprintf (dump_file, " from loop %d on exit %d -> %d\n",
2332 loop->num, ex->src->index, ex->dest->index);
2333 }
2334 gassign *store = gimple_build_assign (unshare_expr (ref->mem.ref),
2335 seq[i-1].from);
2336 gsi_insert_on_edge (ex, store);
2337 }
2338 else
2339 {
2340 sm_aux *aux = *aux_map.get (ref);
2341 if (!aux->store_flag || kind == sm_ord)
2342 {
2343 gassign *store;
2344 store = gimple_build_assign (unshare_expr (ref->mem.ref),
2345 aux->tmp_var);
2346 gsi_insert_on_edge (ex, store);
2347 }
2348 else
2349 execute_sm_if_changed (ex, ref->mem.ref, aux->tmp_var,
2350 aux->store_flag,
2351 loop_preheader_edge (loop), &aux->flag_bbs,
2352 append_cond_position, last_cond_fallthru);
2353 }
2354 }
2355 }
2356
2357 /* Push the SM candidate at index PTR in the sequence SEQ down until
2358 we hit the next SM candidate. Return true if that went OK and
2359 false if we could not disambiguate agains another unrelated ref.
2360 Update *AT to the index where the candidate now resides. */
2361
2362 static bool
sm_seq_push_down(vec<seq_entry> & seq,unsigned ptr,unsigned * at)2363 sm_seq_push_down (vec<seq_entry> &seq, unsigned ptr, unsigned *at)
2364 {
2365 *at = ptr;
2366 for (; ptr > 0; --ptr)
2367 {
2368 seq_entry &new_cand = seq[ptr];
2369 seq_entry &against = seq[ptr-1];
2370 if (against.second == sm_ord
2371 || (against.second == sm_other && against.from != NULL_TREE))
2372 /* Found the tail of the sequence. */
2373 break;
2374 /* We may not ignore self-dependences here. */
2375 if (new_cand.first == against.first
2376 || !refs_independent_p (memory_accesses.refs_list[new_cand.first],
2377 memory_accesses.refs_list[against.first],
2378 false))
2379 /* ??? Prune new_cand from the list of refs to apply SM to. */
2380 return false;
2381 std::swap (new_cand, against);
2382 *at = ptr - 1;
2383 }
2384 return true;
2385 }
2386
2387 /* Computes the sequence of stores from candidates in REFS_NOT_IN_SEQ to SEQ
2388 walking backwards from VDEF (or the end of BB if VDEF is NULL). */
2389
2390 static int
sm_seq_valid_bb(class loop * loop,basic_block bb,tree vdef,vec<seq_entry> & seq,bitmap refs_not_in_seq,bitmap refs_not_supported,bool forked,bitmap fully_visited)2391 sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef,
2392 vec<seq_entry> &seq, bitmap refs_not_in_seq,
2393 bitmap refs_not_supported, bool forked,
2394 bitmap fully_visited)
2395 {
2396 if (!vdef)
2397 for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p (gsi);
2398 gsi_prev (&gsi))
2399 {
2400 vdef = gimple_vdef (gsi_stmt (gsi));
2401 if (vdef)
2402 break;
2403 }
2404 if (!vdef)
2405 {
2406 gphi *vphi = get_virtual_phi (bb);
2407 if (vphi)
2408 vdef = gimple_phi_result (vphi);
2409 }
2410 if (!vdef)
2411 {
2412 if (single_pred_p (bb))
2413 /* This handles the perfect nest case. */
2414 return sm_seq_valid_bb (loop, single_pred (bb), vdef,
2415 seq, refs_not_in_seq, refs_not_supported,
2416 forked, fully_visited);
2417 return 0;
2418 }
2419 do
2420 {
2421 gimple *def = SSA_NAME_DEF_STMT (vdef);
2422 if (gimple_bb (def) != bb)
2423 {
2424 /* If we forked by processing a PHI do not allow our walk to
2425 merge again until we handle that robustly. */
2426 if (forked)
2427 {
2428 /* Mark refs_not_in_seq as unsupported. */
2429 bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2430 return 1;
2431 }
2432 /* Otherwise it doesn't really matter if we end up in different
2433 BBs. */
2434 bb = gimple_bb (def);
2435 }
2436 if (gphi *phi = dyn_cast <gphi *> (def))
2437 {
2438 /* Handle CFG merges. Until we handle forks (gimple_bb (def) != bb)
2439 this is still linear.
2440 Eventually we want to cache intermediate results per BB
2441 (but we can't easily cache for different exits?). */
2442 /* Stop at PHIs with possible backedges. */
2443 if (bb == bb->loop_father->header
2444 || bb->flags & BB_IRREDUCIBLE_LOOP)
2445 {
2446 /* Mark refs_not_in_seq as unsupported. */
2447 bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2448 return 1;
2449 }
2450 if (gimple_phi_num_args (phi) == 1)
2451 return sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
2452 gimple_phi_arg_def (phi, 0), seq,
2453 refs_not_in_seq, refs_not_supported,
2454 false, fully_visited);
2455 if (bitmap_bit_p (fully_visited,
2456 SSA_NAME_VERSION (gimple_phi_result (phi))))
2457 return 1;
2458 auto_vec<seq_entry> first_edge_seq;
2459 auto_bitmap tem_refs_not_in_seq (&lim_bitmap_obstack);
2460 int eret;
2461 bitmap_copy (tem_refs_not_in_seq, refs_not_in_seq);
2462 eret = sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
2463 gimple_phi_arg_def (phi, 0),
2464 first_edge_seq,
2465 tem_refs_not_in_seq, refs_not_supported,
2466 true, fully_visited);
2467 if (eret != 1)
2468 return -1;
2469 /* Simplify our lives by pruning the sequence of !sm_ord. */
2470 while (!first_edge_seq.is_empty ()
2471 && first_edge_seq.last ().second != sm_ord)
2472 first_edge_seq.pop ();
2473 for (unsigned int i = 1; i < gimple_phi_num_args (phi); ++i)
2474 {
2475 tree vuse = gimple_phi_arg_def (phi, i);
2476 edge e = gimple_phi_arg_edge (phi, i);
2477 auto_vec<seq_entry> edge_seq;
2478 bitmap_and_compl (tem_refs_not_in_seq,
2479 refs_not_in_seq, refs_not_supported);
2480 /* If we've marked all refs we search for as unsupported
2481 we can stop processing and use the sequence as before
2482 the PHI. */
2483 if (bitmap_empty_p (tem_refs_not_in_seq))
2484 return 1;
2485 eret = sm_seq_valid_bb (loop, e->src, vuse, edge_seq,
2486 tem_refs_not_in_seq, refs_not_supported,
2487 true, fully_visited);
2488 if (eret != 1)
2489 return -1;
2490 /* Simplify our lives by pruning the sequence of !sm_ord. */
2491 while (!edge_seq.is_empty ()
2492 && edge_seq.last ().second != sm_ord)
2493 edge_seq.pop ();
2494 unsigned min_len = MIN(first_edge_seq.length (),
2495 edge_seq.length ());
2496 /* Incrementally merge seqs into first_edge_seq. */
2497 int first_uneq = -1;
2498 auto_vec<seq_entry, 2> extra_refs;
2499 for (unsigned int i = 0; i < min_len; ++i)
2500 {
2501 /* ??? We can more intelligently merge when we face different
2502 order by additional sinking operations in one sequence.
2503 For now we simply mark them as to be processed by the
2504 not order-preserving SM code. */
2505 if (first_edge_seq[i].first != edge_seq[i].first)
2506 {
2507 if (first_edge_seq[i].second == sm_ord)
2508 bitmap_set_bit (refs_not_supported,
2509 first_edge_seq[i].first);
2510 if (edge_seq[i].second == sm_ord)
2511 bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2512 first_edge_seq[i].second = sm_other;
2513 first_edge_seq[i].from = NULL_TREE;
2514 /* Record the dropped refs for later processing. */
2515 if (first_uneq == -1)
2516 first_uneq = i;
2517 extra_refs.safe_push (seq_entry (edge_seq[i].first,
2518 sm_other, NULL_TREE));
2519 }
2520 /* sm_other prevails. */
2521 else if (first_edge_seq[i].second != edge_seq[i].second)
2522 {
2523 /* Make sure the ref is marked as not supported. */
2524 bitmap_set_bit (refs_not_supported,
2525 first_edge_seq[i].first);
2526 first_edge_seq[i].second = sm_other;
2527 first_edge_seq[i].from = NULL_TREE;
2528 }
2529 else if (first_edge_seq[i].second == sm_other
2530 && first_edge_seq[i].from != NULL_TREE
2531 && (edge_seq[i].from == NULL_TREE
2532 || !operand_equal_p (first_edge_seq[i].from,
2533 edge_seq[i].from, 0)))
2534 first_edge_seq[i].from = NULL_TREE;
2535 }
2536 /* Any excess elements become sm_other since they are now
2537 coonditionally executed. */
2538 if (first_edge_seq.length () > edge_seq.length ())
2539 {
2540 for (unsigned i = edge_seq.length ();
2541 i < first_edge_seq.length (); ++i)
2542 {
2543 if (first_edge_seq[i].second == sm_ord)
2544 bitmap_set_bit (refs_not_supported,
2545 first_edge_seq[i].first);
2546 first_edge_seq[i].second = sm_other;
2547 }
2548 }
2549 else if (edge_seq.length () > first_edge_seq.length ())
2550 {
2551 if (first_uneq == -1)
2552 first_uneq = first_edge_seq.length ();
2553 for (unsigned i = first_edge_seq.length ();
2554 i < edge_seq.length (); ++i)
2555 {
2556 if (edge_seq[i].second == sm_ord)
2557 bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2558 extra_refs.safe_push (seq_entry (edge_seq[i].first,
2559 sm_other, NULL_TREE));
2560 }
2561 }
2562 /* Put unmerged refs at first_uneq to force dependence checking
2563 on them. */
2564 if (first_uneq != -1)
2565 {
2566 /* Missing ordered_splice_at. */
2567 if ((unsigned)first_uneq == first_edge_seq.length ())
2568 first_edge_seq.safe_splice (extra_refs);
2569 else
2570 {
2571 unsigned fes_length = first_edge_seq.length ();
2572 first_edge_seq.safe_grow (fes_length
2573 + extra_refs.length ());
2574 memmove (&first_edge_seq[first_uneq + extra_refs.length ()],
2575 &first_edge_seq[first_uneq],
2576 (fes_length - first_uneq) * sizeof (seq_entry));
2577 memcpy (&first_edge_seq[first_uneq],
2578 extra_refs.address (),
2579 extra_refs.length () * sizeof (seq_entry));
2580 }
2581 }
2582 }
2583 /* Use the sequence from the first edge and push SMs down. */
2584 for (unsigned i = 0; i < first_edge_seq.length (); ++i)
2585 {
2586 unsigned id = first_edge_seq[i].first;
2587 seq.safe_push (first_edge_seq[i]);
2588 unsigned new_idx;
2589 if ((first_edge_seq[i].second == sm_ord
2590 || (first_edge_seq[i].second == sm_other
2591 && first_edge_seq[i].from != NULL_TREE))
2592 && !sm_seq_push_down (seq, seq.length () - 1, &new_idx))
2593 {
2594 if (first_edge_seq[i].second == sm_ord)
2595 bitmap_set_bit (refs_not_supported, id);
2596 /* Mark it sm_other. */
2597 seq[new_idx].second = sm_other;
2598 seq[new_idx].from = NULL_TREE;
2599 }
2600 }
2601 bitmap_set_bit (fully_visited,
2602 SSA_NAME_VERSION (gimple_phi_result (phi)));
2603 return 1;
2604 }
2605 lim_aux_data *data = get_lim_data (def);
2606 gcc_assert (data);
2607 if (data->ref == UNANALYZABLE_MEM_ID)
2608 return -1;
2609 /* Stop at memory references which we can't move. */
2610 else if (memory_accesses.refs_list[data->ref]->mem.ref == error_mark_node
2611 || TREE_THIS_VOLATILE
2612 (memory_accesses.refs_list[data->ref]->mem.ref))
2613 {
2614 /* Mark refs_not_in_seq as unsupported. */
2615 bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2616 return 1;
2617 }
2618 /* One of the stores we want to apply SM to and we've not yet seen. */
2619 else if (bitmap_clear_bit (refs_not_in_seq, data->ref))
2620 {
2621 seq.safe_push (seq_entry (data->ref, sm_ord));
2622
2623 /* 1) push it down the queue until a SMed
2624 and not ignored ref is reached, skipping all not SMed refs
2625 and ignored refs via non-TBAA disambiguation. */
2626 unsigned new_idx;
2627 if (!sm_seq_push_down (seq, seq.length () - 1, &new_idx)
2628 /* If that fails but we did not fork yet continue, we'll see
2629 to re-materialize all of the stores in the sequence then.
2630 Further stores will only be pushed up to this one. */
2631 && forked)
2632 {
2633 bitmap_set_bit (refs_not_supported, data->ref);
2634 /* Mark it sm_other. */
2635 seq[new_idx].second = sm_other;
2636 }
2637
2638 /* 2) check whether we've seen all refs we want to SM and if so
2639 declare success for the active exit */
2640 if (bitmap_empty_p (refs_not_in_seq))
2641 return 1;
2642 }
2643 else
2644 /* Another store not part of the final sequence. Simply push it. */
2645 seq.safe_push (seq_entry (data->ref, sm_other,
2646 gimple_assign_rhs1 (def)));
2647
2648 vdef = gimple_vuse (def);
2649 }
2650 while (1);
2651 }
2652
2653 /* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit
2654 edges of the LOOP. */
2655
2656 static void
hoist_memory_references(class loop * loop,bitmap mem_refs,const vec<edge> & exits)2657 hoist_memory_references (class loop *loop, bitmap mem_refs,
2658 const vec<edge> &exits)
2659 {
2660 im_mem_ref *ref;
2661 unsigned i;
2662 bitmap_iterator bi;
2663
2664 /* There's a special case we can use ordered re-materialization for
2665 conditionally excuted stores which is when all stores in the loop
2666 happen in the same basic-block. In that case we know we'll reach
2667 all stores and thus can simply process that BB and emit a single
2668 conditional block of ordered materializations. See PR102436. */
2669 basic_block single_store_bb = NULL;
2670 EXECUTE_IF_SET_IN_BITMAP (&memory_accesses.all_refs_stored_in_loop[loop->num],
2671 0, i, bi)
2672 {
2673 bool fail = false;
2674 ref = memory_accesses.refs_list[i];
2675 for (auto loc : ref->accesses_in_loop)
2676 if (!gimple_vdef (loc.stmt))
2677 ;
2678 else if (!single_store_bb)
2679 {
2680 single_store_bb = gimple_bb (loc.stmt);
2681 bool conditional = false;
2682 for (edge e : exits)
2683 if (!dominated_by_p (CDI_DOMINATORS, e->src, single_store_bb))
2684 {
2685 /* Conditional as seen from e. */
2686 conditional = true;
2687 break;
2688 }
2689 if (!conditional)
2690 {
2691 fail = true;
2692 break;
2693 }
2694 }
2695 else if (single_store_bb != gimple_bb (loc.stmt))
2696 {
2697 fail = true;
2698 break;
2699 }
2700 if (fail)
2701 {
2702 single_store_bb = NULL;
2703 break;
2704 }
2705 }
2706 if (single_store_bb)
2707 {
2708 /* Analyze the single block with stores. */
2709 auto_bitmap fully_visited;
2710 auto_bitmap refs_not_supported;
2711 auto_bitmap refs_not_in_seq;
2712 auto_vec<seq_entry> seq;
2713 bitmap_copy (refs_not_in_seq, mem_refs);
2714 int res = sm_seq_valid_bb (loop, single_store_bb, NULL_TREE,
2715 seq, refs_not_in_seq, refs_not_supported,
2716 false, fully_visited);
2717 if (res != 1)
2718 {
2719 /* Unhandled refs can still fail this. */
2720 bitmap_clear (mem_refs);
2721 return;
2722 }
2723
2724 /* We cannot handle sm_other since we neither remember the
2725 stored location nor the value at the point we execute them. */
2726 for (unsigned i = 0; i < seq.length (); ++i)
2727 {
2728 unsigned new_i;
2729 if (seq[i].second == sm_other
2730 && seq[i].from != NULL_TREE)
2731 seq[i].from = NULL_TREE;
2732 else if ((seq[i].second == sm_ord
2733 || (seq[i].second == sm_other
2734 && seq[i].from != NULL_TREE))
2735 && !sm_seq_push_down (seq, i, &new_i))
2736 {
2737 bitmap_set_bit (refs_not_supported, seq[new_i].first);
2738 seq[new_i].second = sm_other;
2739 seq[new_i].from = NULL_TREE;
2740 }
2741 }
2742 bitmap_and_compl_into (mem_refs, refs_not_supported);
2743 if (bitmap_empty_p (mem_refs))
2744 return;
2745
2746 /* Prune seq. */
2747 while (seq.last ().second == sm_other
2748 && seq.last ().from == NULL_TREE)
2749 seq.pop ();
2750
2751 hash_map<im_mem_ref *, sm_aux *> aux_map;
2752
2753 /* Execute SM but delay the store materialization for ordered
2754 sequences on exit. */
2755 bool first_p = true;
2756 EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2757 {
2758 ref = memory_accesses.refs_list[i];
2759 execute_sm (loop, ref, aux_map, true, !first_p);
2760 first_p = false;
2761 }
2762
2763 /* Get at the single flag variable we eventually produced. */
2764 im_mem_ref *ref
2765 = memory_accesses.refs_list[bitmap_first_set_bit (mem_refs)];
2766 sm_aux *aux = *aux_map.get (ref);
2767
2768 /* Materialize ordered store sequences on exits. */
2769 edge e;
2770 FOR_EACH_VEC_ELT (exits, i, e)
2771 {
2772 edge append_cond_position = NULL;
2773 edge last_cond_fallthru = NULL;
2774 edge insert_e = e;
2775 /* Construct the single flag variable control flow and insert
2776 the ordered seq of stores in the then block. With
2777 -fstore-data-races we can do the stores unconditionally. */
2778 if (aux->store_flag)
2779 insert_e
2780 = single_pred_edge
2781 (execute_sm_if_changed (e, NULL_TREE, NULL_TREE,
2782 aux->store_flag,
2783 loop_preheader_edge (loop),
2784 &aux->flag_bbs, append_cond_position,
2785 last_cond_fallthru));
2786 execute_sm_exit (loop, insert_e, seq, aux_map, sm_ord,
2787 append_cond_position, last_cond_fallthru);
2788 gsi_commit_one_edge_insert (insert_e, NULL);
2789 }
2790
2791 for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
2792 iter != aux_map.end (); ++iter)
2793 delete (*iter).second;
2794
2795 return;
2796 }
2797
2798 /* To address PR57359 before actually applying store-motion check
2799 the candidates found for validity with regards to reordering
2800 relative to other stores which we until here disambiguated using
2801 TBAA which isn't valid.
2802 What matters is the order of the last stores to the mem_refs
2803 with respect to the other stores of the loop at the point of the
2804 loop exits. */
2805
2806 /* For each exit compute the store order, pruning from mem_refs
2807 on the fly. */
2808 /* The complexity of this is at least
2809 O(number of exits * number of SM refs) but more approaching
2810 O(number of exits * number of SM refs * number of stores). */
2811 /* ??? Somehow do this in a single sweep over the loop body. */
2812 auto_vec<std::pair<edge, vec<seq_entry> > > sms;
2813 auto_bitmap refs_not_supported (&lim_bitmap_obstack);
2814 edge e;
2815 FOR_EACH_VEC_ELT (exits, i, e)
2816 {
2817 vec<seq_entry> seq;
2818 seq.create (4);
2819 auto_bitmap refs_not_in_seq (&lim_bitmap_obstack);
2820 bitmap_and_compl (refs_not_in_seq, mem_refs, refs_not_supported);
2821 if (bitmap_empty_p (refs_not_in_seq))
2822 {
2823 seq.release ();
2824 break;
2825 }
2826 auto_bitmap fully_visited;
2827 int res = sm_seq_valid_bb (loop, e->src, NULL_TREE,
2828 seq, refs_not_in_seq,
2829 refs_not_supported, false,
2830 fully_visited);
2831 if (res != 1)
2832 {
2833 bitmap_copy (refs_not_supported, mem_refs);
2834 seq.release ();
2835 break;
2836 }
2837 sms.safe_push (std::make_pair (e, seq));
2838 }
2839
2840 /* Prune pruned mem_refs from earlier processed exits. */
2841 bool changed = !bitmap_empty_p (refs_not_supported);
2842 while (changed)
2843 {
2844 changed = false;
2845 std::pair<edge, vec<seq_entry> > *seq;
2846 FOR_EACH_VEC_ELT (sms, i, seq)
2847 {
2848 bool need_to_push = false;
2849 for (unsigned i = 0; i < seq->second.length (); ++i)
2850 {
2851 sm_kind kind = seq->second[i].second;
2852 if (kind == sm_other && seq->second[i].from == NULL_TREE)
2853 break;
2854 unsigned id = seq->second[i].first;
2855 unsigned new_idx;
2856 if (kind == sm_ord
2857 && bitmap_bit_p (refs_not_supported, id))
2858 {
2859 seq->second[i].second = sm_other;
2860 gcc_assert (seq->second[i].from == NULL_TREE);
2861 need_to_push = true;
2862 }
2863 else if (need_to_push
2864 && !sm_seq_push_down (seq->second, i, &new_idx))
2865 {
2866 /* We need to push down both sm_ord and sm_other
2867 but for the latter we need to disqualify all
2868 following refs. */
2869 if (kind == sm_ord)
2870 {
2871 if (bitmap_set_bit (refs_not_supported, id))
2872 changed = true;
2873 seq->second[new_idx].second = sm_other;
2874 }
2875 else
2876 {
2877 for (unsigned j = seq->second.length () - 1;
2878 j > new_idx; --j)
2879 if (seq->second[j].second == sm_ord
2880 && bitmap_set_bit (refs_not_supported,
2881 seq->second[j].first))
2882 changed = true;
2883 seq->second.truncate (new_idx);
2884 break;
2885 }
2886 }
2887 }
2888 }
2889 }
2890 std::pair<edge, vec<seq_entry> > *seq;
2891 FOR_EACH_VEC_ELT (sms, i, seq)
2892 {
2893 /* Prune sm_other from the end. */
2894 while (!seq->second.is_empty ()
2895 && seq->second.last ().second == sm_other)
2896 seq->second.pop ();
2897 /* Prune duplicates from the start. */
2898 auto_bitmap seen (&lim_bitmap_obstack);
2899 unsigned j, k;
2900 for (j = k = 0; j < seq->second.length (); ++j)
2901 if (bitmap_set_bit (seen, seq->second[j].first))
2902 {
2903 if (k != j)
2904 seq->second[k] = seq->second[j];
2905 ++k;
2906 }
2907 seq->second.truncate (k);
2908 /* And verify. */
2909 seq_entry *e;
2910 FOR_EACH_VEC_ELT (seq->second, j, e)
2911 gcc_assert (e->second == sm_ord
2912 || (e->second == sm_other && e->from != NULL_TREE));
2913 }
2914
2915 /* Verify dependence for refs we cannot handle with the order preserving
2916 code (refs_not_supported) or prune them from mem_refs. */
2917 auto_vec<seq_entry> unord_refs;
2918 EXECUTE_IF_SET_IN_BITMAP (refs_not_supported, 0, i, bi)
2919 {
2920 ref = memory_accesses.refs_list[i];
2921 if (!ref_indep_loop_p (loop, ref, sm_waw))
2922 bitmap_clear_bit (mem_refs, i);
2923 /* We've now verified store order for ref with respect to all other
2924 stores in the loop does not matter. */
2925 else
2926 unord_refs.safe_push (seq_entry (i, sm_unord));
2927 }
2928
2929 hash_map<im_mem_ref *, sm_aux *> aux_map;
2930
2931 /* Execute SM but delay the store materialization for ordered
2932 sequences on exit. */
2933 EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2934 {
2935 ref = memory_accesses.refs_list[i];
2936 execute_sm (loop, ref, aux_map, bitmap_bit_p (refs_not_supported, i),
2937 false);
2938 }
2939
2940 /* Materialize ordered store sequences on exits. */
2941 FOR_EACH_VEC_ELT (exits, i, e)
2942 {
2943 edge append_cond_position = NULL;
2944 edge last_cond_fallthru = NULL;
2945 if (i < sms.length ())
2946 {
2947 gcc_assert (sms[i].first == e);
2948 execute_sm_exit (loop, e, sms[i].second, aux_map, sm_ord,
2949 append_cond_position, last_cond_fallthru);
2950 sms[i].second.release ();
2951 }
2952 if (!unord_refs.is_empty ())
2953 execute_sm_exit (loop, e, unord_refs, aux_map, sm_unord,
2954 append_cond_position, last_cond_fallthru);
2955 /* Commit edge inserts here to preserve the order of stores
2956 when an exit exits multiple loops. */
2957 gsi_commit_one_edge_insert (e, NULL);
2958 }
2959
2960 for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
2961 iter != aux_map.end (); ++iter)
2962 delete (*iter).second;
2963 }
2964
2965 class ref_always_accessed
2966 {
2967 public:
ref_always_accessed(class loop * loop_,bool stored_p_)2968 ref_always_accessed (class loop *loop_, bool stored_p_)
2969 : loop (loop_), stored_p (stored_p_) {}
2970 bool operator () (mem_ref_loc *loc);
2971 class loop *loop;
2972 bool stored_p;
2973 };
2974
2975 bool
operator ()(mem_ref_loc * loc)2976 ref_always_accessed::operator () (mem_ref_loc *loc)
2977 {
2978 class loop *must_exec;
2979
2980 struct lim_aux_data *lim_data = get_lim_data (loc->stmt);
2981 if (!lim_data)
2982 return false;
2983
2984 /* If we require an always executed store make sure the statement
2985 is a store. */
2986 if (stored_p)
2987 {
2988 tree lhs = gimple_get_lhs (loc->stmt);
2989 if (!lhs
2990 || !(DECL_P (lhs) || REFERENCE_CLASS_P (lhs)))
2991 return false;
2992 }
2993
2994 must_exec = lim_data->always_executed_in;
2995 if (!must_exec)
2996 return false;
2997
2998 if (must_exec == loop
2999 || flow_loop_nested_p (must_exec, loop))
3000 return true;
3001
3002 return false;
3003 }
3004
3005 /* Returns true if REF is always accessed in LOOP. If STORED_P is true
3006 make sure REF is always stored to in LOOP. */
3007
3008 static bool
ref_always_accessed_p(class loop * loop,im_mem_ref * ref,bool stored_p)3009 ref_always_accessed_p (class loop *loop, im_mem_ref *ref, bool stored_p)
3010 {
3011 return for_all_locs_in_loop (loop, ref,
3012 ref_always_accessed (loop, stored_p));
3013 }
3014
3015 /* Returns true if REF1 and REF2 are independent. */
3016
3017 static bool
refs_independent_p(im_mem_ref * ref1,im_mem_ref * ref2,bool tbaa_p)3018 refs_independent_p (im_mem_ref *ref1, im_mem_ref *ref2, bool tbaa_p)
3019 {
3020 if (ref1 == ref2)
3021 return true;
3022
3023 if (dump_file && (dump_flags & TDF_DETAILS))
3024 fprintf (dump_file, "Querying dependency of refs %u and %u: ",
3025 ref1->id, ref2->id);
3026
3027 if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache, tbaa_p))
3028 {
3029 if (dump_file && (dump_flags & TDF_DETAILS))
3030 fprintf (dump_file, "dependent.\n");
3031 return false;
3032 }
3033 else
3034 {
3035 if (dump_file && (dump_flags & TDF_DETAILS))
3036 fprintf (dump_file, "independent.\n");
3037 return true;
3038 }
3039 }
3040
3041 /* Returns true if REF is independent on all other accessess in LOOP.
3042 KIND specifies the kind of dependence to consider.
3043 lim_raw assumes REF is not stored in LOOP and disambiguates RAW
3044 dependences so if true REF can be hoisted out of LOOP
3045 sm_war disambiguates a store REF against all other loads to see
3046 whether the store can be sunk across loads out of LOOP
3047 sm_waw disambiguates a store REF against all other stores to see
3048 whether the store can be sunk across stores out of LOOP. */
3049
3050 static bool
ref_indep_loop_p(class loop * loop,im_mem_ref * ref,dep_kind kind)3051 ref_indep_loop_p (class loop *loop, im_mem_ref *ref, dep_kind kind)
3052 {
3053 bool indep_p = true;
3054 bitmap refs_to_check;
3055
3056 if (kind == sm_war)
3057 refs_to_check = &memory_accesses.refs_loaded_in_loop[loop->num];
3058 else
3059 refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
3060
3061 if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID)
3062 || ref->mem.ref == error_mark_node)
3063 indep_p = false;
3064 else
3065 {
3066 /* tri-state, { unknown, independent, dependent } */
3067 dep_state state = query_loop_dependence (loop, ref, kind);
3068 if (state != dep_unknown)
3069 return state == dep_independent ? true : false;
3070
3071 class loop *inner = loop->inner;
3072 while (inner)
3073 {
3074 if (!ref_indep_loop_p (inner, ref, kind))
3075 {
3076 indep_p = false;
3077 break;
3078 }
3079 inner = inner->next;
3080 }
3081
3082 if (indep_p)
3083 {
3084 unsigned i;
3085 bitmap_iterator bi;
3086 EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
3087 {
3088 im_mem_ref *aref = memory_accesses.refs_list[i];
3089 if (aref->mem.ref == error_mark_node)
3090 {
3091 gimple *stmt = aref->accesses_in_loop[0].stmt;
3092 if ((kind == sm_war
3093 && ref_maybe_used_by_stmt_p (stmt, &ref->mem,
3094 kind != sm_waw))
3095 || stmt_may_clobber_ref_p_1 (stmt, &ref->mem,
3096 kind != sm_waw))
3097 {
3098 indep_p = false;
3099 break;
3100 }
3101 }
3102 else if (!refs_independent_p (ref, aref, kind != sm_waw))
3103 {
3104 indep_p = false;
3105 break;
3106 }
3107 }
3108 }
3109 }
3110
3111 if (dump_file && (dump_flags & TDF_DETAILS))
3112 fprintf (dump_file, "Querying %s dependencies of ref %u in loop %d: %s\n",
3113 kind == lim_raw ? "RAW" : (kind == sm_war ? "SM WAR" : "SM WAW"),
3114 ref->id, loop->num, indep_p ? "independent" : "dependent");
3115
3116 /* Record the computed result in the cache. */
3117 record_loop_dependence (loop, ref, kind,
3118 indep_p ? dep_independent : dep_dependent);
3119
3120 return indep_p;
3121 }
3122
3123 class ref_in_loop_hot_body
3124 {
3125 public:
ref_in_loop_hot_body(class loop * loop_)3126 ref_in_loop_hot_body (class loop *loop_) : l (loop_) {}
3127 bool operator () (mem_ref_loc *loc);
3128 class loop *l;
3129 };
3130
3131 /* Check the coldest loop between loop L and innermost loop. If there is one
3132 cold loop between L and INNER_LOOP, store motion can be performed, otherwise
3133 no cold loop means no store motion. get_coldest_out_loop also handles cases
3134 when l is inner_loop. */
3135 bool
operator ()(mem_ref_loc * loc)3136 ref_in_loop_hot_body::operator () (mem_ref_loc *loc)
3137 {
3138 basic_block curr_bb = gimple_bb (loc->stmt);
3139 class loop *inner_loop = curr_bb->loop_father;
3140 return get_coldest_out_loop (l, inner_loop, curr_bb);
3141 }
3142
3143
3144 /* Returns true if we can perform store motion of REF from LOOP. */
3145
3146 static bool
can_sm_ref_p(class loop * loop,im_mem_ref * ref)3147 can_sm_ref_p (class loop *loop, im_mem_ref *ref)
3148 {
3149 tree base;
3150
3151 /* Can't hoist unanalyzable refs. */
3152 if (!MEM_ANALYZABLE (ref))
3153 return false;
3154
3155 /* Can't hoist/sink aggregate copies. */
3156 if (ref->mem.ref == error_mark_node)
3157 return false;
3158
3159 /* It should be movable. */
3160 if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
3161 || TREE_THIS_VOLATILE (ref->mem.ref)
3162 || !for_each_index (&ref->mem.ref, may_move_till, loop))
3163 return false;
3164
3165 /* If it can throw fail, we do not properly update EH info. */
3166 if (tree_could_throw_p (ref->mem.ref))
3167 return false;
3168
3169 /* If it can trap, it must be always executed in LOOP.
3170 Readonly memory locations may trap when storing to them, but
3171 tree_could_trap_p is a predicate for rvalues, so check that
3172 explicitly. */
3173 base = get_base_address (ref->mem.ref);
3174 if ((tree_could_trap_p (ref->mem.ref)
3175 || (DECL_P (base) && TREE_READONLY (base)))
3176 /* ??? We can at least use false here, allowing loads? We
3177 are forcing conditional stores if the ref is not always
3178 stored to later anyway. So this would only guard
3179 the load we need to emit. Thus when the ref is not
3180 loaded we can elide this completely? */
3181 && !ref_always_accessed_p (loop, ref, true))
3182 return false;
3183
3184 /* Verify all loads of ref can be hoisted. */
3185 if (ref->loaded
3186 && bitmap_bit_p (ref->loaded, loop->num)
3187 && !ref_indep_loop_p (loop, ref, lim_raw))
3188 return false;
3189
3190 /* Verify the candidate can be disambiguated against all loads,
3191 that is, we can elide all in-loop stores. Disambiguation
3192 against stores is done later when we cannot guarantee preserving
3193 the order of stores. */
3194 if (!ref_indep_loop_p (loop, ref, sm_war))
3195 return false;
3196
3197 /* Verify whether the candidate is hot for LOOP. Only do store motion if the
3198 candidate's profile count is hot. Statement in cold BB shouldn't be moved
3199 out of it's loop_father. */
3200 if (!for_all_locs_in_loop (loop, ref, ref_in_loop_hot_body (loop)))
3201 return false;
3202
3203 return true;
3204 }
3205
3206 /* Marks the references in LOOP for that store motion should be performed
3207 in REFS_TO_SM. SM_EXECUTED is the set of references for that store
3208 motion was performed in one of the outer loops. */
3209
3210 static void
find_refs_for_sm(class loop * loop,bitmap sm_executed,bitmap refs_to_sm)3211 find_refs_for_sm (class loop *loop, bitmap sm_executed, bitmap refs_to_sm)
3212 {
3213 bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
3214 unsigned i;
3215 bitmap_iterator bi;
3216 im_mem_ref *ref;
3217
3218 EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
3219 {
3220 ref = memory_accesses.refs_list[i];
3221 if (can_sm_ref_p (loop, ref) && dbg_cnt (lim))
3222 bitmap_set_bit (refs_to_sm, i);
3223 }
3224 }
3225
3226 /* Checks whether LOOP (with exits stored in EXITS array) is suitable
3227 for a store motion optimization (i.e. whether we can insert statement
3228 on its exits). */
3229
3230 static bool
loop_suitable_for_sm(class loop * loop ATTRIBUTE_UNUSED,const vec<edge> & exits)3231 loop_suitable_for_sm (class loop *loop ATTRIBUTE_UNUSED,
3232 const vec<edge> &exits)
3233 {
3234 unsigned i;
3235 edge ex;
3236
3237 FOR_EACH_VEC_ELT (exits, i, ex)
3238 if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
3239 return false;
3240
3241 return true;
3242 }
3243
3244 /* Try to perform store motion for all memory references modified inside
3245 LOOP. SM_EXECUTED is the bitmap of the memory references for that
3246 store motion was executed in one of the outer loops. */
3247
3248 static void
store_motion_loop(class loop * loop,bitmap sm_executed)3249 store_motion_loop (class loop *loop, bitmap sm_executed)
3250 {
3251 auto_vec<edge> exits = get_loop_exit_edges (loop);
3252 class loop *subloop;
3253 bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
3254
3255 if (loop_suitable_for_sm (loop, exits))
3256 {
3257 find_refs_for_sm (loop, sm_executed, sm_in_loop);
3258 if (!bitmap_empty_p (sm_in_loop))
3259 hoist_memory_references (loop, sm_in_loop, exits);
3260 }
3261
3262 bitmap_ior_into (sm_executed, sm_in_loop);
3263 for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
3264 store_motion_loop (subloop, sm_executed);
3265 bitmap_and_compl_into (sm_executed, sm_in_loop);
3266 BITMAP_FREE (sm_in_loop);
3267 }
3268
3269 /* Try to perform store motion for all memory references modified inside
3270 loops. */
3271
3272 static void
do_store_motion(void)3273 do_store_motion (void)
3274 {
3275 class loop *loop;
3276 bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack);
3277
3278 for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
3279 store_motion_loop (loop, sm_executed);
3280
3281 BITMAP_FREE (sm_executed);
3282 }
3283
3284 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
3285 for each such basic block bb records the outermost loop for that execution
3286 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
3287 blocks that contain a nonpure call. */
3288
3289 static void
fill_always_executed_in_1(class loop * loop,sbitmap contains_call)3290 fill_always_executed_in_1 (class loop *loop, sbitmap contains_call)
3291 {
3292 basic_block bb = NULL, last = NULL;
3293 edge e;
3294 class loop *inn_loop = loop;
3295
3296 if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
3297 {
3298 auto_vec<basic_block, 64> worklist;
3299 worklist.reserve_exact (loop->num_nodes);
3300 worklist.quick_push (loop->header);
3301 do
3302 {
3303 edge_iterator ei;
3304 bb = worklist.pop ();
3305
3306 if (!flow_bb_inside_loop_p (inn_loop, bb))
3307 {
3308 /* When we are leaving a possibly infinite inner loop
3309 we have to stop processing. */
3310 if (!finite_loop_p (inn_loop))
3311 break;
3312 /* If the loop was finite we can continue with processing
3313 the loop we exited to. */
3314 inn_loop = bb->loop_father;
3315 }
3316
3317 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
3318 last = bb;
3319
3320 if (bitmap_bit_p (contains_call, bb->index))
3321 break;
3322
3323 /* If LOOP exits from this BB stop processing. */
3324 FOR_EACH_EDGE (e, ei, bb->succs)
3325 if (!flow_bb_inside_loop_p (loop, e->dest))
3326 break;
3327 if (e)
3328 break;
3329
3330 /* A loop might be infinite (TODO use simple loop analysis
3331 to disprove this if possible). */
3332 if (bb->flags & BB_IRREDUCIBLE_LOOP)
3333 break;
3334
3335 if (bb->loop_father->header == bb)
3336 /* Record that we enter into a subloop since it might not
3337 be finite. */
3338 /* ??? Entering into a not always executed subloop makes
3339 fill_always_executed_in quadratic in loop depth since
3340 we walk those loops N times. This is not a problem
3341 in practice though, see PR102253 for a worst-case testcase. */
3342 inn_loop = bb->loop_father;
3343
3344 /* Walk the body of LOOP sorted by dominance relation. Additionally,
3345 if a basic block S dominates the latch, then only blocks dominated
3346 by S are after it.
3347 This is get_loop_body_in_dom_order using a worklist algorithm and
3348 stopping once we are no longer interested in visiting further
3349 blocks. */
3350 unsigned old_len = worklist.length ();
3351 unsigned postpone = 0;
3352 for (basic_block son = first_dom_son (CDI_DOMINATORS, bb);
3353 son;
3354 son = next_dom_son (CDI_DOMINATORS, son))
3355 {
3356 if (!flow_bb_inside_loop_p (loop, son))
3357 continue;
3358 if (dominated_by_p (CDI_DOMINATORS, loop->latch, son))
3359 postpone = worklist.length ();
3360 worklist.quick_push (son);
3361 }
3362 if (postpone)
3363 /* Postponing the block that dominates the latch means
3364 processing it last and thus putting it earliest in the
3365 worklist. */
3366 std::swap (worklist[old_len], worklist[postpone]);
3367 }
3368 while (!worklist.is_empty ());
3369
3370 while (1)
3371 {
3372 if (dump_enabled_p ())
3373 dump_printf (MSG_NOTE, "BB %d is always executed in loop %d\n",
3374 last->index, loop->num);
3375 SET_ALWAYS_EXECUTED_IN (last, loop);
3376 if (last == loop->header)
3377 break;
3378 last = get_immediate_dominator (CDI_DOMINATORS, last);
3379 }
3380 }
3381
3382 for (loop = loop->inner; loop; loop = loop->next)
3383 fill_always_executed_in_1 (loop, contains_call);
3384 }
3385
3386 /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
3387 for each such basic block bb records the outermost loop for that execution
3388 of its header implies execution of bb. */
3389
3390 static void
fill_always_executed_in(void)3391 fill_always_executed_in (void)
3392 {
3393 basic_block bb;
3394 class loop *loop;
3395
3396 auto_sbitmap contains_call (last_basic_block_for_fn (cfun));
3397 bitmap_clear (contains_call);
3398 FOR_EACH_BB_FN (bb, cfun)
3399 {
3400 gimple_stmt_iterator gsi;
3401 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3402 {
3403 if (nonpure_call_p (gsi_stmt (gsi)))
3404 break;
3405 }
3406
3407 if (!gsi_end_p (gsi))
3408 bitmap_set_bit (contains_call, bb->index);
3409 }
3410
3411 for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
3412 fill_always_executed_in_1 (loop, contains_call);
3413 }
3414
3415 /* Find the coldest loop preheader for LOOP, also find the nearest hotter loop
3416 to LOOP. Then recursively iterate each inner loop. */
3417
3418 void
fill_coldest_and_hotter_out_loop(class loop * coldest_loop,class loop * hotter_loop,class loop * loop)3419 fill_coldest_and_hotter_out_loop (class loop *coldest_loop,
3420 class loop *hotter_loop, class loop *loop)
3421 {
3422 if (bb_colder_than_loop_preheader (loop_preheader_edge (loop)->src,
3423 coldest_loop))
3424 coldest_loop = loop;
3425
3426 coldest_outermost_loop[loop->num] = coldest_loop;
3427
3428 hotter_than_inner_loop[loop->num] = NULL;
3429 class loop *outer_loop = loop_outer (loop);
3430 if (hotter_loop
3431 && bb_colder_than_loop_preheader (loop_preheader_edge (loop)->src,
3432 hotter_loop))
3433 hotter_than_inner_loop[loop->num] = hotter_loop;
3434
3435 if (outer_loop && outer_loop != current_loops->tree_root
3436 && bb_colder_than_loop_preheader (loop_preheader_edge (loop)->src,
3437 outer_loop))
3438 hotter_than_inner_loop[loop->num] = outer_loop;
3439
3440 if (dump_enabled_p ())
3441 {
3442 dump_printf (MSG_NOTE, "loop %d's coldest_outermost_loop is %d, ",
3443 loop->num, coldest_loop->num);
3444 if (hotter_than_inner_loop[loop->num])
3445 dump_printf (MSG_NOTE, "hotter_than_inner_loop is %d\n",
3446 hotter_than_inner_loop[loop->num]->num);
3447 else
3448 dump_printf (MSG_NOTE, "hotter_than_inner_loop is NULL\n");
3449 }
3450
3451 class loop *inner_loop;
3452 for (inner_loop = loop->inner; inner_loop; inner_loop = inner_loop->next)
3453 fill_coldest_and_hotter_out_loop (coldest_loop,
3454 hotter_than_inner_loop[loop->num],
3455 inner_loop);
3456 }
3457
3458 /* Compute the global information needed by the loop invariant motion pass. */
3459
3460 static void
tree_ssa_lim_initialize(bool store_motion)3461 tree_ssa_lim_initialize (bool store_motion)
3462 {
3463 unsigned i;
3464
3465 bitmap_obstack_initialize (&lim_bitmap_obstack);
3466 gcc_obstack_init (&mem_ref_obstack);
3467 lim_aux_data_map = new hash_map<gimple *, lim_aux_data *>;
3468
3469 if (flag_tm)
3470 compute_transaction_bits ();
3471
3472 memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
3473 memory_accesses.refs_list.create (100);
3474 /* Allocate a special, unanalyzable mem-ref with ID zero. */
3475 memory_accesses.refs_list.quick_push
3476 (mem_ref_alloc (NULL, 0, UNANALYZABLE_MEM_ID));
3477
3478 memory_accesses.refs_loaded_in_loop.create (number_of_loops (cfun));
3479 memory_accesses.refs_loaded_in_loop.quick_grow (number_of_loops (cfun));
3480 memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun));
3481 memory_accesses.refs_stored_in_loop.quick_grow (number_of_loops (cfun));
3482 if (store_motion)
3483 {
3484 memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun));
3485 memory_accesses.all_refs_stored_in_loop.quick_grow
3486 (number_of_loops (cfun));
3487 }
3488
3489 for (i = 0; i < number_of_loops (cfun); i++)
3490 {
3491 bitmap_initialize (&memory_accesses.refs_loaded_in_loop[i],
3492 &lim_bitmap_obstack);
3493 bitmap_initialize (&memory_accesses.refs_stored_in_loop[i],
3494 &lim_bitmap_obstack);
3495 if (store_motion)
3496 bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i],
3497 &lim_bitmap_obstack);
3498 }
3499
3500 memory_accesses.ttae_cache = NULL;
3501
3502 /* Initialize bb_loop_postorder with a mapping from loop->num to
3503 its postorder index. */
3504 i = 0;
3505 bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
3506 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
3507 bb_loop_postorder[loop->num] = i++;
3508 }
3509
3510 /* Cleans up after the invariant motion pass. */
3511
3512 static void
tree_ssa_lim_finalize(void)3513 tree_ssa_lim_finalize (void)
3514 {
3515 basic_block bb;
3516 unsigned i;
3517 im_mem_ref *ref;
3518
3519 FOR_EACH_BB_FN (bb, cfun)
3520 SET_ALWAYS_EXECUTED_IN (bb, NULL);
3521
3522 bitmap_obstack_release (&lim_bitmap_obstack);
3523 delete lim_aux_data_map;
3524
3525 delete memory_accesses.refs;
3526 memory_accesses.refs = NULL;
3527
3528 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
3529 memref_free (ref);
3530 memory_accesses.refs_list.release ();
3531 obstack_free (&mem_ref_obstack, NULL);
3532
3533 memory_accesses.refs_loaded_in_loop.release ();
3534 memory_accesses.refs_stored_in_loop.release ();
3535 memory_accesses.all_refs_stored_in_loop.release ();
3536
3537 if (memory_accesses.ttae_cache)
3538 free_affine_expand_cache (&memory_accesses.ttae_cache);
3539
3540 free (bb_loop_postorder);
3541
3542 coldest_outermost_loop.release ();
3543 hotter_than_inner_loop.release ();
3544 }
3545
3546 /* Moves invariants from loops. Only "expensive" invariants are moved out --
3547 i.e. those that are likely to be win regardless of the register pressure.
3548 Only perform store motion if STORE_MOTION is true. */
3549
3550 unsigned int
loop_invariant_motion_in_fun(function * fun,bool store_motion)3551 loop_invariant_motion_in_fun (function *fun, bool store_motion)
3552 {
3553 unsigned int todo = 0;
3554
3555 tree_ssa_lim_initialize (store_motion);
3556
3557 mark_ssa_maybe_undefs ();
3558
3559 /* Gathers information about memory accesses in the loops. */
3560 analyze_memory_references (store_motion);
3561
3562 /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */
3563 fill_always_executed_in ();
3564
3565 /* Pre-compute coldest outermost loop and nearest hotter loop of each loop.
3566 */
3567 class loop *loop;
3568 coldest_outermost_loop.create (number_of_loops (cfun));
3569 coldest_outermost_loop.safe_grow_cleared (number_of_loops (cfun));
3570 hotter_than_inner_loop.create (number_of_loops (cfun));
3571 hotter_than_inner_loop.safe_grow_cleared (number_of_loops (cfun));
3572 for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
3573 fill_coldest_and_hotter_out_loop (loop, NULL, loop);
3574
3575 int *rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
3576 int n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
3577
3578 /* For each statement determine the outermost loop in that it is
3579 invariant and cost for computing the invariant. */
3580 for (int i = 0; i < n; ++i)
3581 compute_invariantness (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
3582
3583 /* Execute store motion. Force the necessary invariants to be moved
3584 out of the loops as well. */
3585 if (store_motion)
3586 do_store_motion ();
3587
3588 free (rpo);
3589 rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
3590 n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
3591
3592 /* Move the expressions that are expensive enough. */
3593 for (int i = 0; i < n; ++i)
3594 todo |= move_computations_worker (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
3595
3596 free (rpo);
3597
3598 gsi_commit_edge_inserts ();
3599 if (need_ssa_update_p (fun))
3600 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3601
3602 tree_ssa_lim_finalize ();
3603
3604 return todo;
3605 }
3606
3607 /* Loop invariant motion pass. */
3608
3609 namespace {
3610
3611 const pass_data pass_data_lim =
3612 {
3613 GIMPLE_PASS, /* type */
3614 "lim", /* name */
3615 OPTGROUP_LOOP, /* optinfo_flags */
3616 TV_LIM, /* tv_id */
3617 PROP_cfg, /* properties_required */
3618 0, /* properties_provided */
3619 0, /* properties_destroyed */
3620 0, /* todo_flags_start */
3621 0, /* todo_flags_finish */
3622 };
3623
3624 class pass_lim : public gimple_opt_pass
3625 {
3626 public:
pass_lim(gcc::context * ctxt)3627 pass_lim (gcc::context *ctxt)
3628 : gimple_opt_pass (pass_data_lim, ctxt)
3629 {}
3630
3631 /* opt_pass methods: */
clone()3632 opt_pass * clone () { return new pass_lim (m_ctxt); }
gate(function *)3633 virtual bool gate (function *) { return flag_tree_loop_im != 0; }
3634 virtual unsigned int execute (function *);
3635
3636 }; // class pass_lim
3637
3638 unsigned int
execute(function * fun)3639 pass_lim::execute (function *fun)
3640 {
3641 bool in_loop_pipeline = scev_initialized_p ();
3642 if (!in_loop_pipeline)
3643 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
3644
3645 if (number_of_loops (fun) <= 1)
3646 return 0;
3647 unsigned int todo = loop_invariant_motion_in_fun (fun, flag_move_loop_stores);
3648
3649 if (!in_loop_pipeline)
3650 loop_optimizer_finalize ();
3651 else
3652 scev_reset ();
3653 return todo;
3654 }
3655
3656 } // anon namespace
3657
3658 gimple_opt_pass *
make_pass_lim(gcc::context * ctxt)3659 make_pass_lim (gcc::context *ctxt)
3660 {
3661 return new pass_lim (ctxt);
3662 }
3663
3664
3665