1 /* Predictive commoning.
2 Copyright (C) 2005-2020 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This file implements the predictive commoning optimization. Predictive
21 commoning can be viewed as CSE around a loop, and with some improvements,
22 as generalized strength reduction-- i.e., reusing values computed in
23 earlier iterations of a loop in the later ones. So far, the pass only
24 handles the most useful case, that is, reusing values of memory references.
25 If you think this is all just a special case of PRE, you are sort of right;
26 however, concentrating on loops is simpler, and makes it possible to
27 incorporate data dependence analysis to detect the opportunities, perform
28 loop unrolling to avoid copies together with renaming immediately,
29 and if needed, we could also take register pressure into account.
30
31 Let us demonstrate what is done on an example:
32
33 for (i = 0; i < 100; i++)
34 {
35 a[i+2] = a[i] + a[i+1];
36 b[10] = b[10] + i;
37 c[i] = c[99 - i];
38 d[i] = d[i + 1];
39 }
40
41 1) We find data references in the loop, and split them to mutually
42 independent groups (i.e., we find components of a data dependence
43 graph). We ignore read-read dependences whose distance is not constant.
44 (TODO -- we could also ignore antidependences). In this example, we
45 find the following groups:
46
47 a[i]{read}, a[i+1]{read}, a[i+2]{write}
48 b[10]{read}, b[10]{write}
49 c[99 - i]{read}, c[i]{write}
50 d[i + 1]{read}, d[i]{write}
51
52 2) Inside each of the group, we verify several conditions:
53 a) all the references must differ in indices only, and the indices
54 must all have the same step
55 b) the references must dominate loop latch (and thus, they must be
56 ordered by dominance relation).
57 c) the distance of the indices must be a small multiple of the step
58 We are then able to compute the difference of the references (# of
59 iterations before they point to the same place as the first of them).
60 Also, in case there are writes in the loop, we split the groups into
61 chains whose head is the write whose values are used by the reads in
62 the same chain. The chains are then processed independently,
63 making the further transformations simpler. Also, the shorter chains
64 need the same number of registers, but may require lower unrolling
65 factor in order to get rid of the copies on the loop latch.
66
67 In our example, we get the following chains (the chain for c is invalid).
68
69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70 b[10]{read,+0}, b[10]{write,+0}
71 d[i + 1]{read,+0}, d[i]{write,+1}
72
73 3) For each read, we determine the read or write whose value it reuses,
74 together with the distance of this reuse. I.e. we take the last
75 reference before it with distance 0, or the last of the references
76 with the smallest positive distance to the read. Then, we remove
77 the references that are not used in any of these chains, discard the
78 empty groups, and propagate all the links so that they point to the
79 single root reference of the chain (adjusting their distance
80 appropriately). Some extra care needs to be taken for references with
81 step 0. In our example (the numbers indicate the distance of the
82 reuse),
83
84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85 b[10] --> (*) 1, b[10] (*)
86
87 4) The chains are combined together if possible. If the corresponding
88 elements of two chains are always combined together with the same
89 operator, we remember just the result of this combination, instead
90 of remembering the values separately. We may need to perform
91 reassociation to enable combining, for example
92
93 e[i] + f[i+1] + e[i+1] + f[i]
94
95 can be reassociated as
96
97 (e[i] + f[i]) + (e[i+1] + f[i+1])
98
99 and we can combine the chains for e and f into one chain.
100
101 5) For each root reference (end of the chain) R, let N be maximum distance
102 of a reference reusing its value. Variables R0 up to RN are created,
103 together with phi nodes that transfer values from R1 .. RN to
104 R0 .. R(N-1).
105 Initial values are loaded to R0..R(N-1) (in case not all references
106 must necessarily be accessed and they may trap, we may fail here;
107 TODO sometimes, the loads could be guarded by a check for the number
108 of iterations). Values loaded/stored in roots are also copied to
109 RN. Other reads are replaced with the appropriate variable Ri.
110 Everything is put to SSA form.
111
112 As a small improvement, if R0 is dead after the root (i.e., all uses of
113 the value with the maximum distance dominate the root), we can avoid
114 creating RN and use R0 instead of it.
115
116 In our example, we get (only the parts concerning a and b are shown):
117 for (i = 0; i < 100; i++)
118 {
119 f = phi (a[0], s);
120 s = phi (a[1], f);
121 x = phi (b[10], x);
122
123 f = f + s;
124 a[i+2] = f;
125 x = x + i;
126 b[10] = x;
127 }
128
129 6) Factor F for unrolling is determined as the smallest common multiple of
130 (N + 1) for each root reference (N for references for that we avoided
131 creating RN). If F and the loop is small enough, loop is unrolled F
132 times. The stores to RN (R0) in the copies of the loop body are
133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134 be coalesced and the copies can be eliminated.
135
136 TODO -- copy propagation and other optimizations may change the live
137 ranges of the temporary registers and prevent them from being coalesced;
138 this may increase the register pressure.
139
140 In our case, F = 2 and the (main loop of the) result is
141
142 for (i = 0; i < ...; i += 2)
143 {
144 f = phi (a[0], f);
145 s = phi (a[1], s);
146 x = phi (b[10], x);
147
148 f = f + s;
149 a[i+2] = f;
150 x = x + i;
151 b[10] = x;
152
153 s = s + f;
154 a[i+3] = s;
155 x = x + i;
156 b[10] = x;
157 }
158
159 Apart from predictive commoning on Load-Load and Store-Load chains, we
160 also support Store-Store chains -- stores killed by other store can be
161 eliminated. Given below example:
162
163 for (i = 0; i < n; i++)
164 {
165 a[i] = 1;
166 a[i+2] = 2;
167 }
168
169 It can be replaced with:
170
171 t0 = a[0];
172 t1 = a[1];
173 for (i = 0; i < n; i++)
174 {
175 a[i] = 1;
176 t2 = 2;
177 t0 = t1;
178 t1 = t2;
179 }
180 a[n] = t0;
181 a[n+1] = t1;
182
183 If the loop runs more than 1 iterations, it can be further simplified into:
184
185 for (i = 0; i < n; i++)
186 {
187 a[i] = 1;
188 }
189 a[n] = 2;
190 a[n+1] = 2;
191
192 The interesting part is this can be viewed either as general store motion
193 or general dead store elimination in either intra/inter-iterations way.
194
195 With trivial effort, we also support load inside Store-Store chains if the
196 load is dominated by a store statement in the same iteration of loop. You
197 can see this as a restricted Store-Mixed-Load-Store chain.
198
199 TODO: For now, we don't support store-store chains in multi-exit loops. We
200 force to not unroll in case of store-store chain even if other chains might
201 ask for unroll.
202
203 Predictive commoning can be generalized for arbitrary computations (not
204 just memory loads), and also nontrivial transfer functions (e.g., replacing
205 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
206
207 #include "config.h"
208 #include "system.h"
209 #include "coretypes.h"
210 #include "backend.h"
211 #include "rtl.h"
212 #include "tree.h"
213 #include "gimple.h"
214 #include "predict.h"
215 #include "tree-pass.h"
216 #include "ssa.h"
217 #include "gimple-pretty-print.h"
218 #include "alias.h"
219 #include "fold-const.h"
220 #include "cfgloop.h"
221 #include "tree-eh.h"
222 #include "gimplify.h"
223 #include "gimple-iterator.h"
224 #include "gimplify-me.h"
225 #include "tree-ssa-loop-ivopts.h"
226 #include "tree-ssa-loop-manip.h"
227 #include "tree-ssa-loop-niter.h"
228 #include "tree-ssa-loop.h"
229 #include "tree-into-ssa.h"
230 #include "tree-dfa.h"
231 #include "tree-ssa.h"
232 #include "tree-data-ref.h"
233 #include "tree-scalar-evolution.h"
234 #include "tree-affine.h"
235 #include "builtins.h"
236
237 /* The maximum number of iterations between the considered memory
238 references. */
239
240 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
241
242 /* Data references (or phi nodes that carry data reference values across
243 loop iterations). */
244
245 typedef class dref_d
246 {
247 public:
248 /* The reference itself. */
249 struct data_reference *ref;
250
251 /* The statement in that the reference appears. */
252 gimple *stmt;
253
254 /* In case that STMT is a phi node, this field is set to the SSA name
255 defined by it in replace_phis_by_defined_names (in order to avoid
256 pointing to phi node that got reallocated in the meantime). */
257 tree name_defined_by_phi;
258
259 /* Distance of the reference from the root of the chain (in number of
260 iterations of the loop). */
261 unsigned distance;
262
263 /* Number of iterations offset from the first reference in the component. */
264 widest_int offset;
265
266 /* Number of the reference in a component, in dominance ordering. */
267 unsigned pos;
268
269 /* True if the memory reference is always accessed when the loop is
270 entered. */
271 unsigned always_accessed : 1;
272 } *dref;
273
274
275 /* Type of the chain of the references. */
276
277 enum chain_type
278 {
279 /* The addresses of the references in the chain are constant. */
280 CT_INVARIANT,
281
282 /* There are only loads in the chain. */
283 CT_LOAD,
284
285 /* Root of the chain is store, the rest are loads. */
286 CT_STORE_LOAD,
287
288 /* There are only stores in the chain. */
289 CT_STORE_STORE,
290
291 /* A combination of two chains. */
292 CT_COMBINATION
293 };
294
295 /* Chains of data references. */
296
297 typedef struct chain
298 {
299 /* Type of the chain. */
300 enum chain_type type;
301
302 /* For combination chains, the operator and the two chains that are
303 combined, and the type of the result. */
304 enum tree_code op;
305 tree rslt_type;
306 struct chain *ch1, *ch2;
307
308 /* The references in the chain. */
309 vec<dref> refs;
310
311 /* The maximum distance of the reference in the chain from the root. */
312 unsigned length;
313
314 /* The variables used to copy the value throughout iterations. */
315 vec<tree> vars;
316
317 /* Initializers for the variables. */
318 vec<tree> inits;
319
320 /* Finalizers for the eliminated stores. */
321 vec<tree> finis;
322
323 /* gimple stmts intializing the initial variables of the chain. */
324 gimple_seq init_seq;
325
326 /* gimple stmts finalizing the eliminated stores of the chain. */
327 gimple_seq fini_seq;
328
329 /* True if there is a use of a variable with the maximal distance
330 that comes after the root in the loop. */
331 unsigned has_max_use_after : 1;
332
333 /* True if all the memory references in the chain are always accessed. */
334 unsigned all_always_accessed : 1;
335
336 /* True if this chain was combined together with some other chain. */
337 unsigned combined : 1;
338
339 /* True if this is store elimination chain and eliminated stores store
340 loop invariant value into memory. */
341 unsigned inv_store_elimination : 1;
342 } *chain_p;
343
344
345 /* Describes the knowledge about the step of the memory references in
346 the component. */
347
348 enum ref_step_type
349 {
350 /* The step is zero. */
351 RS_INVARIANT,
352
353 /* The step is nonzero. */
354 RS_NONZERO,
355
356 /* The step may or may not be nonzero. */
357 RS_ANY
358 };
359
360 /* Components of the data dependence graph. */
361
362 struct component
363 {
364 /* The references in the component. */
365 vec<dref> refs;
366
367 /* What we know about the step of the references in the component. */
368 enum ref_step_type comp_step;
369
370 /* True if all references in component are stores and we try to do
371 intra/inter loop iteration dead store elimination. */
372 bool eliminate_store_p;
373
374 /* Next component in the list. */
375 struct component *next;
376 };
377
378 /* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
379
380 static bitmap looparound_phis;
381
382 /* Cache used by tree_to_aff_combination_expand. */
383
384 static hash_map<tree, name_expansion *> *name_expansions;
385
386 /* Dumps data reference REF to FILE. */
387
388 extern void dump_dref (FILE *, dref);
389 void
dump_dref(FILE * file,dref ref)390 dump_dref (FILE *file, dref ref)
391 {
392 if (ref->ref)
393 {
394 fprintf (file, " ");
395 print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
396 fprintf (file, " (id %u%s)\n", ref->pos,
397 DR_IS_READ (ref->ref) ? "" : ", write");
398
399 fprintf (file, " offset ");
400 print_decs (ref->offset, file);
401 fprintf (file, "\n");
402
403 fprintf (file, " distance %u\n", ref->distance);
404 }
405 else
406 {
407 if (gimple_code (ref->stmt) == GIMPLE_PHI)
408 fprintf (file, " looparound ref\n");
409 else
410 fprintf (file, " combination ref\n");
411 fprintf (file, " in statement ");
412 print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
413 fprintf (file, "\n");
414 fprintf (file, " distance %u\n", ref->distance);
415 }
416
417 }
418
419 /* Dumps CHAIN to FILE. */
420
421 extern void dump_chain (FILE *, chain_p);
422 void
dump_chain(FILE * file,chain_p chain)423 dump_chain (FILE *file, chain_p chain)
424 {
425 dref a;
426 const char *chain_type;
427 unsigned i;
428 tree var;
429
430 switch (chain->type)
431 {
432 case CT_INVARIANT:
433 chain_type = "Load motion";
434 break;
435
436 case CT_LOAD:
437 chain_type = "Loads-only";
438 break;
439
440 case CT_STORE_LOAD:
441 chain_type = "Store-loads";
442 break;
443
444 case CT_STORE_STORE:
445 chain_type = "Store-stores";
446 break;
447
448 case CT_COMBINATION:
449 chain_type = "Combination";
450 break;
451
452 default:
453 gcc_unreachable ();
454 }
455
456 fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
457 chain->combined ? " (combined)" : "");
458 if (chain->type != CT_INVARIANT)
459 fprintf (file, " max distance %u%s\n", chain->length,
460 chain->has_max_use_after ? "" : ", may reuse first");
461
462 if (chain->type == CT_COMBINATION)
463 {
464 fprintf (file, " equal to %p %s %p in type ",
465 (void *) chain->ch1, op_symbol_code (chain->op),
466 (void *) chain->ch2);
467 print_generic_expr (file, chain->rslt_type, TDF_SLIM);
468 fprintf (file, "\n");
469 }
470
471 if (chain->vars.exists ())
472 {
473 fprintf (file, " vars");
474 FOR_EACH_VEC_ELT (chain->vars, i, var)
475 {
476 fprintf (file, " ");
477 print_generic_expr (file, var, TDF_SLIM);
478 }
479 fprintf (file, "\n");
480 }
481
482 if (chain->inits.exists ())
483 {
484 fprintf (file, " inits");
485 FOR_EACH_VEC_ELT (chain->inits, i, var)
486 {
487 fprintf (file, " ");
488 print_generic_expr (file, var, TDF_SLIM);
489 }
490 fprintf (file, "\n");
491 }
492
493 fprintf (file, " references:\n");
494 FOR_EACH_VEC_ELT (chain->refs, i, a)
495 dump_dref (file, a);
496
497 fprintf (file, "\n");
498 }
499
500 /* Dumps CHAINS to FILE. */
501
502 extern void dump_chains (FILE *, vec<chain_p> );
503 void
dump_chains(FILE * file,vec<chain_p> chains)504 dump_chains (FILE *file, vec<chain_p> chains)
505 {
506 chain_p chain;
507 unsigned i;
508
509 FOR_EACH_VEC_ELT (chains, i, chain)
510 dump_chain (file, chain);
511 }
512
513 /* Dumps COMP to FILE. */
514
515 extern void dump_component (FILE *, struct component *);
516 void
dump_component(FILE * file,struct component * comp)517 dump_component (FILE *file, struct component *comp)
518 {
519 dref a;
520 unsigned i;
521
522 fprintf (file, "Component%s:\n",
523 comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
524 FOR_EACH_VEC_ELT (comp->refs, i, a)
525 dump_dref (file, a);
526 fprintf (file, "\n");
527 }
528
529 /* Dumps COMPS to FILE. */
530
531 extern void dump_components (FILE *, struct component *);
532 void
dump_components(FILE * file,struct component * comps)533 dump_components (FILE *file, struct component *comps)
534 {
535 struct component *comp;
536
537 for (comp = comps; comp; comp = comp->next)
538 dump_component (file, comp);
539 }
540
541 /* Frees a chain CHAIN. */
542
543 static void
release_chain(chain_p chain)544 release_chain (chain_p chain)
545 {
546 dref ref;
547 unsigned i;
548
549 if (chain == NULL)
550 return;
551
552 FOR_EACH_VEC_ELT (chain->refs, i, ref)
553 free (ref);
554
555 chain->refs.release ();
556 chain->vars.release ();
557 chain->inits.release ();
558 if (chain->init_seq)
559 gimple_seq_discard (chain->init_seq);
560
561 chain->finis.release ();
562 if (chain->fini_seq)
563 gimple_seq_discard (chain->fini_seq);
564
565 free (chain);
566 }
567
568 /* Frees CHAINS. */
569
570 static void
release_chains(vec<chain_p> chains)571 release_chains (vec<chain_p> chains)
572 {
573 unsigned i;
574 chain_p chain;
575
576 FOR_EACH_VEC_ELT (chains, i, chain)
577 release_chain (chain);
578 chains.release ();
579 }
580
581 /* Frees a component COMP. */
582
583 static void
release_component(struct component * comp)584 release_component (struct component *comp)
585 {
586 comp->refs.release ();
587 free (comp);
588 }
589
590 /* Frees list of components COMPS. */
591
592 static void
release_components(struct component * comps)593 release_components (struct component *comps)
594 {
595 struct component *act, *next;
596
597 for (act = comps; act; act = next)
598 {
599 next = act->next;
600 release_component (act);
601 }
602 }
603
604 /* Finds a root of tree given by FATHERS containing A, and performs path
605 shortening. */
606
607 static unsigned
component_of(unsigned fathers[],unsigned a)608 component_of (unsigned fathers[], unsigned a)
609 {
610 unsigned root, n;
611
612 for (root = a; root != fathers[root]; root = fathers[root])
613 continue;
614
615 for (; a != root; a = n)
616 {
617 n = fathers[a];
618 fathers[a] = root;
619 }
620
621 return root;
622 }
623
624 /* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
625 components, A and B are components to merge. */
626
627 static void
merge_comps(unsigned fathers[],unsigned sizes[],unsigned a,unsigned b)628 merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
629 {
630 unsigned ca = component_of (fathers, a);
631 unsigned cb = component_of (fathers, b);
632
633 if (ca == cb)
634 return;
635
636 if (sizes[ca] < sizes[cb])
637 {
638 sizes[cb] += sizes[ca];
639 fathers[ca] = cb;
640 }
641 else
642 {
643 sizes[ca] += sizes[cb];
644 fathers[cb] = ca;
645 }
646 }
647
648 /* Returns true if A is a reference that is suitable for predictive commoning
649 in the innermost loop that contains it. REF_STEP is set according to the
650 step of the reference A. */
651
652 static bool
suitable_reference_p(struct data_reference * a,enum ref_step_type * ref_step)653 suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
654 {
655 tree ref = DR_REF (a), step = DR_STEP (a);
656
657 if (!step
658 || TREE_THIS_VOLATILE (ref)
659 || !is_gimple_reg_type (TREE_TYPE (ref))
660 || tree_could_throw_p (ref))
661 return false;
662
663 if (integer_zerop (step))
664 *ref_step = RS_INVARIANT;
665 else if (integer_nonzerop (step))
666 *ref_step = RS_NONZERO;
667 else
668 *ref_step = RS_ANY;
669
670 return true;
671 }
672
673 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
674
675 static void
aff_combination_dr_offset(struct data_reference * dr,aff_tree * offset)676 aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
677 {
678 tree type = TREE_TYPE (DR_OFFSET (dr));
679 aff_tree delta;
680
681 tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
682 &name_expansions);
683 aff_combination_const (&delta, type, wi::to_poly_widest (DR_INIT (dr)));
684 aff_combination_add (offset, &delta);
685 }
686
687 /* Determines number of iterations of the innermost enclosing loop before B
688 refers to exactly the same location as A and stores it to OFF. If A and
689 B do not have the same step, they never meet, or anything else fails,
690 returns false, otherwise returns true. Both A and B are assumed to
691 satisfy suitable_reference_p. */
692
693 static bool
determine_offset(struct data_reference * a,struct data_reference * b,poly_widest_int * off)694 determine_offset (struct data_reference *a, struct data_reference *b,
695 poly_widest_int *off)
696 {
697 aff_tree diff, baseb, step;
698 tree typea, typeb;
699
700 /* Check that both the references access the location in the same type. */
701 typea = TREE_TYPE (DR_REF (a));
702 typeb = TREE_TYPE (DR_REF (b));
703 if (!useless_type_conversion_p (typeb, typea))
704 return false;
705
706 /* Check whether the base address and the step of both references is the
707 same. */
708 if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
709 || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
710 return false;
711
712 if (integer_zerop (DR_STEP (a)))
713 {
714 /* If the references have loop invariant address, check that they access
715 exactly the same location. */
716 *off = 0;
717 return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
718 && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
719 }
720
721 /* Compare the offsets of the addresses, and check whether the difference
722 is a multiple of step. */
723 aff_combination_dr_offset (a, &diff);
724 aff_combination_dr_offset (b, &baseb);
725 aff_combination_scale (&baseb, -1);
726 aff_combination_add (&diff, &baseb);
727
728 tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
729 &step, &name_expansions);
730 return aff_combination_constant_multiple_p (&diff, &step, off);
731 }
732
733 /* Returns the last basic block in LOOP for that we are sure that
734 it is executed whenever the loop is entered. */
735
736 static basic_block
last_always_executed_block(class loop * loop)737 last_always_executed_block (class loop *loop)
738 {
739 unsigned i;
740 vec<edge> exits = get_loop_exit_edges (loop);
741 edge ex;
742 basic_block last = loop->latch;
743
744 FOR_EACH_VEC_ELT (exits, i, ex)
745 last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
746 exits.release ();
747
748 return last;
749 }
750
751 /* Splits dependence graph on DATAREFS described by DEPENDS to components. */
752
753 static struct component *
split_data_refs_to_components(class loop * loop,vec<data_reference_p> datarefs,vec<ddr_p> depends)754 split_data_refs_to_components (class loop *loop,
755 vec<data_reference_p> datarefs,
756 vec<ddr_p> depends)
757 {
758 unsigned i, n = datarefs.length ();
759 unsigned ca, ia, ib, bad;
760 unsigned *comp_father = XNEWVEC (unsigned, n + 1);
761 unsigned *comp_size = XNEWVEC (unsigned, n + 1);
762 struct component **comps;
763 struct data_reference *dr, *dra, *drb;
764 struct data_dependence_relation *ddr;
765 struct component *comp_list = NULL, *comp;
766 dref dataref;
767 /* Don't do store elimination if loop has multiple exit edges. */
768 bool eliminate_store_p = single_exit (loop) != NULL;
769 basic_block last_always_executed = last_always_executed_block (loop);
770 auto_bitmap no_store_store_comps;
771
772 FOR_EACH_VEC_ELT (datarefs, i, dr)
773 {
774 if (!DR_REF (dr))
775 {
776 /* A fake reference for call or asm_expr that may clobber memory;
777 just fail. */
778 goto end;
779 }
780 /* predcom pass isn't prepared to handle calls with data references. */
781 if (is_gimple_call (DR_STMT (dr)))
782 goto end;
783 dr->aux = (void *) (size_t) i;
784 comp_father[i] = i;
785 comp_size[i] = 1;
786 }
787
788 /* A component reserved for the "bad" data references. */
789 comp_father[n] = n;
790 comp_size[n] = 1;
791
792 FOR_EACH_VEC_ELT (datarefs, i, dr)
793 {
794 enum ref_step_type dummy;
795
796 if (!suitable_reference_p (dr, &dummy))
797 {
798 ia = (unsigned) (size_t) dr->aux;
799 merge_comps (comp_father, comp_size, n, ia);
800 }
801 }
802
803 FOR_EACH_VEC_ELT (depends, i, ddr)
804 {
805 poly_widest_int dummy_off;
806
807 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
808 continue;
809
810 dra = DDR_A (ddr);
811 drb = DDR_B (ddr);
812
813 /* Don't do store elimination if there is any unknown dependence for
814 any store data reference. */
815 if ((DR_IS_WRITE (dra) || DR_IS_WRITE (drb))
816 && (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
817 || DDR_NUM_DIST_VECTS (ddr) == 0))
818 eliminate_store_p = false;
819
820 ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
821 ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
822 if (ia == ib)
823 continue;
824
825 bad = component_of (comp_father, n);
826
827 /* If both A and B are reads, we may ignore unsuitable dependences. */
828 if (DR_IS_READ (dra) && DR_IS_READ (drb))
829 {
830 if (ia == bad || ib == bad
831 || !determine_offset (dra, drb, &dummy_off))
832 continue;
833 }
834 /* If A is read and B write or vice versa and there is unsuitable
835 dependence, instead of merging both components into a component
836 that will certainly not pass suitable_component_p, just put the
837 read into bad component, perhaps at least the write together with
838 all the other data refs in it's component will be optimizable. */
839 else if (DR_IS_READ (dra) && ib != bad)
840 {
841 if (ia == bad)
842 {
843 bitmap_set_bit (no_store_store_comps, ib);
844 continue;
845 }
846 else if (!determine_offset (dra, drb, &dummy_off))
847 {
848 bitmap_set_bit (no_store_store_comps, ib);
849 merge_comps (comp_father, comp_size, bad, ia);
850 continue;
851 }
852 }
853 else if (DR_IS_READ (drb) && ia != bad)
854 {
855 if (ib == bad)
856 {
857 bitmap_set_bit (no_store_store_comps, ia);
858 continue;
859 }
860 else if (!determine_offset (dra, drb, &dummy_off))
861 {
862 bitmap_set_bit (no_store_store_comps, ia);
863 merge_comps (comp_father, comp_size, bad, ib);
864 continue;
865 }
866 }
867 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)
868 && ia != bad && ib != bad
869 && !determine_offset (dra, drb, &dummy_off))
870 {
871 merge_comps (comp_father, comp_size, bad, ia);
872 merge_comps (comp_father, comp_size, bad, ib);
873 continue;
874 }
875
876 merge_comps (comp_father, comp_size, ia, ib);
877 }
878
879 if (eliminate_store_p)
880 {
881 tree niters = number_of_latch_executions (loop);
882
883 /* Don't do store elimination if niters info is unknown because stores
884 in the last iteration can't be eliminated and we need to recover it
885 after loop. */
886 eliminate_store_p = (niters != NULL_TREE && niters != chrec_dont_know);
887 }
888
889 comps = XCNEWVEC (struct component *, n);
890 bad = component_of (comp_father, n);
891 FOR_EACH_VEC_ELT (datarefs, i, dr)
892 {
893 ia = (unsigned) (size_t) dr->aux;
894 ca = component_of (comp_father, ia);
895 if (ca == bad)
896 continue;
897
898 comp = comps[ca];
899 if (!comp)
900 {
901 comp = XCNEW (struct component);
902 comp->refs.create (comp_size[ca]);
903 comp->eliminate_store_p = eliminate_store_p;
904 comps[ca] = comp;
905 }
906
907 dataref = XCNEW (class dref_d);
908 dataref->ref = dr;
909 dataref->stmt = DR_STMT (dr);
910 dataref->offset = 0;
911 dataref->distance = 0;
912
913 dataref->always_accessed
914 = dominated_by_p (CDI_DOMINATORS, last_always_executed,
915 gimple_bb (dataref->stmt));
916 dataref->pos = comp->refs.length ();
917 comp->refs.quick_push (dataref);
918 }
919
920 if (eliminate_store_p)
921 {
922 bitmap_iterator bi;
923 EXECUTE_IF_SET_IN_BITMAP (no_store_store_comps, 0, ia, bi)
924 {
925 ca = component_of (comp_father, ia);
926 if (ca != bad)
927 comps[ca]->eliminate_store_p = false;
928 }
929 }
930
931 for (i = 0; i < n; i++)
932 {
933 comp = comps[i];
934 if (comp)
935 {
936 comp->next = comp_list;
937 comp_list = comp;
938 }
939 }
940 free (comps);
941
942 end:
943 free (comp_father);
944 free (comp_size);
945 return comp_list;
946 }
947
948 /* Returns true if the component COMP satisfies the conditions
949 described in 2) at the beginning of this file. LOOP is the current
950 loop. */
951
952 static bool
suitable_component_p(class loop * loop,struct component * comp)953 suitable_component_p (class loop *loop, struct component *comp)
954 {
955 unsigned i;
956 dref a, first;
957 basic_block ba, bp = loop->header;
958 bool ok, has_write = false;
959
960 FOR_EACH_VEC_ELT (comp->refs, i, a)
961 {
962 ba = gimple_bb (a->stmt);
963
964 if (!just_once_each_iteration_p (loop, ba))
965 return false;
966
967 gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
968 bp = ba;
969
970 if (DR_IS_WRITE (a->ref))
971 has_write = true;
972 }
973
974 first = comp->refs[0];
975 ok = suitable_reference_p (first->ref, &comp->comp_step);
976 gcc_assert (ok);
977 first->offset = 0;
978
979 for (i = 1; comp->refs.iterate (i, &a); i++)
980 {
981 /* Polynomial offsets are no use, since we need to know the
982 gap between iteration numbers at compile time. */
983 poly_widest_int offset;
984 if (!determine_offset (first->ref, a->ref, &offset)
985 || !offset.is_constant (&a->offset))
986 return false;
987
988 enum ref_step_type a_step;
989 gcc_checking_assert (suitable_reference_p (a->ref, &a_step)
990 && a_step == comp->comp_step);
991 }
992
993 /* If there is a write inside the component, we must know whether the
994 step is nonzero or not -- we would not otherwise be able to recognize
995 whether the value accessed by reads comes from the OFFSET-th iteration
996 or the previous one. */
997 if (has_write && comp->comp_step == RS_ANY)
998 return false;
999
1000 return true;
1001 }
1002
1003 /* Check the conditions on references inside each of components COMPS,
1004 and remove the unsuitable components from the list. The new list
1005 of components is returned. The conditions are described in 2) at
1006 the beginning of this file. LOOP is the current loop. */
1007
1008 static struct component *
filter_suitable_components(class loop * loop,struct component * comps)1009 filter_suitable_components (class loop *loop, struct component *comps)
1010 {
1011 struct component **comp, *act;
1012
1013 for (comp = &comps; *comp; )
1014 {
1015 act = *comp;
1016 if (suitable_component_p (loop, act))
1017 comp = &act->next;
1018 else
1019 {
1020 dref ref;
1021 unsigned i;
1022
1023 *comp = act->next;
1024 FOR_EACH_VEC_ELT (act->refs, i, ref)
1025 free (ref);
1026 release_component (act);
1027 }
1028 }
1029
1030 return comps;
1031 }
1032
1033 /* Compares two drefs A and B by their offset and position. Callback for
1034 qsort. */
1035
1036 static int
order_drefs(const void * a,const void * b)1037 order_drefs (const void *a, const void *b)
1038 {
1039 const dref *const da = (const dref *) a;
1040 const dref *const db = (const dref *) b;
1041 int offcmp = wi::cmps ((*da)->offset, (*db)->offset);
1042
1043 if (offcmp != 0)
1044 return offcmp;
1045
1046 return (*da)->pos - (*db)->pos;
1047 }
1048
1049 /* Compares two drefs A and B by their position. Callback for qsort. */
1050
1051 static int
order_drefs_by_pos(const void * a,const void * b)1052 order_drefs_by_pos (const void *a, const void *b)
1053 {
1054 const dref *const da = (const dref *) a;
1055 const dref *const db = (const dref *) b;
1056
1057 return (*da)->pos - (*db)->pos;
1058 }
1059
1060 /* Returns root of the CHAIN. */
1061
1062 static inline dref
get_chain_root(chain_p chain)1063 get_chain_root (chain_p chain)
1064 {
1065 return chain->refs[0];
1066 }
1067
1068 /* Given CHAIN, returns the last write ref at DISTANCE, or NULL if it doesn't
1069 exist. */
1070
1071 static inline dref
get_chain_last_write_at(chain_p chain,unsigned distance)1072 get_chain_last_write_at (chain_p chain, unsigned distance)
1073 {
1074 for (unsigned i = chain->refs.length (); i > 0; i--)
1075 if (DR_IS_WRITE (chain->refs[i - 1]->ref)
1076 && distance == chain->refs[i - 1]->distance)
1077 return chain->refs[i - 1];
1078
1079 return NULL;
1080 }
1081
1082 /* Given CHAIN, returns the last write ref with the same distance before load
1083 at index LOAD_IDX, or NULL if it doesn't exist. */
1084
1085 static inline dref
get_chain_last_write_before_load(chain_p chain,unsigned load_idx)1086 get_chain_last_write_before_load (chain_p chain, unsigned load_idx)
1087 {
1088 gcc_assert (load_idx < chain->refs.length ());
1089
1090 unsigned distance = chain->refs[load_idx]->distance;
1091
1092 for (unsigned i = load_idx; i > 0; i--)
1093 if (DR_IS_WRITE (chain->refs[i - 1]->ref)
1094 && distance == chain->refs[i - 1]->distance)
1095 return chain->refs[i - 1];
1096
1097 return NULL;
1098 }
1099
1100 /* Adds REF to the chain CHAIN. */
1101
1102 static void
add_ref_to_chain(chain_p chain,dref ref)1103 add_ref_to_chain (chain_p chain, dref ref)
1104 {
1105 dref root = get_chain_root (chain);
1106
1107 gcc_assert (wi::les_p (root->offset, ref->offset));
1108 widest_int dist = ref->offset - root->offset;
1109 gcc_assert (wi::fits_uhwi_p (dist));
1110
1111 chain->refs.safe_push (ref);
1112
1113 ref->distance = dist.to_uhwi ();
1114
1115 if (ref->distance >= chain->length)
1116 {
1117 chain->length = ref->distance;
1118 chain->has_max_use_after = false;
1119 }
1120
1121 /* Promote this chain to CT_STORE_STORE if it has multiple stores. */
1122 if (DR_IS_WRITE (ref->ref))
1123 chain->type = CT_STORE_STORE;
1124
1125 /* Don't set the flag for store-store chain since there is no use. */
1126 if (chain->type != CT_STORE_STORE
1127 && ref->distance == chain->length
1128 && ref->pos > root->pos)
1129 chain->has_max_use_after = true;
1130
1131 chain->all_always_accessed &= ref->always_accessed;
1132 }
1133
1134 /* Returns the chain for invariant component COMP. */
1135
1136 static chain_p
make_invariant_chain(struct component * comp)1137 make_invariant_chain (struct component *comp)
1138 {
1139 chain_p chain = XCNEW (struct chain);
1140 unsigned i;
1141 dref ref;
1142
1143 chain->type = CT_INVARIANT;
1144
1145 chain->all_always_accessed = true;
1146
1147 FOR_EACH_VEC_ELT (comp->refs, i, ref)
1148 {
1149 chain->refs.safe_push (ref);
1150 chain->all_always_accessed &= ref->always_accessed;
1151 }
1152
1153 chain->inits = vNULL;
1154 chain->finis = vNULL;
1155
1156 return chain;
1157 }
1158
1159 /* Make a new chain of type TYPE rooted at REF. */
1160
1161 static chain_p
make_rooted_chain(dref ref,enum chain_type type)1162 make_rooted_chain (dref ref, enum chain_type type)
1163 {
1164 chain_p chain = XCNEW (struct chain);
1165
1166 chain->type = type;
1167 chain->refs.safe_push (ref);
1168 chain->all_always_accessed = ref->always_accessed;
1169 ref->distance = 0;
1170
1171 chain->inits = vNULL;
1172 chain->finis = vNULL;
1173
1174 return chain;
1175 }
1176
1177 /* Returns true if CHAIN is not trivial. */
1178
1179 static bool
nontrivial_chain_p(chain_p chain)1180 nontrivial_chain_p (chain_p chain)
1181 {
1182 return chain != NULL && chain->refs.length () > 1;
1183 }
1184
1185 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1186 is no such name. */
1187
1188 static tree
name_for_ref(dref ref)1189 name_for_ref (dref ref)
1190 {
1191 tree name;
1192
1193 if (is_gimple_assign (ref->stmt))
1194 {
1195 if (!ref->ref || DR_IS_READ (ref->ref))
1196 name = gimple_assign_lhs (ref->stmt);
1197 else
1198 name = gimple_assign_rhs1 (ref->stmt);
1199 }
1200 else
1201 name = PHI_RESULT (ref->stmt);
1202
1203 return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1204 }
1205
1206 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1207 iterations of the innermost enclosing loop). */
1208
1209 static bool
valid_initializer_p(struct data_reference * ref,unsigned distance,struct data_reference * root)1210 valid_initializer_p (struct data_reference *ref,
1211 unsigned distance, struct data_reference *root)
1212 {
1213 aff_tree diff, base, step;
1214 poly_widest_int off;
1215
1216 /* Both REF and ROOT must be accessing the same object. */
1217 if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1218 return false;
1219
1220 /* The initializer is defined outside of loop, hence its address must be
1221 invariant inside the loop. */
1222 gcc_assert (integer_zerop (DR_STEP (ref)));
1223
1224 /* If the address of the reference is invariant, initializer must access
1225 exactly the same location. */
1226 if (integer_zerop (DR_STEP (root)))
1227 return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1228 && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1229
1230 /* Verify that this index of REF is equal to the root's index at
1231 -DISTANCE-th iteration. */
1232 aff_combination_dr_offset (root, &diff);
1233 aff_combination_dr_offset (ref, &base);
1234 aff_combination_scale (&base, -1);
1235 aff_combination_add (&diff, &base);
1236
1237 tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1238 &step, &name_expansions);
1239 if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1240 return false;
1241
1242 if (maybe_ne (off, distance))
1243 return false;
1244
1245 return true;
1246 }
1247
1248 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
1249 initial value is correct (equal to initial value of REF shifted by one
1250 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1251 is the root of the current chain. */
1252
1253 static gphi *
find_looparound_phi(class loop * loop,dref ref,dref root)1254 find_looparound_phi (class loop *loop, dref ref, dref root)
1255 {
1256 tree name, init, init_ref;
1257 gimple *init_stmt;
1258 edge latch = loop_latch_edge (loop);
1259 struct data_reference init_dr;
1260 gphi_iterator psi;
1261
1262 if (is_gimple_assign (ref->stmt))
1263 {
1264 if (DR_IS_READ (ref->ref))
1265 name = gimple_assign_lhs (ref->stmt);
1266 else
1267 name = gimple_assign_rhs1 (ref->stmt);
1268 }
1269 else
1270 name = PHI_RESULT (ref->stmt);
1271 if (!name)
1272 return NULL;
1273
1274 tree entry_vuse = NULL_TREE;
1275 gphi *phi = NULL;
1276 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1277 {
1278 gphi *p = psi.phi ();
1279 if (PHI_ARG_DEF_FROM_EDGE (p, latch) == name)
1280 phi = p;
1281 else if (virtual_operand_p (gimple_phi_result (p)))
1282 entry_vuse = PHI_ARG_DEF_FROM_EDGE (p, loop_preheader_edge (loop));
1283 if (phi && entry_vuse)
1284 break;
1285 }
1286 if (!phi || !entry_vuse)
1287 return NULL;
1288
1289 init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1290 if (TREE_CODE (init) != SSA_NAME)
1291 return NULL;
1292 init_stmt = SSA_NAME_DEF_STMT (init);
1293 if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
1294 return NULL;
1295 gcc_assert (gimple_assign_lhs (init_stmt) == init);
1296
1297 init_ref = gimple_assign_rhs1 (init_stmt);
1298 if (!REFERENCE_CLASS_P (init_ref)
1299 && !DECL_P (init_ref))
1300 return NULL;
1301
1302 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1303 loop enclosing PHI). */
1304 memset (&init_dr, 0, sizeof (struct data_reference));
1305 DR_REF (&init_dr) = init_ref;
1306 DR_STMT (&init_dr) = phi;
1307 if (!dr_analyze_innermost (&DR_INNERMOST (&init_dr), init_ref, loop,
1308 init_stmt))
1309 return NULL;
1310
1311 if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
1312 return NULL;
1313
1314 /* Make sure nothing clobbers the location we re-use the initial value
1315 from. */
1316 if (entry_vuse != gimple_vuse (init_stmt))
1317 {
1318 ao_ref ref;
1319 ao_ref_init (&ref, init_ref);
1320 unsigned limit = param_sccvn_max_alias_queries_per_access;
1321 tree vdef = entry_vuse;
1322 do
1323 {
1324 gimple *def = SSA_NAME_DEF_STMT (vdef);
1325 if (limit-- == 0 || gimple_code (def) == GIMPLE_PHI)
1326 return NULL;
1327 if (stmt_may_clobber_ref_p_1 (def, &ref))
1328 /* When the stmt is an assign to init_ref we could in theory
1329 use its RHS for the initial value of the looparound PHI
1330 we replace in prepare_initializers_chain, but we have
1331 no convenient place to store this info at the moment. */
1332 return NULL;
1333 vdef = gimple_vuse (def);
1334 }
1335 while (vdef != gimple_vuse (init_stmt));
1336 }
1337
1338 return phi;
1339 }
1340
1341 /* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1342
1343 static void
insert_looparound_copy(chain_p chain,dref ref,gphi * phi)1344 insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
1345 {
1346 dref nw = XCNEW (class dref_d), aref;
1347 unsigned i;
1348
1349 nw->stmt = phi;
1350 nw->distance = ref->distance + 1;
1351 nw->always_accessed = 1;
1352
1353 FOR_EACH_VEC_ELT (chain->refs, i, aref)
1354 if (aref->distance >= nw->distance)
1355 break;
1356 chain->refs.safe_insert (i, nw);
1357
1358 if (nw->distance > chain->length)
1359 {
1360 chain->length = nw->distance;
1361 chain->has_max_use_after = false;
1362 }
1363 }
1364
1365 /* For references in CHAIN that are copied around the LOOP (created previously
1366 by PRE, or by user), add the results of such copies to the chain. This
1367 enables us to remove the copies by unrolling, and may need less registers
1368 (also, it may allow us to combine chains together). */
1369
1370 static void
add_looparound_copies(class loop * loop,chain_p chain)1371 add_looparound_copies (class loop *loop, chain_p chain)
1372 {
1373 unsigned i;
1374 dref ref, root = get_chain_root (chain);
1375 gphi *phi;
1376
1377 if (chain->type == CT_STORE_STORE)
1378 return;
1379
1380 FOR_EACH_VEC_ELT (chain->refs, i, ref)
1381 {
1382 phi = find_looparound_phi (loop, ref, root);
1383 if (!phi)
1384 continue;
1385
1386 bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1387 insert_looparound_copy (chain, ref, phi);
1388 }
1389 }
1390
1391 /* Find roots of the values and determine distances in the component COMP.
1392 The references are redistributed into CHAINS. LOOP is the current
1393 loop. */
1394
1395 static void
determine_roots_comp(class loop * loop,struct component * comp,vec<chain_p> * chains)1396 determine_roots_comp (class loop *loop,
1397 struct component *comp,
1398 vec<chain_p> *chains)
1399 {
1400 unsigned i;
1401 dref a;
1402 chain_p chain = NULL;
1403 widest_int last_ofs = 0;
1404 enum chain_type type;
1405
1406 /* Invariants are handled specially. */
1407 if (comp->comp_step == RS_INVARIANT)
1408 {
1409 chain = make_invariant_chain (comp);
1410 chains->safe_push (chain);
1411 return;
1412 }
1413
1414 /* Trivial component. */
1415 if (comp->refs.length () <= 1)
1416 {
1417 if (comp->refs.length () == 1)
1418 {
1419 free (comp->refs[0]);
1420 comp->refs.truncate (0);
1421 }
1422 return;
1423 }
1424
1425 comp->refs.qsort (order_drefs);
1426
1427 /* For Store-Store chain, we only support load if it is dominated by a
1428 store statement in the same iteration of loop. */
1429 if (comp->eliminate_store_p)
1430 for (a = NULL, i = 0; i < comp->refs.length (); i++)
1431 {
1432 if (DR_IS_WRITE (comp->refs[i]->ref))
1433 a = comp->refs[i];
1434 else if (a == NULL || a->offset != comp->refs[i]->offset)
1435 {
1436 /* If there is load that is not dominated by a store in the
1437 same iteration of loop, clear the flag so no Store-Store
1438 chain is generated for this component. */
1439 comp->eliminate_store_p = false;
1440 break;
1441 }
1442 }
1443
1444 /* Determine roots and create chains for components. */
1445 FOR_EACH_VEC_ELT (comp->refs, i, a)
1446 {
1447 if (!chain
1448 || (chain->type == CT_LOAD && DR_IS_WRITE (a->ref))
1449 || (!comp->eliminate_store_p && DR_IS_WRITE (a->ref))
1450 || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs))
1451 {
1452 if (nontrivial_chain_p (chain))
1453 {
1454 add_looparound_copies (loop, chain);
1455 chains->safe_push (chain);
1456 }
1457 else
1458 release_chain (chain);
1459
1460 /* Determine type of the chain. If the root reference is a load,
1461 this can only be a CT_LOAD chain; other chains are intialized
1462 to CT_STORE_LOAD and might be promoted to CT_STORE_STORE when
1463 new reference is added. */
1464 type = DR_IS_READ (a->ref) ? CT_LOAD : CT_STORE_LOAD;
1465 chain = make_rooted_chain (a, type);
1466 last_ofs = a->offset;
1467 continue;
1468 }
1469
1470 add_ref_to_chain (chain, a);
1471 }
1472
1473 if (nontrivial_chain_p (chain))
1474 {
1475 add_looparound_copies (loop, chain);
1476 chains->safe_push (chain);
1477 }
1478 else
1479 release_chain (chain);
1480 }
1481
1482 /* Find roots of the values and determine distances in components COMPS, and
1483 separates the references to CHAINS. LOOP is the current loop. */
1484
1485 static void
determine_roots(class loop * loop,struct component * comps,vec<chain_p> * chains)1486 determine_roots (class loop *loop,
1487 struct component *comps, vec<chain_p> *chains)
1488 {
1489 struct component *comp;
1490
1491 for (comp = comps; comp; comp = comp->next)
1492 determine_roots_comp (loop, comp, chains);
1493 }
1494
1495 /* Replace the reference in statement STMT with temporary variable
1496 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
1497 the reference in the statement. IN_LHS is true if the reference
1498 is in the lhs of STMT, false if it is in rhs. */
1499
1500 static void
replace_ref_with(gimple * stmt,tree new_tree,bool set,bool in_lhs)1501 replace_ref_with (gimple *stmt, tree new_tree, bool set, bool in_lhs)
1502 {
1503 tree val;
1504 gassign *new_stmt;
1505 gimple_stmt_iterator bsi, psi;
1506
1507 if (gimple_code (stmt) == GIMPLE_PHI)
1508 {
1509 gcc_assert (!in_lhs && !set);
1510
1511 val = PHI_RESULT (stmt);
1512 bsi = gsi_after_labels (gimple_bb (stmt));
1513 psi = gsi_for_stmt (stmt);
1514 remove_phi_node (&psi, false);
1515
1516 /* Turn the phi node into GIMPLE_ASSIGN. */
1517 new_stmt = gimple_build_assign (val, new_tree);
1518 gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
1519 return;
1520 }
1521
1522 /* Since the reference is of gimple_reg type, it should only
1523 appear as lhs or rhs of modify statement. */
1524 gcc_assert (is_gimple_assign (stmt));
1525
1526 bsi = gsi_for_stmt (stmt);
1527
1528 /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
1529 if (!set)
1530 {
1531 gcc_assert (!in_lhs);
1532 gimple_assign_set_rhs_from_tree (&bsi, new_tree);
1533 stmt = gsi_stmt (bsi);
1534 update_stmt (stmt);
1535 return;
1536 }
1537
1538 if (in_lhs)
1539 {
1540 /* We have statement
1541
1542 OLD = VAL
1543
1544 If OLD is a memory reference, then VAL is gimple_val, and we transform
1545 this to
1546
1547 OLD = VAL
1548 NEW = VAL
1549
1550 Otherwise, we are replacing a combination chain,
1551 VAL is the expression that performs the combination, and OLD is an
1552 SSA name. In this case, we transform the assignment to
1553
1554 OLD = VAL
1555 NEW = OLD
1556
1557 */
1558
1559 val = gimple_assign_lhs (stmt);
1560 if (TREE_CODE (val) != SSA_NAME)
1561 {
1562 val = gimple_assign_rhs1 (stmt);
1563 gcc_assert (gimple_assign_single_p (stmt));
1564 if (TREE_CLOBBER_P (val))
1565 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
1566 else
1567 gcc_assert (gimple_assign_copy_p (stmt));
1568 }
1569 }
1570 else
1571 {
1572 /* VAL = OLD
1573
1574 is transformed to
1575
1576 VAL = OLD
1577 NEW = VAL */
1578
1579 val = gimple_assign_lhs (stmt);
1580 }
1581
1582 new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
1583 gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
1584 }
1585
1586 /* Returns a memory reference to DR in the (NITERS + ITER)-th iteration
1587 of the loop it was analyzed in. Append init stmts to STMTS. */
1588
1589 static tree
1590 ref_at_iteration (data_reference_p dr, int iter,
1591 gimple_seq *stmts, tree niters = NULL_TREE)
1592 {
1593 tree off = DR_OFFSET (dr);
1594 tree coff = DR_INIT (dr);
1595 tree ref = DR_REF (dr);
1596 enum tree_code ref_code = ERROR_MARK;
1597 tree ref_type = NULL_TREE;
1598 tree ref_op1 = NULL_TREE;
1599 tree ref_op2 = NULL_TREE;
1600 tree new_offset;
1601
1602 if (iter != 0)
1603 {
1604 new_offset = size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter));
1605 if (TREE_CODE (new_offset) == INTEGER_CST)
1606 coff = size_binop (PLUS_EXPR, coff, new_offset);
1607 else
1608 off = size_binop (PLUS_EXPR, off, new_offset);
1609 }
1610
1611 if (niters != NULL_TREE)
1612 {
1613 niters = fold_convert (ssizetype, niters);
1614 new_offset = size_binop (MULT_EXPR, DR_STEP (dr), niters);
1615 if (TREE_CODE (niters) == INTEGER_CST)
1616 coff = size_binop (PLUS_EXPR, coff, new_offset);
1617 else
1618 off = size_binop (PLUS_EXPR, off, new_offset);
1619 }
1620
1621 /* While data-ref analysis punts on bit offsets it still handles
1622 bitfield accesses at byte boundaries. Cope with that. Note that
1623 if the bitfield object also starts at a byte-boundary we can simply
1624 replicate the COMPONENT_REF, but we have to subtract the component's
1625 byte-offset from the MEM_REF address first.
1626 Otherwise we simply build a BIT_FIELD_REF knowing that the bits
1627 start at offset zero. */
1628 if (TREE_CODE (ref) == COMPONENT_REF
1629 && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
1630 {
1631 unsigned HOST_WIDE_INT boff;
1632 tree field = TREE_OPERAND (ref, 1);
1633 tree offset = component_ref_field_offset (ref);
1634 ref_type = TREE_TYPE (ref);
1635 boff = tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field));
1636 /* This can occur in Ada. See the comment in get_bit_range. */
1637 if (boff % BITS_PER_UNIT != 0
1638 || !tree_fits_uhwi_p (offset))
1639 {
1640 ref_code = BIT_FIELD_REF;
1641 ref_op1 = DECL_SIZE (field);
1642 ref_op2 = bitsize_zero_node;
1643 }
1644 else
1645 {
1646 boff >>= LOG2_BITS_PER_UNIT;
1647 boff += tree_to_uhwi (offset);
1648 coff = size_binop (MINUS_EXPR, coff, ssize_int (boff));
1649 ref_code = COMPONENT_REF;
1650 ref_op1 = field;
1651 ref_op2 = TREE_OPERAND (ref, 2);
1652 ref = TREE_OPERAND (ref, 0);
1653 }
1654 }
1655 tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
1656 addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
1657 is_gimple_mem_ref_addr, NULL_TREE);
1658 tree alias_ptr = fold_convert (reference_alias_ptr_type (ref), coff);
1659 tree type = build_aligned_type (TREE_TYPE (ref),
1660 get_object_alignment (ref));
1661 ref = build2 (MEM_REF, type, addr, alias_ptr);
1662 if (ref_type)
1663 ref = build3 (ref_code, ref_type, ref, ref_op1, ref_op2);
1664 return ref;
1665 }
1666
1667 /* Get the initialization expression for the INDEX-th temporary variable
1668 of CHAIN. */
1669
1670 static tree
get_init_expr(chain_p chain,unsigned index)1671 get_init_expr (chain_p chain, unsigned index)
1672 {
1673 if (chain->type == CT_COMBINATION)
1674 {
1675 tree e1 = get_init_expr (chain->ch1, index);
1676 tree e2 = get_init_expr (chain->ch2, index);
1677
1678 return fold_build2 (chain->op, chain->rslt_type, e1, e2);
1679 }
1680 else
1681 return chain->inits[index];
1682 }
1683
1684 /* Returns a new temporary variable used for the I-th variable carrying
1685 value of REF. The variable's uid is marked in TMP_VARS. */
1686
1687 static tree
predcom_tmp_var(tree ref,unsigned i,bitmap tmp_vars)1688 predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1689 {
1690 tree type = TREE_TYPE (ref);
1691 /* We never access the components of the temporary variable in predictive
1692 commoning. */
1693 tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
1694 bitmap_set_bit (tmp_vars, DECL_UID (var));
1695 return var;
1696 }
1697
1698 /* Creates the variables for CHAIN, as well as phi nodes for them and
1699 initialization on entry to LOOP. Uids of the newly created
1700 temporary variables are marked in TMP_VARS. */
1701
1702 static void
initialize_root_vars(class loop * loop,chain_p chain,bitmap tmp_vars)1703 initialize_root_vars (class loop *loop, chain_p chain, bitmap tmp_vars)
1704 {
1705 unsigned i;
1706 unsigned n = chain->length;
1707 dref root = get_chain_root (chain);
1708 bool reuse_first = !chain->has_max_use_after;
1709 tree ref, init, var, next;
1710 gphi *phi;
1711 gimple_seq stmts;
1712 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1713
1714 /* If N == 0, then all the references are within the single iteration. And
1715 since this is an nonempty chain, reuse_first cannot be true. */
1716 gcc_assert (n > 0 || !reuse_first);
1717
1718 chain->vars.create (n + 1);
1719
1720 if (chain->type == CT_COMBINATION)
1721 ref = gimple_assign_lhs (root->stmt);
1722 else
1723 ref = DR_REF (root->ref);
1724
1725 for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1726 {
1727 var = predcom_tmp_var (ref, i, tmp_vars);
1728 chain->vars.quick_push (var);
1729 }
1730 if (reuse_first)
1731 chain->vars.quick_push (chain->vars[0]);
1732
1733 FOR_EACH_VEC_ELT (chain->vars, i, var)
1734 chain->vars[i] = make_ssa_name (var);
1735
1736 for (i = 0; i < n; i++)
1737 {
1738 var = chain->vars[i];
1739 next = chain->vars[i + 1];
1740 init = get_init_expr (chain, i);
1741
1742 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1743 if (stmts)
1744 gsi_insert_seq_on_edge_immediate (entry, stmts);
1745
1746 phi = create_phi_node (var, loop->header);
1747 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1748 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1749 }
1750 }
1751
1752 /* For inter-iteration store elimination CHAIN in LOOP, returns true if
1753 all stores to be eliminated store loop invariant values into memory.
1754 In this case, we can use these invariant values directly after LOOP. */
1755
1756 static bool
is_inv_store_elimination_chain(class loop * loop,chain_p chain)1757 is_inv_store_elimination_chain (class loop *loop, chain_p chain)
1758 {
1759 if (chain->length == 0 || chain->type != CT_STORE_STORE)
1760 return false;
1761
1762 gcc_assert (!chain->has_max_use_after);
1763
1764 /* If loop iterates for unknown times or fewer times than chain->length,
1765 we still need to setup root variable and propagate it with PHI node. */
1766 tree niters = number_of_latch_executions (loop);
1767 if (TREE_CODE (niters) != INTEGER_CST
1768 || wi::leu_p (wi::to_wide (niters), chain->length))
1769 return false;
1770
1771 /* Check stores in chain for elimination if they only store loop invariant
1772 values. */
1773 for (unsigned i = 0; i < chain->length; i++)
1774 {
1775 dref a = get_chain_last_write_at (chain, i);
1776 if (a == NULL)
1777 continue;
1778
1779 gimple *def_stmt, *stmt = a->stmt;
1780 if (!gimple_assign_single_p (stmt))
1781 return false;
1782
1783 tree val = gimple_assign_rhs1 (stmt);
1784 if (TREE_CLOBBER_P (val))
1785 return false;
1786
1787 if (CONSTANT_CLASS_P (val))
1788 continue;
1789
1790 if (TREE_CODE (val) != SSA_NAME)
1791 return false;
1792
1793 def_stmt = SSA_NAME_DEF_STMT (val);
1794 if (gimple_nop_p (def_stmt))
1795 continue;
1796
1797 if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
1798 return false;
1799 }
1800 return true;
1801 }
1802
1803 /* Creates root variables for store elimination CHAIN in which stores for
1804 elimination only store loop invariant values. In this case, we neither
1805 need to load root variables before loop nor propagate it with PHI nodes. */
1806
1807 static void
initialize_root_vars_store_elim_1(chain_p chain)1808 initialize_root_vars_store_elim_1 (chain_p chain)
1809 {
1810 tree var;
1811 unsigned i, n = chain->length;
1812
1813 chain->vars.create (n);
1814 chain->vars.safe_grow_cleared (n);
1815
1816 /* Initialize root value for eliminated stores at each distance. */
1817 for (i = 0; i < n; i++)
1818 {
1819 dref a = get_chain_last_write_at (chain, i);
1820 if (a == NULL)
1821 continue;
1822
1823 var = gimple_assign_rhs1 (a->stmt);
1824 chain->vars[a->distance] = var;
1825 }
1826
1827 /* We don't propagate values with PHI nodes, so manually propagate value
1828 to bubble positions. */
1829 var = chain->vars[0];
1830 for (i = 1; i < n; i++)
1831 {
1832 if (chain->vars[i] != NULL_TREE)
1833 {
1834 var = chain->vars[i];
1835 continue;
1836 }
1837 chain->vars[i] = var;
1838 }
1839
1840 /* Revert the vector. */
1841 for (i = 0; i < n / 2; i++)
1842 std::swap (chain->vars[i], chain->vars[n - i - 1]);
1843 }
1844
1845 /* Creates root variables for store elimination CHAIN in which stores for
1846 elimination store loop variant values. In this case, we may need to
1847 load root variables before LOOP and propagate it with PHI nodes. Uids
1848 of the newly created root variables are marked in TMP_VARS. */
1849
1850 static void
initialize_root_vars_store_elim_2(class loop * loop,chain_p chain,bitmap tmp_vars)1851 initialize_root_vars_store_elim_2 (class loop *loop,
1852 chain_p chain, bitmap tmp_vars)
1853 {
1854 unsigned i, n = chain->length;
1855 tree ref, init, var, next, val, phi_result;
1856 gimple *stmt;
1857 gimple_seq stmts;
1858
1859 chain->vars.create (n);
1860
1861 ref = DR_REF (get_chain_root (chain)->ref);
1862 for (i = 0; i < n; i++)
1863 {
1864 var = predcom_tmp_var (ref, i, tmp_vars);
1865 chain->vars.quick_push (var);
1866 }
1867
1868 FOR_EACH_VEC_ELT (chain->vars, i, var)
1869 chain->vars[i] = make_ssa_name (var);
1870
1871 /* Root values are either rhs operand of stores to be eliminated, or
1872 loaded from memory before loop. */
1873 auto_vec<tree> vtemps;
1874 vtemps.safe_grow_cleared (n);
1875 for (i = 0; i < n; i++)
1876 {
1877 init = get_init_expr (chain, i);
1878 if (init == NULL_TREE)
1879 {
1880 /* Root value is rhs operand of the store to be eliminated if
1881 it isn't loaded from memory before loop. */
1882 dref a = get_chain_last_write_at (chain, i);
1883 val = gimple_assign_rhs1 (a->stmt);
1884 if (TREE_CLOBBER_P (val))
1885 {
1886 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (var));
1887 gimple_assign_set_rhs1 (a->stmt, val);
1888 }
1889
1890 vtemps[n - i - 1] = val;
1891 }
1892 else
1893 {
1894 edge latch = loop_latch_edge (loop);
1895 edge entry = loop_preheader_edge (loop);
1896
1897 /* Root value is loaded from memory before loop, we also need
1898 to add PHI nodes to propagate the value across iterations. */
1899 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1900 if (stmts)
1901 gsi_insert_seq_on_edge_immediate (entry, stmts);
1902
1903 next = chain->vars[n - i];
1904 phi_result = copy_ssa_name (next);
1905 gphi *phi = create_phi_node (phi_result, loop->header);
1906 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1907 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1908 vtemps[n - i - 1] = phi_result;
1909 }
1910 }
1911
1912 /* Find the insertion position. */
1913 dref last = get_chain_root (chain);
1914 for (i = 0; i < chain->refs.length (); i++)
1915 {
1916 if (chain->refs[i]->pos > last->pos)
1917 last = chain->refs[i];
1918 }
1919
1920 gimple_stmt_iterator gsi = gsi_for_stmt (last->stmt);
1921
1922 /* Insert statements copying root value to root variable. */
1923 for (i = 0; i < n; i++)
1924 {
1925 var = chain->vars[i];
1926 val = vtemps[i];
1927 stmt = gimple_build_assign (var, val);
1928 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1929 }
1930 }
1931
1932 /* Generates stores for CHAIN's eliminated stores in LOOP's last
1933 (CHAIN->length - 1) iterations. */
1934
1935 static void
finalize_eliminated_stores(class loop * loop,chain_p chain)1936 finalize_eliminated_stores (class loop *loop, chain_p chain)
1937 {
1938 unsigned i, n = chain->length;
1939
1940 for (i = 0; i < n; i++)
1941 {
1942 tree var = chain->vars[i];
1943 tree fini = chain->finis[n - i - 1];
1944 gimple *stmt = gimple_build_assign (fini, var);
1945
1946 gimple_seq_add_stmt_without_update (&chain->fini_seq, stmt);
1947 }
1948
1949 if (chain->fini_seq)
1950 {
1951 gsi_insert_seq_on_edge_immediate (single_exit (loop), chain->fini_seq);
1952 chain->fini_seq = NULL;
1953 }
1954 }
1955
1956 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
1957 initialization on entry to LOOP if necessary. The ssa name for the variable
1958 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
1959 around the loop is created. Uid of the newly created temporary variable
1960 is marked in TMP_VARS. INITS is the list containing the (single)
1961 initializer. */
1962
1963 static void
initialize_root_vars_lm(class loop * loop,dref root,bool written,vec<tree> * vars,vec<tree> inits,bitmap tmp_vars)1964 initialize_root_vars_lm (class loop *loop, dref root, bool written,
1965 vec<tree> *vars, vec<tree> inits,
1966 bitmap tmp_vars)
1967 {
1968 unsigned i;
1969 tree ref = DR_REF (root->ref), init, var, next;
1970 gimple_seq stmts;
1971 gphi *phi;
1972 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1973
1974 /* Find the initializer for the variable, and check that it cannot
1975 trap. */
1976 init = inits[0];
1977
1978 vars->create (written ? 2 : 1);
1979 var = predcom_tmp_var (ref, 0, tmp_vars);
1980 vars->quick_push (var);
1981 if (written)
1982 vars->quick_push ((*vars)[0]);
1983
1984 FOR_EACH_VEC_ELT (*vars, i, var)
1985 (*vars)[i] = make_ssa_name (var);
1986
1987 var = (*vars)[0];
1988
1989 init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1990 if (stmts)
1991 gsi_insert_seq_on_edge_immediate (entry, stmts);
1992
1993 if (written)
1994 {
1995 next = (*vars)[1];
1996 phi = create_phi_node (var, loop->header);
1997 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1998 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1999 }
2000 else
2001 {
2002 gassign *init_stmt = gimple_build_assign (var, init);
2003 gsi_insert_on_edge_immediate (entry, init_stmt);
2004 }
2005 }
2006
2007
2008 /* Execute load motion for references in chain CHAIN. Uids of the newly
2009 created temporary variables are marked in TMP_VARS. */
2010
2011 static void
execute_load_motion(class loop * loop,chain_p chain,bitmap tmp_vars)2012 execute_load_motion (class loop *loop, chain_p chain, bitmap tmp_vars)
2013 {
2014 auto_vec<tree> vars;
2015 dref a;
2016 unsigned n_writes = 0, ridx, i;
2017 tree var;
2018
2019 gcc_assert (chain->type == CT_INVARIANT);
2020 gcc_assert (!chain->combined);
2021 FOR_EACH_VEC_ELT (chain->refs, i, a)
2022 if (DR_IS_WRITE (a->ref))
2023 n_writes++;
2024
2025 /* If there are no reads in the loop, there is nothing to do. */
2026 if (n_writes == chain->refs.length ())
2027 return;
2028
2029 initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
2030 &vars, chain->inits, tmp_vars);
2031
2032 ridx = 0;
2033 FOR_EACH_VEC_ELT (chain->refs, i, a)
2034 {
2035 bool is_read = DR_IS_READ (a->ref);
2036
2037 if (DR_IS_WRITE (a->ref))
2038 {
2039 n_writes--;
2040 if (n_writes)
2041 {
2042 var = vars[0];
2043 var = make_ssa_name (SSA_NAME_VAR (var));
2044 vars[0] = var;
2045 }
2046 else
2047 ridx = 1;
2048 }
2049
2050 replace_ref_with (a->stmt, vars[ridx],
2051 !is_read, !is_read);
2052 }
2053 }
2054
2055 /* Returns the single statement in that NAME is used, excepting
2056 the looparound phi nodes contained in one of the chains. If there is no
2057 such statement, or more statements, NULL is returned. */
2058
2059 static gimple *
single_nonlooparound_use(tree name)2060 single_nonlooparound_use (tree name)
2061 {
2062 use_operand_p use;
2063 imm_use_iterator it;
2064 gimple *stmt, *ret = NULL;
2065
2066 FOR_EACH_IMM_USE_FAST (use, it, name)
2067 {
2068 stmt = USE_STMT (use);
2069
2070 if (gimple_code (stmt) == GIMPLE_PHI)
2071 {
2072 /* Ignore uses in looparound phi nodes. Uses in other phi nodes
2073 could not be processed anyway, so just fail for them. */
2074 if (bitmap_bit_p (looparound_phis,
2075 SSA_NAME_VERSION (PHI_RESULT (stmt))))
2076 continue;
2077
2078 return NULL;
2079 }
2080 else if (is_gimple_debug (stmt))
2081 continue;
2082 else if (ret != NULL)
2083 return NULL;
2084 else
2085 ret = stmt;
2086 }
2087
2088 return ret;
2089 }
2090
2091 /* Remove statement STMT, as well as the chain of assignments in that it is
2092 used. */
2093
2094 static void
remove_stmt(gimple * stmt)2095 remove_stmt (gimple *stmt)
2096 {
2097 tree name;
2098 gimple *next;
2099 gimple_stmt_iterator psi;
2100
2101 if (gimple_code (stmt) == GIMPLE_PHI)
2102 {
2103 name = PHI_RESULT (stmt);
2104 next = single_nonlooparound_use (name);
2105 reset_debug_uses (stmt);
2106 psi = gsi_for_stmt (stmt);
2107 remove_phi_node (&psi, true);
2108
2109 if (!next
2110 || !gimple_assign_ssa_name_copy_p (next)
2111 || gimple_assign_rhs1 (next) != name)
2112 return;
2113
2114 stmt = next;
2115 }
2116
2117 while (1)
2118 {
2119 gimple_stmt_iterator bsi;
2120
2121 bsi = gsi_for_stmt (stmt);
2122
2123 name = gimple_assign_lhs (stmt);
2124 if (TREE_CODE (name) == SSA_NAME)
2125 {
2126 next = single_nonlooparound_use (name);
2127 reset_debug_uses (stmt);
2128 }
2129 else
2130 {
2131 /* This is a store to be eliminated. */
2132 gcc_assert (gimple_vdef (stmt) != NULL);
2133 next = NULL;
2134 }
2135
2136 unlink_stmt_vdef (stmt);
2137 gsi_remove (&bsi, true);
2138 release_defs (stmt);
2139
2140 if (!next
2141 || !gimple_assign_ssa_name_copy_p (next)
2142 || gimple_assign_rhs1 (next) != name)
2143 return;
2144
2145 stmt = next;
2146 }
2147 }
2148
2149 /* Perform the predictive commoning optimization for a chain CHAIN.
2150 Uids of the newly created temporary variables are marked in TMP_VARS.*/
2151
2152 static void
execute_pred_commoning_chain(class loop * loop,chain_p chain,bitmap tmp_vars)2153 execute_pred_commoning_chain (class loop *loop, chain_p chain,
2154 bitmap tmp_vars)
2155 {
2156 unsigned i;
2157 dref a;
2158 tree var;
2159 bool in_lhs;
2160
2161 if (chain->combined)
2162 {
2163 /* For combined chains, just remove the statements that are used to
2164 compute the values of the expression (except for the root one).
2165 We delay this until after all chains are processed. */
2166 }
2167 else if (chain->type == CT_STORE_STORE)
2168 {
2169 if (chain->length > 0)
2170 {
2171 if (chain->inv_store_elimination)
2172 {
2173 /* If dead stores in this chain only store loop invariant
2174 values, we can simply record the invariant value and use
2175 it directly after loop. */
2176 initialize_root_vars_store_elim_1 (chain);
2177 }
2178 else
2179 {
2180 /* If dead stores in this chain store loop variant values,
2181 we need to set up the variables by loading from memory
2182 before loop and propagating it with PHI nodes. */
2183 initialize_root_vars_store_elim_2 (loop, chain, tmp_vars);
2184 }
2185
2186 /* For inter-iteration store elimination chain, stores at each
2187 distance in loop's last (chain->length - 1) iterations can't
2188 be eliminated, because there is no following killing store.
2189 We need to generate these stores after loop. */
2190 finalize_eliminated_stores (loop, chain);
2191 }
2192
2193 bool last_store_p = true;
2194 for (i = chain->refs.length (); i > 0; i--)
2195 {
2196 a = chain->refs[i - 1];
2197 /* Preserve the last store of the chain. Eliminate other stores
2198 which are killed by the last one. */
2199 if (DR_IS_WRITE (a->ref))
2200 {
2201 if (last_store_p)
2202 last_store_p = false;
2203 else
2204 remove_stmt (a->stmt);
2205
2206 continue;
2207 }
2208
2209 /* Any load in Store-Store chain must be dominated by a previous
2210 store, we replace the load reference with rhs of the store. */
2211 dref b = get_chain_last_write_before_load (chain, i - 1);
2212 gcc_assert (b != NULL);
2213 var = gimple_assign_rhs1 (b->stmt);
2214 replace_ref_with (a->stmt, var, false, false);
2215 }
2216 }
2217 else
2218 {
2219 /* For non-combined chains, set up the variables that hold its value. */
2220 initialize_root_vars (loop, chain, tmp_vars);
2221 a = get_chain_root (chain);
2222 in_lhs = (chain->type == CT_STORE_LOAD
2223 || chain->type == CT_COMBINATION);
2224 replace_ref_with (a->stmt, chain->vars[chain->length], true, in_lhs);
2225
2226 /* Replace the uses of the original references by these variables. */
2227 for (i = 1; chain->refs.iterate (i, &a); i++)
2228 {
2229 var = chain->vars[chain->length - a->distance];
2230 replace_ref_with (a->stmt, var, false, false);
2231 }
2232 }
2233 }
2234
2235 /* Determines the unroll factor necessary to remove as many temporary variable
2236 copies as possible. CHAINS is the list of chains that will be
2237 optimized. */
2238
2239 static unsigned
determine_unroll_factor(vec<chain_p> chains)2240 determine_unroll_factor (vec<chain_p> chains)
2241 {
2242 chain_p chain;
2243 unsigned factor = 1, af, nfactor, i;
2244 unsigned max = param_max_unroll_times;
2245
2246 FOR_EACH_VEC_ELT (chains, i, chain)
2247 {
2248 if (chain->type == CT_INVARIANT)
2249 continue;
2250 /* For now we can't handle unrolling when eliminating stores. */
2251 else if (chain->type == CT_STORE_STORE)
2252 return 1;
2253
2254 if (chain->combined)
2255 {
2256 /* For combined chains, we can't handle unrolling if we replace
2257 looparound PHIs. */
2258 dref a;
2259 unsigned j;
2260 for (j = 1; chain->refs.iterate (j, &a); j++)
2261 if (gimple_code (a->stmt) == GIMPLE_PHI)
2262 return 1;
2263 continue;
2264 }
2265
2266 /* The best unroll factor for this chain is equal to the number of
2267 temporary variables that we create for it. */
2268 af = chain->length;
2269 if (chain->has_max_use_after)
2270 af++;
2271
2272 nfactor = factor * af / gcd (factor, af);
2273 if (nfactor <= max)
2274 factor = nfactor;
2275 }
2276
2277 return factor;
2278 }
2279
2280 /* Perform the predictive commoning optimization for CHAINS.
2281 Uids of the newly created temporary variables are marked in TMP_VARS. */
2282
2283 static void
execute_pred_commoning(class loop * loop,vec<chain_p> chains,bitmap tmp_vars)2284 execute_pred_commoning (class loop *loop, vec<chain_p> chains,
2285 bitmap tmp_vars)
2286 {
2287 chain_p chain;
2288 unsigned i;
2289
2290 FOR_EACH_VEC_ELT (chains, i, chain)
2291 {
2292 if (chain->type == CT_INVARIANT)
2293 execute_load_motion (loop, chain, tmp_vars);
2294 else
2295 execute_pred_commoning_chain (loop, chain, tmp_vars);
2296 }
2297
2298 FOR_EACH_VEC_ELT (chains, i, chain)
2299 {
2300 if (chain->type == CT_INVARIANT)
2301 ;
2302 else if (chain->combined)
2303 {
2304 /* For combined chains, just remove the statements that are used to
2305 compute the values of the expression (except for the root one). */
2306 dref a;
2307 unsigned j;
2308 for (j = 1; chain->refs.iterate (j, &a); j++)
2309 remove_stmt (a->stmt);
2310 }
2311 }
2312
2313 update_ssa (TODO_update_ssa_only_virtuals);
2314 }
2315
2316 /* For each reference in CHAINS, if its defining statement is
2317 phi node, record the ssa name that is defined by it. */
2318
2319 static void
replace_phis_by_defined_names(vec<chain_p> chains)2320 replace_phis_by_defined_names (vec<chain_p> chains)
2321 {
2322 chain_p chain;
2323 dref a;
2324 unsigned i, j;
2325
2326 FOR_EACH_VEC_ELT (chains, i, chain)
2327 FOR_EACH_VEC_ELT (chain->refs, j, a)
2328 {
2329 if (gimple_code (a->stmt) == GIMPLE_PHI)
2330 {
2331 a->name_defined_by_phi = PHI_RESULT (a->stmt);
2332 a->stmt = NULL;
2333 }
2334 }
2335 }
2336
2337 /* For each reference in CHAINS, if name_defined_by_phi is not
2338 NULL, use it to set the stmt field. */
2339
2340 static void
replace_names_by_phis(vec<chain_p> chains)2341 replace_names_by_phis (vec<chain_p> chains)
2342 {
2343 chain_p chain;
2344 dref a;
2345 unsigned i, j;
2346
2347 FOR_EACH_VEC_ELT (chains, i, chain)
2348 FOR_EACH_VEC_ELT (chain->refs, j, a)
2349 if (a->stmt == NULL)
2350 {
2351 a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
2352 gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
2353 a->name_defined_by_phi = NULL_TREE;
2354 }
2355 }
2356
2357 /* Wrapper over execute_pred_commoning, to pass it as a callback
2358 to tree_transform_and_unroll_loop. */
2359
2360 struct epcc_data
2361 {
2362 vec<chain_p> chains;
2363 bitmap tmp_vars;
2364 };
2365
2366 static void
execute_pred_commoning_cbck(class loop * loop,void * data)2367 execute_pred_commoning_cbck (class loop *loop, void *data)
2368 {
2369 struct epcc_data *const dta = (struct epcc_data *) data;
2370
2371 /* Restore phi nodes that were replaced by ssa names before
2372 tree_transform_and_unroll_loop (see detailed description in
2373 tree_predictive_commoning_loop). */
2374 replace_names_by_phis (dta->chains);
2375 execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
2376 }
2377
2378 /* Base NAME and all the names in the chain of phi nodes that use it
2379 on variable VAR. The phi nodes are recognized by being in the copies of
2380 the header of the LOOP. */
2381
2382 static void
base_names_in_chain_on(class loop * loop,tree name,tree var)2383 base_names_in_chain_on (class loop *loop, tree name, tree var)
2384 {
2385 gimple *stmt, *phi;
2386 imm_use_iterator iter;
2387
2388 replace_ssa_name_symbol (name, var);
2389
2390 while (1)
2391 {
2392 phi = NULL;
2393 FOR_EACH_IMM_USE_STMT (stmt, iter, name)
2394 {
2395 if (gimple_code (stmt) == GIMPLE_PHI
2396 && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
2397 {
2398 phi = stmt;
2399 BREAK_FROM_IMM_USE_STMT (iter);
2400 }
2401 }
2402 if (!phi)
2403 return;
2404
2405 name = PHI_RESULT (phi);
2406 replace_ssa_name_symbol (name, var);
2407 }
2408 }
2409
2410 /* Given an unrolled LOOP after predictive commoning, remove the
2411 register copies arising from phi nodes by changing the base
2412 variables of SSA names. TMP_VARS is the set of the temporary variables
2413 for those we want to perform this. */
2414
2415 static void
eliminate_temp_copies(class loop * loop,bitmap tmp_vars)2416 eliminate_temp_copies (class loop *loop, bitmap tmp_vars)
2417 {
2418 edge e;
2419 gphi *phi;
2420 gimple *stmt;
2421 tree name, use, var;
2422 gphi_iterator psi;
2423
2424 e = loop_latch_edge (loop);
2425 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
2426 {
2427 phi = psi.phi ();
2428 name = PHI_RESULT (phi);
2429 var = SSA_NAME_VAR (name);
2430 if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
2431 continue;
2432 use = PHI_ARG_DEF_FROM_EDGE (phi, e);
2433 gcc_assert (TREE_CODE (use) == SSA_NAME);
2434
2435 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
2436 stmt = SSA_NAME_DEF_STMT (use);
2437 while (gimple_code (stmt) == GIMPLE_PHI
2438 /* In case we could not unroll the loop enough to eliminate
2439 all copies, we may reach the loop header before the defining
2440 statement (in that case, some register copies will be present
2441 in loop latch in the final code, corresponding to the newly
2442 created looparound phi nodes). */
2443 && gimple_bb (stmt) != loop->header)
2444 {
2445 gcc_assert (single_pred_p (gimple_bb (stmt)));
2446 use = PHI_ARG_DEF (stmt, 0);
2447 stmt = SSA_NAME_DEF_STMT (use);
2448 }
2449
2450 base_names_in_chain_on (loop, use, var);
2451 }
2452 }
2453
2454 /* Returns true if CHAIN is suitable to be combined. */
2455
2456 static bool
chain_can_be_combined_p(chain_p chain)2457 chain_can_be_combined_p (chain_p chain)
2458 {
2459 return (!chain->combined
2460 && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
2461 }
2462
2463 /* Returns the modify statement that uses NAME. Skips over assignment
2464 statements, NAME is replaced with the actual name used in the returned
2465 statement. */
2466
2467 static gimple *
find_use_stmt(tree * name)2468 find_use_stmt (tree *name)
2469 {
2470 gimple *stmt;
2471 tree rhs, lhs;
2472
2473 /* Skip over assignments. */
2474 while (1)
2475 {
2476 stmt = single_nonlooparound_use (*name);
2477 if (!stmt)
2478 return NULL;
2479
2480 if (gimple_code (stmt) != GIMPLE_ASSIGN)
2481 return NULL;
2482
2483 lhs = gimple_assign_lhs (stmt);
2484 if (TREE_CODE (lhs) != SSA_NAME)
2485 return NULL;
2486
2487 if (gimple_assign_copy_p (stmt))
2488 {
2489 rhs = gimple_assign_rhs1 (stmt);
2490 if (rhs != *name)
2491 return NULL;
2492
2493 *name = lhs;
2494 }
2495 else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
2496 == GIMPLE_BINARY_RHS)
2497 return stmt;
2498 else
2499 return NULL;
2500 }
2501 }
2502
2503 /* Returns true if we may perform reassociation for operation CODE in TYPE. */
2504
2505 static bool
may_reassociate_p(tree type,enum tree_code code)2506 may_reassociate_p (tree type, enum tree_code code)
2507 {
2508 if (FLOAT_TYPE_P (type)
2509 && !flag_unsafe_math_optimizations)
2510 return false;
2511
2512 return (commutative_tree_code (code)
2513 && associative_tree_code (code));
2514 }
2515
2516 /* If the operation used in STMT is associative and commutative, go through the
2517 tree of the same operations and returns its root. Distance to the root
2518 is stored in DISTANCE. */
2519
2520 static gimple *
find_associative_operation_root(gimple * stmt,unsigned * distance)2521 find_associative_operation_root (gimple *stmt, unsigned *distance)
2522 {
2523 tree lhs;
2524 gimple *next;
2525 enum tree_code code = gimple_assign_rhs_code (stmt);
2526 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2527 unsigned dist = 0;
2528
2529 if (!may_reassociate_p (type, code))
2530 return NULL;
2531
2532 while (1)
2533 {
2534 lhs = gimple_assign_lhs (stmt);
2535 gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2536
2537 next = find_use_stmt (&lhs);
2538 if (!next
2539 || gimple_assign_rhs_code (next) != code)
2540 break;
2541
2542 stmt = next;
2543 dist++;
2544 }
2545
2546 if (distance)
2547 *distance = dist;
2548 return stmt;
2549 }
2550
2551 /* Returns the common statement in that NAME1 and NAME2 have a use. If there
2552 is no such statement, returns NULL_TREE. In case the operation used on
2553 NAME1 and NAME2 is associative and commutative, returns the root of the
2554 tree formed by this operation instead of the statement that uses NAME1 or
2555 NAME2. */
2556
2557 static gimple *
find_common_use_stmt(tree * name1,tree * name2)2558 find_common_use_stmt (tree *name1, tree *name2)
2559 {
2560 gimple *stmt1, *stmt2;
2561
2562 stmt1 = find_use_stmt (name1);
2563 if (!stmt1)
2564 return NULL;
2565
2566 stmt2 = find_use_stmt (name2);
2567 if (!stmt2)
2568 return NULL;
2569
2570 if (stmt1 == stmt2)
2571 return stmt1;
2572
2573 stmt1 = find_associative_operation_root (stmt1, NULL);
2574 if (!stmt1)
2575 return NULL;
2576 stmt2 = find_associative_operation_root (stmt2, NULL);
2577 if (!stmt2)
2578 return NULL;
2579
2580 return (stmt1 == stmt2 ? stmt1 : NULL);
2581 }
2582
2583 /* Checks whether R1 and R2 are combined together using CODE, with the result
2584 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2585 if it is true. If CODE is ERROR_MARK, set these values instead. */
2586
2587 static bool
combinable_refs_p(dref r1,dref r2,enum tree_code * code,bool * swap,tree * rslt_type)2588 combinable_refs_p (dref r1, dref r2,
2589 enum tree_code *code, bool *swap, tree *rslt_type)
2590 {
2591 enum tree_code acode;
2592 bool aswap;
2593 tree atype;
2594 tree name1, name2;
2595 gimple *stmt;
2596
2597 name1 = name_for_ref (r1);
2598 name2 = name_for_ref (r2);
2599 gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2600
2601 stmt = find_common_use_stmt (&name1, &name2);
2602
2603 if (!stmt
2604 /* A simple post-dominance check - make sure the combination
2605 is executed under the same condition as the references. */
2606 || (gimple_bb (stmt) != gimple_bb (r1->stmt)
2607 && gimple_bb (stmt) != gimple_bb (r2->stmt)))
2608 return false;
2609
2610 acode = gimple_assign_rhs_code (stmt);
2611 aswap = (!commutative_tree_code (acode)
2612 && gimple_assign_rhs1 (stmt) != name1);
2613 atype = TREE_TYPE (gimple_assign_lhs (stmt));
2614
2615 if (*code == ERROR_MARK)
2616 {
2617 *code = acode;
2618 *swap = aswap;
2619 *rslt_type = atype;
2620 return true;
2621 }
2622
2623 return (*code == acode
2624 && *swap == aswap
2625 && *rslt_type == atype);
2626 }
2627
2628 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2629 an assignment of the remaining operand. */
2630
2631 static void
remove_name_from_operation(gimple * stmt,tree op)2632 remove_name_from_operation (gimple *stmt, tree op)
2633 {
2634 tree other_op;
2635 gimple_stmt_iterator si;
2636
2637 gcc_assert (is_gimple_assign (stmt));
2638
2639 if (gimple_assign_rhs1 (stmt) == op)
2640 other_op = gimple_assign_rhs2 (stmt);
2641 else
2642 other_op = gimple_assign_rhs1 (stmt);
2643
2644 si = gsi_for_stmt (stmt);
2645 gimple_assign_set_rhs_from_tree (&si, other_op);
2646
2647 /* We should not have reallocated STMT. */
2648 gcc_assert (gsi_stmt (si) == stmt);
2649
2650 update_stmt (stmt);
2651 }
2652
2653 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2654 are combined in a single statement, and returns this statement. */
2655
2656 static gimple *
reassociate_to_the_same_stmt(tree name1,tree name2)2657 reassociate_to_the_same_stmt (tree name1, tree name2)
2658 {
2659 gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
2660 gassign *new_stmt, *tmp_stmt;
2661 tree new_name, tmp_name, var, r1, r2;
2662 unsigned dist1, dist2;
2663 enum tree_code code;
2664 tree type = TREE_TYPE (name1);
2665 gimple_stmt_iterator bsi;
2666
2667 stmt1 = find_use_stmt (&name1);
2668 stmt2 = find_use_stmt (&name2);
2669 root1 = find_associative_operation_root (stmt1, &dist1);
2670 root2 = find_associative_operation_root (stmt2, &dist2);
2671 code = gimple_assign_rhs_code (stmt1);
2672
2673 gcc_assert (root1 && root2 && root1 == root2
2674 && code == gimple_assign_rhs_code (stmt2));
2675
2676 /* Find the root of the nearest expression in that both NAME1 and NAME2
2677 are used. */
2678 r1 = name1;
2679 s1 = stmt1;
2680 r2 = name2;
2681 s2 = stmt2;
2682
2683 while (dist1 > dist2)
2684 {
2685 s1 = find_use_stmt (&r1);
2686 r1 = gimple_assign_lhs (s1);
2687 dist1--;
2688 }
2689 while (dist2 > dist1)
2690 {
2691 s2 = find_use_stmt (&r2);
2692 r2 = gimple_assign_lhs (s2);
2693 dist2--;
2694 }
2695
2696 while (s1 != s2)
2697 {
2698 s1 = find_use_stmt (&r1);
2699 r1 = gimple_assign_lhs (s1);
2700 s2 = find_use_stmt (&r2);
2701 r2 = gimple_assign_lhs (s2);
2702 }
2703
2704 /* Remove NAME1 and NAME2 from the statements in that they are used
2705 currently. */
2706 remove_name_from_operation (stmt1, name1);
2707 remove_name_from_operation (stmt2, name2);
2708
2709 /* Insert the new statement combining NAME1 and NAME2 before S1, and
2710 combine it with the rhs of S1. */
2711 var = create_tmp_reg (type, "predreastmp");
2712 new_name = make_ssa_name (var);
2713 new_stmt = gimple_build_assign (new_name, code, name1, name2);
2714
2715 var = create_tmp_reg (type, "predreastmp");
2716 tmp_name = make_ssa_name (var);
2717
2718 /* Rhs of S1 may now be either a binary expression with operation
2719 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2720 so that name1 or name2 was removed from it). */
2721 tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (s1),
2722 gimple_assign_rhs1 (s1),
2723 gimple_assign_rhs2 (s1));
2724
2725 bsi = gsi_for_stmt (s1);
2726 gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2727 s1 = gsi_stmt (bsi);
2728 update_stmt (s1);
2729
2730 gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
2731 gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
2732
2733 return new_stmt;
2734 }
2735
2736 /* Returns the statement that combines references R1 and R2. In case R1
2737 and R2 are not used in the same statement, but they are used with an
2738 associative and commutative operation in the same expression, reassociate
2739 the expression so that they are used in the same statement. */
2740
2741 static gimple *
stmt_combining_refs(dref r1,dref r2)2742 stmt_combining_refs (dref r1, dref r2)
2743 {
2744 gimple *stmt1, *stmt2;
2745 tree name1 = name_for_ref (r1);
2746 tree name2 = name_for_ref (r2);
2747
2748 stmt1 = find_use_stmt (&name1);
2749 stmt2 = find_use_stmt (&name2);
2750 if (stmt1 == stmt2)
2751 return stmt1;
2752
2753 return reassociate_to_the_same_stmt (name1, name2);
2754 }
2755
2756 /* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2757 description of the new chain is returned, otherwise we return NULL. */
2758
2759 static chain_p
combine_chains(chain_p ch1,chain_p ch2)2760 combine_chains (chain_p ch1, chain_p ch2)
2761 {
2762 dref r1, r2, nw;
2763 enum tree_code op = ERROR_MARK;
2764 bool swap = false;
2765 chain_p new_chain;
2766 unsigned i;
2767 tree rslt_type = NULL_TREE;
2768
2769 if (ch1 == ch2)
2770 return NULL;
2771 if (ch1->length != ch2->length)
2772 return NULL;
2773
2774 if (ch1->refs.length () != ch2->refs.length ())
2775 return NULL;
2776
2777 for (i = 0; (ch1->refs.iterate (i, &r1)
2778 && ch2->refs.iterate (i, &r2)); i++)
2779 {
2780 if (r1->distance != r2->distance)
2781 return NULL;
2782
2783 if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2784 return NULL;
2785 }
2786
2787 if (swap)
2788 std::swap (ch1, ch2);
2789
2790 new_chain = XCNEW (struct chain);
2791 new_chain->type = CT_COMBINATION;
2792 new_chain->op = op;
2793 new_chain->ch1 = ch1;
2794 new_chain->ch2 = ch2;
2795 new_chain->rslt_type = rslt_type;
2796 new_chain->length = ch1->length;
2797
2798 for (i = 0; (ch1->refs.iterate (i, &r1)
2799 && ch2->refs.iterate (i, &r2)); i++)
2800 {
2801 nw = XCNEW (class dref_d);
2802 nw->stmt = stmt_combining_refs (r1, r2);
2803 nw->distance = r1->distance;
2804
2805 new_chain->refs.safe_push (nw);
2806 }
2807
2808 ch1->combined = true;
2809 ch2->combined = true;
2810 return new_chain;
2811 }
2812
2813 /* Recursively update position information of all offspring chains to ROOT
2814 chain's position information. */
2815
2816 static void
update_pos_for_combined_chains(chain_p root)2817 update_pos_for_combined_chains (chain_p root)
2818 {
2819 chain_p ch1 = root->ch1, ch2 = root->ch2;
2820 dref ref, ref1, ref2;
2821 for (unsigned j = 0; (root->refs.iterate (j, &ref)
2822 && ch1->refs.iterate (j, &ref1)
2823 && ch2->refs.iterate (j, &ref2)); ++j)
2824 ref1->pos = ref2->pos = ref->pos;
2825
2826 if (ch1->type == CT_COMBINATION)
2827 update_pos_for_combined_chains (ch1);
2828 if (ch2->type == CT_COMBINATION)
2829 update_pos_for_combined_chains (ch2);
2830 }
2831
2832 /* Returns true if statement S1 dominates statement S2. */
2833
2834 static bool
pcom_stmt_dominates_stmt_p(gimple * s1,gimple * s2)2835 pcom_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
2836 {
2837 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
2838
2839 if (!bb1 || s1 == s2)
2840 return true;
2841
2842 if (bb1 == bb2)
2843 return gimple_uid (s1) < gimple_uid (s2);
2844
2845 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
2846 }
2847
2848 /* Try to combine the CHAINS in LOOP. */
2849
2850 static void
try_combine_chains(class loop * loop,vec<chain_p> * chains)2851 try_combine_chains (class loop *loop, vec<chain_p> *chains)
2852 {
2853 unsigned i, j;
2854 chain_p ch1, ch2, cch;
2855 auto_vec<chain_p> worklist;
2856 bool combined_p = false;
2857
2858 FOR_EACH_VEC_ELT (*chains, i, ch1)
2859 if (chain_can_be_combined_p (ch1))
2860 worklist.safe_push (ch1);
2861
2862 while (!worklist.is_empty ())
2863 {
2864 ch1 = worklist.pop ();
2865 if (!chain_can_be_combined_p (ch1))
2866 continue;
2867
2868 FOR_EACH_VEC_ELT (*chains, j, ch2)
2869 {
2870 if (!chain_can_be_combined_p (ch2))
2871 continue;
2872
2873 cch = combine_chains (ch1, ch2);
2874 if (cch)
2875 {
2876 worklist.safe_push (cch);
2877 chains->safe_push (cch);
2878 combined_p = true;
2879 break;
2880 }
2881 }
2882 }
2883 if (!combined_p)
2884 return;
2885
2886 /* Setup UID for all statements in dominance order. */
2887 basic_block *bbs = get_loop_body_in_dom_order (loop);
2888 renumber_gimple_stmt_uids_in_blocks (bbs, loop->num_nodes);
2889 free (bbs);
2890
2891 /* Re-association in combined chains may generate statements different to
2892 order of references of the original chain. We need to keep references
2893 of combined chain in dominance order so that all uses will be inserted
2894 after definitions. Note:
2895 A) This is necessary for all combined chains.
2896 B) This is only necessary for ZERO distance references because other
2897 references inherit value from loop carried PHIs.
2898
2899 We first update position information for all combined chains. */
2900 dref ref;
2901 for (i = 0; chains->iterate (i, &ch1); ++i)
2902 {
2903 if (ch1->type != CT_COMBINATION || ch1->combined)
2904 continue;
2905
2906 for (j = 0; ch1->refs.iterate (j, &ref); ++j)
2907 ref->pos = gimple_uid (ref->stmt);
2908
2909 update_pos_for_combined_chains (ch1);
2910 }
2911 /* Then sort references according to newly updated position information. */
2912 for (i = 0; chains->iterate (i, &ch1); ++i)
2913 {
2914 if (ch1->type != CT_COMBINATION && !ch1->combined)
2915 continue;
2916
2917 /* Find the first reference with non-ZERO distance. */
2918 if (ch1->length == 0)
2919 j = ch1->refs.length();
2920 else
2921 {
2922 for (j = 0; ch1->refs.iterate (j, &ref); ++j)
2923 if (ref->distance != 0)
2924 break;
2925 }
2926
2927 /* Sort all ZERO distance references by position. */
2928 qsort (&ch1->refs[0], j, sizeof (ch1->refs[0]), order_drefs_by_pos);
2929
2930 if (ch1->combined)
2931 continue;
2932
2933 /* For ZERO length chain, has_max_use_after must be true since root
2934 combined stmt must dominates others. */
2935 if (ch1->length == 0)
2936 {
2937 ch1->has_max_use_after = true;
2938 continue;
2939 }
2940 /* Check if there is use at max distance after root for combined chains
2941 and set flag accordingly. */
2942 ch1->has_max_use_after = false;
2943 gimple *root_stmt = get_chain_root (ch1)->stmt;
2944 for (j = 1; ch1->refs.iterate (j, &ref); ++j)
2945 {
2946 if (ref->distance == ch1->length
2947 && !pcom_stmt_dominates_stmt_p (ref->stmt, root_stmt))
2948 {
2949 ch1->has_max_use_after = true;
2950 break;
2951 }
2952 }
2953 }
2954 }
2955
2956 /* Prepare initializers for store elimination CHAIN in LOOP. Returns false
2957 if this is impossible because one of these initializers may trap, true
2958 otherwise. */
2959
2960 static bool
prepare_initializers_chain_store_elim(class loop * loop,chain_p chain)2961 prepare_initializers_chain_store_elim (class loop *loop, chain_p chain)
2962 {
2963 unsigned i, n = chain->length;
2964
2965 /* For now we can't eliminate stores if some of them are conditional
2966 executed. */
2967 if (!chain->all_always_accessed)
2968 return false;
2969
2970 /* Nothing to intialize for intra-iteration store elimination. */
2971 if (n == 0 && chain->type == CT_STORE_STORE)
2972 return true;
2973
2974 /* For store elimination chain, there is nothing to initialize if stores
2975 to be eliminated only store loop invariant values into memory. */
2976 if (chain->type == CT_STORE_STORE
2977 && is_inv_store_elimination_chain (loop, chain))
2978 {
2979 chain->inv_store_elimination = true;
2980 return true;
2981 }
2982
2983 chain->inits.create (n);
2984 chain->inits.safe_grow_cleared (n);
2985
2986 /* For store eliminatin chain like below:
2987
2988 for (i = 0; i < len; i++)
2989 {
2990 a[i] = 1;
2991 // a[i + 1] = ...
2992 a[i + 2] = 3;
2993 }
2994
2995 store to a[i + 1] is missed in loop body, it acts like bubbles. The
2996 content of a[i + 1] remain the same if the loop iterates fewer times
2997 than chain->length. We need to set up root variables for such stores
2998 by loading from memory before loop. Note we only need to load bubble
2999 elements because loop body is guaranteed to be executed at least once
3000 after loop's preheader edge. */
3001 auto_vec<bool> bubbles;
3002 bubbles.safe_grow_cleared (n + 1);
3003 for (i = 0; i < chain->refs.length (); i++)
3004 bubbles[chain->refs[i]->distance] = true;
3005
3006 struct data_reference *dr = get_chain_root (chain)->ref;
3007 for (i = 0; i < n; i++)
3008 {
3009 if (bubbles[i])
3010 continue;
3011
3012 gimple_seq stmts = NULL;
3013
3014 tree init = ref_at_iteration (dr, (int) 0 - i, &stmts);
3015 if (stmts)
3016 gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
3017
3018 chain->inits[i] = init;
3019 }
3020
3021 return true;
3022 }
3023
3024 /* Prepare initializers for CHAIN in LOOP. Returns false if this is
3025 impossible because one of these initializers may trap, true otherwise. */
3026
3027 static bool
prepare_initializers_chain(class loop * loop,chain_p chain)3028 prepare_initializers_chain (class loop *loop, chain_p chain)
3029 {
3030 unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
3031 struct data_reference *dr = get_chain_root (chain)->ref;
3032 tree init;
3033 dref laref;
3034 edge entry = loop_preheader_edge (loop);
3035
3036 if (chain->type == CT_STORE_STORE)
3037 return prepare_initializers_chain_store_elim (loop, chain);
3038
3039 /* Find the initializers for the variables, and check that they cannot
3040 trap. */
3041 chain->inits.create (n);
3042 for (i = 0; i < n; i++)
3043 chain->inits.quick_push (NULL_TREE);
3044
3045 /* If we have replaced some looparound phi nodes, use their initializers
3046 instead of creating our own. */
3047 FOR_EACH_VEC_ELT (chain->refs, i, laref)
3048 {
3049 if (gimple_code (laref->stmt) != GIMPLE_PHI)
3050 continue;
3051
3052 gcc_assert (laref->distance > 0);
3053 chain->inits[n - laref->distance]
3054 = PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
3055 }
3056
3057 for (i = 0; i < n; i++)
3058 {
3059 gimple_seq stmts = NULL;
3060
3061 if (chain->inits[i] != NULL_TREE)
3062 continue;
3063
3064 init = ref_at_iteration (dr, (int) i - n, &stmts);
3065 if (!chain->all_always_accessed && tree_could_trap_p (init))
3066 {
3067 gimple_seq_discard (stmts);
3068 return false;
3069 }
3070
3071 if (stmts)
3072 gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
3073
3074 chain->inits[i] = init;
3075 }
3076
3077 return true;
3078 }
3079
3080 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
3081 be used because the initializers might trap. */
3082
3083 static void
prepare_initializers(class loop * loop,vec<chain_p> chains)3084 prepare_initializers (class loop *loop, vec<chain_p> chains)
3085 {
3086 chain_p chain;
3087 unsigned i;
3088
3089 for (i = 0; i < chains.length (); )
3090 {
3091 chain = chains[i];
3092 if (prepare_initializers_chain (loop, chain))
3093 i++;
3094 else
3095 {
3096 release_chain (chain);
3097 chains.unordered_remove (i);
3098 }
3099 }
3100 }
3101
3102 /* Generates finalizer memory references for CHAIN in LOOP. Returns true
3103 if finalizer code for CHAIN can be generated, otherwise false. */
3104
3105 static bool
prepare_finalizers_chain(class loop * loop,chain_p chain)3106 prepare_finalizers_chain (class loop *loop, chain_p chain)
3107 {
3108 unsigned i, n = chain->length;
3109 struct data_reference *dr = get_chain_root (chain)->ref;
3110 tree fini, niters = number_of_latch_executions (loop);
3111
3112 /* For now we can't eliminate stores if some of them are conditional
3113 executed. */
3114 if (!chain->all_always_accessed)
3115 return false;
3116
3117 chain->finis.create (n);
3118 for (i = 0; i < n; i++)
3119 chain->finis.quick_push (NULL_TREE);
3120
3121 /* We never use looparound phi node for store elimination chains. */
3122
3123 /* Find the finalizers for the variables, and check that they cannot
3124 trap. */
3125 for (i = 0; i < n; i++)
3126 {
3127 gimple_seq stmts = NULL;
3128 gcc_assert (chain->finis[i] == NULL_TREE);
3129
3130 if (TREE_CODE (niters) != INTEGER_CST && TREE_CODE (niters) != SSA_NAME)
3131 {
3132 niters = unshare_expr (niters);
3133 niters = force_gimple_operand (niters, &stmts, true, NULL);
3134 if (stmts)
3135 {
3136 gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
3137 stmts = NULL;
3138 }
3139 }
3140 fini = ref_at_iteration (dr, (int) 0 - i, &stmts, niters);
3141 if (stmts)
3142 gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
3143
3144 chain->finis[i] = fini;
3145 }
3146
3147 return true;
3148 }
3149
3150 /* Generates finalizer memory reference for CHAINS in LOOP. Returns true
3151 if finalizer code generation for CHAINS breaks loop closed ssa form. */
3152
3153 static bool
prepare_finalizers(class loop * loop,vec<chain_p> chains)3154 prepare_finalizers (class loop *loop, vec<chain_p> chains)
3155 {
3156 chain_p chain;
3157 unsigned i;
3158 bool loop_closed_ssa = false;
3159
3160 for (i = 0; i < chains.length ();)
3161 {
3162 chain = chains[i];
3163
3164 /* Finalizer is only necessary for inter-iteration store elimination
3165 chains. */
3166 if (chain->length == 0 || chain->type != CT_STORE_STORE)
3167 {
3168 i++;
3169 continue;
3170 }
3171
3172 if (prepare_finalizers_chain (loop, chain))
3173 {
3174 i++;
3175 /* Be conservative, assume loop closed ssa form is corrupted
3176 by store-store chain. Though it's not always the case if
3177 eliminated stores only store loop invariant values into
3178 memory. */
3179 loop_closed_ssa = true;
3180 }
3181 else
3182 {
3183 release_chain (chain);
3184 chains.unordered_remove (i);
3185 }
3186 }
3187 return loop_closed_ssa;
3188 }
3189
3190 /* Insert all initializing gimple stmts into loop's entry edge. */
3191
3192 static void
insert_init_seqs(class loop * loop,vec<chain_p> chains)3193 insert_init_seqs (class loop *loop, vec<chain_p> chains)
3194 {
3195 unsigned i;
3196 edge entry = loop_preheader_edge (loop);
3197
3198 for (i = 0; i < chains.length (); ++i)
3199 if (chains[i]->init_seq)
3200 {
3201 gsi_insert_seq_on_edge_immediate (entry, chains[i]->init_seq);
3202 chains[i]->init_seq = NULL;
3203 }
3204 }
3205
3206 /* Performs predictive commoning for LOOP. Sets bit 1<<0 of return value
3207 if LOOP was unrolled; Sets bit 1<<1 of return value if loop closed ssa
3208 form was corrupted. */
3209
3210 static unsigned
tree_predictive_commoning_loop(class loop * loop)3211 tree_predictive_commoning_loop (class loop *loop)
3212 {
3213 vec<data_reference_p> datarefs;
3214 vec<ddr_p> dependences;
3215 struct component *components;
3216 vec<chain_p> chains = vNULL;
3217 unsigned unroll_factor;
3218 class tree_niter_desc desc;
3219 bool unroll = false, loop_closed_ssa = false;
3220 edge exit;
3221
3222 if (dump_file && (dump_flags & TDF_DETAILS))
3223 fprintf (dump_file, "Processing loop %d\n", loop->num);
3224
3225 /* Nothing for predicitive commoning if loop only iterates 1 time. */
3226 if (get_max_loop_iterations_int (loop) == 0)
3227 {
3228 if (dump_file && (dump_flags & TDF_DETAILS))
3229 fprintf (dump_file, "Loop iterates only 1 time, nothing to do.\n");
3230
3231 return 0;
3232 }
3233
3234 /* Find the data references and split them into components according to their
3235 dependence relations. */
3236 auto_vec<loop_p, 3> loop_nest;
3237 dependences.create (10);
3238 datarefs.create (10);
3239 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
3240 &dependences))
3241 {
3242 if (dump_file && (dump_flags & TDF_DETAILS))
3243 fprintf (dump_file, "Cannot analyze data dependencies\n");
3244 free_data_refs (datarefs);
3245 free_dependence_relations (dependences);
3246 return 0;
3247 }
3248
3249 if (dump_file && (dump_flags & TDF_DETAILS))
3250 dump_data_dependence_relations (dump_file, dependences);
3251
3252 components = split_data_refs_to_components (loop, datarefs, dependences);
3253 loop_nest.release ();
3254 free_dependence_relations (dependences);
3255 if (!components)
3256 {
3257 free_data_refs (datarefs);
3258 free_affine_expand_cache (&name_expansions);
3259 return 0;
3260 }
3261
3262 if (dump_file && (dump_flags & TDF_DETAILS))
3263 {
3264 fprintf (dump_file, "Initial state:\n\n");
3265 dump_components (dump_file, components);
3266 }
3267
3268 /* Find the suitable components and split them into chains. */
3269 components = filter_suitable_components (loop, components);
3270
3271 auto_bitmap tmp_vars;
3272 looparound_phis = BITMAP_ALLOC (NULL);
3273 determine_roots (loop, components, &chains);
3274 release_components (components);
3275
3276 if (!chains.exists ())
3277 {
3278 if (dump_file && (dump_flags & TDF_DETAILS))
3279 fprintf (dump_file,
3280 "Predictive commoning failed: no suitable chains\n");
3281 goto end;
3282 }
3283 prepare_initializers (loop, chains);
3284 loop_closed_ssa = prepare_finalizers (loop, chains);
3285
3286 /* Try to combine the chains that are always worked with together. */
3287 try_combine_chains (loop, &chains);
3288
3289 insert_init_seqs (loop, chains);
3290
3291 if (dump_file && (dump_flags & TDF_DETAILS))
3292 {
3293 fprintf (dump_file, "Before commoning:\n\n");
3294 dump_chains (dump_file, chains);
3295 }
3296
3297 /* Determine the unroll factor, and if the loop should be unrolled, ensure
3298 that its number of iterations is divisible by the factor. */
3299 unroll_factor = determine_unroll_factor (chains);
3300 scev_reset ();
3301 unroll = (unroll_factor > 1
3302 && can_unroll_loop_p (loop, unroll_factor, &desc));
3303 exit = single_dom_exit (loop);
3304
3305 /* Execute the predictive commoning transformations, and possibly unroll the
3306 loop. */
3307 if (unroll)
3308 {
3309 struct epcc_data dta;
3310
3311 if (dump_file && (dump_flags & TDF_DETAILS))
3312 fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
3313
3314 dta.chains = chains;
3315 dta.tmp_vars = tmp_vars;
3316
3317 update_ssa (TODO_update_ssa_only_virtuals);
3318
3319 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
3320 execute_pred_commoning_cbck is called may cause phi nodes to be
3321 reallocated, which is a problem since CHAINS may point to these
3322 statements. To fix this, we store the ssa names defined by the
3323 phi nodes here instead of the phi nodes themselves, and restore
3324 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
3325 replace_phis_by_defined_names (chains);
3326
3327 tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
3328 execute_pred_commoning_cbck, &dta);
3329 eliminate_temp_copies (loop, tmp_vars);
3330 }
3331 else
3332 {
3333 if (dump_file && (dump_flags & TDF_DETAILS))
3334 fprintf (dump_file,
3335 "Executing predictive commoning without unrolling.\n");
3336 execute_pred_commoning (loop, chains, tmp_vars);
3337 }
3338
3339 end: ;
3340 release_chains (chains);
3341 free_data_refs (datarefs);
3342 BITMAP_FREE (looparound_phis);
3343
3344 free_affine_expand_cache (&name_expansions);
3345
3346 return (unroll ? 1 : 0) | (loop_closed_ssa ? 2 : 0);
3347 }
3348
3349 /* Runs predictive commoning. */
3350
3351 unsigned
tree_predictive_commoning(void)3352 tree_predictive_commoning (void)
3353 {
3354 class loop *loop;
3355 unsigned ret = 0, changed = 0;
3356
3357 initialize_original_copy_tables ();
3358 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
3359 if (optimize_loop_for_speed_p (loop))
3360 {
3361 changed |= tree_predictive_commoning_loop (loop);
3362 }
3363 free_original_copy_tables ();
3364
3365 if (changed > 0)
3366 {
3367 scev_reset ();
3368
3369 if (changed > 1)
3370 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3371
3372 ret = TODO_cleanup_cfg;
3373 }
3374
3375 return ret;
3376 }
3377
3378 /* Predictive commoning Pass. */
3379
3380 static unsigned
run_tree_predictive_commoning(struct function * fun)3381 run_tree_predictive_commoning (struct function *fun)
3382 {
3383 if (number_of_loops (fun) <= 1)
3384 return 0;
3385
3386 return tree_predictive_commoning ();
3387 }
3388
3389 namespace {
3390
3391 const pass_data pass_data_predcom =
3392 {
3393 GIMPLE_PASS, /* type */
3394 "pcom", /* name */
3395 OPTGROUP_LOOP, /* optinfo_flags */
3396 TV_PREDCOM, /* tv_id */
3397 PROP_cfg, /* properties_required */
3398 0, /* properties_provided */
3399 0, /* properties_destroyed */
3400 0, /* todo_flags_start */
3401 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
3402 };
3403
3404 class pass_predcom : public gimple_opt_pass
3405 {
3406 public:
pass_predcom(gcc::context * ctxt)3407 pass_predcom (gcc::context *ctxt)
3408 : gimple_opt_pass (pass_data_predcom, ctxt)
3409 {}
3410
3411 /* opt_pass methods: */
gate(function *)3412 virtual bool gate (function *) { return flag_predictive_commoning != 0; }
execute(function * fun)3413 virtual unsigned int execute (function *fun)
3414 {
3415 return run_tree_predictive_commoning (fun);
3416 }
3417
3418 }; // class pass_predcom
3419
3420 } // anon namespace
3421
3422 gimple_opt_pass *
make_pass_predcom(gcc::context * ctxt)3423 make_pass_predcom (gcc::context *ctxt)
3424 {
3425 return new pass_predcom (ctxt);
3426 }
3427
3428
3429