xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-predcom.c (revision 8feb0f0b7eaff0608f8350bbfa3098827b4bb91b)
1 /* Predictive commoning.
2    Copyright (C) 2005-2020 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This file implements the predictive commoning optimization.  Predictive
21    commoning can be viewed as CSE around a loop, and with some improvements,
22    as generalized strength reduction-- i.e., reusing values computed in
23    earlier iterations of a loop in the later ones.  So far, the pass only
24    handles the most useful case, that is, reusing values of memory references.
25    If you think this is all just a special case of PRE, you are sort of right;
26    however, concentrating on loops is simpler, and makes it possible to
27    incorporate data dependence analysis to detect the opportunities, perform
28    loop unrolling to avoid copies together with renaming immediately,
29    and if needed, we could also take register pressure into account.
30 
31    Let us demonstrate what is done on an example:
32 
33    for (i = 0; i < 100; i++)
34      {
35        a[i+2] = a[i] + a[i+1];
36        b[10] = b[10] + i;
37        c[i] = c[99 - i];
38        d[i] = d[i + 1];
39      }
40 
41    1) We find data references in the loop, and split them to mutually
42       independent groups (i.e., we find components of a data dependence
43       graph).  We ignore read-read dependences whose distance is not constant.
44       (TODO -- we could also ignore antidependences).  In this example, we
45       find the following groups:
46 
47       a[i]{read}, a[i+1]{read}, a[i+2]{write}
48       b[10]{read}, b[10]{write}
49       c[99 - i]{read}, c[i]{write}
50       d[i + 1]{read}, d[i]{write}
51 
52    2) Inside each of the group, we verify several conditions:
53       a) all the references must differ in indices only, and the indices
54 	 must all have the same step
55       b) the references must dominate loop latch (and thus, they must be
56 	 ordered by dominance relation).
57       c) the distance of the indices must be a small multiple of the step
58       We are then able to compute the difference of the references (# of
59       iterations before they point to the same place as the first of them).
60       Also, in case there are writes in the loop, we split the groups into
61       chains whose head is the write whose values are used by the reads in
62       the same chain.  The chains are then processed independently,
63       making the further transformations simpler.  Also, the shorter chains
64       need the same number of registers, but may require lower unrolling
65       factor in order to get rid of the copies on the loop latch.
66 
67       In our example, we get the following chains (the chain for c is invalid).
68 
69       a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70       b[10]{read,+0}, b[10]{write,+0}
71       d[i + 1]{read,+0}, d[i]{write,+1}
72 
73    3) For each read, we determine the read or write whose value it reuses,
74       together with the distance of this reuse.  I.e. we take the last
75       reference before it with distance 0, or the last of the references
76       with the smallest positive distance to the read.  Then, we remove
77       the references that are not used in any of these chains, discard the
78       empty groups, and propagate all the links so that they point to the
79       single root reference of the chain (adjusting their distance
80       appropriately).  Some extra care needs to be taken for references with
81       step 0.  In our example (the numbers indicate the distance of the
82       reuse),
83 
84       a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85       b[10] --> (*) 1, b[10] (*)
86 
87    4) The chains are combined together if possible.  If the corresponding
88       elements of two chains are always combined together with the same
89       operator, we remember just the result of this combination, instead
90       of remembering the values separately.  We may need to perform
91       reassociation to enable combining, for example
92 
93       e[i] + f[i+1] + e[i+1] + f[i]
94 
95       can be reassociated as
96 
97       (e[i] + f[i]) + (e[i+1] + f[i+1])
98 
99       and we can combine the chains for e and f into one chain.
100 
101    5) For each root reference (end of the chain) R, let N be maximum distance
102       of a reference reusing its value.  Variables R0 up to RN are created,
103       together with phi nodes that transfer values from R1 .. RN to
104       R0 .. R(N-1).
105       Initial values are loaded to R0..R(N-1) (in case not all references
106       must necessarily be accessed and they may trap, we may fail here;
107       TODO sometimes, the loads could be guarded by a check for the number
108       of iterations).  Values loaded/stored in roots are also copied to
109       RN.  Other reads are replaced with the appropriate variable Ri.
110       Everything is put to SSA form.
111 
112       As a small improvement, if R0 is dead after the root (i.e., all uses of
113       the value with the maximum distance dominate the root), we can avoid
114       creating RN and use R0 instead of it.
115 
116       In our example, we get (only the parts concerning a and b are shown):
117       for (i = 0; i < 100; i++)
118 	{
119 	  f = phi (a[0], s);
120 	  s = phi (a[1], f);
121 	  x = phi (b[10], x);
122 
123 	  f = f + s;
124 	  a[i+2] = f;
125 	  x = x + i;
126 	  b[10] = x;
127 	}
128 
129    6) Factor F for unrolling is determined as the smallest common multiple of
130       (N + 1) for each root reference (N for references for that we avoided
131       creating RN).  If F and the loop is small enough, loop is unrolled F
132       times.  The stores to RN (R0) in the copies of the loop body are
133       periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134       be coalesced and the copies can be eliminated.
135 
136       TODO -- copy propagation and other optimizations may change the live
137       ranges of the temporary registers and prevent them from being coalesced;
138       this may increase the register pressure.
139 
140       In our case, F = 2 and the (main loop of the) result is
141 
142       for (i = 0; i < ...; i += 2)
143         {
144           f = phi (a[0], f);
145           s = phi (a[1], s);
146           x = phi (b[10], x);
147 
148           f = f + s;
149           a[i+2] = f;
150           x = x + i;
151           b[10] = x;
152 
153           s = s + f;
154           a[i+3] = s;
155           x = x + i;
156           b[10] = x;
157        }
158 
159    Apart from predictive commoning on Load-Load and Store-Load chains, we
160    also support Store-Store chains -- stores killed by other store can be
161    eliminated.  Given below example:
162 
163      for (i = 0; i < n; i++)
164        {
165 	 a[i] = 1;
166 	 a[i+2] = 2;
167        }
168 
169    It can be replaced with:
170 
171      t0 = a[0];
172      t1 = a[1];
173      for (i = 0; i < n; i++)
174        {
175 	 a[i] = 1;
176 	 t2 = 2;
177 	 t0 = t1;
178 	 t1 = t2;
179        }
180      a[n] = t0;
181      a[n+1] = t1;
182 
183    If the loop runs more than 1 iterations, it can be further simplified into:
184 
185      for (i = 0; i < n; i++)
186        {
187 	 a[i] = 1;
188        }
189      a[n] = 2;
190      a[n+1] = 2;
191 
192    The interesting part is this can be viewed either as general store motion
193    or general dead store elimination in either intra/inter-iterations way.
194 
195    With trivial effort, we also support load inside Store-Store chains if the
196    load is dominated by a store statement in the same iteration of loop.  You
197    can see this as a restricted Store-Mixed-Load-Store chain.
198 
199    TODO: For now, we don't support store-store chains in multi-exit loops.  We
200    force to not unroll in case of store-store chain even if other chains might
201    ask for unroll.
202 
203    Predictive commoning can be generalized for arbitrary computations (not
204    just memory loads), and also nontrivial transfer functions (e.g., replacing
205    i * i with ii_last + 2 * i + 1), to generalize strength reduction.  */
206 
207 #include "config.h"
208 #include "system.h"
209 #include "coretypes.h"
210 #include "backend.h"
211 #include "rtl.h"
212 #include "tree.h"
213 #include "gimple.h"
214 #include "predict.h"
215 #include "tree-pass.h"
216 #include "ssa.h"
217 #include "gimple-pretty-print.h"
218 #include "alias.h"
219 #include "fold-const.h"
220 #include "cfgloop.h"
221 #include "tree-eh.h"
222 #include "gimplify.h"
223 #include "gimple-iterator.h"
224 #include "gimplify-me.h"
225 #include "tree-ssa-loop-ivopts.h"
226 #include "tree-ssa-loop-manip.h"
227 #include "tree-ssa-loop-niter.h"
228 #include "tree-ssa-loop.h"
229 #include "tree-into-ssa.h"
230 #include "tree-dfa.h"
231 #include "tree-ssa.h"
232 #include "tree-data-ref.h"
233 #include "tree-scalar-evolution.h"
234 #include "tree-affine.h"
235 #include "builtins.h"
236 
237 /* The maximum number of iterations between the considered memory
238    references.  */
239 
240 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
241 
242 /* Data references (or phi nodes that carry data reference values across
243    loop iterations).  */
244 
245 typedef class dref_d
246 {
247 public:
248   /* The reference itself.  */
249   struct data_reference *ref;
250 
251   /* The statement in that the reference appears.  */
252   gimple *stmt;
253 
254   /* In case that STMT is a phi node, this field is set to the SSA name
255      defined by it in replace_phis_by_defined_names (in order to avoid
256      pointing to phi node that got reallocated in the meantime).  */
257   tree name_defined_by_phi;
258 
259   /* Distance of the reference from the root of the chain (in number of
260      iterations of the loop).  */
261   unsigned distance;
262 
263   /* Number of iterations offset from the first reference in the component.  */
264   widest_int offset;
265 
266   /* Number of the reference in a component, in dominance ordering.  */
267   unsigned pos;
268 
269   /* True if the memory reference is always accessed when the loop is
270      entered.  */
271   unsigned always_accessed : 1;
272 } *dref;
273 
274 
275 /* Type of the chain of the references.  */
276 
277 enum chain_type
278 {
279   /* The addresses of the references in the chain are constant.  */
280   CT_INVARIANT,
281 
282   /* There are only loads in the chain.  */
283   CT_LOAD,
284 
285   /* Root of the chain is store, the rest are loads.  */
286   CT_STORE_LOAD,
287 
288   /* There are only stores in the chain.  */
289   CT_STORE_STORE,
290 
291   /* A combination of two chains.  */
292   CT_COMBINATION
293 };
294 
295 /* Chains of data references.  */
296 
297 typedef struct chain
298 {
299   /* Type of the chain.  */
300   enum chain_type type;
301 
302   /* For combination chains, the operator and the two chains that are
303      combined, and the type of the result.  */
304   enum tree_code op;
305   tree rslt_type;
306   struct chain *ch1, *ch2;
307 
308   /* The references in the chain.  */
309   vec<dref> refs;
310 
311   /* The maximum distance of the reference in the chain from the root.  */
312   unsigned length;
313 
314   /* The variables used to copy the value throughout iterations.  */
315   vec<tree> vars;
316 
317   /* Initializers for the variables.  */
318   vec<tree> inits;
319 
320   /* Finalizers for the eliminated stores.  */
321   vec<tree> finis;
322 
323   /* gimple stmts intializing the initial variables of the chain.  */
324   gimple_seq init_seq;
325 
326   /* gimple stmts finalizing the eliminated stores of the chain.  */
327   gimple_seq fini_seq;
328 
329   /* True if there is a use of a variable with the maximal distance
330      that comes after the root in the loop.  */
331   unsigned has_max_use_after : 1;
332 
333   /* True if all the memory references in the chain are always accessed.  */
334   unsigned all_always_accessed : 1;
335 
336   /* True if this chain was combined together with some other chain.  */
337   unsigned combined : 1;
338 
339   /* True if this is store elimination chain and eliminated stores store
340      loop invariant value into memory.  */
341   unsigned inv_store_elimination : 1;
342 } *chain_p;
343 
344 
345 /* Describes the knowledge about the step of the memory references in
346    the component.  */
347 
348 enum ref_step_type
349 {
350   /* The step is zero.  */
351   RS_INVARIANT,
352 
353   /* The step is nonzero.  */
354   RS_NONZERO,
355 
356   /* The step may or may not be nonzero.  */
357   RS_ANY
358 };
359 
360 /* Components of the data dependence graph.  */
361 
362 struct component
363 {
364   /* The references in the component.  */
365   vec<dref> refs;
366 
367   /* What we know about the step of the references in the component.  */
368   enum ref_step_type comp_step;
369 
370   /* True if all references in component are stores and we try to do
371      intra/inter loop iteration dead store elimination.  */
372   bool eliminate_store_p;
373 
374   /* Next component in the list.  */
375   struct component *next;
376 };
377 
378 /* Bitmap of ssa names defined by looparound phi nodes covered by chains.  */
379 
380 static bitmap looparound_phis;
381 
382 /* Cache used by tree_to_aff_combination_expand.  */
383 
384 static hash_map<tree, name_expansion *> *name_expansions;
385 
386 /* Dumps data reference REF to FILE.  */
387 
388 extern void dump_dref (FILE *, dref);
389 void
dump_dref(FILE * file,dref ref)390 dump_dref (FILE *file, dref ref)
391 {
392   if (ref->ref)
393     {
394       fprintf (file, "    ");
395       print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
396       fprintf (file, " (id %u%s)\n", ref->pos,
397 	       DR_IS_READ (ref->ref) ? "" : ", write");
398 
399       fprintf (file, "      offset ");
400       print_decs (ref->offset, file);
401       fprintf (file, "\n");
402 
403       fprintf (file, "      distance %u\n", ref->distance);
404     }
405   else
406     {
407       if (gimple_code (ref->stmt) == GIMPLE_PHI)
408 	fprintf (file, "    looparound ref\n");
409       else
410 	fprintf (file, "    combination ref\n");
411       fprintf (file, "      in statement ");
412       print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
413       fprintf (file, "\n");
414       fprintf (file, "      distance %u\n", ref->distance);
415     }
416 
417 }
418 
419 /* Dumps CHAIN to FILE.  */
420 
421 extern void dump_chain (FILE *, chain_p);
422 void
dump_chain(FILE * file,chain_p chain)423 dump_chain (FILE *file, chain_p chain)
424 {
425   dref a;
426   const char *chain_type;
427   unsigned i;
428   tree var;
429 
430   switch (chain->type)
431     {
432     case CT_INVARIANT:
433       chain_type = "Load motion";
434       break;
435 
436     case CT_LOAD:
437       chain_type = "Loads-only";
438       break;
439 
440     case CT_STORE_LOAD:
441       chain_type = "Store-loads";
442       break;
443 
444     case CT_STORE_STORE:
445       chain_type = "Store-stores";
446       break;
447 
448     case CT_COMBINATION:
449       chain_type = "Combination";
450       break;
451 
452     default:
453       gcc_unreachable ();
454     }
455 
456   fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
457 	   chain->combined ? " (combined)" : "");
458   if (chain->type != CT_INVARIANT)
459     fprintf (file, "  max distance %u%s\n", chain->length,
460 	     chain->has_max_use_after ? "" : ", may reuse first");
461 
462   if (chain->type == CT_COMBINATION)
463     {
464       fprintf (file, "  equal to %p %s %p in type ",
465 	       (void *) chain->ch1, op_symbol_code (chain->op),
466 	       (void *) chain->ch2);
467       print_generic_expr (file, chain->rslt_type, TDF_SLIM);
468       fprintf (file, "\n");
469     }
470 
471   if (chain->vars.exists ())
472     {
473       fprintf (file, "  vars");
474       FOR_EACH_VEC_ELT (chain->vars, i, var)
475 	{
476 	  fprintf (file, " ");
477 	  print_generic_expr (file, var, TDF_SLIM);
478 	}
479       fprintf (file, "\n");
480     }
481 
482   if (chain->inits.exists ())
483     {
484       fprintf (file, "  inits");
485       FOR_EACH_VEC_ELT (chain->inits, i, var)
486 	{
487 	  fprintf (file, " ");
488 	  print_generic_expr (file, var, TDF_SLIM);
489 	}
490       fprintf (file, "\n");
491     }
492 
493   fprintf (file, "  references:\n");
494   FOR_EACH_VEC_ELT (chain->refs, i, a)
495     dump_dref (file, a);
496 
497   fprintf (file, "\n");
498 }
499 
500 /* Dumps CHAINS to FILE.  */
501 
502 extern void dump_chains (FILE *, vec<chain_p> );
503 void
dump_chains(FILE * file,vec<chain_p> chains)504 dump_chains (FILE *file, vec<chain_p> chains)
505 {
506   chain_p chain;
507   unsigned i;
508 
509   FOR_EACH_VEC_ELT (chains, i, chain)
510     dump_chain (file, chain);
511 }
512 
513 /* Dumps COMP to FILE.  */
514 
515 extern void dump_component (FILE *, struct component *);
516 void
dump_component(FILE * file,struct component * comp)517 dump_component (FILE *file, struct component *comp)
518 {
519   dref a;
520   unsigned i;
521 
522   fprintf (file, "Component%s:\n",
523 	   comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
524   FOR_EACH_VEC_ELT (comp->refs, i, a)
525     dump_dref (file, a);
526   fprintf (file, "\n");
527 }
528 
529 /* Dumps COMPS to FILE.  */
530 
531 extern void dump_components (FILE *, struct component *);
532 void
dump_components(FILE * file,struct component * comps)533 dump_components (FILE *file, struct component *comps)
534 {
535   struct component *comp;
536 
537   for (comp = comps; comp; comp = comp->next)
538     dump_component (file, comp);
539 }
540 
541 /* Frees a chain CHAIN.  */
542 
543 static void
release_chain(chain_p chain)544 release_chain (chain_p chain)
545 {
546   dref ref;
547   unsigned i;
548 
549   if (chain == NULL)
550     return;
551 
552   FOR_EACH_VEC_ELT (chain->refs, i, ref)
553     free (ref);
554 
555   chain->refs.release ();
556   chain->vars.release ();
557   chain->inits.release ();
558   if (chain->init_seq)
559     gimple_seq_discard (chain->init_seq);
560 
561   chain->finis.release ();
562   if (chain->fini_seq)
563     gimple_seq_discard (chain->fini_seq);
564 
565   free (chain);
566 }
567 
568 /* Frees CHAINS.  */
569 
570 static void
release_chains(vec<chain_p> chains)571 release_chains (vec<chain_p> chains)
572 {
573   unsigned i;
574   chain_p chain;
575 
576   FOR_EACH_VEC_ELT (chains, i, chain)
577     release_chain (chain);
578   chains.release ();
579 }
580 
581 /* Frees a component COMP.  */
582 
583 static void
release_component(struct component * comp)584 release_component (struct component *comp)
585 {
586   comp->refs.release ();
587   free (comp);
588 }
589 
590 /* Frees list of components COMPS.  */
591 
592 static void
release_components(struct component * comps)593 release_components (struct component *comps)
594 {
595   struct component *act, *next;
596 
597   for (act = comps; act; act = next)
598     {
599       next = act->next;
600       release_component (act);
601     }
602 }
603 
604 /* Finds a root of tree given by FATHERS containing A, and performs path
605    shortening.  */
606 
607 static unsigned
component_of(unsigned fathers[],unsigned a)608 component_of (unsigned fathers[], unsigned a)
609 {
610   unsigned root, n;
611 
612   for (root = a; root != fathers[root]; root = fathers[root])
613     continue;
614 
615   for (; a != root; a = n)
616     {
617       n = fathers[a];
618       fathers[a] = root;
619     }
620 
621   return root;
622 }
623 
624 /* Join operation for DFU.  FATHERS gives the tree, SIZES are sizes of the
625    components, A and B are components to merge.  */
626 
627 static void
merge_comps(unsigned fathers[],unsigned sizes[],unsigned a,unsigned b)628 merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
629 {
630   unsigned ca = component_of (fathers, a);
631   unsigned cb = component_of (fathers, b);
632 
633   if (ca == cb)
634     return;
635 
636   if (sizes[ca] < sizes[cb])
637     {
638       sizes[cb] += sizes[ca];
639       fathers[ca] = cb;
640     }
641   else
642     {
643       sizes[ca] += sizes[cb];
644       fathers[cb] = ca;
645     }
646 }
647 
648 /* Returns true if A is a reference that is suitable for predictive commoning
649    in the innermost loop that contains it.  REF_STEP is set according to the
650    step of the reference A.  */
651 
652 static bool
suitable_reference_p(struct data_reference * a,enum ref_step_type * ref_step)653 suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
654 {
655   tree ref = DR_REF (a), step = DR_STEP (a);
656 
657   if (!step
658       || TREE_THIS_VOLATILE (ref)
659       || !is_gimple_reg_type (TREE_TYPE (ref))
660       || tree_could_throw_p (ref))
661     return false;
662 
663   if (integer_zerop (step))
664     *ref_step = RS_INVARIANT;
665   else if (integer_nonzerop (step))
666     *ref_step = RS_NONZERO;
667   else
668     *ref_step = RS_ANY;
669 
670   return true;
671 }
672 
673 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET.  */
674 
675 static void
aff_combination_dr_offset(struct data_reference * dr,aff_tree * offset)676 aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
677 {
678   tree type = TREE_TYPE (DR_OFFSET (dr));
679   aff_tree delta;
680 
681   tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
682 				  &name_expansions);
683   aff_combination_const (&delta, type, wi::to_poly_widest (DR_INIT (dr)));
684   aff_combination_add (offset, &delta);
685 }
686 
687 /* Determines number of iterations of the innermost enclosing loop before B
688    refers to exactly the same location as A and stores it to OFF.  If A and
689    B do not have the same step, they never meet, or anything else fails,
690    returns false, otherwise returns true.  Both A and B are assumed to
691    satisfy suitable_reference_p.  */
692 
693 static bool
determine_offset(struct data_reference * a,struct data_reference * b,poly_widest_int * off)694 determine_offset (struct data_reference *a, struct data_reference *b,
695 		  poly_widest_int *off)
696 {
697   aff_tree diff, baseb, step;
698   tree typea, typeb;
699 
700   /* Check that both the references access the location in the same type.  */
701   typea = TREE_TYPE (DR_REF (a));
702   typeb = TREE_TYPE (DR_REF (b));
703   if (!useless_type_conversion_p (typeb, typea))
704     return false;
705 
706   /* Check whether the base address and the step of both references is the
707      same.  */
708   if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
709       || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
710     return false;
711 
712   if (integer_zerop (DR_STEP (a)))
713     {
714       /* If the references have loop invariant address, check that they access
715 	 exactly the same location.  */
716       *off = 0;
717       return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
718 	      && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
719     }
720 
721   /* Compare the offsets of the addresses, and check whether the difference
722      is a multiple of step.  */
723   aff_combination_dr_offset (a, &diff);
724   aff_combination_dr_offset (b, &baseb);
725   aff_combination_scale (&baseb, -1);
726   aff_combination_add (&diff, &baseb);
727 
728   tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
729 				  &step, &name_expansions);
730   return aff_combination_constant_multiple_p (&diff, &step, off);
731 }
732 
733 /* Returns the last basic block in LOOP for that we are sure that
734    it is executed whenever the loop is entered.  */
735 
736 static basic_block
last_always_executed_block(class loop * loop)737 last_always_executed_block (class loop *loop)
738 {
739   unsigned i;
740   vec<edge> exits = get_loop_exit_edges (loop);
741   edge ex;
742   basic_block last = loop->latch;
743 
744   FOR_EACH_VEC_ELT (exits, i, ex)
745     last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
746   exits.release ();
747 
748   return last;
749 }
750 
751 /* Splits dependence graph on DATAREFS described by DEPENDS to components.  */
752 
753 static struct component *
split_data_refs_to_components(class loop * loop,vec<data_reference_p> datarefs,vec<ddr_p> depends)754 split_data_refs_to_components (class loop *loop,
755 			       vec<data_reference_p> datarefs,
756 			       vec<ddr_p> depends)
757 {
758   unsigned i, n = datarefs.length ();
759   unsigned ca, ia, ib, bad;
760   unsigned *comp_father = XNEWVEC (unsigned, n + 1);
761   unsigned *comp_size = XNEWVEC (unsigned, n + 1);
762   struct component **comps;
763   struct data_reference *dr, *dra, *drb;
764   struct data_dependence_relation *ddr;
765   struct component *comp_list = NULL, *comp;
766   dref dataref;
767   /* Don't do store elimination if loop has multiple exit edges.  */
768   bool eliminate_store_p = single_exit (loop) != NULL;
769   basic_block last_always_executed = last_always_executed_block (loop);
770   auto_bitmap no_store_store_comps;
771 
772   FOR_EACH_VEC_ELT (datarefs, i, dr)
773     {
774       if (!DR_REF (dr))
775 	{
776 	  /* A fake reference for call or asm_expr that may clobber memory;
777 	     just fail.  */
778 	  goto end;
779 	}
780       /* predcom pass isn't prepared to handle calls with data references.  */
781       if (is_gimple_call (DR_STMT (dr)))
782 	goto end;
783       dr->aux = (void *) (size_t) i;
784       comp_father[i] = i;
785       comp_size[i] = 1;
786     }
787 
788   /* A component reserved for the "bad" data references.  */
789   comp_father[n] = n;
790   comp_size[n] = 1;
791 
792   FOR_EACH_VEC_ELT (datarefs, i, dr)
793     {
794       enum ref_step_type dummy;
795 
796       if (!suitable_reference_p (dr, &dummy))
797 	{
798 	  ia = (unsigned) (size_t) dr->aux;
799 	  merge_comps (comp_father, comp_size, n, ia);
800 	}
801     }
802 
803   FOR_EACH_VEC_ELT (depends, i, ddr)
804     {
805       poly_widest_int dummy_off;
806 
807       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
808 	continue;
809 
810       dra = DDR_A (ddr);
811       drb = DDR_B (ddr);
812 
813       /* Don't do store elimination if there is any unknown dependence for
814 	 any store data reference.  */
815       if ((DR_IS_WRITE (dra) || DR_IS_WRITE (drb))
816 	  && (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
817 	      || DDR_NUM_DIST_VECTS (ddr) == 0))
818 	eliminate_store_p = false;
819 
820       ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
821       ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
822       if (ia == ib)
823 	continue;
824 
825       bad = component_of (comp_father, n);
826 
827       /* If both A and B are reads, we may ignore unsuitable dependences.  */
828       if (DR_IS_READ (dra) && DR_IS_READ (drb))
829 	{
830 	  if (ia == bad || ib == bad
831 	      || !determine_offset (dra, drb, &dummy_off))
832 	    continue;
833 	}
834       /* If A is read and B write or vice versa and there is unsuitable
835 	 dependence, instead of merging both components into a component
836 	 that will certainly not pass suitable_component_p, just put the
837 	 read into bad component, perhaps at least the write together with
838 	 all the other data refs in it's component will be optimizable.  */
839       else if (DR_IS_READ (dra) && ib != bad)
840 	{
841 	  if (ia == bad)
842 	    {
843 	      bitmap_set_bit (no_store_store_comps, ib);
844 	      continue;
845 	    }
846 	  else if (!determine_offset (dra, drb, &dummy_off))
847 	    {
848 	      bitmap_set_bit (no_store_store_comps, ib);
849 	      merge_comps (comp_father, comp_size, bad, ia);
850 	      continue;
851 	    }
852 	}
853       else if (DR_IS_READ (drb) && ia != bad)
854 	{
855 	  if (ib == bad)
856 	    {
857 	      bitmap_set_bit (no_store_store_comps, ia);
858 	      continue;
859 	    }
860 	  else if (!determine_offset (dra, drb, &dummy_off))
861 	    {
862 	      bitmap_set_bit (no_store_store_comps, ia);
863 	      merge_comps (comp_father, comp_size, bad, ib);
864 	      continue;
865 	    }
866 	}
867       else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)
868 	       && ia != bad && ib != bad
869 	       && !determine_offset (dra, drb, &dummy_off))
870 	{
871 	  merge_comps (comp_father, comp_size, bad, ia);
872 	  merge_comps (comp_father, comp_size, bad, ib);
873 	  continue;
874 	}
875 
876       merge_comps (comp_father, comp_size, ia, ib);
877     }
878 
879   if (eliminate_store_p)
880     {
881       tree niters = number_of_latch_executions (loop);
882 
883       /* Don't do store elimination if niters info is unknown because stores
884 	 in the last iteration can't be eliminated and we need to recover it
885 	 after loop.  */
886       eliminate_store_p = (niters != NULL_TREE && niters != chrec_dont_know);
887     }
888 
889   comps = XCNEWVEC (struct component *, n);
890   bad = component_of (comp_father, n);
891   FOR_EACH_VEC_ELT (datarefs, i, dr)
892     {
893       ia = (unsigned) (size_t) dr->aux;
894       ca = component_of (comp_father, ia);
895       if (ca == bad)
896 	continue;
897 
898       comp = comps[ca];
899       if (!comp)
900 	{
901 	  comp = XCNEW (struct component);
902 	  comp->refs.create (comp_size[ca]);
903 	  comp->eliminate_store_p = eliminate_store_p;
904 	  comps[ca] = comp;
905 	}
906 
907       dataref = XCNEW (class dref_d);
908       dataref->ref = dr;
909       dataref->stmt = DR_STMT (dr);
910       dataref->offset = 0;
911       dataref->distance = 0;
912 
913       dataref->always_accessed
914 	      = dominated_by_p (CDI_DOMINATORS, last_always_executed,
915 				gimple_bb (dataref->stmt));
916       dataref->pos = comp->refs.length ();
917       comp->refs.quick_push (dataref);
918     }
919 
920   if (eliminate_store_p)
921     {
922       bitmap_iterator bi;
923       EXECUTE_IF_SET_IN_BITMAP (no_store_store_comps, 0, ia, bi)
924 	{
925 	  ca = component_of (comp_father, ia);
926 	  if (ca != bad)
927 	    comps[ca]->eliminate_store_p = false;
928 	}
929     }
930 
931   for (i = 0; i < n; i++)
932     {
933       comp = comps[i];
934       if (comp)
935 	{
936 	  comp->next = comp_list;
937 	  comp_list = comp;
938 	}
939     }
940   free (comps);
941 
942 end:
943   free (comp_father);
944   free (comp_size);
945   return comp_list;
946 }
947 
948 /* Returns true if the component COMP satisfies the conditions
949    described in 2) at the beginning of this file.  LOOP is the current
950    loop.  */
951 
952 static bool
suitable_component_p(class loop * loop,struct component * comp)953 suitable_component_p (class loop *loop, struct component *comp)
954 {
955   unsigned i;
956   dref a, first;
957   basic_block ba, bp = loop->header;
958   bool ok, has_write = false;
959 
960   FOR_EACH_VEC_ELT (comp->refs, i, a)
961     {
962       ba = gimple_bb (a->stmt);
963 
964       if (!just_once_each_iteration_p (loop, ba))
965 	return false;
966 
967       gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
968       bp = ba;
969 
970       if (DR_IS_WRITE (a->ref))
971 	has_write = true;
972     }
973 
974   first = comp->refs[0];
975   ok = suitable_reference_p (first->ref, &comp->comp_step);
976   gcc_assert (ok);
977   first->offset = 0;
978 
979   for (i = 1; comp->refs.iterate (i, &a); i++)
980     {
981       /* Polynomial offsets are no use, since we need to know the
982 	 gap between iteration numbers at compile time.  */
983       poly_widest_int offset;
984       if (!determine_offset (first->ref, a->ref, &offset)
985 	  || !offset.is_constant (&a->offset))
986 	return false;
987 
988       enum ref_step_type a_step;
989       gcc_checking_assert (suitable_reference_p (a->ref, &a_step)
990 			   && a_step == comp->comp_step);
991     }
992 
993   /* If there is a write inside the component, we must know whether the
994      step is nonzero or not -- we would not otherwise be able to recognize
995      whether the value accessed by reads comes from the OFFSET-th iteration
996      or the previous one.  */
997   if (has_write && comp->comp_step == RS_ANY)
998     return false;
999 
1000   return true;
1001 }
1002 
1003 /* Check the conditions on references inside each of components COMPS,
1004    and remove the unsuitable components from the list.  The new list
1005    of components is returned.  The conditions are described in 2) at
1006    the beginning of this file.  LOOP is the current loop.  */
1007 
1008 static struct component *
filter_suitable_components(class loop * loop,struct component * comps)1009 filter_suitable_components (class loop *loop, struct component *comps)
1010 {
1011   struct component **comp, *act;
1012 
1013   for (comp = &comps; *comp; )
1014     {
1015       act = *comp;
1016       if (suitable_component_p (loop, act))
1017 	comp = &act->next;
1018       else
1019 	{
1020 	  dref ref;
1021 	  unsigned i;
1022 
1023 	  *comp = act->next;
1024 	  FOR_EACH_VEC_ELT (act->refs, i, ref)
1025 	    free (ref);
1026 	  release_component (act);
1027 	}
1028     }
1029 
1030   return comps;
1031 }
1032 
1033 /* Compares two drefs A and B by their offset and position.  Callback for
1034    qsort.  */
1035 
1036 static int
order_drefs(const void * a,const void * b)1037 order_drefs (const void *a, const void *b)
1038 {
1039   const dref *const da = (const dref *) a;
1040   const dref *const db = (const dref *) b;
1041   int offcmp = wi::cmps ((*da)->offset, (*db)->offset);
1042 
1043   if (offcmp != 0)
1044     return offcmp;
1045 
1046   return (*da)->pos - (*db)->pos;
1047 }
1048 
1049 /* Compares two drefs A and B by their position.  Callback for qsort.  */
1050 
1051 static int
order_drefs_by_pos(const void * a,const void * b)1052 order_drefs_by_pos (const void *a, const void *b)
1053 {
1054   const dref *const da = (const dref *) a;
1055   const dref *const db = (const dref *) b;
1056 
1057   return (*da)->pos - (*db)->pos;
1058 }
1059 
1060 /* Returns root of the CHAIN.  */
1061 
1062 static inline dref
get_chain_root(chain_p chain)1063 get_chain_root (chain_p chain)
1064 {
1065   return chain->refs[0];
1066 }
1067 
1068 /* Given CHAIN, returns the last write ref at DISTANCE, or NULL if it doesn't
1069    exist.  */
1070 
1071 static inline dref
get_chain_last_write_at(chain_p chain,unsigned distance)1072 get_chain_last_write_at (chain_p chain, unsigned distance)
1073 {
1074   for (unsigned i = chain->refs.length (); i > 0; i--)
1075     if (DR_IS_WRITE (chain->refs[i - 1]->ref)
1076 	&& distance == chain->refs[i - 1]->distance)
1077       return chain->refs[i - 1];
1078 
1079   return NULL;
1080 }
1081 
1082 /* Given CHAIN, returns the last write ref with the same distance before load
1083    at index LOAD_IDX, or NULL if it doesn't exist.  */
1084 
1085 static inline dref
get_chain_last_write_before_load(chain_p chain,unsigned load_idx)1086 get_chain_last_write_before_load (chain_p chain, unsigned load_idx)
1087 {
1088   gcc_assert (load_idx < chain->refs.length ());
1089 
1090   unsigned distance = chain->refs[load_idx]->distance;
1091 
1092   for (unsigned i = load_idx; i > 0; i--)
1093     if (DR_IS_WRITE (chain->refs[i - 1]->ref)
1094 	&& distance == chain->refs[i - 1]->distance)
1095       return chain->refs[i - 1];
1096 
1097   return NULL;
1098 }
1099 
1100 /* Adds REF to the chain CHAIN.  */
1101 
1102 static void
add_ref_to_chain(chain_p chain,dref ref)1103 add_ref_to_chain (chain_p chain, dref ref)
1104 {
1105   dref root = get_chain_root (chain);
1106 
1107   gcc_assert (wi::les_p (root->offset, ref->offset));
1108   widest_int dist = ref->offset - root->offset;
1109   gcc_assert (wi::fits_uhwi_p (dist));
1110 
1111   chain->refs.safe_push (ref);
1112 
1113   ref->distance = dist.to_uhwi ();
1114 
1115   if (ref->distance >= chain->length)
1116     {
1117       chain->length = ref->distance;
1118       chain->has_max_use_after = false;
1119     }
1120 
1121   /* Promote this chain to CT_STORE_STORE if it has multiple stores.  */
1122   if (DR_IS_WRITE (ref->ref))
1123     chain->type = CT_STORE_STORE;
1124 
1125   /* Don't set the flag for store-store chain since there is no use.  */
1126   if (chain->type != CT_STORE_STORE
1127       && ref->distance == chain->length
1128       && ref->pos > root->pos)
1129     chain->has_max_use_after = true;
1130 
1131   chain->all_always_accessed &= ref->always_accessed;
1132 }
1133 
1134 /* Returns the chain for invariant component COMP.  */
1135 
1136 static chain_p
make_invariant_chain(struct component * comp)1137 make_invariant_chain (struct component *comp)
1138 {
1139   chain_p chain = XCNEW (struct chain);
1140   unsigned i;
1141   dref ref;
1142 
1143   chain->type = CT_INVARIANT;
1144 
1145   chain->all_always_accessed = true;
1146 
1147   FOR_EACH_VEC_ELT (comp->refs, i, ref)
1148     {
1149       chain->refs.safe_push (ref);
1150       chain->all_always_accessed &= ref->always_accessed;
1151     }
1152 
1153   chain->inits = vNULL;
1154   chain->finis = vNULL;
1155 
1156   return chain;
1157 }
1158 
1159 /* Make a new chain of type TYPE rooted at REF.  */
1160 
1161 static chain_p
make_rooted_chain(dref ref,enum chain_type type)1162 make_rooted_chain (dref ref, enum chain_type type)
1163 {
1164   chain_p chain = XCNEW (struct chain);
1165 
1166   chain->type = type;
1167   chain->refs.safe_push (ref);
1168   chain->all_always_accessed = ref->always_accessed;
1169   ref->distance = 0;
1170 
1171   chain->inits = vNULL;
1172   chain->finis = vNULL;
1173 
1174   return chain;
1175 }
1176 
1177 /* Returns true if CHAIN is not trivial.  */
1178 
1179 static bool
nontrivial_chain_p(chain_p chain)1180 nontrivial_chain_p (chain_p chain)
1181 {
1182   return chain != NULL && chain->refs.length () > 1;
1183 }
1184 
1185 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1186    is no such name.  */
1187 
1188 static tree
name_for_ref(dref ref)1189 name_for_ref (dref ref)
1190 {
1191   tree name;
1192 
1193   if (is_gimple_assign (ref->stmt))
1194     {
1195       if (!ref->ref || DR_IS_READ (ref->ref))
1196 	name = gimple_assign_lhs (ref->stmt);
1197       else
1198 	name = gimple_assign_rhs1 (ref->stmt);
1199     }
1200   else
1201     name = PHI_RESULT (ref->stmt);
1202 
1203   return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1204 }
1205 
1206 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1207    iterations of the innermost enclosing loop).  */
1208 
1209 static bool
valid_initializer_p(struct data_reference * ref,unsigned distance,struct data_reference * root)1210 valid_initializer_p (struct data_reference *ref,
1211 		     unsigned distance, struct data_reference *root)
1212 {
1213   aff_tree diff, base, step;
1214   poly_widest_int off;
1215 
1216   /* Both REF and ROOT must be accessing the same object.  */
1217   if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1218     return false;
1219 
1220   /* The initializer is defined outside of loop, hence its address must be
1221      invariant inside the loop.  */
1222   gcc_assert (integer_zerop (DR_STEP (ref)));
1223 
1224   /* If the address of the reference is invariant, initializer must access
1225      exactly the same location.  */
1226   if (integer_zerop (DR_STEP (root)))
1227     return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1228 	    && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1229 
1230   /* Verify that this index of REF is equal to the root's index at
1231      -DISTANCE-th iteration.  */
1232   aff_combination_dr_offset (root, &diff);
1233   aff_combination_dr_offset (ref, &base);
1234   aff_combination_scale (&base, -1);
1235   aff_combination_add (&diff, &base);
1236 
1237   tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1238 				  &step, &name_expansions);
1239   if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1240     return false;
1241 
1242   if (maybe_ne (off, distance))
1243     return false;
1244 
1245   return true;
1246 }
1247 
1248 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
1249    initial value is correct (equal to initial value of REF shifted by one
1250    iteration), returns the phi node.  Otherwise, NULL_TREE is returned.  ROOT
1251    is the root of the current chain.  */
1252 
1253 static gphi *
find_looparound_phi(class loop * loop,dref ref,dref root)1254 find_looparound_phi (class loop *loop, dref ref, dref root)
1255 {
1256   tree name, init, init_ref;
1257   gimple *init_stmt;
1258   edge latch = loop_latch_edge (loop);
1259   struct data_reference init_dr;
1260   gphi_iterator psi;
1261 
1262   if (is_gimple_assign (ref->stmt))
1263     {
1264       if (DR_IS_READ (ref->ref))
1265 	name = gimple_assign_lhs (ref->stmt);
1266       else
1267 	name = gimple_assign_rhs1 (ref->stmt);
1268     }
1269   else
1270     name = PHI_RESULT (ref->stmt);
1271   if (!name)
1272     return NULL;
1273 
1274   tree entry_vuse = NULL_TREE;
1275   gphi *phi = NULL;
1276   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1277     {
1278       gphi *p = psi.phi ();
1279       if (PHI_ARG_DEF_FROM_EDGE (p, latch) == name)
1280 	phi = p;
1281       else if (virtual_operand_p (gimple_phi_result (p)))
1282 	entry_vuse = PHI_ARG_DEF_FROM_EDGE (p, loop_preheader_edge (loop));
1283       if (phi && entry_vuse)
1284 	break;
1285     }
1286   if (!phi || !entry_vuse)
1287     return NULL;
1288 
1289   init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1290   if (TREE_CODE (init) != SSA_NAME)
1291     return NULL;
1292   init_stmt = SSA_NAME_DEF_STMT (init);
1293   if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
1294     return NULL;
1295   gcc_assert (gimple_assign_lhs (init_stmt) == init);
1296 
1297   init_ref = gimple_assign_rhs1 (init_stmt);
1298   if (!REFERENCE_CLASS_P (init_ref)
1299       && !DECL_P (init_ref))
1300     return NULL;
1301 
1302   /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1303      loop enclosing PHI).  */
1304   memset (&init_dr, 0, sizeof (struct data_reference));
1305   DR_REF (&init_dr) = init_ref;
1306   DR_STMT (&init_dr) = phi;
1307   if (!dr_analyze_innermost (&DR_INNERMOST (&init_dr), init_ref, loop,
1308 			     init_stmt))
1309     return NULL;
1310 
1311   if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
1312     return NULL;
1313 
1314   /* Make sure nothing clobbers the location we re-use the initial value
1315      from.  */
1316   if (entry_vuse != gimple_vuse (init_stmt))
1317     {
1318       ao_ref ref;
1319       ao_ref_init (&ref, init_ref);
1320       unsigned limit = param_sccvn_max_alias_queries_per_access;
1321       tree vdef = entry_vuse;
1322       do
1323 	{
1324 	  gimple *def = SSA_NAME_DEF_STMT (vdef);
1325 	  if (limit-- == 0 || gimple_code (def) == GIMPLE_PHI)
1326 	    return NULL;
1327 	  if (stmt_may_clobber_ref_p_1 (def, &ref))
1328 	    /* When the stmt is an assign to init_ref we could in theory
1329 	       use its RHS for the initial value of the looparound PHI
1330 	       we replace in prepare_initializers_chain, but we have
1331 	       no convenient place to store this info at the moment.  */
1332 	    return NULL;
1333 	  vdef = gimple_vuse (def);
1334 	}
1335       while (vdef != gimple_vuse (init_stmt));
1336     }
1337 
1338   return phi;
1339 }
1340 
1341 /* Adds a reference for the looparound copy of REF in PHI to CHAIN.  */
1342 
1343 static void
insert_looparound_copy(chain_p chain,dref ref,gphi * phi)1344 insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
1345 {
1346   dref nw = XCNEW (class dref_d), aref;
1347   unsigned i;
1348 
1349   nw->stmt = phi;
1350   nw->distance = ref->distance + 1;
1351   nw->always_accessed = 1;
1352 
1353   FOR_EACH_VEC_ELT (chain->refs, i, aref)
1354     if (aref->distance >= nw->distance)
1355       break;
1356   chain->refs.safe_insert (i, nw);
1357 
1358   if (nw->distance > chain->length)
1359     {
1360       chain->length = nw->distance;
1361       chain->has_max_use_after = false;
1362     }
1363 }
1364 
1365 /* For references in CHAIN that are copied around the LOOP (created previously
1366    by PRE, or by user), add the results of such copies to the chain.  This
1367    enables us to remove the copies by unrolling, and may need less registers
1368    (also, it may allow us to combine chains together).  */
1369 
1370 static void
add_looparound_copies(class loop * loop,chain_p chain)1371 add_looparound_copies (class loop *loop, chain_p chain)
1372 {
1373   unsigned i;
1374   dref ref, root = get_chain_root (chain);
1375   gphi *phi;
1376 
1377   if (chain->type == CT_STORE_STORE)
1378     return;
1379 
1380   FOR_EACH_VEC_ELT (chain->refs, i, ref)
1381     {
1382       phi = find_looparound_phi (loop, ref, root);
1383       if (!phi)
1384 	continue;
1385 
1386       bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1387       insert_looparound_copy (chain, ref, phi);
1388     }
1389 }
1390 
1391 /* Find roots of the values and determine distances in the component COMP.
1392    The references are redistributed into CHAINS.  LOOP is the current
1393    loop.  */
1394 
1395 static void
determine_roots_comp(class loop * loop,struct component * comp,vec<chain_p> * chains)1396 determine_roots_comp (class loop *loop,
1397 		      struct component *comp,
1398 		      vec<chain_p> *chains)
1399 {
1400   unsigned i;
1401   dref a;
1402   chain_p chain = NULL;
1403   widest_int last_ofs = 0;
1404   enum chain_type type;
1405 
1406   /* Invariants are handled specially.  */
1407   if (comp->comp_step == RS_INVARIANT)
1408     {
1409       chain = make_invariant_chain (comp);
1410       chains->safe_push (chain);
1411       return;
1412     }
1413 
1414   /* Trivial component.  */
1415   if (comp->refs.length () <= 1)
1416     {
1417       if (comp->refs.length () == 1)
1418 	{
1419 	  free (comp->refs[0]);
1420 	  comp->refs.truncate (0);
1421 	}
1422       return;
1423     }
1424 
1425   comp->refs.qsort (order_drefs);
1426 
1427   /* For Store-Store chain, we only support load if it is dominated by a
1428      store statement in the same iteration of loop.  */
1429   if (comp->eliminate_store_p)
1430     for (a = NULL, i = 0; i < comp->refs.length (); i++)
1431       {
1432 	if (DR_IS_WRITE (comp->refs[i]->ref))
1433 	  a = comp->refs[i];
1434 	else if (a == NULL || a->offset != comp->refs[i]->offset)
1435 	  {
1436 	    /* If there is load that is not dominated by a store in the
1437 	       same iteration of loop, clear the flag so no Store-Store
1438 	       chain is generated for this component.  */
1439 	    comp->eliminate_store_p = false;
1440 	    break;
1441 	  }
1442       }
1443 
1444   /* Determine roots and create chains for components.  */
1445   FOR_EACH_VEC_ELT (comp->refs, i, a)
1446     {
1447       if (!chain
1448 	  || (chain->type == CT_LOAD && DR_IS_WRITE (a->ref))
1449 	  || (!comp->eliminate_store_p && DR_IS_WRITE (a->ref))
1450 	  || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs))
1451 	{
1452 	  if (nontrivial_chain_p (chain))
1453 	    {
1454 	      add_looparound_copies (loop, chain);
1455 	      chains->safe_push (chain);
1456 	    }
1457 	  else
1458 	    release_chain (chain);
1459 
1460 	  /* Determine type of the chain.  If the root reference is a load,
1461 	     this can only be a CT_LOAD chain; other chains are intialized
1462 	     to CT_STORE_LOAD and might be promoted to CT_STORE_STORE when
1463 	     new reference is added.  */
1464 	  type = DR_IS_READ (a->ref) ? CT_LOAD : CT_STORE_LOAD;
1465 	  chain = make_rooted_chain (a, type);
1466 	  last_ofs = a->offset;
1467 	  continue;
1468 	}
1469 
1470       add_ref_to_chain (chain, a);
1471     }
1472 
1473   if (nontrivial_chain_p (chain))
1474     {
1475       add_looparound_copies (loop, chain);
1476       chains->safe_push (chain);
1477     }
1478   else
1479     release_chain (chain);
1480 }
1481 
1482 /* Find roots of the values and determine distances in components COMPS, and
1483    separates the references to CHAINS.  LOOP is the current loop.  */
1484 
1485 static void
determine_roots(class loop * loop,struct component * comps,vec<chain_p> * chains)1486 determine_roots (class loop *loop,
1487 		 struct component *comps, vec<chain_p> *chains)
1488 {
1489   struct component *comp;
1490 
1491   for (comp = comps; comp; comp = comp->next)
1492     determine_roots_comp (loop, comp, chains);
1493 }
1494 
1495 /* Replace the reference in statement STMT with temporary variable
1496    NEW_TREE.  If SET is true, NEW_TREE is instead initialized to the value of
1497    the reference in the statement.  IN_LHS is true if the reference
1498    is in the lhs of STMT, false if it is in rhs.  */
1499 
1500 static void
replace_ref_with(gimple * stmt,tree new_tree,bool set,bool in_lhs)1501 replace_ref_with (gimple *stmt, tree new_tree, bool set, bool in_lhs)
1502 {
1503   tree val;
1504   gassign *new_stmt;
1505   gimple_stmt_iterator bsi, psi;
1506 
1507   if (gimple_code (stmt) == GIMPLE_PHI)
1508     {
1509       gcc_assert (!in_lhs && !set);
1510 
1511       val = PHI_RESULT (stmt);
1512       bsi = gsi_after_labels (gimple_bb (stmt));
1513       psi = gsi_for_stmt (stmt);
1514       remove_phi_node (&psi, false);
1515 
1516       /* Turn the phi node into GIMPLE_ASSIGN.  */
1517       new_stmt = gimple_build_assign (val, new_tree);
1518       gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
1519       return;
1520     }
1521 
1522   /* Since the reference is of gimple_reg type, it should only
1523      appear as lhs or rhs of modify statement.  */
1524   gcc_assert (is_gimple_assign (stmt));
1525 
1526   bsi = gsi_for_stmt (stmt);
1527 
1528   /* If we do not need to initialize NEW_TREE, just replace the use of OLD.  */
1529   if (!set)
1530     {
1531       gcc_assert (!in_lhs);
1532       gimple_assign_set_rhs_from_tree (&bsi, new_tree);
1533       stmt = gsi_stmt (bsi);
1534       update_stmt (stmt);
1535       return;
1536     }
1537 
1538   if (in_lhs)
1539     {
1540       /* We have statement
1541 
1542 	 OLD = VAL
1543 
1544 	 If OLD is a memory reference, then VAL is gimple_val, and we transform
1545 	 this to
1546 
1547 	 OLD = VAL
1548 	 NEW = VAL
1549 
1550 	 Otherwise, we are replacing a combination chain,
1551 	 VAL is the expression that performs the combination, and OLD is an
1552 	 SSA name.  In this case, we transform the assignment to
1553 
1554 	 OLD = VAL
1555 	 NEW = OLD
1556 
1557 	 */
1558 
1559       val = gimple_assign_lhs (stmt);
1560       if (TREE_CODE (val) != SSA_NAME)
1561 	{
1562 	  val = gimple_assign_rhs1 (stmt);
1563 	  gcc_assert (gimple_assign_single_p (stmt));
1564 	  if (TREE_CLOBBER_P (val))
1565 	    val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
1566 	  else
1567 	    gcc_assert (gimple_assign_copy_p (stmt));
1568 	}
1569     }
1570   else
1571     {
1572       /* VAL = OLD
1573 
1574 	 is transformed to
1575 
1576 	 VAL = OLD
1577 	 NEW = VAL  */
1578 
1579       val = gimple_assign_lhs (stmt);
1580     }
1581 
1582   new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
1583   gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
1584 }
1585 
1586 /* Returns a memory reference to DR in the (NITERS + ITER)-th iteration
1587    of the loop it was analyzed in.  Append init stmts to STMTS.  */
1588 
1589 static tree
1590 ref_at_iteration (data_reference_p dr, int iter,
1591 		  gimple_seq *stmts, tree niters = NULL_TREE)
1592 {
1593   tree off = DR_OFFSET (dr);
1594   tree coff = DR_INIT (dr);
1595   tree ref = DR_REF (dr);
1596   enum tree_code ref_code = ERROR_MARK;
1597   tree ref_type = NULL_TREE;
1598   tree ref_op1 = NULL_TREE;
1599   tree ref_op2 = NULL_TREE;
1600   tree new_offset;
1601 
1602   if (iter != 0)
1603     {
1604       new_offset = size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter));
1605       if (TREE_CODE (new_offset) == INTEGER_CST)
1606 	coff = size_binop (PLUS_EXPR, coff, new_offset);
1607       else
1608 	off = size_binop (PLUS_EXPR, off, new_offset);
1609     }
1610 
1611   if (niters != NULL_TREE)
1612     {
1613       niters = fold_convert (ssizetype, niters);
1614       new_offset = size_binop (MULT_EXPR, DR_STEP (dr), niters);
1615       if (TREE_CODE (niters) == INTEGER_CST)
1616 	coff = size_binop (PLUS_EXPR, coff, new_offset);
1617       else
1618 	off = size_binop (PLUS_EXPR, off, new_offset);
1619     }
1620 
1621   /* While data-ref analysis punts on bit offsets it still handles
1622      bitfield accesses at byte boundaries.  Cope with that.  Note that
1623      if the bitfield object also starts at a byte-boundary we can simply
1624      replicate the COMPONENT_REF, but we have to subtract the component's
1625      byte-offset from the MEM_REF address first.
1626      Otherwise we simply build a BIT_FIELD_REF knowing that the bits
1627      start at offset zero.  */
1628   if (TREE_CODE (ref) == COMPONENT_REF
1629       && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
1630     {
1631       unsigned HOST_WIDE_INT boff;
1632       tree field = TREE_OPERAND (ref, 1);
1633       tree offset = component_ref_field_offset (ref);
1634       ref_type = TREE_TYPE (ref);
1635       boff = tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field));
1636       /* This can occur in Ada.  See the comment in get_bit_range.  */
1637       if (boff % BITS_PER_UNIT != 0
1638 	  || !tree_fits_uhwi_p (offset))
1639 	{
1640 	  ref_code = BIT_FIELD_REF;
1641 	  ref_op1 = DECL_SIZE (field);
1642 	  ref_op2 = bitsize_zero_node;
1643 	}
1644       else
1645 	{
1646 	  boff >>= LOG2_BITS_PER_UNIT;
1647 	  boff += tree_to_uhwi (offset);
1648 	  coff = size_binop (MINUS_EXPR, coff, ssize_int (boff));
1649 	  ref_code = COMPONENT_REF;
1650 	  ref_op1 = field;
1651 	  ref_op2 = TREE_OPERAND (ref, 2);
1652 	  ref = TREE_OPERAND (ref, 0);
1653 	}
1654     }
1655   tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
1656   addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
1657 				 is_gimple_mem_ref_addr, NULL_TREE);
1658   tree alias_ptr = fold_convert (reference_alias_ptr_type (ref), coff);
1659   tree type = build_aligned_type (TREE_TYPE (ref),
1660 				  get_object_alignment (ref));
1661   ref = build2 (MEM_REF, type, addr, alias_ptr);
1662   if (ref_type)
1663     ref = build3 (ref_code, ref_type, ref, ref_op1, ref_op2);
1664   return ref;
1665 }
1666 
1667 /* Get the initialization expression for the INDEX-th temporary variable
1668    of CHAIN.  */
1669 
1670 static tree
get_init_expr(chain_p chain,unsigned index)1671 get_init_expr (chain_p chain, unsigned index)
1672 {
1673   if (chain->type == CT_COMBINATION)
1674     {
1675       tree e1 = get_init_expr (chain->ch1, index);
1676       tree e2 = get_init_expr (chain->ch2, index);
1677 
1678       return fold_build2 (chain->op, chain->rslt_type, e1, e2);
1679     }
1680   else
1681     return chain->inits[index];
1682 }
1683 
1684 /* Returns a new temporary variable used for the I-th variable carrying
1685    value of REF.  The variable's uid is marked in TMP_VARS.  */
1686 
1687 static tree
predcom_tmp_var(tree ref,unsigned i,bitmap tmp_vars)1688 predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1689 {
1690   tree type = TREE_TYPE (ref);
1691   /* We never access the components of the temporary variable in predictive
1692      commoning.  */
1693   tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
1694   bitmap_set_bit (tmp_vars, DECL_UID (var));
1695   return var;
1696 }
1697 
1698 /* Creates the variables for CHAIN, as well as phi nodes for them and
1699    initialization on entry to LOOP.  Uids of the newly created
1700    temporary variables are marked in TMP_VARS.  */
1701 
1702 static void
initialize_root_vars(class loop * loop,chain_p chain,bitmap tmp_vars)1703 initialize_root_vars (class loop *loop, chain_p chain, bitmap tmp_vars)
1704 {
1705   unsigned i;
1706   unsigned n = chain->length;
1707   dref root = get_chain_root (chain);
1708   bool reuse_first = !chain->has_max_use_after;
1709   tree ref, init, var, next;
1710   gphi *phi;
1711   gimple_seq stmts;
1712   edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1713 
1714   /* If N == 0, then all the references are within the single iteration.  And
1715      since this is an nonempty chain, reuse_first cannot be true.  */
1716   gcc_assert (n > 0 || !reuse_first);
1717 
1718   chain->vars.create (n + 1);
1719 
1720   if (chain->type == CT_COMBINATION)
1721     ref = gimple_assign_lhs (root->stmt);
1722   else
1723     ref = DR_REF (root->ref);
1724 
1725   for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1726     {
1727       var = predcom_tmp_var (ref, i, tmp_vars);
1728       chain->vars.quick_push (var);
1729     }
1730   if (reuse_first)
1731     chain->vars.quick_push (chain->vars[0]);
1732 
1733   FOR_EACH_VEC_ELT (chain->vars, i, var)
1734     chain->vars[i] = make_ssa_name (var);
1735 
1736   for (i = 0; i < n; i++)
1737     {
1738       var = chain->vars[i];
1739       next = chain->vars[i + 1];
1740       init = get_init_expr (chain, i);
1741 
1742       init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1743       if (stmts)
1744 	gsi_insert_seq_on_edge_immediate (entry, stmts);
1745 
1746       phi = create_phi_node (var, loop->header);
1747       add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1748       add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1749     }
1750 }
1751 
1752 /* For inter-iteration store elimination CHAIN in LOOP, returns true if
1753    all stores to be eliminated store loop invariant values into memory.
1754    In this case, we can use these invariant values directly after LOOP.  */
1755 
1756 static bool
is_inv_store_elimination_chain(class loop * loop,chain_p chain)1757 is_inv_store_elimination_chain (class loop *loop, chain_p chain)
1758 {
1759   if (chain->length == 0 || chain->type != CT_STORE_STORE)
1760     return false;
1761 
1762   gcc_assert (!chain->has_max_use_after);
1763 
1764   /* If loop iterates for unknown times or fewer times than chain->length,
1765      we still need to setup root variable and propagate it with PHI node.  */
1766   tree niters = number_of_latch_executions (loop);
1767   if (TREE_CODE (niters) != INTEGER_CST
1768       || wi::leu_p (wi::to_wide (niters), chain->length))
1769     return false;
1770 
1771   /* Check stores in chain for elimination if they only store loop invariant
1772      values.  */
1773   for (unsigned i = 0; i < chain->length; i++)
1774     {
1775       dref a = get_chain_last_write_at (chain, i);
1776       if (a == NULL)
1777 	continue;
1778 
1779       gimple *def_stmt, *stmt = a->stmt;
1780       if (!gimple_assign_single_p (stmt))
1781 	return false;
1782 
1783       tree val = gimple_assign_rhs1 (stmt);
1784       if (TREE_CLOBBER_P (val))
1785 	return false;
1786 
1787       if (CONSTANT_CLASS_P (val))
1788 	continue;
1789 
1790       if (TREE_CODE (val) != SSA_NAME)
1791 	return false;
1792 
1793       def_stmt = SSA_NAME_DEF_STMT (val);
1794       if (gimple_nop_p (def_stmt))
1795 	continue;
1796 
1797       if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
1798 	return false;
1799     }
1800   return true;
1801 }
1802 
1803 /* Creates root variables for store elimination CHAIN in which stores for
1804    elimination only store loop invariant values.  In this case, we neither
1805    need to load root variables before loop nor propagate it with PHI nodes.  */
1806 
1807 static void
initialize_root_vars_store_elim_1(chain_p chain)1808 initialize_root_vars_store_elim_1 (chain_p chain)
1809 {
1810   tree var;
1811   unsigned i, n = chain->length;
1812 
1813   chain->vars.create (n);
1814   chain->vars.safe_grow_cleared (n);
1815 
1816   /* Initialize root value for eliminated stores at each distance.  */
1817   for (i = 0; i < n; i++)
1818     {
1819       dref a = get_chain_last_write_at (chain, i);
1820       if (a == NULL)
1821 	continue;
1822 
1823       var = gimple_assign_rhs1 (a->stmt);
1824       chain->vars[a->distance] = var;
1825     }
1826 
1827   /* We don't propagate values with PHI nodes, so manually propagate value
1828      to bubble positions.  */
1829   var = chain->vars[0];
1830   for (i = 1; i < n; i++)
1831     {
1832       if (chain->vars[i] != NULL_TREE)
1833 	{
1834 	  var = chain->vars[i];
1835 	  continue;
1836 	}
1837       chain->vars[i] = var;
1838     }
1839 
1840   /* Revert the vector.  */
1841   for (i = 0; i < n / 2; i++)
1842     std::swap (chain->vars[i], chain->vars[n - i - 1]);
1843 }
1844 
1845 /* Creates root variables for store elimination CHAIN in which stores for
1846    elimination store loop variant values.  In this case, we may need to
1847    load root variables before LOOP and propagate it with PHI nodes.  Uids
1848    of the newly created root variables are marked in TMP_VARS.  */
1849 
1850 static void
initialize_root_vars_store_elim_2(class loop * loop,chain_p chain,bitmap tmp_vars)1851 initialize_root_vars_store_elim_2 (class loop *loop,
1852 				   chain_p chain, bitmap tmp_vars)
1853 {
1854   unsigned i, n = chain->length;
1855   tree ref, init, var, next, val, phi_result;
1856   gimple *stmt;
1857   gimple_seq stmts;
1858 
1859   chain->vars.create (n);
1860 
1861   ref = DR_REF (get_chain_root (chain)->ref);
1862   for (i = 0; i < n; i++)
1863     {
1864       var = predcom_tmp_var (ref, i, tmp_vars);
1865       chain->vars.quick_push (var);
1866     }
1867 
1868   FOR_EACH_VEC_ELT (chain->vars, i, var)
1869     chain->vars[i] = make_ssa_name (var);
1870 
1871   /* Root values are either rhs operand of stores to be eliminated, or
1872      loaded from memory before loop.  */
1873   auto_vec<tree> vtemps;
1874   vtemps.safe_grow_cleared (n);
1875   for (i = 0; i < n; i++)
1876     {
1877       init = get_init_expr (chain, i);
1878       if (init == NULL_TREE)
1879 	{
1880 	  /* Root value is rhs operand of the store to be eliminated if
1881 	     it isn't loaded from memory before loop.  */
1882 	  dref a = get_chain_last_write_at (chain, i);
1883 	  val = gimple_assign_rhs1 (a->stmt);
1884 	  if (TREE_CLOBBER_P (val))
1885 	    {
1886 	      val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (var));
1887 	      gimple_assign_set_rhs1 (a->stmt, val);
1888 	    }
1889 
1890 	  vtemps[n - i - 1] = val;
1891 	}
1892       else
1893 	{
1894 	  edge latch = loop_latch_edge (loop);
1895 	  edge entry = loop_preheader_edge (loop);
1896 
1897 	  /* Root value is loaded from memory before loop, we also need
1898 	     to add PHI nodes to propagate the value across iterations.  */
1899 	  init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1900 	  if (stmts)
1901 	    gsi_insert_seq_on_edge_immediate (entry, stmts);
1902 
1903 	  next = chain->vars[n - i];
1904 	  phi_result = copy_ssa_name (next);
1905 	  gphi *phi = create_phi_node (phi_result, loop->header);
1906 	  add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1907 	  add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1908 	  vtemps[n - i - 1] = phi_result;
1909 	}
1910     }
1911 
1912   /* Find the insertion position.  */
1913   dref last = get_chain_root (chain);
1914   for (i = 0; i < chain->refs.length (); i++)
1915     {
1916       if (chain->refs[i]->pos > last->pos)
1917 	last = chain->refs[i];
1918     }
1919 
1920   gimple_stmt_iterator gsi = gsi_for_stmt (last->stmt);
1921 
1922   /* Insert statements copying root value to root variable.  */
1923   for (i = 0; i < n; i++)
1924     {
1925       var = chain->vars[i];
1926       val = vtemps[i];
1927       stmt = gimple_build_assign (var, val);
1928       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1929     }
1930 }
1931 
1932 /* Generates stores for CHAIN's eliminated stores in LOOP's last
1933    (CHAIN->length - 1) iterations.  */
1934 
1935 static void
finalize_eliminated_stores(class loop * loop,chain_p chain)1936 finalize_eliminated_stores (class loop *loop, chain_p chain)
1937 {
1938   unsigned i, n = chain->length;
1939 
1940   for (i = 0; i < n; i++)
1941     {
1942       tree var = chain->vars[i];
1943       tree fini = chain->finis[n - i - 1];
1944       gimple *stmt = gimple_build_assign (fini, var);
1945 
1946       gimple_seq_add_stmt_without_update (&chain->fini_seq, stmt);
1947     }
1948 
1949   if (chain->fini_seq)
1950     {
1951       gsi_insert_seq_on_edge_immediate (single_exit (loop), chain->fini_seq);
1952       chain->fini_seq = NULL;
1953     }
1954 }
1955 
1956 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
1957    initialization on entry to LOOP if necessary.  The ssa name for the variable
1958    is stored in VARS.  If WRITTEN is true, also a phi node to copy its value
1959    around the loop is created.  Uid of the newly created temporary variable
1960    is marked in TMP_VARS.  INITS is the list containing the (single)
1961    initializer.  */
1962 
1963 static void
initialize_root_vars_lm(class loop * loop,dref root,bool written,vec<tree> * vars,vec<tree> inits,bitmap tmp_vars)1964 initialize_root_vars_lm (class loop *loop, dref root, bool written,
1965 			 vec<tree> *vars, vec<tree> inits,
1966 			 bitmap tmp_vars)
1967 {
1968   unsigned i;
1969   tree ref = DR_REF (root->ref), init, var, next;
1970   gimple_seq stmts;
1971   gphi *phi;
1972   edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1973 
1974   /* Find the initializer for the variable, and check that it cannot
1975      trap.  */
1976   init = inits[0];
1977 
1978   vars->create (written ? 2 : 1);
1979   var = predcom_tmp_var (ref, 0, tmp_vars);
1980   vars->quick_push (var);
1981   if (written)
1982     vars->quick_push ((*vars)[0]);
1983 
1984   FOR_EACH_VEC_ELT (*vars, i, var)
1985     (*vars)[i] = make_ssa_name (var);
1986 
1987   var = (*vars)[0];
1988 
1989   init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1990   if (stmts)
1991     gsi_insert_seq_on_edge_immediate (entry, stmts);
1992 
1993   if (written)
1994     {
1995       next = (*vars)[1];
1996       phi = create_phi_node (var, loop->header);
1997       add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1998       add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1999     }
2000   else
2001     {
2002       gassign *init_stmt = gimple_build_assign (var, init);
2003       gsi_insert_on_edge_immediate (entry, init_stmt);
2004     }
2005 }
2006 
2007 
2008 /* Execute load motion for references in chain CHAIN.  Uids of the newly
2009    created temporary variables are marked in TMP_VARS.  */
2010 
2011 static void
execute_load_motion(class loop * loop,chain_p chain,bitmap tmp_vars)2012 execute_load_motion (class loop *loop, chain_p chain, bitmap tmp_vars)
2013 {
2014   auto_vec<tree> vars;
2015   dref a;
2016   unsigned n_writes = 0, ridx, i;
2017   tree var;
2018 
2019   gcc_assert (chain->type == CT_INVARIANT);
2020   gcc_assert (!chain->combined);
2021   FOR_EACH_VEC_ELT (chain->refs, i, a)
2022     if (DR_IS_WRITE (a->ref))
2023       n_writes++;
2024 
2025   /* If there are no reads in the loop, there is nothing to do.  */
2026   if (n_writes == chain->refs.length ())
2027     return;
2028 
2029   initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
2030 			   &vars, chain->inits, tmp_vars);
2031 
2032   ridx = 0;
2033   FOR_EACH_VEC_ELT (chain->refs, i, a)
2034     {
2035       bool is_read = DR_IS_READ (a->ref);
2036 
2037       if (DR_IS_WRITE (a->ref))
2038 	{
2039 	  n_writes--;
2040 	  if (n_writes)
2041 	    {
2042 	      var = vars[0];
2043 	      var = make_ssa_name (SSA_NAME_VAR (var));
2044 	      vars[0] = var;
2045 	    }
2046 	  else
2047 	    ridx = 1;
2048 	}
2049 
2050       replace_ref_with (a->stmt, vars[ridx],
2051 			!is_read, !is_read);
2052     }
2053 }
2054 
2055 /* Returns the single statement in that NAME is used, excepting
2056    the looparound phi nodes contained in one of the chains.  If there is no
2057    such statement, or more statements, NULL is returned.  */
2058 
2059 static gimple *
single_nonlooparound_use(tree name)2060 single_nonlooparound_use (tree name)
2061 {
2062   use_operand_p use;
2063   imm_use_iterator it;
2064   gimple *stmt, *ret = NULL;
2065 
2066   FOR_EACH_IMM_USE_FAST (use, it, name)
2067     {
2068       stmt = USE_STMT (use);
2069 
2070       if (gimple_code (stmt) == GIMPLE_PHI)
2071 	{
2072 	  /* Ignore uses in looparound phi nodes.  Uses in other phi nodes
2073 	     could not be processed anyway, so just fail for them.  */
2074 	  if (bitmap_bit_p (looparound_phis,
2075 			    SSA_NAME_VERSION (PHI_RESULT (stmt))))
2076 	    continue;
2077 
2078 	  return NULL;
2079 	}
2080       else if (is_gimple_debug (stmt))
2081 	continue;
2082       else if (ret != NULL)
2083 	return NULL;
2084       else
2085 	ret = stmt;
2086     }
2087 
2088   return ret;
2089 }
2090 
2091 /* Remove statement STMT, as well as the chain of assignments in that it is
2092    used.  */
2093 
2094 static void
remove_stmt(gimple * stmt)2095 remove_stmt (gimple *stmt)
2096 {
2097   tree name;
2098   gimple *next;
2099   gimple_stmt_iterator psi;
2100 
2101   if (gimple_code (stmt) == GIMPLE_PHI)
2102     {
2103       name = PHI_RESULT (stmt);
2104       next = single_nonlooparound_use (name);
2105       reset_debug_uses (stmt);
2106       psi = gsi_for_stmt (stmt);
2107       remove_phi_node (&psi, true);
2108 
2109       if (!next
2110 	  || !gimple_assign_ssa_name_copy_p (next)
2111 	  || gimple_assign_rhs1 (next) != name)
2112 	return;
2113 
2114       stmt = next;
2115     }
2116 
2117   while (1)
2118     {
2119       gimple_stmt_iterator bsi;
2120 
2121       bsi = gsi_for_stmt (stmt);
2122 
2123       name = gimple_assign_lhs (stmt);
2124       if (TREE_CODE (name) == SSA_NAME)
2125 	{
2126 	  next = single_nonlooparound_use (name);
2127 	  reset_debug_uses (stmt);
2128 	}
2129       else
2130 	{
2131 	  /* This is a store to be eliminated.  */
2132 	  gcc_assert (gimple_vdef (stmt) != NULL);
2133 	  next = NULL;
2134 	}
2135 
2136       unlink_stmt_vdef (stmt);
2137       gsi_remove (&bsi, true);
2138       release_defs (stmt);
2139 
2140       if (!next
2141 	  || !gimple_assign_ssa_name_copy_p (next)
2142 	  || gimple_assign_rhs1 (next) != name)
2143 	return;
2144 
2145       stmt = next;
2146     }
2147 }
2148 
2149 /* Perform the predictive commoning optimization for a chain CHAIN.
2150    Uids of the newly created temporary variables are marked in TMP_VARS.*/
2151 
2152 static void
execute_pred_commoning_chain(class loop * loop,chain_p chain,bitmap tmp_vars)2153 execute_pred_commoning_chain (class loop *loop, chain_p chain,
2154 			      bitmap tmp_vars)
2155 {
2156   unsigned i;
2157   dref a;
2158   tree var;
2159   bool in_lhs;
2160 
2161   if (chain->combined)
2162     {
2163       /* For combined chains, just remove the statements that are used to
2164 	 compute the values of the expression (except for the root one).
2165 	 We delay this until after all chains are processed.  */
2166     }
2167   else if (chain->type == CT_STORE_STORE)
2168     {
2169       if (chain->length > 0)
2170 	{
2171 	  if (chain->inv_store_elimination)
2172 	    {
2173 	      /* If dead stores in this chain only store loop invariant
2174 		 values, we can simply record the invariant value and use
2175 		 it directly after loop.  */
2176 	      initialize_root_vars_store_elim_1 (chain);
2177 	    }
2178 	  else
2179 	    {
2180 	      /* If dead stores in this chain store loop variant values,
2181 		 we need to set up the variables by loading from memory
2182 		 before loop and propagating it with PHI nodes.  */
2183 	      initialize_root_vars_store_elim_2 (loop, chain, tmp_vars);
2184 	    }
2185 
2186 	  /* For inter-iteration store elimination chain, stores at each
2187 	     distance in loop's last (chain->length - 1) iterations can't
2188 	     be eliminated, because there is no following killing store.
2189 	     We need to generate these stores after loop.  */
2190 	  finalize_eliminated_stores (loop, chain);
2191 	}
2192 
2193       bool last_store_p = true;
2194       for (i = chain->refs.length (); i > 0; i--)
2195 	{
2196 	  a = chain->refs[i - 1];
2197 	  /* Preserve the last store of the chain.  Eliminate other stores
2198 	     which are killed by the last one.  */
2199 	  if (DR_IS_WRITE (a->ref))
2200 	    {
2201 	      if (last_store_p)
2202 		last_store_p = false;
2203 	      else
2204 		remove_stmt (a->stmt);
2205 
2206 	      continue;
2207 	    }
2208 
2209 	  /* Any load in Store-Store chain must be dominated by a previous
2210 	     store, we replace the load reference with rhs of the store.  */
2211 	  dref b = get_chain_last_write_before_load (chain, i - 1);
2212 	  gcc_assert (b != NULL);
2213 	  var = gimple_assign_rhs1 (b->stmt);
2214 	  replace_ref_with (a->stmt, var, false, false);
2215 	}
2216     }
2217   else
2218     {
2219       /* For non-combined chains, set up the variables that hold its value.  */
2220       initialize_root_vars (loop, chain, tmp_vars);
2221       a = get_chain_root (chain);
2222       in_lhs = (chain->type == CT_STORE_LOAD
2223 		|| chain->type == CT_COMBINATION);
2224       replace_ref_with (a->stmt, chain->vars[chain->length], true, in_lhs);
2225 
2226       /* Replace the uses of the original references by these variables.  */
2227       for (i = 1; chain->refs.iterate (i, &a); i++)
2228 	{
2229 	  var = chain->vars[chain->length - a->distance];
2230 	  replace_ref_with (a->stmt, var, false, false);
2231 	}
2232     }
2233 }
2234 
2235 /* Determines the unroll factor necessary to remove as many temporary variable
2236    copies as possible.  CHAINS is the list of chains that will be
2237    optimized.  */
2238 
2239 static unsigned
determine_unroll_factor(vec<chain_p> chains)2240 determine_unroll_factor (vec<chain_p> chains)
2241 {
2242   chain_p chain;
2243   unsigned factor = 1, af, nfactor, i;
2244   unsigned max = param_max_unroll_times;
2245 
2246   FOR_EACH_VEC_ELT (chains, i, chain)
2247     {
2248       if (chain->type == CT_INVARIANT)
2249 	continue;
2250       /* For now we can't handle unrolling when eliminating stores.  */
2251       else if (chain->type == CT_STORE_STORE)
2252 	return 1;
2253 
2254       if (chain->combined)
2255 	{
2256 	  /* For combined chains, we can't handle unrolling if we replace
2257 	     looparound PHIs.  */
2258 	  dref a;
2259 	  unsigned j;
2260 	  for (j = 1; chain->refs.iterate (j, &a); j++)
2261 	    if (gimple_code (a->stmt) == GIMPLE_PHI)
2262 	      return 1;
2263 	  continue;
2264 	}
2265 
2266       /* The best unroll factor for this chain is equal to the number of
2267 	 temporary variables that we create for it.  */
2268       af = chain->length;
2269       if (chain->has_max_use_after)
2270 	af++;
2271 
2272       nfactor = factor * af / gcd (factor, af);
2273       if (nfactor <= max)
2274 	factor = nfactor;
2275     }
2276 
2277   return factor;
2278 }
2279 
2280 /* Perform the predictive commoning optimization for CHAINS.
2281    Uids of the newly created temporary variables are marked in TMP_VARS.  */
2282 
2283 static void
execute_pred_commoning(class loop * loop,vec<chain_p> chains,bitmap tmp_vars)2284 execute_pred_commoning (class loop *loop, vec<chain_p> chains,
2285 			bitmap tmp_vars)
2286 {
2287   chain_p chain;
2288   unsigned i;
2289 
2290   FOR_EACH_VEC_ELT (chains, i, chain)
2291     {
2292       if (chain->type == CT_INVARIANT)
2293 	execute_load_motion (loop, chain, tmp_vars);
2294       else
2295 	execute_pred_commoning_chain (loop, chain, tmp_vars);
2296     }
2297 
2298   FOR_EACH_VEC_ELT (chains, i, chain)
2299     {
2300       if (chain->type == CT_INVARIANT)
2301 	;
2302       else if (chain->combined)
2303 	{
2304 	  /* For combined chains, just remove the statements that are used to
2305 	     compute the values of the expression (except for the root one).  */
2306 	  dref a;
2307 	  unsigned j;
2308 	  for (j = 1; chain->refs.iterate (j, &a); j++)
2309 	    remove_stmt (a->stmt);
2310 	}
2311     }
2312 
2313   update_ssa (TODO_update_ssa_only_virtuals);
2314 }
2315 
2316 /* For each reference in CHAINS, if its defining statement is
2317    phi node, record the ssa name that is defined by it.  */
2318 
2319 static void
replace_phis_by_defined_names(vec<chain_p> chains)2320 replace_phis_by_defined_names (vec<chain_p> chains)
2321 {
2322   chain_p chain;
2323   dref a;
2324   unsigned i, j;
2325 
2326   FOR_EACH_VEC_ELT (chains, i, chain)
2327     FOR_EACH_VEC_ELT (chain->refs, j, a)
2328       {
2329 	if (gimple_code (a->stmt) == GIMPLE_PHI)
2330 	  {
2331 	    a->name_defined_by_phi = PHI_RESULT (a->stmt);
2332 	    a->stmt = NULL;
2333 	  }
2334       }
2335 }
2336 
2337 /* For each reference in CHAINS, if name_defined_by_phi is not
2338    NULL, use it to set the stmt field.  */
2339 
2340 static void
replace_names_by_phis(vec<chain_p> chains)2341 replace_names_by_phis (vec<chain_p> chains)
2342 {
2343   chain_p chain;
2344   dref a;
2345   unsigned i, j;
2346 
2347   FOR_EACH_VEC_ELT (chains, i, chain)
2348     FOR_EACH_VEC_ELT (chain->refs, j, a)
2349       if (a->stmt == NULL)
2350 	{
2351 	  a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
2352 	  gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
2353 	  a->name_defined_by_phi = NULL_TREE;
2354 	}
2355 }
2356 
2357 /* Wrapper over execute_pred_commoning, to pass it as a callback
2358    to tree_transform_and_unroll_loop.  */
2359 
2360 struct epcc_data
2361 {
2362   vec<chain_p> chains;
2363   bitmap tmp_vars;
2364 };
2365 
2366 static void
execute_pred_commoning_cbck(class loop * loop,void * data)2367 execute_pred_commoning_cbck (class loop *loop, void *data)
2368 {
2369   struct epcc_data *const dta = (struct epcc_data *) data;
2370 
2371   /* Restore phi nodes that were replaced by ssa names before
2372      tree_transform_and_unroll_loop (see detailed description in
2373      tree_predictive_commoning_loop).  */
2374   replace_names_by_phis (dta->chains);
2375   execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
2376 }
2377 
2378 /* Base NAME and all the names in the chain of phi nodes that use it
2379    on variable VAR.  The phi nodes are recognized by being in the copies of
2380    the header of the LOOP.  */
2381 
2382 static void
base_names_in_chain_on(class loop * loop,tree name,tree var)2383 base_names_in_chain_on (class loop *loop, tree name, tree var)
2384 {
2385   gimple *stmt, *phi;
2386   imm_use_iterator iter;
2387 
2388   replace_ssa_name_symbol (name, var);
2389 
2390   while (1)
2391     {
2392       phi = NULL;
2393       FOR_EACH_IMM_USE_STMT (stmt, iter, name)
2394 	{
2395 	  if (gimple_code (stmt) == GIMPLE_PHI
2396 	      && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
2397 	    {
2398 	      phi = stmt;
2399 	      BREAK_FROM_IMM_USE_STMT (iter);
2400 	    }
2401 	}
2402       if (!phi)
2403 	return;
2404 
2405       name = PHI_RESULT (phi);
2406       replace_ssa_name_symbol (name, var);
2407     }
2408 }
2409 
2410 /* Given an unrolled LOOP after predictive commoning, remove the
2411    register copies arising from phi nodes by changing the base
2412    variables of SSA names.  TMP_VARS is the set of the temporary variables
2413    for those we want to perform this.  */
2414 
2415 static void
eliminate_temp_copies(class loop * loop,bitmap tmp_vars)2416 eliminate_temp_copies (class loop *loop, bitmap tmp_vars)
2417 {
2418   edge e;
2419   gphi *phi;
2420   gimple *stmt;
2421   tree name, use, var;
2422   gphi_iterator psi;
2423 
2424   e = loop_latch_edge (loop);
2425   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
2426     {
2427       phi = psi.phi ();
2428       name = PHI_RESULT (phi);
2429       var = SSA_NAME_VAR (name);
2430       if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
2431 	continue;
2432       use = PHI_ARG_DEF_FROM_EDGE (phi, e);
2433       gcc_assert (TREE_CODE (use) == SSA_NAME);
2434 
2435       /* Base all the ssa names in the ud and du chain of NAME on VAR.  */
2436       stmt = SSA_NAME_DEF_STMT (use);
2437       while (gimple_code (stmt) == GIMPLE_PHI
2438 	     /* In case we could not unroll the loop enough to eliminate
2439 		all copies, we may reach the loop header before the defining
2440 		statement (in that case, some register copies will be present
2441 		in loop latch in the final code, corresponding to the newly
2442 		created looparound phi nodes).  */
2443 	     && gimple_bb (stmt) != loop->header)
2444 	{
2445 	  gcc_assert (single_pred_p (gimple_bb (stmt)));
2446 	  use = PHI_ARG_DEF (stmt, 0);
2447 	  stmt = SSA_NAME_DEF_STMT (use);
2448 	}
2449 
2450       base_names_in_chain_on (loop, use, var);
2451     }
2452 }
2453 
2454 /* Returns true if CHAIN is suitable to be combined.  */
2455 
2456 static bool
chain_can_be_combined_p(chain_p chain)2457 chain_can_be_combined_p (chain_p chain)
2458 {
2459   return (!chain->combined
2460 	  && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
2461 }
2462 
2463 /* Returns the modify statement that uses NAME.  Skips over assignment
2464    statements, NAME is replaced with the actual name used in the returned
2465    statement.  */
2466 
2467 static gimple *
find_use_stmt(tree * name)2468 find_use_stmt (tree *name)
2469 {
2470   gimple *stmt;
2471   tree rhs, lhs;
2472 
2473   /* Skip over assignments.  */
2474   while (1)
2475     {
2476       stmt = single_nonlooparound_use (*name);
2477       if (!stmt)
2478 	return NULL;
2479 
2480       if (gimple_code (stmt) != GIMPLE_ASSIGN)
2481 	return NULL;
2482 
2483       lhs = gimple_assign_lhs (stmt);
2484       if (TREE_CODE (lhs) != SSA_NAME)
2485 	return NULL;
2486 
2487       if (gimple_assign_copy_p (stmt))
2488 	{
2489 	  rhs = gimple_assign_rhs1 (stmt);
2490 	  if (rhs != *name)
2491 	    return NULL;
2492 
2493 	  *name = lhs;
2494 	}
2495       else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
2496 	       == GIMPLE_BINARY_RHS)
2497 	return stmt;
2498       else
2499 	return NULL;
2500     }
2501 }
2502 
2503 /* Returns true if we may perform reassociation for operation CODE in TYPE.  */
2504 
2505 static bool
may_reassociate_p(tree type,enum tree_code code)2506 may_reassociate_p (tree type, enum tree_code code)
2507 {
2508   if (FLOAT_TYPE_P (type)
2509       && !flag_unsafe_math_optimizations)
2510     return false;
2511 
2512   return (commutative_tree_code (code)
2513 	  && associative_tree_code (code));
2514 }
2515 
2516 /* If the operation used in STMT is associative and commutative, go through the
2517    tree of the same operations and returns its root.  Distance to the root
2518    is stored in DISTANCE.  */
2519 
2520 static gimple *
find_associative_operation_root(gimple * stmt,unsigned * distance)2521 find_associative_operation_root (gimple *stmt, unsigned *distance)
2522 {
2523   tree lhs;
2524   gimple *next;
2525   enum tree_code code = gimple_assign_rhs_code (stmt);
2526   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2527   unsigned dist = 0;
2528 
2529   if (!may_reassociate_p (type, code))
2530     return NULL;
2531 
2532   while (1)
2533     {
2534       lhs = gimple_assign_lhs (stmt);
2535       gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2536 
2537       next = find_use_stmt (&lhs);
2538       if (!next
2539 	  || gimple_assign_rhs_code (next) != code)
2540 	break;
2541 
2542       stmt = next;
2543       dist++;
2544     }
2545 
2546   if (distance)
2547     *distance = dist;
2548   return stmt;
2549 }
2550 
2551 /* Returns the common statement in that NAME1 and NAME2 have a use.  If there
2552    is no such statement, returns NULL_TREE.  In case the operation used on
2553    NAME1 and NAME2 is associative and commutative, returns the root of the
2554    tree formed by this operation instead of the statement that uses NAME1 or
2555    NAME2.  */
2556 
2557 static gimple *
find_common_use_stmt(tree * name1,tree * name2)2558 find_common_use_stmt (tree *name1, tree *name2)
2559 {
2560   gimple *stmt1, *stmt2;
2561 
2562   stmt1 = find_use_stmt (name1);
2563   if (!stmt1)
2564     return NULL;
2565 
2566   stmt2 = find_use_stmt (name2);
2567   if (!stmt2)
2568     return NULL;
2569 
2570   if (stmt1 == stmt2)
2571     return stmt1;
2572 
2573   stmt1 = find_associative_operation_root (stmt1, NULL);
2574   if (!stmt1)
2575     return NULL;
2576   stmt2 = find_associative_operation_root (stmt2, NULL);
2577   if (!stmt2)
2578     return NULL;
2579 
2580   return (stmt1 == stmt2 ? stmt1 : NULL);
2581 }
2582 
2583 /* Checks whether R1 and R2 are combined together using CODE, with the result
2584    in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2585    if it is true.  If CODE is ERROR_MARK, set these values instead.  */
2586 
2587 static bool
combinable_refs_p(dref r1,dref r2,enum tree_code * code,bool * swap,tree * rslt_type)2588 combinable_refs_p (dref r1, dref r2,
2589 		   enum tree_code *code, bool *swap, tree *rslt_type)
2590 {
2591   enum tree_code acode;
2592   bool aswap;
2593   tree atype;
2594   tree name1, name2;
2595   gimple *stmt;
2596 
2597   name1 = name_for_ref (r1);
2598   name2 = name_for_ref (r2);
2599   gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2600 
2601   stmt = find_common_use_stmt (&name1, &name2);
2602 
2603   if (!stmt
2604       /* A simple post-dominance check - make sure the combination
2605          is executed under the same condition as the references.  */
2606       || (gimple_bb (stmt) != gimple_bb (r1->stmt)
2607 	  && gimple_bb (stmt) != gimple_bb (r2->stmt)))
2608     return false;
2609 
2610   acode = gimple_assign_rhs_code (stmt);
2611   aswap = (!commutative_tree_code (acode)
2612 	   && gimple_assign_rhs1 (stmt) != name1);
2613   atype = TREE_TYPE (gimple_assign_lhs (stmt));
2614 
2615   if (*code == ERROR_MARK)
2616     {
2617       *code = acode;
2618       *swap = aswap;
2619       *rslt_type = atype;
2620       return true;
2621     }
2622 
2623   return (*code == acode
2624 	  && *swap == aswap
2625 	  && *rslt_type == atype);
2626 }
2627 
2628 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2629    an assignment of the remaining operand.  */
2630 
2631 static void
remove_name_from_operation(gimple * stmt,tree op)2632 remove_name_from_operation (gimple *stmt, tree op)
2633 {
2634   tree other_op;
2635   gimple_stmt_iterator si;
2636 
2637   gcc_assert (is_gimple_assign (stmt));
2638 
2639   if (gimple_assign_rhs1 (stmt) == op)
2640     other_op = gimple_assign_rhs2 (stmt);
2641   else
2642     other_op = gimple_assign_rhs1 (stmt);
2643 
2644   si = gsi_for_stmt (stmt);
2645   gimple_assign_set_rhs_from_tree (&si, other_op);
2646 
2647   /* We should not have reallocated STMT.  */
2648   gcc_assert (gsi_stmt (si) == stmt);
2649 
2650   update_stmt (stmt);
2651 }
2652 
2653 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2654    are combined in a single statement, and returns this statement.  */
2655 
2656 static gimple *
reassociate_to_the_same_stmt(tree name1,tree name2)2657 reassociate_to_the_same_stmt (tree name1, tree name2)
2658 {
2659   gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
2660   gassign *new_stmt, *tmp_stmt;
2661   tree new_name, tmp_name, var, r1, r2;
2662   unsigned dist1, dist2;
2663   enum tree_code code;
2664   tree type = TREE_TYPE (name1);
2665   gimple_stmt_iterator bsi;
2666 
2667   stmt1 = find_use_stmt (&name1);
2668   stmt2 = find_use_stmt (&name2);
2669   root1 = find_associative_operation_root (stmt1, &dist1);
2670   root2 = find_associative_operation_root (stmt2, &dist2);
2671   code = gimple_assign_rhs_code (stmt1);
2672 
2673   gcc_assert (root1 && root2 && root1 == root2
2674 	      && code == gimple_assign_rhs_code (stmt2));
2675 
2676   /* Find the root of the nearest expression in that both NAME1 and NAME2
2677      are used.  */
2678   r1 = name1;
2679   s1 = stmt1;
2680   r2 = name2;
2681   s2 = stmt2;
2682 
2683   while (dist1 > dist2)
2684     {
2685       s1 = find_use_stmt (&r1);
2686       r1 = gimple_assign_lhs (s1);
2687       dist1--;
2688     }
2689   while (dist2 > dist1)
2690     {
2691       s2 = find_use_stmt (&r2);
2692       r2 = gimple_assign_lhs (s2);
2693       dist2--;
2694     }
2695 
2696   while (s1 != s2)
2697     {
2698       s1 = find_use_stmt (&r1);
2699       r1 = gimple_assign_lhs (s1);
2700       s2 = find_use_stmt (&r2);
2701       r2 = gimple_assign_lhs (s2);
2702     }
2703 
2704   /* Remove NAME1 and NAME2 from the statements in that they are used
2705      currently.  */
2706   remove_name_from_operation (stmt1, name1);
2707   remove_name_from_operation (stmt2, name2);
2708 
2709   /* Insert the new statement combining NAME1 and NAME2 before S1, and
2710      combine it with the rhs of S1.  */
2711   var = create_tmp_reg (type, "predreastmp");
2712   new_name = make_ssa_name (var);
2713   new_stmt = gimple_build_assign (new_name, code, name1, name2);
2714 
2715   var = create_tmp_reg (type, "predreastmp");
2716   tmp_name = make_ssa_name (var);
2717 
2718   /* Rhs of S1 may now be either a binary expression with operation
2719      CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2720      so that name1 or name2 was removed from it).  */
2721   tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (s1),
2722 				  gimple_assign_rhs1 (s1),
2723 				  gimple_assign_rhs2 (s1));
2724 
2725   bsi = gsi_for_stmt (s1);
2726   gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2727   s1 = gsi_stmt (bsi);
2728   update_stmt (s1);
2729 
2730   gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
2731   gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
2732 
2733   return new_stmt;
2734 }
2735 
2736 /* Returns the statement that combines references R1 and R2.  In case R1
2737    and R2 are not used in the same statement, but they are used with an
2738    associative and commutative operation in the same expression, reassociate
2739    the expression so that they are used in the same statement.  */
2740 
2741 static gimple *
stmt_combining_refs(dref r1,dref r2)2742 stmt_combining_refs (dref r1, dref r2)
2743 {
2744   gimple *stmt1, *stmt2;
2745   tree name1 = name_for_ref (r1);
2746   tree name2 = name_for_ref (r2);
2747 
2748   stmt1 = find_use_stmt (&name1);
2749   stmt2 = find_use_stmt (&name2);
2750   if (stmt1 == stmt2)
2751     return stmt1;
2752 
2753   return reassociate_to_the_same_stmt (name1, name2);
2754 }
2755 
2756 /* Tries to combine chains CH1 and CH2 together.  If this succeeds, the
2757    description of the new chain is returned, otherwise we return NULL.  */
2758 
2759 static chain_p
combine_chains(chain_p ch1,chain_p ch2)2760 combine_chains (chain_p ch1, chain_p ch2)
2761 {
2762   dref r1, r2, nw;
2763   enum tree_code op = ERROR_MARK;
2764   bool swap = false;
2765   chain_p new_chain;
2766   unsigned i;
2767   tree rslt_type = NULL_TREE;
2768 
2769   if (ch1 == ch2)
2770     return NULL;
2771   if (ch1->length != ch2->length)
2772     return NULL;
2773 
2774   if (ch1->refs.length () != ch2->refs.length ())
2775     return NULL;
2776 
2777   for (i = 0; (ch1->refs.iterate (i, &r1)
2778 	       && ch2->refs.iterate (i, &r2)); i++)
2779     {
2780       if (r1->distance != r2->distance)
2781 	return NULL;
2782 
2783       if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2784 	return NULL;
2785     }
2786 
2787   if (swap)
2788     std::swap (ch1, ch2);
2789 
2790   new_chain = XCNEW (struct chain);
2791   new_chain->type = CT_COMBINATION;
2792   new_chain->op = op;
2793   new_chain->ch1 = ch1;
2794   new_chain->ch2 = ch2;
2795   new_chain->rslt_type = rslt_type;
2796   new_chain->length = ch1->length;
2797 
2798   for (i = 0; (ch1->refs.iterate (i, &r1)
2799 	       && ch2->refs.iterate (i, &r2)); i++)
2800     {
2801       nw = XCNEW (class dref_d);
2802       nw->stmt = stmt_combining_refs (r1, r2);
2803       nw->distance = r1->distance;
2804 
2805       new_chain->refs.safe_push (nw);
2806     }
2807 
2808   ch1->combined = true;
2809   ch2->combined = true;
2810   return new_chain;
2811 }
2812 
2813 /* Recursively update position information of all offspring chains to ROOT
2814    chain's position information.  */
2815 
2816 static void
update_pos_for_combined_chains(chain_p root)2817 update_pos_for_combined_chains (chain_p root)
2818 {
2819   chain_p ch1 = root->ch1, ch2 = root->ch2;
2820   dref ref, ref1, ref2;
2821   for (unsigned j = 0; (root->refs.iterate (j, &ref)
2822 			&& ch1->refs.iterate (j, &ref1)
2823 			&& ch2->refs.iterate (j, &ref2)); ++j)
2824     ref1->pos = ref2->pos = ref->pos;
2825 
2826   if (ch1->type == CT_COMBINATION)
2827     update_pos_for_combined_chains (ch1);
2828   if (ch2->type == CT_COMBINATION)
2829     update_pos_for_combined_chains (ch2);
2830 }
2831 
2832 /* Returns true if statement S1 dominates statement S2.  */
2833 
2834 static bool
pcom_stmt_dominates_stmt_p(gimple * s1,gimple * s2)2835 pcom_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
2836 {
2837   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
2838 
2839   if (!bb1 || s1 == s2)
2840     return true;
2841 
2842   if (bb1 == bb2)
2843     return gimple_uid (s1) < gimple_uid (s2);
2844 
2845   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
2846 }
2847 
2848 /* Try to combine the CHAINS in LOOP.  */
2849 
2850 static void
try_combine_chains(class loop * loop,vec<chain_p> * chains)2851 try_combine_chains (class loop *loop, vec<chain_p> *chains)
2852 {
2853   unsigned i, j;
2854   chain_p ch1, ch2, cch;
2855   auto_vec<chain_p> worklist;
2856   bool combined_p = false;
2857 
2858   FOR_EACH_VEC_ELT (*chains, i, ch1)
2859     if (chain_can_be_combined_p (ch1))
2860       worklist.safe_push (ch1);
2861 
2862   while (!worklist.is_empty ())
2863     {
2864       ch1 = worklist.pop ();
2865       if (!chain_can_be_combined_p (ch1))
2866 	continue;
2867 
2868       FOR_EACH_VEC_ELT (*chains, j, ch2)
2869 	{
2870 	  if (!chain_can_be_combined_p (ch2))
2871 	    continue;
2872 
2873 	  cch = combine_chains (ch1, ch2);
2874 	  if (cch)
2875 	    {
2876 	      worklist.safe_push (cch);
2877 	      chains->safe_push (cch);
2878 	      combined_p = true;
2879 	      break;
2880 	    }
2881 	}
2882     }
2883   if (!combined_p)
2884     return;
2885 
2886   /* Setup UID for all statements in dominance order.  */
2887   basic_block *bbs = get_loop_body_in_dom_order (loop);
2888   renumber_gimple_stmt_uids_in_blocks (bbs, loop->num_nodes);
2889   free (bbs);
2890 
2891   /* Re-association in combined chains may generate statements different to
2892      order of references of the original chain.  We need to keep references
2893      of combined chain in dominance order so that all uses will be inserted
2894      after definitions.  Note:
2895        A) This is necessary for all combined chains.
2896        B) This is only necessary for ZERO distance references because other
2897 	  references inherit value from loop carried PHIs.
2898 
2899      We first update position information for all combined chains.  */
2900   dref ref;
2901   for (i = 0; chains->iterate (i, &ch1); ++i)
2902     {
2903       if (ch1->type != CT_COMBINATION || ch1->combined)
2904 	continue;
2905 
2906       for (j = 0; ch1->refs.iterate (j, &ref); ++j)
2907 	ref->pos = gimple_uid (ref->stmt);
2908 
2909       update_pos_for_combined_chains (ch1);
2910     }
2911   /* Then sort references according to newly updated position information.  */
2912   for (i = 0; chains->iterate (i, &ch1); ++i)
2913     {
2914       if (ch1->type != CT_COMBINATION && !ch1->combined)
2915 	continue;
2916 
2917       /* Find the first reference with non-ZERO distance.  */
2918       if (ch1->length == 0)
2919 	j = ch1->refs.length();
2920       else
2921 	{
2922 	  for (j = 0; ch1->refs.iterate (j, &ref); ++j)
2923 	    if (ref->distance != 0)
2924 	      break;
2925 	}
2926 
2927       /* Sort all ZERO distance references by position.  */
2928       qsort (&ch1->refs[0], j, sizeof (ch1->refs[0]), order_drefs_by_pos);
2929 
2930       if (ch1->combined)
2931 	continue;
2932 
2933       /* For ZERO length chain, has_max_use_after must be true since root
2934 	 combined stmt must dominates others.  */
2935       if (ch1->length == 0)
2936 	{
2937 	  ch1->has_max_use_after = true;
2938 	  continue;
2939 	}
2940       /* Check if there is use at max distance after root for combined chains
2941 	 and set flag accordingly.  */
2942       ch1->has_max_use_after = false;
2943       gimple *root_stmt = get_chain_root (ch1)->stmt;
2944       for (j = 1; ch1->refs.iterate (j, &ref); ++j)
2945 	{
2946 	  if (ref->distance == ch1->length
2947 	      && !pcom_stmt_dominates_stmt_p (ref->stmt, root_stmt))
2948 	    {
2949 	      ch1->has_max_use_after = true;
2950 	      break;
2951 	    }
2952 	}
2953     }
2954 }
2955 
2956 /* Prepare initializers for store elimination CHAIN in LOOP.  Returns false
2957    if this is impossible because one of these initializers may trap, true
2958    otherwise.  */
2959 
2960 static bool
prepare_initializers_chain_store_elim(class loop * loop,chain_p chain)2961 prepare_initializers_chain_store_elim (class loop *loop, chain_p chain)
2962 {
2963   unsigned i, n = chain->length;
2964 
2965   /* For now we can't eliminate stores if some of them are conditional
2966      executed.  */
2967   if (!chain->all_always_accessed)
2968     return false;
2969 
2970   /* Nothing to intialize for intra-iteration store elimination.  */
2971   if (n == 0 && chain->type == CT_STORE_STORE)
2972     return true;
2973 
2974   /* For store elimination chain, there is nothing to initialize if stores
2975      to be eliminated only store loop invariant values into memory.  */
2976   if (chain->type == CT_STORE_STORE
2977       && is_inv_store_elimination_chain (loop, chain))
2978     {
2979       chain->inv_store_elimination = true;
2980       return true;
2981     }
2982 
2983   chain->inits.create (n);
2984   chain->inits.safe_grow_cleared (n);
2985 
2986   /* For store eliminatin chain like below:
2987 
2988      for (i = 0; i < len; i++)
2989        {
2990 	 a[i] = 1;
2991 	 // a[i + 1] = ...
2992 	 a[i + 2] = 3;
2993        }
2994 
2995      store to a[i + 1] is missed in loop body, it acts like bubbles.  The
2996      content of a[i + 1] remain the same if the loop iterates fewer times
2997      than chain->length.  We need to set up root variables for such stores
2998      by loading from memory before loop.  Note we only need to load bubble
2999      elements because loop body is guaranteed to be executed at least once
3000      after loop's preheader edge.  */
3001   auto_vec<bool> bubbles;
3002   bubbles.safe_grow_cleared (n + 1);
3003   for (i = 0; i < chain->refs.length (); i++)
3004     bubbles[chain->refs[i]->distance] = true;
3005 
3006   struct data_reference *dr = get_chain_root (chain)->ref;
3007   for (i = 0; i < n; i++)
3008     {
3009       if (bubbles[i])
3010 	continue;
3011 
3012       gimple_seq stmts = NULL;
3013 
3014       tree init = ref_at_iteration (dr, (int) 0 - i, &stmts);
3015       if (stmts)
3016 	gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
3017 
3018       chain->inits[i] = init;
3019     }
3020 
3021   return true;
3022 }
3023 
3024 /* Prepare initializers for CHAIN in LOOP.  Returns false if this is
3025    impossible because one of these initializers may trap, true otherwise.  */
3026 
3027 static bool
prepare_initializers_chain(class loop * loop,chain_p chain)3028 prepare_initializers_chain (class loop *loop, chain_p chain)
3029 {
3030   unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
3031   struct data_reference *dr = get_chain_root (chain)->ref;
3032   tree init;
3033   dref laref;
3034   edge entry = loop_preheader_edge (loop);
3035 
3036   if (chain->type == CT_STORE_STORE)
3037     return prepare_initializers_chain_store_elim (loop, chain);
3038 
3039   /* Find the initializers for the variables, and check that they cannot
3040      trap.  */
3041   chain->inits.create (n);
3042   for (i = 0; i < n; i++)
3043     chain->inits.quick_push (NULL_TREE);
3044 
3045   /* If we have replaced some looparound phi nodes, use their initializers
3046      instead of creating our own.  */
3047   FOR_EACH_VEC_ELT (chain->refs, i, laref)
3048     {
3049       if (gimple_code (laref->stmt) != GIMPLE_PHI)
3050 	continue;
3051 
3052       gcc_assert (laref->distance > 0);
3053       chain->inits[n - laref->distance]
3054 	= PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
3055     }
3056 
3057   for (i = 0; i < n; i++)
3058     {
3059       gimple_seq stmts = NULL;
3060 
3061       if (chain->inits[i] != NULL_TREE)
3062 	continue;
3063 
3064       init = ref_at_iteration (dr, (int) i - n, &stmts);
3065       if (!chain->all_always_accessed && tree_could_trap_p (init))
3066 	{
3067 	  gimple_seq_discard (stmts);
3068 	  return false;
3069 	}
3070 
3071       if (stmts)
3072 	gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
3073 
3074       chain->inits[i] = init;
3075     }
3076 
3077   return true;
3078 }
3079 
3080 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
3081    be used because the initializers might trap.  */
3082 
3083 static void
prepare_initializers(class loop * loop,vec<chain_p> chains)3084 prepare_initializers (class loop *loop, vec<chain_p> chains)
3085 {
3086   chain_p chain;
3087   unsigned i;
3088 
3089   for (i = 0; i < chains.length (); )
3090     {
3091       chain = chains[i];
3092       if (prepare_initializers_chain (loop, chain))
3093 	i++;
3094       else
3095 	{
3096 	  release_chain (chain);
3097 	  chains.unordered_remove (i);
3098 	}
3099     }
3100 }
3101 
3102 /* Generates finalizer memory references for CHAIN in LOOP.  Returns true
3103    if finalizer code for CHAIN can be generated, otherwise false.  */
3104 
3105 static bool
prepare_finalizers_chain(class loop * loop,chain_p chain)3106 prepare_finalizers_chain (class loop *loop, chain_p chain)
3107 {
3108   unsigned i, n = chain->length;
3109   struct data_reference *dr = get_chain_root (chain)->ref;
3110   tree fini, niters = number_of_latch_executions (loop);
3111 
3112   /* For now we can't eliminate stores if some of them are conditional
3113      executed.  */
3114   if (!chain->all_always_accessed)
3115     return false;
3116 
3117   chain->finis.create (n);
3118   for (i = 0; i < n; i++)
3119     chain->finis.quick_push (NULL_TREE);
3120 
3121   /* We never use looparound phi node for store elimination chains.  */
3122 
3123   /* Find the finalizers for the variables, and check that they cannot
3124      trap.  */
3125   for (i = 0; i < n; i++)
3126     {
3127       gimple_seq stmts = NULL;
3128       gcc_assert (chain->finis[i] == NULL_TREE);
3129 
3130       if (TREE_CODE (niters) != INTEGER_CST && TREE_CODE (niters) != SSA_NAME)
3131 	{
3132 	  niters = unshare_expr (niters);
3133 	  niters = force_gimple_operand (niters, &stmts, true, NULL);
3134 	  if (stmts)
3135 	    {
3136 	      gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
3137 	      stmts = NULL;
3138 	    }
3139 	}
3140       fini = ref_at_iteration (dr, (int) 0 - i, &stmts, niters);
3141       if (stmts)
3142 	gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
3143 
3144       chain->finis[i] = fini;
3145     }
3146 
3147   return true;
3148 }
3149 
3150 /* Generates finalizer memory reference for CHAINS in LOOP.  Returns true
3151    if finalizer code generation for CHAINS breaks loop closed ssa form.  */
3152 
3153 static bool
prepare_finalizers(class loop * loop,vec<chain_p> chains)3154 prepare_finalizers (class loop *loop, vec<chain_p> chains)
3155 {
3156   chain_p chain;
3157   unsigned i;
3158   bool loop_closed_ssa = false;
3159 
3160   for (i = 0; i < chains.length ();)
3161     {
3162       chain = chains[i];
3163 
3164       /* Finalizer is only necessary for inter-iteration store elimination
3165 	 chains.  */
3166       if (chain->length == 0 || chain->type != CT_STORE_STORE)
3167 	{
3168 	  i++;
3169 	  continue;
3170 	}
3171 
3172       if (prepare_finalizers_chain (loop, chain))
3173 	{
3174 	  i++;
3175 	  /* Be conservative, assume loop closed ssa form is corrupted
3176 	     by store-store chain.  Though it's not always the case if
3177 	     eliminated stores only store loop invariant values into
3178 	     memory.  */
3179 	  loop_closed_ssa = true;
3180 	}
3181       else
3182 	{
3183 	  release_chain (chain);
3184 	  chains.unordered_remove (i);
3185 	}
3186     }
3187   return loop_closed_ssa;
3188 }
3189 
3190 /* Insert all initializing gimple stmts into loop's entry edge.  */
3191 
3192 static void
insert_init_seqs(class loop * loop,vec<chain_p> chains)3193 insert_init_seqs (class loop *loop, vec<chain_p> chains)
3194 {
3195   unsigned i;
3196   edge entry = loop_preheader_edge (loop);
3197 
3198   for (i = 0; i < chains.length (); ++i)
3199     if (chains[i]->init_seq)
3200       {
3201 	gsi_insert_seq_on_edge_immediate (entry, chains[i]->init_seq);
3202 	chains[i]->init_seq = NULL;
3203       }
3204 }
3205 
3206 /* Performs predictive commoning for LOOP.  Sets bit 1<<0 of return value
3207    if LOOP was unrolled; Sets bit 1<<1 of return value if loop closed ssa
3208    form was corrupted.  */
3209 
3210 static unsigned
tree_predictive_commoning_loop(class loop * loop)3211 tree_predictive_commoning_loop (class loop *loop)
3212 {
3213   vec<data_reference_p> datarefs;
3214   vec<ddr_p> dependences;
3215   struct component *components;
3216   vec<chain_p> chains = vNULL;
3217   unsigned unroll_factor;
3218   class tree_niter_desc desc;
3219   bool unroll = false, loop_closed_ssa = false;
3220   edge exit;
3221 
3222   if (dump_file && (dump_flags & TDF_DETAILS))
3223     fprintf (dump_file, "Processing loop %d\n",  loop->num);
3224 
3225   /* Nothing for predicitive commoning if loop only iterates 1 time.  */
3226   if (get_max_loop_iterations_int (loop) == 0)
3227     {
3228       if (dump_file && (dump_flags & TDF_DETAILS))
3229 	fprintf (dump_file, "Loop iterates only 1 time, nothing to do.\n");
3230 
3231       return 0;
3232     }
3233 
3234   /* Find the data references and split them into components according to their
3235      dependence relations.  */
3236   auto_vec<loop_p, 3> loop_nest;
3237   dependences.create (10);
3238   datarefs.create (10);
3239   if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
3240 					   &dependences))
3241     {
3242       if (dump_file && (dump_flags & TDF_DETAILS))
3243 	fprintf (dump_file, "Cannot analyze data dependencies\n");
3244       free_data_refs (datarefs);
3245       free_dependence_relations (dependences);
3246       return 0;
3247     }
3248 
3249   if (dump_file && (dump_flags & TDF_DETAILS))
3250     dump_data_dependence_relations (dump_file, dependences);
3251 
3252   components = split_data_refs_to_components (loop, datarefs, dependences);
3253   loop_nest.release ();
3254   free_dependence_relations (dependences);
3255   if (!components)
3256     {
3257       free_data_refs (datarefs);
3258       free_affine_expand_cache (&name_expansions);
3259       return 0;
3260     }
3261 
3262   if (dump_file && (dump_flags & TDF_DETAILS))
3263     {
3264       fprintf (dump_file, "Initial state:\n\n");
3265       dump_components (dump_file, components);
3266     }
3267 
3268   /* Find the suitable components and split them into chains.  */
3269   components = filter_suitable_components (loop, components);
3270 
3271   auto_bitmap tmp_vars;
3272   looparound_phis = BITMAP_ALLOC (NULL);
3273   determine_roots (loop, components, &chains);
3274   release_components (components);
3275 
3276   if (!chains.exists ())
3277     {
3278       if (dump_file && (dump_flags & TDF_DETAILS))
3279 	fprintf (dump_file,
3280 		 "Predictive commoning failed: no suitable chains\n");
3281       goto end;
3282     }
3283   prepare_initializers (loop, chains);
3284   loop_closed_ssa = prepare_finalizers (loop, chains);
3285 
3286   /* Try to combine the chains that are always worked with together.  */
3287   try_combine_chains (loop, &chains);
3288 
3289   insert_init_seqs (loop, chains);
3290 
3291   if (dump_file && (dump_flags & TDF_DETAILS))
3292     {
3293       fprintf (dump_file, "Before commoning:\n\n");
3294       dump_chains (dump_file, chains);
3295     }
3296 
3297   /* Determine the unroll factor, and if the loop should be unrolled, ensure
3298      that its number of iterations is divisible by the factor.  */
3299   unroll_factor = determine_unroll_factor (chains);
3300   scev_reset ();
3301   unroll = (unroll_factor > 1
3302 	    && can_unroll_loop_p (loop, unroll_factor, &desc));
3303   exit = single_dom_exit (loop);
3304 
3305   /* Execute the predictive commoning transformations, and possibly unroll the
3306      loop.  */
3307   if (unroll)
3308     {
3309       struct epcc_data dta;
3310 
3311       if (dump_file && (dump_flags & TDF_DETAILS))
3312 	fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
3313 
3314       dta.chains = chains;
3315       dta.tmp_vars = tmp_vars;
3316 
3317       update_ssa (TODO_update_ssa_only_virtuals);
3318 
3319       /* Cfg manipulations performed in tree_transform_and_unroll_loop before
3320 	 execute_pred_commoning_cbck is called may cause phi nodes to be
3321 	 reallocated, which is a problem since CHAINS may point to these
3322 	 statements.  To fix this, we store the ssa names defined by the
3323 	 phi nodes here instead of the phi nodes themselves, and restore
3324 	 the phi nodes in execute_pred_commoning_cbck.  A bit hacky.  */
3325       replace_phis_by_defined_names (chains);
3326 
3327       tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
3328 				      execute_pred_commoning_cbck, &dta);
3329       eliminate_temp_copies (loop, tmp_vars);
3330     }
3331   else
3332     {
3333       if (dump_file && (dump_flags & TDF_DETAILS))
3334 	fprintf (dump_file,
3335 		 "Executing predictive commoning without unrolling.\n");
3336       execute_pred_commoning (loop, chains, tmp_vars);
3337     }
3338 
3339 end: ;
3340   release_chains (chains);
3341   free_data_refs (datarefs);
3342   BITMAP_FREE (looparound_phis);
3343 
3344   free_affine_expand_cache (&name_expansions);
3345 
3346   return (unroll ? 1 : 0) | (loop_closed_ssa ? 2 : 0);
3347 }
3348 
3349 /* Runs predictive commoning.  */
3350 
3351 unsigned
tree_predictive_commoning(void)3352 tree_predictive_commoning (void)
3353 {
3354   class loop *loop;
3355   unsigned ret = 0, changed = 0;
3356 
3357   initialize_original_copy_tables ();
3358   FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
3359     if (optimize_loop_for_speed_p (loop))
3360       {
3361 	changed |= tree_predictive_commoning_loop (loop);
3362       }
3363   free_original_copy_tables ();
3364 
3365   if (changed > 0)
3366     {
3367       scev_reset ();
3368 
3369       if (changed > 1)
3370 	rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3371 
3372       ret = TODO_cleanup_cfg;
3373     }
3374 
3375   return ret;
3376 }
3377 
3378 /* Predictive commoning Pass.  */
3379 
3380 static unsigned
run_tree_predictive_commoning(struct function * fun)3381 run_tree_predictive_commoning (struct function *fun)
3382 {
3383   if (number_of_loops (fun) <= 1)
3384     return 0;
3385 
3386   return tree_predictive_commoning ();
3387 }
3388 
3389 namespace {
3390 
3391 const pass_data pass_data_predcom =
3392 {
3393   GIMPLE_PASS, /* type */
3394   "pcom", /* name */
3395   OPTGROUP_LOOP, /* optinfo_flags */
3396   TV_PREDCOM, /* tv_id */
3397   PROP_cfg, /* properties_required */
3398   0, /* properties_provided */
3399   0, /* properties_destroyed */
3400   0, /* todo_flags_start */
3401   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
3402 };
3403 
3404 class pass_predcom : public gimple_opt_pass
3405 {
3406 public:
pass_predcom(gcc::context * ctxt)3407   pass_predcom (gcc::context *ctxt)
3408     : gimple_opt_pass (pass_data_predcom, ctxt)
3409   {}
3410 
3411   /* opt_pass methods: */
gate(function *)3412   virtual bool gate (function *) { return flag_predictive_commoning != 0; }
execute(function * fun)3413   virtual unsigned int execute (function *fun)
3414     {
3415       return run_tree_predictive_commoning (fun);
3416     }
3417 
3418 }; // class pass_predcom
3419 
3420 } // anon namespace
3421 
3422 gimple_opt_pass *
make_pass_predcom(gcc::context * ctxt)3423 make_pass_predcom (gcc::context *ctxt)
3424 {
3425   return new pass_predcom (ctxt);
3426 }
3427 
3428 
3429