xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-predcom.c (revision e6c7e151de239c49d2e38720a061ed9d1fa99309)
1 /* Predictive commoning.
2    Copyright (C) 2005-2017 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This file implements the predictive commoning optimization.  Predictive
21    commoning can be viewed as CSE around a loop, and with some improvements,
22    as generalized strength reduction-- i.e., reusing values computed in
23    earlier iterations of a loop in the later ones.  So far, the pass only
24    handles the most useful case, that is, reusing values of memory references.
25    If you think this is all just a special case of PRE, you are sort of right;
26    however, concentrating on loops is simpler, and makes it possible to
27    incorporate data dependence analysis to detect the opportunities, perform
28    loop unrolling to avoid copies together with renaming immediately,
29    and if needed, we could also take register pressure into account.
30 
31    Let us demonstrate what is done on an example:
32 
33    for (i = 0; i < 100; i++)
34      {
35        a[i+2] = a[i] + a[i+1];
36        b[10] = b[10] + i;
37        c[i] = c[99 - i];
38        d[i] = d[i + 1];
39      }
40 
41    1) We find data references in the loop, and split them to mutually
42       independent groups (i.e., we find components of a data dependence
43       graph).  We ignore read-read dependences whose distance is not constant.
44       (TODO -- we could also ignore antidependences).  In this example, we
45       find the following groups:
46 
47       a[i]{read}, a[i+1]{read}, a[i+2]{write}
48       b[10]{read}, b[10]{write}
49       c[99 - i]{read}, c[i]{write}
50       d[i + 1]{read}, d[i]{write}
51 
52    2) Inside each of the group, we verify several conditions:
53       a) all the references must differ in indices only, and the indices
54 	 must all have the same step
55       b) the references must dominate loop latch (and thus, they must be
56 	 ordered by dominance relation).
57       c) the distance of the indices must be a small multiple of the step
58       We are then able to compute the difference of the references (# of
59       iterations before they point to the same place as the first of them).
60       Also, in case there are writes in the loop, we split the groups into
61       chains whose head is the write whose values are used by the reads in
62       the same chain.  The chains are then processed independently,
63       making the further transformations simpler.  Also, the shorter chains
64       need the same number of registers, but may require lower unrolling
65       factor in order to get rid of the copies on the loop latch.
66 
67       In our example, we get the following chains (the chain for c is invalid).
68 
69       a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70       b[10]{read,+0}, b[10]{write,+0}
71       d[i + 1]{read,+0}, d[i]{write,+1}
72 
73    3) For each read, we determine the read or write whose value it reuses,
74       together with the distance of this reuse.  I.e. we take the last
75       reference before it with distance 0, or the last of the references
76       with the smallest positive distance to the read.  Then, we remove
77       the references that are not used in any of these chains, discard the
78       empty groups, and propagate all the links so that they point to the
79       single root reference of the chain (adjusting their distance
80       appropriately).  Some extra care needs to be taken for references with
81       step 0.  In our example (the numbers indicate the distance of the
82       reuse),
83 
84       a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85       b[10] --> (*) 1, b[10] (*)
86 
87    4) The chains are combined together if possible.  If the corresponding
88       elements of two chains are always combined together with the same
89       operator, we remember just the result of this combination, instead
90       of remembering the values separately.  We may need to perform
91       reassociation to enable combining, for example
92 
93       e[i] + f[i+1] + e[i+1] + f[i]
94 
95       can be reassociated as
96 
97       (e[i] + f[i]) + (e[i+1] + f[i+1])
98 
99       and we can combine the chains for e and f into one chain.
100 
101    5) For each root reference (end of the chain) R, let N be maximum distance
102       of a reference reusing its value.  Variables R0 up to RN are created,
103       together with phi nodes that transfer values from R1 .. RN to
104       R0 .. R(N-1).
105       Initial values are loaded to R0..R(N-1) (in case not all references
106       must necessarily be accessed and they may trap, we may fail here;
107       TODO sometimes, the loads could be guarded by a check for the number
108       of iterations).  Values loaded/stored in roots are also copied to
109       RN.  Other reads are replaced with the appropriate variable Ri.
110       Everything is put to SSA form.
111 
112       As a small improvement, if R0 is dead after the root (i.e., all uses of
113       the value with the maximum distance dominate the root), we can avoid
114       creating RN and use R0 instead of it.
115 
116       In our example, we get (only the parts concerning a and b are shown):
117       for (i = 0; i < 100; i++)
118 	{
119 	  f = phi (a[0], s);
120 	  s = phi (a[1], f);
121 	  x = phi (b[10], x);
122 
123 	  f = f + s;
124 	  a[i+2] = f;
125 	  x = x + i;
126 	  b[10] = x;
127 	}
128 
129    6) Factor F for unrolling is determined as the smallest common multiple of
130       (N + 1) for each root reference (N for references for that we avoided
131       creating RN).  If F and the loop is small enough, loop is unrolled F
132       times.  The stores to RN (R0) in the copies of the loop body are
133       periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134       be coalesced and the copies can be eliminated.
135 
136       TODO -- copy propagation and other optimizations may change the live
137       ranges of the temporary registers and prevent them from being coalesced;
138       this may increase the register pressure.
139 
140       In our case, F = 2 and the (main loop of the) result is
141 
142       for (i = 0; i < ...; i += 2)
143         {
144           f = phi (a[0], f);
145           s = phi (a[1], s);
146           x = phi (b[10], x);
147 
148           f = f + s;
149           a[i+2] = f;
150           x = x + i;
151           b[10] = x;
152 
153           s = s + f;
154           a[i+3] = s;
155           x = x + i;
156           b[10] = x;
157        }
158 
159    TODO -- stores killing other stores can be taken into account, e.g.,
160    for (i = 0; i < n; i++)
161      {
162        a[i] = 1;
163        a[i+2] = 2;
164      }
165 
166    can be replaced with
167 
168    t0 = a[0];
169    t1 = a[1];
170    for (i = 0; i < n; i++)
171      {
172        a[i] = 1;
173        t2 = 2;
174        t0 = t1;
175        t1 = t2;
176      }
177    a[n] = t0;
178    a[n+1] = t1;
179 
180    The interesting part is that this would generalize store motion; still, since
181    sm is performed elsewhere, it does not seem that important.
182 
183    Predictive commoning can be generalized for arbitrary computations (not
184    just memory loads), and also nontrivial transfer functions (e.g., replacing
185    i * i with ii_last + 2 * i + 1), to generalize strength reduction.  */
186 
187 #include "config.h"
188 #include "system.h"
189 #include "coretypes.h"
190 #include "backend.h"
191 #include "rtl.h"
192 #include "tree.h"
193 #include "gimple.h"
194 #include "predict.h"
195 #include "tree-pass.h"
196 #include "ssa.h"
197 #include "gimple-pretty-print.h"
198 #include "alias.h"
199 #include "fold-const.h"
200 #include "cfgloop.h"
201 #include "tree-eh.h"
202 #include "gimplify.h"
203 #include "gimple-iterator.h"
204 #include "gimplify-me.h"
205 #include "tree-ssa-loop-ivopts.h"
206 #include "tree-ssa-loop-manip.h"
207 #include "tree-ssa-loop-niter.h"
208 #include "tree-ssa-loop.h"
209 #include "tree-into-ssa.h"
210 #include "tree-dfa.h"
211 #include "tree-ssa.h"
212 #include "tree-data-ref.h"
213 #include "tree-scalar-evolution.h"
214 #include "params.h"
215 #include "tree-affine.h"
216 #include "builtins.h"
217 
218 /* The maximum number of iterations between the considered memory
219    references.  */
220 
221 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
222 
223 /* Data references (or phi nodes that carry data reference values across
224    loop iterations).  */
225 
226 typedef struct dref_d
227 {
228   /* The reference itself.  */
229   struct data_reference *ref;
230 
231   /* The statement in that the reference appears.  */
232   gimple *stmt;
233 
234   /* In case that STMT is a phi node, this field is set to the SSA name
235      defined by it in replace_phis_by_defined_names (in order to avoid
236      pointing to phi node that got reallocated in the meantime).  */
237   tree name_defined_by_phi;
238 
239   /* Distance of the reference from the root of the chain (in number of
240      iterations of the loop).  */
241   unsigned distance;
242 
243   /* Number of iterations offset from the first reference in the component.  */
244   widest_int offset;
245 
246   /* Number of the reference in a component, in dominance ordering.  */
247   unsigned pos;
248 
249   /* True if the memory reference is always accessed when the loop is
250      entered.  */
251   unsigned always_accessed : 1;
252 } *dref;
253 
254 
255 /* Type of the chain of the references.  */
256 
257 enum chain_type
258 {
259   /* The addresses of the references in the chain are constant.  */
260   CT_INVARIANT,
261 
262   /* There are only loads in the chain.  */
263   CT_LOAD,
264 
265   /* Root of the chain is store, the rest are loads.  */
266   CT_STORE_LOAD,
267 
268   /* A combination of two chains.  */
269   CT_COMBINATION
270 };
271 
272 /* Chains of data references.  */
273 
274 typedef struct chain
275 {
276   /* Type of the chain.  */
277   enum chain_type type;
278 
279   /* For combination chains, the operator and the two chains that are
280      combined, and the type of the result.  */
281   enum tree_code op;
282   tree rslt_type;
283   struct chain *ch1, *ch2;
284 
285   /* The references in the chain.  */
286   vec<dref> refs;
287 
288   /* The maximum distance of the reference in the chain from the root.  */
289   unsigned length;
290 
291   /* The variables used to copy the value throughout iterations.  */
292   vec<tree> vars;
293 
294   /* Initializers for the variables.  */
295   vec<tree> inits;
296 
297   /* True if there is a use of a variable with the maximal distance
298      that comes after the root in the loop.  */
299   unsigned has_max_use_after : 1;
300 
301   /* True if all the memory references in the chain are always accessed.  */
302   unsigned all_always_accessed : 1;
303 
304   /* True if this chain was combined together with some other chain.  */
305   unsigned combined : 1;
306 } *chain_p;
307 
308 
309 /* Describes the knowledge about the step of the memory references in
310    the component.  */
311 
312 enum ref_step_type
313 {
314   /* The step is zero.  */
315   RS_INVARIANT,
316 
317   /* The step is nonzero.  */
318   RS_NONZERO,
319 
320   /* The step may or may not be nonzero.  */
321   RS_ANY
322 };
323 
324 /* Components of the data dependence graph.  */
325 
326 struct component
327 {
328   /* The references in the component.  */
329   vec<dref> refs;
330 
331   /* What we know about the step of the references in the component.  */
332   enum ref_step_type comp_step;
333 
334   /* Next component in the list.  */
335   struct component *next;
336 };
337 
338 /* Bitmap of ssa names defined by looparound phi nodes covered by chains.  */
339 
340 static bitmap looparound_phis;
341 
342 /* Cache used by tree_to_aff_combination_expand.  */
343 
344 static hash_map<tree, name_expansion *> *name_expansions;
345 
346 /* Dumps data reference REF to FILE.  */
347 
348 extern void dump_dref (FILE *, dref);
349 void
350 dump_dref (FILE *file, dref ref)
351 {
352   if (ref->ref)
353     {
354       fprintf (file, "    ");
355       print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
356       fprintf (file, " (id %u%s)\n", ref->pos,
357 	       DR_IS_READ (ref->ref) ? "" : ", write");
358 
359       fprintf (file, "      offset ");
360       print_decs (ref->offset, file);
361       fprintf (file, "\n");
362 
363       fprintf (file, "      distance %u\n", ref->distance);
364     }
365   else
366     {
367       if (gimple_code (ref->stmt) == GIMPLE_PHI)
368 	fprintf (file, "    looparound ref\n");
369       else
370 	fprintf (file, "    combination ref\n");
371       fprintf (file, "      in statement ");
372       print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
373       fprintf (file, "\n");
374       fprintf (file, "      distance %u\n", ref->distance);
375     }
376 
377 }
378 
379 /* Dumps CHAIN to FILE.  */
380 
381 extern void dump_chain (FILE *, chain_p);
382 void
383 dump_chain (FILE *file, chain_p chain)
384 {
385   dref a;
386   const char *chain_type;
387   unsigned i;
388   tree var;
389 
390   switch (chain->type)
391     {
392     case CT_INVARIANT:
393       chain_type = "Load motion";
394       break;
395 
396     case CT_LOAD:
397       chain_type = "Loads-only";
398       break;
399 
400     case CT_STORE_LOAD:
401       chain_type = "Store-loads";
402       break;
403 
404     case CT_COMBINATION:
405       chain_type = "Combination";
406       break;
407 
408     default:
409       gcc_unreachable ();
410     }
411 
412   fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
413 	   chain->combined ? " (combined)" : "");
414   if (chain->type != CT_INVARIANT)
415     fprintf (file, "  max distance %u%s\n", chain->length,
416 	     chain->has_max_use_after ? "" : ", may reuse first");
417 
418   if (chain->type == CT_COMBINATION)
419     {
420       fprintf (file, "  equal to %p %s %p in type ",
421 	       (void *) chain->ch1, op_symbol_code (chain->op),
422 	       (void *) chain->ch2);
423       print_generic_expr (file, chain->rslt_type, TDF_SLIM);
424       fprintf (file, "\n");
425     }
426 
427   if (chain->vars.exists ())
428     {
429       fprintf (file, "  vars");
430       FOR_EACH_VEC_ELT (chain->vars, i, var)
431 	{
432 	  fprintf (file, " ");
433 	  print_generic_expr (file, var, TDF_SLIM);
434 	}
435       fprintf (file, "\n");
436     }
437 
438   if (chain->inits.exists ())
439     {
440       fprintf (file, "  inits");
441       FOR_EACH_VEC_ELT (chain->inits, i, var)
442 	{
443 	  fprintf (file, " ");
444 	  print_generic_expr (file, var, TDF_SLIM);
445 	}
446       fprintf (file, "\n");
447     }
448 
449   fprintf (file, "  references:\n");
450   FOR_EACH_VEC_ELT (chain->refs, i, a)
451     dump_dref (file, a);
452 
453   fprintf (file, "\n");
454 }
455 
456 /* Dumps CHAINS to FILE.  */
457 
458 extern void dump_chains (FILE *, vec<chain_p> );
459 void
460 dump_chains (FILE *file, vec<chain_p> chains)
461 {
462   chain_p chain;
463   unsigned i;
464 
465   FOR_EACH_VEC_ELT (chains, i, chain)
466     dump_chain (file, chain);
467 }
468 
469 /* Dumps COMP to FILE.  */
470 
471 extern void dump_component (FILE *, struct component *);
472 void
473 dump_component (FILE *file, struct component *comp)
474 {
475   dref a;
476   unsigned i;
477 
478   fprintf (file, "Component%s:\n",
479 	   comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
480   FOR_EACH_VEC_ELT (comp->refs, i, a)
481     dump_dref (file, a);
482   fprintf (file, "\n");
483 }
484 
485 /* Dumps COMPS to FILE.  */
486 
487 extern void dump_components (FILE *, struct component *);
488 void
489 dump_components (FILE *file, struct component *comps)
490 {
491   struct component *comp;
492 
493   for (comp = comps; comp; comp = comp->next)
494     dump_component (file, comp);
495 }
496 
497 /* Frees a chain CHAIN.  */
498 
499 static void
500 release_chain (chain_p chain)
501 {
502   dref ref;
503   unsigned i;
504 
505   if (chain == NULL)
506     return;
507 
508   FOR_EACH_VEC_ELT (chain->refs, i, ref)
509     free (ref);
510 
511   chain->refs.release ();
512   chain->vars.release ();
513   chain->inits.release ();
514 
515   free (chain);
516 }
517 
518 /* Frees CHAINS.  */
519 
520 static void
521 release_chains (vec<chain_p> chains)
522 {
523   unsigned i;
524   chain_p chain;
525 
526   FOR_EACH_VEC_ELT (chains, i, chain)
527     release_chain (chain);
528   chains.release ();
529 }
530 
531 /* Frees a component COMP.  */
532 
533 static void
534 release_component (struct component *comp)
535 {
536   comp->refs.release ();
537   free (comp);
538 }
539 
540 /* Frees list of components COMPS.  */
541 
542 static void
543 release_components (struct component *comps)
544 {
545   struct component *act, *next;
546 
547   for (act = comps; act; act = next)
548     {
549       next = act->next;
550       release_component (act);
551     }
552 }
553 
554 /* Finds a root of tree given by FATHERS containing A, and performs path
555    shortening.  */
556 
557 static unsigned
558 component_of (unsigned fathers[], unsigned a)
559 {
560   unsigned root, n;
561 
562   for (root = a; root != fathers[root]; root = fathers[root])
563     continue;
564 
565   for (; a != root; a = n)
566     {
567       n = fathers[a];
568       fathers[a] = root;
569     }
570 
571   return root;
572 }
573 
574 /* Join operation for DFU.  FATHERS gives the tree, SIZES are sizes of the
575    components, A and B are components to merge.  */
576 
577 static void
578 merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
579 {
580   unsigned ca = component_of (fathers, a);
581   unsigned cb = component_of (fathers, b);
582 
583   if (ca == cb)
584     return;
585 
586   if (sizes[ca] < sizes[cb])
587     {
588       sizes[cb] += sizes[ca];
589       fathers[ca] = cb;
590     }
591   else
592     {
593       sizes[ca] += sizes[cb];
594       fathers[cb] = ca;
595     }
596 }
597 
598 /* Returns true if A is a reference that is suitable for predictive commoning
599    in the innermost loop that contains it.  REF_STEP is set according to the
600    step of the reference A.  */
601 
602 static bool
603 suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
604 {
605   tree ref = DR_REF (a), step = DR_STEP (a);
606 
607   if (!step
608       || TREE_THIS_VOLATILE (ref)
609       || !is_gimple_reg_type (TREE_TYPE (ref))
610       || tree_could_throw_p (ref))
611     return false;
612 
613   if (integer_zerop (step))
614     *ref_step = RS_INVARIANT;
615   else if (integer_nonzerop (step))
616     *ref_step = RS_NONZERO;
617   else
618     *ref_step = RS_ANY;
619 
620   return true;
621 }
622 
623 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET.  */
624 
625 static void
626 aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
627 {
628   tree type = TREE_TYPE (DR_OFFSET (dr));
629   aff_tree delta;
630 
631   tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
632 				  &name_expansions);
633   aff_combination_const (&delta, type, wi::to_widest (DR_INIT (dr)));
634   aff_combination_add (offset, &delta);
635 }
636 
637 /* Determines number of iterations of the innermost enclosing loop before B
638    refers to exactly the same location as A and stores it to OFF.  If A and
639    B do not have the same step, they never meet, or anything else fails,
640    returns false, otherwise returns true.  Both A and B are assumed to
641    satisfy suitable_reference_p.  */
642 
643 static bool
644 determine_offset (struct data_reference *a, struct data_reference *b,
645 		  widest_int *off)
646 {
647   aff_tree diff, baseb, step;
648   tree typea, typeb;
649 
650   /* Check that both the references access the location in the same type.  */
651   typea = TREE_TYPE (DR_REF (a));
652   typeb = TREE_TYPE (DR_REF (b));
653   if (!useless_type_conversion_p (typeb, typea))
654     return false;
655 
656   /* Check whether the base address and the step of both references is the
657      same.  */
658   if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
659       || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
660     return false;
661 
662   if (integer_zerop (DR_STEP (a)))
663     {
664       /* If the references have loop invariant address, check that they access
665 	 exactly the same location.  */
666       *off = 0;
667       return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
668 	      && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
669     }
670 
671   /* Compare the offsets of the addresses, and check whether the difference
672      is a multiple of step.  */
673   aff_combination_dr_offset (a, &diff);
674   aff_combination_dr_offset (b, &baseb);
675   aff_combination_scale (&baseb, -1);
676   aff_combination_add (&diff, &baseb);
677 
678   tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
679 				  &step, &name_expansions);
680   return aff_combination_constant_multiple_p (&diff, &step, off);
681 }
682 
683 /* Returns the last basic block in LOOP for that we are sure that
684    it is executed whenever the loop is entered.  */
685 
686 static basic_block
687 last_always_executed_block (struct loop *loop)
688 {
689   unsigned i;
690   vec<edge> exits = get_loop_exit_edges (loop);
691   edge ex;
692   basic_block last = loop->latch;
693 
694   FOR_EACH_VEC_ELT (exits, i, ex)
695     last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
696   exits.release ();
697 
698   return last;
699 }
700 
701 /* Splits dependence graph on DATAREFS described by DEPENDS to components.  */
702 
703 static struct component *
704 split_data_refs_to_components (struct loop *loop,
705 			       vec<data_reference_p> datarefs,
706 			       vec<ddr_p> depends)
707 {
708   unsigned i, n = datarefs.length ();
709   unsigned ca, ia, ib, bad;
710   unsigned *comp_father = XNEWVEC (unsigned, n + 1);
711   unsigned *comp_size = XNEWVEC (unsigned, n + 1);
712   struct component **comps;
713   struct data_reference *dr, *dra, *drb;
714   struct data_dependence_relation *ddr;
715   struct component *comp_list = NULL, *comp;
716   dref dataref;
717   basic_block last_always_executed = last_always_executed_block (loop);
718 
719   FOR_EACH_VEC_ELT (datarefs, i, dr)
720     {
721       if (!DR_REF (dr))
722 	{
723 	  /* A fake reference for call or asm_expr that may clobber memory;
724 	     just fail.  */
725 	  goto end;
726 	}
727       /* predcom pass isn't prepared to handle calls with data references.  */
728       if (is_gimple_call (DR_STMT (dr)))
729 	goto end;
730       dr->aux = (void *) (size_t) i;
731       comp_father[i] = i;
732       comp_size[i] = 1;
733     }
734 
735   /* A component reserved for the "bad" data references.  */
736   comp_father[n] = n;
737   comp_size[n] = 1;
738 
739   FOR_EACH_VEC_ELT (datarefs, i, dr)
740     {
741       enum ref_step_type dummy;
742 
743       if (!suitable_reference_p (dr, &dummy))
744 	{
745 	  ia = (unsigned) (size_t) dr->aux;
746 	  merge_comps (comp_father, comp_size, n, ia);
747 	}
748     }
749 
750   FOR_EACH_VEC_ELT (depends, i, ddr)
751     {
752       widest_int dummy_off;
753 
754       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
755 	continue;
756 
757       dra = DDR_A (ddr);
758       drb = DDR_B (ddr);
759       ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
760       ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
761       if (ia == ib)
762 	continue;
763 
764       bad = component_of (comp_father, n);
765 
766       /* If both A and B are reads, we may ignore unsuitable dependences.  */
767       if (DR_IS_READ (dra) && DR_IS_READ (drb))
768 	{
769 	  if (ia == bad || ib == bad
770 	      || !determine_offset (dra, drb, &dummy_off))
771 	    continue;
772 	}
773       /* If A is read and B write or vice versa and there is unsuitable
774 	 dependence, instead of merging both components into a component
775 	 that will certainly not pass suitable_component_p, just put the
776 	 read into bad component, perhaps at least the write together with
777 	 all the other data refs in it's component will be optimizable.  */
778       else if (DR_IS_READ (dra) && ib != bad)
779 	{
780 	  if (ia == bad)
781 	    continue;
782 	  else if (!determine_offset (dra, drb, &dummy_off))
783 	    {
784 	      merge_comps (comp_father, comp_size, bad, ia);
785 	      continue;
786 	    }
787 	}
788       else if (DR_IS_READ (drb) && ia != bad)
789 	{
790 	  if (ib == bad)
791 	    continue;
792 	  else if (!determine_offset (dra, drb, &dummy_off))
793 	    {
794 	      merge_comps (comp_father, comp_size, bad, ib);
795 	      continue;
796 	    }
797 	}
798 
799       merge_comps (comp_father, comp_size, ia, ib);
800     }
801 
802   comps = XCNEWVEC (struct component *, n);
803   bad = component_of (comp_father, n);
804   FOR_EACH_VEC_ELT (datarefs, i, dr)
805     {
806       ia = (unsigned) (size_t) dr->aux;
807       ca = component_of (comp_father, ia);
808       if (ca == bad)
809 	continue;
810 
811       comp = comps[ca];
812       if (!comp)
813 	{
814 	  comp = XCNEW (struct component);
815 	  comp->refs.create (comp_size[ca]);
816 	  comps[ca] = comp;
817 	}
818 
819       dataref = XCNEW (struct dref_d);
820       dataref->ref = dr;
821       dataref->stmt = DR_STMT (dr);
822       dataref->offset = 0;
823       dataref->distance = 0;
824 
825       dataref->always_accessed
826 	      = dominated_by_p (CDI_DOMINATORS, last_always_executed,
827 				gimple_bb (dataref->stmt));
828       dataref->pos = comp->refs.length ();
829       comp->refs.quick_push (dataref);
830     }
831 
832   for (i = 0; i < n; i++)
833     {
834       comp = comps[i];
835       if (comp)
836 	{
837 	  comp->next = comp_list;
838 	  comp_list = comp;
839 	}
840     }
841   free (comps);
842 
843 end:
844   free (comp_father);
845   free (comp_size);
846   return comp_list;
847 }
848 
849 /* Returns true if the component COMP satisfies the conditions
850    described in 2) at the beginning of this file.  LOOP is the current
851    loop.  */
852 
853 static bool
854 suitable_component_p (struct loop *loop, struct component *comp)
855 {
856   unsigned i;
857   dref a, first;
858   basic_block ba, bp = loop->header;
859   bool ok, has_write = false;
860 
861   FOR_EACH_VEC_ELT (comp->refs, i, a)
862     {
863       ba = gimple_bb (a->stmt);
864 
865       if (!just_once_each_iteration_p (loop, ba))
866 	return false;
867 
868       gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
869       bp = ba;
870 
871       if (DR_IS_WRITE (a->ref))
872 	has_write = true;
873     }
874 
875   first = comp->refs[0];
876   ok = suitable_reference_p (first->ref, &comp->comp_step);
877   gcc_assert (ok);
878   first->offset = 0;
879 
880   for (i = 1; comp->refs.iterate (i, &a); i++)
881     {
882       if (!determine_offset (first->ref, a->ref, &a->offset))
883 	return false;
884 
885       enum ref_step_type a_step;
886       gcc_checking_assert (suitable_reference_p (a->ref, &a_step)
887 			   && a_step == comp->comp_step);
888     }
889 
890   /* If there is a write inside the component, we must know whether the
891      step is nonzero or not -- we would not otherwise be able to recognize
892      whether the value accessed by reads comes from the OFFSET-th iteration
893      or the previous one.  */
894   if (has_write && comp->comp_step == RS_ANY)
895     return false;
896 
897   return true;
898 }
899 
900 /* Check the conditions on references inside each of components COMPS,
901    and remove the unsuitable components from the list.  The new list
902    of components is returned.  The conditions are described in 2) at
903    the beginning of this file.  LOOP is the current loop.  */
904 
905 static struct component *
906 filter_suitable_components (struct loop *loop, struct component *comps)
907 {
908   struct component **comp, *act;
909 
910   for (comp = &comps; *comp; )
911     {
912       act = *comp;
913       if (suitable_component_p (loop, act))
914 	comp = &act->next;
915       else
916 	{
917 	  dref ref;
918 	  unsigned i;
919 
920 	  *comp = act->next;
921 	  FOR_EACH_VEC_ELT (act->refs, i, ref)
922 	    free (ref);
923 	  release_component (act);
924 	}
925     }
926 
927   return comps;
928 }
929 
930 /* Compares two drefs A and B by their offset and position.  Callback for
931    qsort.  */
932 
933 static int
934 order_drefs (const void *a, const void *b)
935 {
936   const dref *const da = (const dref *) a;
937   const dref *const db = (const dref *) b;
938   int offcmp = wi::cmps ((*da)->offset, (*db)->offset);
939 
940   if (offcmp != 0)
941     return offcmp;
942 
943   return (*da)->pos - (*db)->pos;
944 }
945 
946 /* Compares two drefs A and B by their position.  Callback for qsort.  */
947 
948 static int
949 order_drefs_by_pos (const void *a, const void *b)
950 {
951   const dref *const da = (const dref *) a;
952   const dref *const db = (const dref *) b;
953 
954   return (*da)->pos - (*db)->pos;
955 }
956 
957 /* Returns root of the CHAIN.  */
958 
959 static inline dref
960 get_chain_root (chain_p chain)
961 {
962   return chain->refs[0];
963 }
964 
965 /* Adds REF to the chain CHAIN.  */
966 
967 static void
968 add_ref_to_chain (chain_p chain, dref ref)
969 {
970   dref root = get_chain_root (chain);
971 
972   gcc_assert (wi::les_p (root->offset, ref->offset));
973   widest_int dist = ref->offset - root->offset;
974   if (wi::leu_p (MAX_DISTANCE, dist))
975     {
976       free (ref);
977       return;
978     }
979   gcc_assert (wi::fits_uhwi_p (dist));
980 
981   chain->refs.safe_push (ref);
982 
983   ref->distance = dist.to_uhwi ();
984 
985   if (ref->distance >= chain->length)
986     {
987       chain->length = ref->distance;
988       chain->has_max_use_after = false;
989     }
990 
991   if (ref->distance == chain->length
992       && ref->pos > root->pos)
993     chain->has_max_use_after = true;
994 
995   chain->all_always_accessed &= ref->always_accessed;
996 }
997 
998 /* Returns the chain for invariant component COMP.  */
999 
1000 static chain_p
1001 make_invariant_chain (struct component *comp)
1002 {
1003   chain_p chain = XCNEW (struct chain);
1004   unsigned i;
1005   dref ref;
1006 
1007   chain->type = CT_INVARIANT;
1008 
1009   chain->all_always_accessed = true;
1010 
1011   FOR_EACH_VEC_ELT (comp->refs, i, ref)
1012     {
1013       chain->refs.safe_push (ref);
1014       chain->all_always_accessed &= ref->always_accessed;
1015     }
1016 
1017   return chain;
1018 }
1019 
1020 /* Make a new chain rooted at REF.  */
1021 
1022 static chain_p
1023 make_rooted_chain (dref ref)
1024 {
1025   chain_p chain = XCNEW (struct chain);
1026 
1027   chain->type = DR_IS_READ (ref->ref) ? CT_LOAD : CT_STORE_LOAD;
1028 
1029   chain->refs.safe_push (ref);
1030   chain->all_always_accessed = ref->always_accessed;
1031 
1032   ref->distance = 0;
1033 
1034   return chain;
1035 }
1036 
1037 /* Returns true if CHAIN is not trivial.  */
1038 
1039 static bool
1040 nontrivial_chain_p (chain_p chain)
1041 {
1042   return chain != NULL && chain->refs.length () > 1;
1043 }
1044 
1045 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1046    is no such name.  */
1047 
1048 static tree
1049 name_for_ref (dref ref)
1050 {
1051   tree name;
1052 
1053   if (is_gimple_assign (ref->stmt))
1054     {
1055       if (!ref->ref || DR_IS_READ (ref->ref))
1056 	name = gimple_assign_lhs (ref->stmt);
1057       else
1058 	name = gimple_assign_rhs1 (ref->stmt);
1059     }
1060   else
1061     name = PHI_RESULT (ref->stmt);
1062 
1063   return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1064 }
1065 
1066 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1067    iterations of the innermost enclosing loop).  */
1068 
1069 static bool
1070 valid_initializer_p (struct data_reference *ref,
1071 		     unsigned distance, struct data_reference *root)
1072 {
1073   aff_tree diff, base, step;
1074   widest_int off;
1075 
1076   /* Both REF and ROOT must be accessing the same object.  */
1077   if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1078     return false;
1079 
1080   /* The initializer is defined outside of loop, hence its address must be
1081      invariant inside the loop.  */
1082   gcc_assert (integer_zerop (DR_STEP (ref)));
1083 
1084   /* If the address of the reference is invariant, initializer must access
1085      exactly the same location.  */
1086   if (integer_zerop (DR_STEP (root)))
1087     return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1088 	    && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1089 
1090   /* Verify that this index of REF is equal to the root's index at
1091      -DISTANCE-th iteration.  */
1092   aff_combination_dr_offset (root, &diff);
1093   aff_combination_dr_offset (ref, &base);
1094   aff_combination_scale (&base, -1);
1095   aff_combination_add (&diff, &base);
1096 
1097   tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1098 				  &step, &name_expansions);
1099   if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1100     return false;
1101 
1102   if (off != distance)
1103     return false;
1104 
1105   return true;
1106 }
1107 
1108 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
1109    initial value is correct (equal to initial value of REF shifted by one
1110    iteration), returns the phi node.  Otherwise, NULL_TREE is returned.  ROOT
1111    is the root of the current chain.  */
1112 
1113 static gphi *
1114 find_looparound_phi (struct loop *loop, dref ref, dref root)
1115 {
1116   tree name, init, init_ref;
1117   gphi *phi = NULL;
1118   gimple *init_stmt;
1119   edge latch = loop_latch_edge (loop);
1120   struct data_reference init_dr;
1121   gphi_iterator psi;
1122 
1123   if (is_gimple_assign (ref->stmt))
1124     {
1125       if (DR_IS_READ (ref->ref))
1126 	name = gimple_assign_lhs (ref->stmt);
1127       else
1128 	name = gimple_assign_rhs1 (ref->stmt);
1129     }
1130   else
1131     name = PHI_RESULT (ref->stmt);
1132   if (!name)
1133     return NULL;
1134 
1135   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1136     {
1137       phi = psi.phi ();
1138       if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
1139 	break;
1140     }
1141 
1142   if (gsi_end_p (psi))
1143     return NULL;
1144 
1145   init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1146   if (TREE_CODE (init) != SSA_NAME)
1147     return NULL;
1148   init_stmt = SSA_NAME_DEF_STMT (init);
1149   if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
1150     return NULL;
1151   gcc_assert (gimple_assign_lhs (init_stmt) == init);
1152 
1153   init_ref = gimple_assign_rhs1 (init_stmt);
1154   if (!REFERENCE_CLASS_P (init_ref)
1155       && !DECL_P (init_ref))
1156     return NULL;
1157 
1158   /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1159      loop enclosing PHI).  */
1160   memset (&init_dr, 0, sizeof (struct data_reference));
1161   DR_REF (&init_dr) = init_ref;
1162   DR_STMT (&init_dr) = phi;
1163   if (!dr_analyze_innermost (&init_dr, loop))
1164     return NULL;
1165 
1166   if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
1167     return NULL;
1168 
1169   return phi;
1170 }
1171 
1172 /* Adds a reference for the looparound copy of REF in PHI to CHAIN.  */
1173 
1174 static void
1175 insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
1176 {
1177   dref nw = XCNEW (struct dref_d), aref;
1178   unsigned i;
1179 
1180   nw->stmt = phi;
1181   nw->distance = ref->distance + 1;
1182   nw->always_accessed = 1;
1183 
1184   FOR_EACH_VEC_ELT (chain->refs, i, aref)
1185     if (aref->distance >= nw->distance)
1186       break;
1187   chain->refs.safe_insert (i, nw);
1188 
1189   if (nw->distance > chain->length)
1190     {
1191       chain->length = nw->distance;
1192       chain->has_max_use_after = false;
1193     }
1194 }
1195 
1196 /* For references in CHAIN that are copied around the LOOP (created previously
1197    by PRE, or by user), add the results of such copies to the chain.  This
1198    enables us to remove the copies by unrolling, and may need less registers
1199    (also, it may allow us to combine chains together).  */
1200 
1201 static void
1202 add_looparound_copies (struct loop *loop, chain_p chain)
1203 {
1204   unsigned i;
1205   dref ref, root = get_chain_root (chain);
1206   gphi *phi;
1207 
1208   FOR_EACH_VEC_ELT (chain->refs, i, ref)
1209     {
1210       phi = find_looparound_phi (loop, ref, root);
1211       if (!phi)
1212 	continue;
1213 
1214       bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1215       insert_looparound_copy (chain, ref, phi);
1216     }
1217 }
1218 
1219 /* Find roots of the values and determine distances in the component COMP.
1220    The references are redistributed into CHAINS.  LOOP is the current
1221    loop.  */
1222 
1223 static void
1224 determine_roots_comp (struct loop *loop,
1225 		      struct component *comp,
1226 		      vec<chain_p> *chains)
1227 {
1228   unsigned i;
1229   dref a;
1230   chain_p chain = NULL;
1231   widest_int last_ofs = 0;
1232 
1233   /* Invariants are handled specially.  */
1234   if (comp->comp_step == RS_INVARIANT)
1235     {
1236       chain = make_invariant_chain (comp);
1237       chains->safe_push (chain);
1238       return;
1239     }
1240 
1241   comp->refs.qsort (order_drefs);
1242 
1243   FOR_EACH_VEC_ELT (comp->refs, i, a)
1244     {
1245       if (!chain || DR_IS_WRITE (a->ref)
1246 	  || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs))
1247 	{
1248 	  if (nontrivial_chain_p (chain))
1249 	    {
1250 	      add_looparound_copies (loop, chain);
1251 	      chains->safe_push (chain);
1252 	    }
1253 	  else
1254 	    release_chain (chain);
1255 	  chain = make_rooted_chain (a);
1256 	  last_ofs = a->offset;
1257 	  continue;
1258 	}
1259 
1260       add_ref_to_chain (chain, a);
1261     }
1262 
1263   if (nontrivial_chain_p (chain))
1264     {
1265       add_looparound_copies (loop, chain);
1266       chains->safe_push (chain);
1267     }
1268   else
1269     release_chain (chain);
1270 }
1271 
1272 /* Find roots of the values and determine distances in components COMPS, and
1273    separates the references to CHAINS.  LOOP is the current loop.  */
1274 
1275 static void
1276 determine_roots (struct loop *loop,
1277 		 struct component *comps, vec<chain_p> *chains)
1278 {
1279   struct component *comp;
1280 
1281   for (comp = comps; comp; comp = comp->next)
1282     determine_roots_comp (loop, comp, chains);
1283 }
1284 
1285 /* Replace the reference in statement STMT with temporary variable
1286    NEW_TREE.  If SET is true, NEW_TREE is instead initialized to the value of
1287    the reference in the statement.  IN_LHS is true if the reference
1288    is in the lhs of STMT, false if it is in rhs.  */
1289 
1290 static void
1291 replace_ref_with (gimple *stmt, tree new_tree, bool set, bool in_lhs)
1292 {
1293   tree val;
1294   gassign *new_stmt;
1295   gimple_stmt_iterator bsi, psi;
1296 
1297   if (gimple_code (stmt) == GIMPLE_PHI)
1298     {
1299       gcc_assert (!in_lhs && !set);
1300 
1301       val = PHI_RESULT (stmt);
1302       bsi = gsi_after_labels (gimple_bb (stmt));
1303       psi = gsi_for_stmt (stmt);
1304       remove_phi_node (&psi, false);
1305 
1306       /* Turn the phi node into GIMPLE_ASSIGN.  */
1307       new_stmt = gimple_build_assign (val, new_tree);
1308       gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
1309       return;
1310     }
1311 
1312   /* Since the reference is of gimple_reg type, it should only
1313      appear as lhs or rhs of modify statement.  */
1314   gcc_assert (is_gimple_assign (stmt));
1315 
1316   bsi = gsi_for_stmt (stmt);
1317 
1318   /* If we do not need to initialize NEW_TREE, just replace the use of OLD.  */
1319   if (!set)
1320     {
1321       gcc_assert (!in_lhs);
1322       gimple_assign_set_rhs_from_tree (&bsi, new_tree);
1323       stmt = gsi_stmt (bsi);
1324       update_stmt (stmt);
1325       return;
1326     }
1327 
1328   if (in_lhs)
1329     {
1330       /* We have statement
1331 
1332 	 OLD = VAL
1333 
1334 	 If OLD is a memory reference, then VAL is gimple_val, and we transform
1335 	 this to
1336 
1337 	 OLD = VAL
1338 	 NEW = VAL
1339 
1340 	 Otherwise, we are replacing a combination chain,
1341 	 VAL is the expression that performs the combination, and OLD is an
1342 	 SSA name.  In this case, we transform the assignment to
1343 
1344 	 OLD = VAL
1345 	 NEW = OLD
1346 
1347 	 */
1348 
1349       val = gimple_assign_lhs (stmt);
1350       if (TREE_CODE (val) != SSA_NAME)
1351 	{
1352 	  val = gimple_assign_rhs1 (stmt);
1353 	  gcc_assert (gimple_assign_single_p (stmt));
1354 	  if (TREE_CLOBBER_P (val))
1355 	    val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
1356 	  else
1357 	    gcc_assert (gimple_assign_copy_p (stmt));
1358 	}
1359     }
1360   else
1361     {
1362       /* VAL = OLD
1363 
1364 	 is transformed to
1365 
1366 	 VAL = OLD
1367 	 NEW = VAL  */
1368 
1369       val = gimple_assign_lhs (stmt);
1370     }
1371 
1372   new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
1373   gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
1374 }
1375 
1376 /* Returns a memory reference to DR in the ITER-th iteration of
1377    the loop it was analyzed in.  Append init stmts to STMTS.  */
1378 
1379 static tree
1380 ref_at_iteration (data_reference_p dr, int iter, gimple_seq *stmts)
1381 {
1382   tree off = DR_OFFSET (dr);
1383   tree coff = DR_INIT (dr);
1384   tree ref = DR_REF (dr);
1385   enum tree_code ref_code = ERROR_MARK;
1386   tree ref_type = NULL_TREE;
1387   tree ref_op1 = NULL_TREE;
1388   tree ref_op2 = NULL_TREE;
1389   if (iter == 0)
1390     ;
1391   else if (TREE_CODE (DR_STEP (dr)) == INTEGER_CST)
1392     coff = size_binop (PLUS_EXPR, coff,
1393 		       size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
1394   else
1395     off = size_binop (PLUS_EXPR, off,
1396 		      size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
1397   /* While data-ref analysis punts on bit offsets it still handles
1398      bitfield accesses at byte boundaries.  Cope with that.  Note that
1399      if the bitfield object also starts at a byte-boundary we can simply
1400      replicate the COMPONENT_REF, but we have to subtract the component's
1401      byte-offset from the MEM_REF address first.
1402      Otherwise we simply build a BIT_FIELD_REF knowing that the bits
1403      start at offset zero.  */
1404   if (TREE_CODE (ref) == COMPONENT_REF
1405       && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
1406     {
1407       unsigned HOST_WIDE_INT boff;
1408       tree field = TREE_OPERAND (ref, 1);
1409       tree offset = component_ref_field_offset (ref);
1410       ref_type = TREE_TYPE (ref);
1411       boff = tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field));
1412       /* This can occur in Ada.  See the comment in get_bit_range.  */
1413       if (boff % BITS_PER_UNIT != 0
1414 	  || !tree_fits_uhwi_p (offset))
1415 	{
1416 	  ref_code = BIT_FIELD_REF;
1417 	  ref_op1 = DECL_SIZE (field);
1418 	  ref_op2 = bitsize_zero_node;
1419 	}
1420       else
1421 	{
1422 	  boff >>= LOG2_BITS_PER_UNIT;
1423 	  boff += tree_to_uhwi (offset);
1424 	  coff = size_binop (MINUS_EXPR, coff, ssize_int (boff));
1425 	  ref_code = COMPONENT_REF;
1426 	  ref_op1 = field;
1427 	  ref_op2 = TREE_OPERAND (ref, 2);
1428 	  ref = TREE_OPERAND (ref, 0);
1429 	}
1430     }
1431   tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
1432   addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
1433 				 is_gimple_mem_ref_addr, NULL_TREE);
1434   tree alias_ptr = fold_convert (reference_alias_ptr_type (ref), coff);
1435   tree type = build_aligned_type (TREE_TYPE (ref),
1436 				  get_object_alignment (ref));
1437   ref = build2 (MEM_REF, type, addr, alias_ptr);
1438   if (ref_type)
1439     ref = build3 (ref_code, ref_type, ref, ref_op1, ref_op2);
1440   return ref;
1441 }
1442 
1443 /* Get the initialization expression for the INDEX-th temporary variable
1444    of CHAIN.  */
1445 
1446 static tree
1447 get_init_expr (chain_p chain, unsigned index)
1448 {
1449   if (chain->type == CT_COMBINATION)
1450     {
1451       tree e1 = get_init_expr (chain->ch1, index);
1452       tree e2 = get_init_expr (chain->ch2, index);
1453 
1454       return fold_build2 (chain->op, chain->rslt_type, e1, e2);
1455     }
1456   else
1457     return chain->inits[index];
1458 }
1459 
1460 /* Returns a new temporary variable used for the I-th variable carrying
1461    value of REF.  The variable's uid is marked in TMP_VARS.  */
1462 
1463 static tree
1464 predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1465 {
1466   tree type = TREE_TYPE (ref);
1467   /* We never access the components of the temporary variable in predictive
1468      commoning.  */
1469   tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
1470   bitmap_set_bit (tmp_vars, DECL_UID (var));
1471   return var;
1472 }
1473 
1474 /* Creates the variables for CHAIN, as well as phi nodes for them and
1475    initialization on entry to LOOP.  Uids of the newly created
1476    temporary variables are marked in TMP_VARS.  */
1477 
1478 static void
1479 initialize_root_vars (struct loop *loop, chain_p chain, bitmap tmp_vars)
1480 {
1481   unsigned i;
1482   unsigned n = chain->length;
1483   dref root = get_chain_root (chain);
1484   bool reuse_first = !chain->has_max_use_after;
1485   tree ref, init, var, next;
1486   gphi *phi;
1487   gimple_seq stmts;
1488   edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1489 
1490   /* If N == 0, then all the references are within the single iteration.  And
1491      since this is an nonempty chain, reuse_first cannot be true.  */
1492   gcc_assert (n > 0 || !reuse_first);
1493 
1494   chain->vars.create (n + 1);
1495 
1496   if (chain->type == CT_COMBINATION)
1497     ref = gimple_assign_lhs (root->stmt);
1498   else
1499     ref = DR_REF (root->ref);
1500 
1501   for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1502     {
1503       var = predcom_tmp_var (ref, i, tmp_vars);
1504       chain->vars.quick_push (var);
1505     }
1506   if (reuse_first)
1507     chain->vars.quick_push (chain->vars[0]);
1508 
1509   FOR_EACH_VEC_ELT (chain->vars, i, var)
1510     chain->vars[i] = make_ssa_name (var);
1511 
1512   for (i = 0; i < n; i++)
1513     {
1514       var = chain->vars[i];
1515       next = chain->vars[i + 1];
1516       init = get_init_expr (chain, i);
1517 
1518       init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1519       if (stmts)
1520 	gsi_insert_seq_on_edge_immediate (entry, stmts);
1521 
1522       phi = create_phi_node (var, loop->header);
1523       add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1524       add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1525     }
1526 }
1527 
1528 /* Create the variables and initialization statement for root of chain
1529    CHAIN.  Uids of the newly created temporary variables are marked
1530    in TMP_VARS.  */
1531 
1532 static void
1533 initialize_root (struct loop *loop, chain_p chain, bitmap tmp_vars)
1534 {
1535   dref root = get_chain_root (chain);
1536   bool in_lhs = (chain->type == CT_STORE_LOAD
1537 		 || chain->type == CT_COMBINATION);
1538 
1539   initialize_root_vars (loop, chain, tmp_vars);
1540   replace_ref_with (root->stmt,
1541 		    chain->vars[chain->length],
1542 		    true, in_lhs);
1543 }
1544 
1545 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
1546    initialization on entry to LOOP if necessary.  The ssa name for the variable
1547    is stored in VARS.  If WRITTEN is true, also a phi node to copy its value
1548    around the loop is created.  Uid of the newly created temporary variable
1549    is marked in TMP_VARS.  INITS is the list containing the (single)
1550    initializer.  */
1551 
1552 static void
1553 initialize_root_vars_lm (struct loop *loop, dref root, bool written,
1554 			 vec<tree> *vars, vec<tree> inits,
1555 			 bitmap tmp_vars)
1556 {
1557   unsigned i;
1558   tree ref = DR_REF (root->ref), init, var, next;
1559   gimple_seq stmts;
1560   gphi *phi;
1561   edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1562 
1563   /* Find the initializer for the variable, and check that it cannot
1564      trap.  */
1565   init = inits[0];
1566 
1567   vars->create (written ? 2 : 1);
1568   var = predcom_tmp_var (ref, 0, tmp_vars);
1569   vars->quick_push (var);
1570   if (written)
1571     vars->quick_push ((*vars)[0]);
1572 
1573   FOR_EACH_VEC_ELT (*vars, i, var)
1574     (*vars)[i] = make_ssa_name (var);
1575 
1576   var = (*vars)[0];
1577 
1578   init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1579   if (stmts)
1580     gsi_insert_seq_on_edge_immediate (entry, stmts);
1581 
1582   if (written)
1583     {
1584       next = (*vars)[1];
1585       phi = create_phi_node (var, loop->header);
1586       add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1587       add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1588     }
1589   else
1590     {
1591       gassign *init_stmt = gimple_build_assign (var, init);
1592       gsi_insert_on_edge_immediate (entry, init_stmt);
1593     }
1594 }
1595 
1596 
1597 /* Execute load motion for references in chain CHAIN.  Uids of the newly
1598    created temporary variables are marked in TMP_VARS.  */
1599 
1600 static void
1601 execute_load_motion (struct loop *loop, chain_p chain, bitmap tmp_vars)
1602 {
1603   auto_vec<tree> vars;
1604   dref a;
1605   unsigned n_writes = 0, ridx, i;
1606   tree var;
1607 
1608   gcc_assert (chain->type == CT_INVARIANT);
1609   gcc_assert (!chain->combined);
1610   FOR_EACH_VEC_ELT (chain->refs, i, a)
1611     if (DR_IS_WRITE (a->ref))
1612       n_writes++;
1613 
1614   /* If there are no reads in the loop, there is nothing to do.  */
1615   if (n_writes == chain->refs.length ())
1616     return;
1617 
1618   initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
1619 			   &vars, chain->inits, tmp_vars);
1620 
1621   ridx = 0;
1622   FOR_EACH_VEC_ELT (chain->refs, i, a)
1623     {
1624       bool is_read = DR_IS_READ (a->ref);
1625 
1626       if (DR_IS_WRITE (a->ref))
1627 	{
1628 	  n_writes--;
1629 	  if (n_writes)
1630 	    {
1631 	      var = vars[0];
1632 	      var = make_ssa_name (SSA_NAME_VAR (var));
1633 	      vars[0] = var;
1634 	    }
1635 	  else
1636 	    ridx = 1;
1637 	}
1638 
1639       replace_ref_with (a->stmt, vars[ridx],
1640 			!is_read, !is_read);
1641     }
1642 }
1643 
1644 /* Returns the single statement in that NAME is used, excepting
1645    the looparound phi nodes contained in one of the chains.  If there is no
1646    such statement, or more statements, NULL is returned.  */
1647 
1648 static gimple *
1649 single_nonlooparound_use (tree name)
1650 {
1651   use_operand_p use;
1652   imm_use_iterator it;
1653   gimple *stmt, *ret = NULL;
1654 
1655   FOR_EACH_IMM_USE_FAST (use, it, name)
1656     {
1657       stmt = USE_STMT (use);
1658 
1659       if (gimple_code (stmt) == GIMPLE_PHI)
1660 	{
1661 	  /* Ignore uses in looparound phi nodes.  Uses in other phi nodes
1662 	     could not be processed anyway, so just fail for them.  */
1663 	  if (bitmap_bit_p (looparound_phis,
1664 			    SSA_NAME_VERSION (PHI_RESULT (stmt))))
1665 	    continue;
1666 
1667 	  return NULL;
1668 	}
1669       else if (is_gimple_debug (stmt))
1670 	continue;
1671       else if (ret != NULL)
1672 	return NULL;
1673       else
1674 	ret = stmt;
1675     }
1676 
1677   return ret;
1678 }
1679 
1680 /* Remove statement STMT, as well as the chain of assignments in that it is
1681    used.  */
1682 
1683 static void
1684 remove_stmt (gimple *stmt)
1685 {
1686   tree name;
1687   gimple *next;
1688   gimple_stmt_iterator psi;
1689 
1690   if (gimple_code (stmt) == GIMPLE_PHI)
1691     {
1692       name = PHI_RESULT (stmt);
1693       next = single_nonlooparound_use (name);
1694       reset_debug_uses (stmt);
1695       psi = gsi_for_stmt (stmt);
1696       remove_phi_node (&psi, true);
1697 
1698       if (!next
1699 	  || !gimple_assign_ssa_name_copy_p (next)
1700 	  || gimple_assign_rhs1 (next) != name)
1701 	return;
1702 
1703       stmt = next;
1704     }
1705 
1706   while (1)
1707     {
1708       gimple_stmt_iterator bsi;
1709 
1710       bsi = gsi_for_stmt (stmt);
1711 
1712       name = gimple_assign_lhs (stmt);
1713       gcc_assert (TREE_CODE (name) == SSA_NAME);
1714 
1715       next = single_nonlooparound_use (name);
1716       reset_debug_uses (stmt);
1717 
1718       unlink_stmt_vdef (stmt);
1719       gsi_remove (&bsi, true);
1720       release_defs (stmt);
1721 
1722       if (!next
1723 	  || !gimple_assign_ssa_name_copy_p (next)
1724 	  || gimple_assign_rhs1 (next) != name)
1725 	return;
1726 
1727       stmt = next;
1728     }
1729 }
1730 
1731 /* Perform the predictive commoning optimization for a chain CHAIN.
1732    Uids of the newly created temporary variables are marked in TMP_VARS.*/
1733 
1734 static void
1735 execute_pred_commoning_chain (struct loop *loop, chain_p chain,
1736 			     bitmap tmp_vars)
1737 {
1738   unsigned i;
1739   dref a;
1740   tree var;
1741 
1742   if (chain->combined)
1743     {
1744       /* For combined chains, just remove the statements that are used to
1745 	 compute the values of the expression (except for the root one).
1746 	 We delay this until after all chains are processed.  */
1747     }
1748   else
1749     {
1750       /* For non-combined chains, set up the variables that hold its value,
1751 	 and replace the uses of the original references by these
1752 	 variables.  */
1753       initialize_root (loop, chain, tmp_vars);
1754       for (i = 1; chain->refs.iterate (i, &a); i++)
1755 	{
1756 	  var = chain->vars[chain->length - a->distance];
1757 	  replace_ref_with (a->stmt, var, false, false);
1758 	}
1759     }
1760 }
1761 
1762 /* Determines the unroll factor necessary to remove as many temporary variable
1763    copies as possible.  CHAINS is the list of chains that will be
1764    optimized.  */
1765 
1766 static unsigned
1767 determine_unroll_factor (vec<chain_p> chains)
1768 {
1769   chain_p chain;
1770   unsigned factor = 1, af, nfactor, i;
1771   unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1772 
1773   FOR_EACH_VEC_ELT (chains, i, chain)
1774     {
1775       if (chain->type == CT_INVARIANT)
1776 	continue;
1777 
1778       if (chain->combined)
1779 	{
1780 	  /* For combined chains, we can't handle unrolling if we replace
1781 	     looparound PHIs.  */
1782 	  dref a;
1783 	  unsigned j;
1784 	  for (j = 1; chain->refs.iterate (j, &a); j++)
1785 	    if (gimple_code (a->stmt) == GIMPLE_PHI)
1786 	      return 1;
1787 	  continue;
1788 	}
1789 
1790       /* The best unroll factor for this chain is equal to the number of
1791 	 temporary variables that we create for it.  */
1792       af = chain->length;
1793       if (chain->has_max_use_after)
1794 	af++;
1795 
1796       nfactor = factor * af / gcd (factor, af);
1797       if (nfactor <= max)
1798 	factor = nfactor;
1799     }
1800 
1801   return factor;
1802 }
1803 
1804 /* Perform the predictive commoning optimization for CHAINS.
1805    Uids of the newly created temporary variables are marked in TMP_VARS.  */
1806 
1807 static void
1808 execute_pred_commoning (struct loop *loop, vec<chain_p> chains,
1809 			bitmap tmp_vars)
1810 {
1811   chain_p chain;
1812   unsigned i;
1813 
1814   FOR_EACH_VEC_ELT (chains, i, chain)
1815     {
1816       if (chain->type == CT_INVARIANT)
1817 	execute_load_motion (loop, chain, tmp_vars);
1818       else
1819 	execute_pred_commoning_chain (loop, chain, tmp_vars);
1820     }
1821 
1822   FOR_EACH_VEC_ELT (chains, i, chain)
1823     {
1824       if (chain->type == CT_INVARIANT)
1825 	;
1826       else if (chain->combined)
1827 	{
1828 	  /* For combined chains, just remove the statements that are used to
1829 	     compute the values of the expression (except for the root one).  */
1830 	  dref a;
1831 	  unsigned j;
1832 	  for (j = 1; chain->refs.iterate (j, &a); j++)
1833 	    remove_stmt (a->stmt);
1834 	}
1835     }
1836 
1837   update_ssa (TODO_update_ssa_only_virtuals);
1838 }
1839 
1840 /* For each reference in CHAINS, if its defining statement is
1841    phi node, record the ssa name that is defined by it.  */
1842 
1843 static void
1844 replace_phis_by_defined_names (vec<chain_p> chains)
1845 {
1846   chain_p chain;
1847   dref a;
1848   unsigned i, j;
1849 
1850   FOR_EACH_VEC_ELT (chains, i, chain)
1851     FOR_EACH_VEC_ELT (chain->refs, j, a)
1852       {
1853 	if (gimple_code (a->stmt) == GIMPLE_PHI)
1854 	  {
1855 	    a->name_defined_by_phi = PHI_RESULT (a->stmt);
1856 	    a->stmt = NULL;
1857 	  }
1858       }
1859 }
1860 
1861 /* For each reference in CHAINS, if name_defined_by_phi is not
1862    NULL, use it to set the stmt field.  */
1863 
1864 static void
1865 replace_names_by_phis (vec<chain_p> chains)
1866 {
1867   chain_p chain;
1868   dref a;
1869   unsigned i, j;
1870 
1871   FOR_EACH_VEC_ELT (chains, i, chain)
1872     FOR_EACH_VEC_ELT (chain->refs, j, a)
1873       if (a->stmt == NULL)
1874 	{
1875 	  a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
1876 	  gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
1877 	  a->name_defined_by_phi = NULL_TREE;
1878 	}
1879 }
1880 
1881 /* Wrapper over execute_pred_commoning, to pass it as a callback
1882    to tree_transform_and_unroll_loop.  */
1883 
1884 struct epcc_data
1885 {
1886   vec<chain_p> chains;
1887   bitmap tmp_vars;
1888 };
1889 
1890 static void
1891 execute_pred_commoning_cbck (struct loop *loop, void *data)
1892 {
1893   struct epcc_data *const dta = (struct epcc_data *) data;
1894 
1895   /* Restore phi nodes that were replaced by ssa names before
1896      tree_transform_and_unroll_loop (see detailed description in
1897      tree_predictive_commoning_loop).  */
1898   replace_names_by_phis (dta->chains);
1899   execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
1900 }
1901 
1902 /* Base NAME and all the names in the chain of phi nodes that use it
1903    on variable VAR.  The phi nodes are recognized by being in the copies of
1904    the header of the LOOP.  */
1905 
1906 static void
1907 base_names_in_chain_on (struct loop *loop, tree name, tree var)
1908 {
1909   gimple *stmt, *phi;
1910   imm_use_iterator iter;
1911 
1912   replace_ssa_name_symbol (name, var);
1913 
1914   while (1)
1915     {
1916       phi = NULL;
1917       FOR_EACH_IMM_USE_STMT (stmt, iter, name)
1918 	{
1919 	  if (gimple_code (stmt) == GIMPLE_PHI
1920 	      && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1921 	    {
1922 	      phi = stmt;
1923 	      BREAK_FROM_IMM_USE_STMT (iter);
1924 	    }
1925 	}
1926       if (!phi)
1927 	return;
1928 
1929       name = PHI_RESULT (phi);
1930       replace_ssa_name_symbol (name, var);
1931     }
1932 }
1933 
1934 /* Given an unrolled LOOP after predictive commoning, remove the
1935    register copies arising from phi nodes by changing the base
1936    variables of SSA names.  TMP_VARS is the set of the temporary variables
1937    for those we want to perform this.  */
1938 
1939 static void
1940 eliminate_temp_copies (struct loop *loop, bitmap tmp_vars)
1941 {
1942   edge e;
1943   gphi *phi;
1944   gimple *stmt;
1945   tree name, use, var;
1946   gphi_iterator psi;
1947 
1948   e = loop_latch_edge (loop);
1949   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1950     {
1951       phi = psi.phi ();
1952       name = PHI_RESULT (phi);
1953       var = SSA_NAME_VAR (name);
1954       if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
1955 	continue;
1956       use = PHI_ARG_DEF_FROM_EDGE (phi, e);
1957       gcc_assert (TREE_CODE (use) == SSA_NAME);
1958 
1959       /* Base all the ssa names in the ud and du chain of NAME on VAR.  */
1960       stmt = SSA_NAME_DEF_STMT (use);
1961       while (gimple_code (stmt) == GIMPLE_PHI
1962 	     /* In case we could not unroll the loop enough to eliminate
1963 		all copies, we may reach the loop header before the defining
1964 		statement (in that case, some register copies will be present
1965 		in loop latch in the final code, corresponding to the newly
1966 		created looparound phi nodes).  */
1967 	     && gimple_bb (stmt) != loop->header)
1968 	{
1969 	  gcc_assert (single_pred_p (gimple_bb (stmt)));
1970 	  use = PHI_ARG_DEF (stmt, 0);
1971 	  stmt = SSA_NAME_DEF_STMT (use);
1972 	}
1973 
1974       base_names_in_chain_on (loop, use, var);
1975     }
1976 }
1977 
1978 /* Returns true if CHAIN is suitable to be combined.  */
1979 
1980 static bool
1981 chain_can_be_combined_p (chain_p chain)
1982 {
1983   return (!chain->combined
1984 	  && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
1985 }
1986 
1987 /* Returns the modify statement that uses NAME.  Skips over assignment
1988    statements, NAME is replaced with the actual name used in the returned
1989    statement.  */
1990 
1991 static gimple *
1992 find_use_stmt (tree *name)
1993 {
1994   gimple *stmt;
1995   tree rhs, lhs;
1996 
1997   /* Skip over assignments.  */
1998   while (1)
1999     {
2000       stmt = single_nonlooparound_use (*name);
2001       if (!stmt)
2002 	return NULL;
2003 
2004       if (gimple_code (stmt) != GIMPLE_ASSIGN)
2005 	return NULL;
2006 
2007       lhs = gimple_assign_lhs (stmt);
2008       if (TREE_CODE (lhs) != SSA_NAME)
2009 	return NULL;
2010 
2011       if (gimple_assign_copy_p (stmt))
2012 	{
2013 	  rhs = gimple_assign_rhs1 (stmt);
2014 	  if (rhs != *name)
2015 	    return NULL;
2016 
2017 	  *name = lhs;
2018 	}
2019       else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
2020 	       == GIMPLE_BINARY_RHS)
2021 	return stmt;
2022       else
2023 	return NULL;
2024     }
2025 }
2026 
2027 /* Returns true if we may perform reassociation for operation CODE in TYPE.  */
2028 
2029 static bool
2030 may_reassociate_p (tree type, enum tree_code code)
2031 {
2032   if (FLOAT_TYPE_P (type)
2033       && !flag_unsafe_math_optimizations)
2034     return false;
2035 
2036   return (commutative_tree_code (code)
2037 	  && associative_tree_code (code));
2038 }
2039 
2040 /* If the operation used in STMT is associative and commutative, go through the
2041    tree of the same operations and returns its root.  Distance to the root
2042    is stored in DISTANCE.  */
2043 
2044 static gimple *
2045 find_associative_operation_root (gimple *stmt, unsigned *distance)
2046 {
2047   tree lhs;
2048   gimple *next;
2049   enum tree_code code = gimple_assign_rhs_code (stmt);
2050   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2051   unsigned dist = 0;
2052 
2053   if (!may_reassociate_p (type, code))
2054     return NULL;
2055 
2056   while (1)
2057     {
2058       lhs = gimple_assign_lhs (stmt);
2059       gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2060 
2061       next = find_use_stmt (&lhs);
2062       if (!next
2063 	  || gimple_assign_rhs_code (next) != code)
2064 	break;
2065 
2066       stmt = next;
2067       dist++;
2068     }
2069 
2070   if (distance)
2071     *distance = dist;
2072   return stmt;
2073 }
2074 
2075 /* Returns the common statement in that NAME1 and NAME2 have a use.  If there
2076    is no such statement, returns NULL_TREE.  In case the operation used on
2077    NAME1 and NAME2 is associative and commutative, returns the root of the
2078    tree formed by this operation instead of the statement that uses NAME1 or
2079    NAME2.  */
2080 
2081 static gimple *
2082 find_common_use_stmt (tree *name1, tree *name2)
2083 {
2084   gimple *stmt1, *stmt2;
2085 
2086   stmt1 = find_use_stmt (name1);
2087   if (!stmt1)
2088     return NULL;
2089 
2090   stmt2 = find_use_stmt (name2);
2091   if (!stmt2)
2092     return NULL;
2093 
2094   if (stmt1 == stmt2)
2095     return stmt1;
2096 
2097   stmt1 = find_associative_operation_root (stmt1, NULL);
2098   if (!stmt1)
2099     return NULL;
2100   stmt2 = find_associative_operation_root (stmt2, NULL);
2101   if (!stmt2)
2102     return NULL;
2103 
2104   return (stmt1 == stmt2 ? stmt1 : NULL);
2105 }
2106 
2107 /* Checks whether R1 and R2 are combined together using CODE, with the result
2108    in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2109    if it is true.  If CODE is ERROR_MARK, set these values instead.  */
2110 
2111 static bool
2112 combinable_refs_p (dref r1, dref r2,
2113 		   enum tree_code *code, bool *swap, tree *rslt_type)
2114 {
2115   enum tree_code acode;
2116   bool aswap;
2117   tree atype;
2118   tree name1, name2;
2119   gimple *stmt;
2120 
2121   name1 = name_for_ref (r1);
2122   name2 = name_for_ref (r2);
2123   gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2124 
2125   stmt = find_common_use_stmt (&name1, &name2);
2126 
2127   if (!stmt
2128       /* A simple post-dominance check - make sure the combination
2129          is executed under the same condition as the references.  */
2130       || (gimple_bb (stmt) != gimple_bb (r1->stmt)
2131 	  && gimple_bb (stmt) != gimple_bb (r2->stmt)))
2132     return false;
2133 
2134   acode = gimple_assign_rhs_code (stmt);
2135   aswap = (!commutative_tree_code (acode)
2136 	   && gimple_assign_rhs1 (stmt) != name1);
2137   atype = TREE_TYPE (gimple_assign_lhs (stmt));
2138 
2139   if (*code == ERROR_MARK)
2140     {
2141       *code = acode;
2142       *swap = aswap;
2143       *rslt_type = atype;
2144       return true;
2145     }
2146 
2147   return (*code == acode
2148 	  && *swap == aswap
2149 	  && *rslt_type == atype);
2150 }
2151 
2152 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2153    an assignment of the remaining operand.  */
2154 
2155 static void
2156 remove_name_from_operation (gimple *stmt, tree op)
2157 {
2158   tree other_op;
2159   gimple_stmt_iterator si;
2160 
2161   gcc_assert (is_gimple_assign (stmt));
2162 
2163   if (gimple_assign_rhs1 (stmt) == op)
2164     other_op = gimple_assign_rhs2 (stmt);
2165   else
2166     other_op = gimple_assign_rhs1 (stmt);
2167 
2168   si = gsi_for_stmt (stmt);
2169   gimple_assign_set_rhs_from_tree (&si, other_op);
2170 
2171   /* We should not have reallocated STMT.  */
2172   gcc_assert (gsi_stmt (si) == stmt);
2173 
2174   update_stmt (stmt);
2175 }
2176 
2177 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2178    are combined in a single statement, and returns this statement.  */
2179 
2180 static gimple *
2181 reassociate_to_the_same_stmt (tree name1, tree name2)
2182 {
2183   gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
2184   gassign *new_stmt, *tmp_stmt;
2185   tree new_name, tmp_name, var, r1, r2;
2186   unsigned dist1, dist2;
2187   enum tree_code code;
2188   tree type = TREE_TYPE (name1);
2189   gimple_stmt_iterator bsi;
2190 
2191   stmt1 = find_use_stmt (&name1);
2192   stmt2 = find_use_stmt (&name2);
2193   root1 = find_associative_operation_root (stmt1, &dist1);
2194   root2 = find_associative_operation_root (stmt2, &dist2);
2195   code = gimple_assign_rhs_code (stmt1);
2196 
2197   gcc_assert (root1 && root2 && root1 == root2
2198 	      && code == gimple_assign_rhs_code (stmt2));
2199 
2200   /* Find the root of the nearest expression in that both NAME1 and NAME2
2201      are used.  */
2202   r1 = name1;
2203   s1 = stmt1;
2204   r2 = name2;
2205   s2 = stmt2;
2206 
2207   while (dist1 > dist2)
2208     {
2209       s1 = find_use_stmt (&r1);
2210       r1 = gimple_assign_lhs (s1);
2211       dist1--;
2212     }
2213   while (dist2 > dist1)
2214     {
2215       s2 = find_use_stmt (&r2);
2216       r2 = gimple_assign_lhs (s2);
2217       dist2--;
2218     }
2219 
2220   while (s1 != s2)
2221     {
2222       s1 = find_use_stmt (&r1);
2223       r1 = gimple_assign_lhs (s1);
2224       s2 = find_use_stmt (&r2);
2225       r2 = gimple_assign_lhs (s2);
2226     }
2227 
2228   /* Remove NAME1 and NAME2 from the statements in that they are used
2229      currently.  */
2230   remove_name_from_operation (stmt1, name1);
2231   remove_name_from_operation (stmt2, name2);
2232 
2233   /* Insert the new statement combining NAME1 and NAME2 before S1, and
2234      combine it with the rhs of S1.  */
2235   var = create_tmp_reg (type, "predreastmp");
2236   new_name = make_ssa_name (var);
2237   new_stmt = gimple_build_assign (new_name, code, name1, name2);
2238 
2239   var = create_tmp_reg (type, "predreastmp");
2240   tmp_name = make_ssa_name (var);
2241 
2242   /* Rhs of S1 may now be either a binary expression with operation
2243      CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2244      so that name1 or name2 was removed from it).  */
2245   tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (s1),
2246 				  gimple_assign_rhs1 (s1),
2247 				  gimple_assign_rhs2 (s1));
2248 
2249   bsi = gsi_for_stmt (s1);
2250   gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2251   s1 = gsi_stmt (bsi);
2252   update_stmt (s1);
2253 
2254   gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
2255   gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
2256 
2257   return new_stmt;
2258 }
2259 
2260 /* Returns the statement that combines references R1 and R2.  In case R1
2261    and R2 are not used in the same statement, but they are used with an
2262    associative and commutative operation in the same expression, reassociate
2263    the expression so that they are used in the same statement.  */
2264 
2265 static gimple *
2266 stmt_combining_refs (dref r1, dref r2)
2267 {
2268   gimple *stmt1, *stmt2;
2269   tree name1 = name_for_ref (r1);
2270   tree name2 = name_for_ref (r2);
2271 
2272   stmt1 = find_use_stmt (&name1);
2273   stmt2 = find_use_stmt (&name2);
2274   if (stmt1 == stmt2)
2275     return stmt1;
2276 
2277   return reassociate_to_the_same_stmt (name1, name2);
2278 }
2279 
2280 /* Tries to combine chains CH1 and CH2 together.  If this succeeds, the
2281    description of the new chain is returned, otherwise we return NULL.  */
2282 
2283 static chain_p
2284 combine_chains (chain_p ch1, chain_p ch2)
2285 {
2286   dref r1, r2, nw;
2287   enum tree_code op = ERROR_MARK;
2288   bool swap = false;
2289   chain_p new_chain;
2290   unsigned i;
2291   tree rslt_type = NULL_TREE;
2292 
2293   if (ch1 == ch2)
2294     return NULL;
2295   if (ch1->length != ch2->length)
2296     return NULL;
2297 
2298   if (ch1->refs.length () != ch2->refs.length ())
2299     return NULL;
2300 
2301   for (i = 0; (ch1->refs.iterate (i, &r1)
2302 	       && ch2->refs.iterate (i, &r2)); i++)
2303     {
2304       if (r1->distance != r2->distance)
2305 	return NULL;
2306 
2307       if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2308 	return NULL;
2309     }
2310 
2311   if (swap)
2312     std::swap (ch1, ch2);
2313 
2314   new_chain = XCNEW (struct chain);
2315   new_chain->type = CT_COMBINATION;
2316   new_chain->op = op;
2317   new_chain->ch1 = ch1;
2318   new_chain->ch2 = ch2;
2319   new_chain->rslt_type = rslt_type;
2320   new_chain->length = ch1->length;
2321 
2322   for (i = 0; (ch1->refs.iterate (i, &r1)
2323 	       && ch2->refs.iterate (i, &r2)); i++)
2324     {
2325       nw = XCNEW (struct dref_d);
2326       nw->stmt = stmt_combining_refs (r1, r2);
2327       nw->distance = r1->distance;
2328 
2329       new_chain->refs.safe_push (nw);
2330     }
2331 
2332   ch1->combined = true;
2333   ch2->combined = true;
2334   return new_chain;
2335 }
2336 
2337 /* Recursively update position information of all offspring chains to ROOT
2338    chain's position information.  */
2339 
2340 static void
2341 update_pos_for_combined_chains (chain_p root)
2342 {
2343   chain_p ch1 = root->ch1, ch2 = root->ch2;
2344   dref ref, ref1, ref2;
2345   for (unsigned j = 0; (root->refs.iterate (j, &ref)
2346 			&& ch1->refs.iterate (j, &ref1)
2347 			&& ch2->refs.iterate (j, &ref2)); ++j)
2348     ref1->pos = ref2->pos = ref->pos;
2349 
2350   if (ch1->type == CT_COMBINATION)
2351     update_pos_for_combined_chains (ch1);
2352   if (ch2->type == CT_COMBINATION)
2353     update_pos_for_combined_chains (ch2);
2354 }
2355 
2356 /* Returns true if statement S1 dominates statement S2.  */
2357 
2358 static bool
2359 pcom_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
2360 {
2361   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
2362 
2363   if (!bb1 || s1 == s2)
2364     return true;
2365 
2366   if (bb1 == bb2)
2367     return gimple_uid (s1) < gimple_uid (s2);
2368 
2369   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
2370 }
2371 
2372 /* Try to combine the CHAINS in LOOP.  */
2373 
2374 static void
2375 try_combine_chains (struct loop *loop, vec<chain_p> *chains)
2376 {
2377   unsigned i, j;
2378   chain_p ch1, ch2, cch;
2379   auto_vec<chain_p> worklist;
2380   bool combined_p = false;
2381 
2382   FOR_EACH_VEC_ELT (*chains, i, ch1)
2383     if (chain_can_be_combined_p (ch1))
2384       worklist.safe_push (ch1);
2385 
2386   while (!worklist.is_empty ())
2387     {
2388       ch1 = worklist.pop ();
2389       if (!chain_can_be_combined_p (ch1))
2390 	continue;
2391 
2392       FOR_EACH_VEC_ELT (*chains, j, ch2)
2393 	{
2394 	  if (!chain_can_be_combined_p (ch2))
2395 	    continue;
2396 
2397 	  cch = combine_chains (ch1, ch2);
2398 	  if (cch)
2399 	    {
2400 	      worklist.safe_push (cch);
2401 	      chains->safe_push (cch);
2402 	      combined_p = true;
2403 	      break;
2404 	    }
2405 	}
2406     }
2407   if (!combined_p)
2408     return;
2409 
2410   /* Setup UID for all statements in dominance order.  */
2411   basic_block *bbs = get_loop_body (loop);
2412   renumber_gimple_stmt_uids_in_blocks (bbs, loop->num_nodes);
2413   free (bbs);
2414 
2415   /* Re-association in combined chains may generate statements different to
2416      order of references of the original chain.  We need to keep references
2417      of combined chain in dominance order so that all uses will be inserted
2418      after definitions.  Note:
2419        A) This is necessary for all combined chains.
2420        B) This is only necessary for ZERO distance references because other
2421 	  references inherit value from loop carried PHIs.
2422 
2423      We first update position information for all combined chains.  */
2424   dref ref;
2425   for (i = 0; chains->iterate (i, &ch1); ++i)
2426     {
2427       if (ch1->type != CT_COMBINATION || ch1->combined)
2428 	continue;
2429 
2430       for (j = 0; ch1->refs.iterate (j, &ref); ++j)
2431 	ref->pos = gimple_uid (ref->stmt);
2432 
2433       update_pos_for_combined_chains (ch1);
2434     }
2435   /* Then sort references according to newly updated position information.  */
2436   for (i = 0; chains->iterate (i, &ch1); ++i)
2437     {
2438       if (ch1->type != CT_COMBINATION && !ch1->combined)
2439 	continue;
2440 
2441       /* Find the first reference with non-ZERO distance.  */
2442       if (ch1->length == 0)
2443 	j = ch1->refs.length();
2444       else
2445 	{
2446 	  for (j = 0; ch1->refs.iterate (j, &ref); ++j)
2447 	    if (ref->distance != 0)
2448 	      break;
2449 	}
2450 
2451       /* Sort all ZERO distance references by position.  */
2452       qsort (&ch1->refs[0], j, sizeof (ch1->refs[0]), order_drefs_by_pos);
2453 
2454       if (ch1->combined)
2455 	continue;
2456 
2457       /* For ZERO length chain, has_max_use_after must be true since root
2458 	 combined stmt must dominates others.  */
2459       if (ch1->length == 0)
2460 	{
2461 	  ch1->has_max_use_after = true;
2462 	  continue;
2463 	}
2464       /* Check if there is use at max distance after root for combined chains
2465 	 and set flag accordingly.  */
2466       ch1->has_max_use_after = false;
2467       gimple *root_stmt = get_chain_root (ch1)->stmt;
2468       for (j = 1; ch1->refs.iterate (j, &ref); ++j)
2469 	{
2470 	  if (ref->distance == ch1->length
2471 	      && !pcom_stmt_dominates_stmt_p (ref->stmt, root_stmt))
2472 	    {
2473 	      ch1->has_max_use_after = true;
2474 	      break;
2475 	    }
2476 	}
2477     }
2478 }
2479 
2480 /* Prepare initializers for CHAIN in LOOP.  Returns false if this is
2481    impossible because one of these initializers may trap, true otherwise.  */
2482 
2483 static bool
2484 prepare_initializers_chain (struct loop *loop, chain_p chain)
2485 {
2486   unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
2487   struct data_reference *dr = get_chain_root (chain)->ref;
2488   tree init;
2489   dref laref;
2490   edge entry = loop_preheader_edge (loop);
2491 
2492   /* Find the initializers for the variables, and check that they cannot
2493      trap.  */
2494   chain->inits.create (n);
2495   for (i = 0; i < n; i++)
2496     chain->inits.quick_push (NULL_TREE);
2497 
2498   /* If we have replaced some looparound phi nodes, use their initializers
2499      instead of creating our own.  */
2500   FOR_EACH_VEC_ELT (chain->refs, i, laref)
2501     {
2502       if (gimple_code (laref->stmt) != GIMPLE_PHI)
2503 	continue;
2504 
2505       gcc_assert (laref->distance > 0);
2506       chain->inits[n - laref->distance]
2507 	= PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
2508     }
2509 
2510   for (i = 0; i < n; i++)
2511     {
2512       gimple_seq stmts = NULL;
2513 
2514       if (chain->inits[i] != NULL_TREE)
2515 	continue;
2516 
2517       init = ref_at_iteration (dr, (int) i - n, &stmts);
2518       if (!chain->all_always_accessed && tree_could_trap_p (init))
2519 	{
2520 	  gimple_seq_discard (stmts);
2521 	  return false;
2522 	}
2523 
2524       if (stmts)
2525 	gsi_insert_seq_on_edge_immediate (entry, stmts);
2526 
2527       chain->inits[i] = init;
2528     }
2529 
2530   return true;
2531 }
2532 
2533 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
2534    be used because the initializers might trap.  */
2535 
2536 static void
2537 prepare_initializers (struct loop *loop, vec<chain_p> chains)
2538 {
2539   chain_p chain;
2540   unsigned i;
2541 
2542   for (i = 0; i < chains.length (); )
2543     {
2544       chain = chains[i];
2545       if (prepare_initializers_chain (loop, chain))
2546 	i++;
2547       else
2548 	{
2549 	  release_chain (chain);
2550 	  chains.unordered_remove (i);
2551 	}
2552     }
2553 }
2554 
2555 /* Performs predictive commoning for LOOP.  Returns true if LOOP was
2556    unrolled.  */
2557 
2558 static bool
2559 tree_predictive_commoning_loop (struct loop *loop)
2560 {
2561   vec<data_reference_p> datarefs;
2562   vec<ddr_p> dependences;
2563   struct component *components;
2564   vec<chain_p> chains = vNULL;
2565   unsigned unroll_factor;
2566   struct tree_niter_desc desc;
2567   bool unroll = false;
2568   edge exit;
2569   bitmap tmp_vars;
2570 
2571   if (dump_file && (dump_flags & TDF_DETAILS))
2572     fprintf (dump_file, "Processing loop %d\n",  loop->num);
2573 
2574   /* Nothing for predicitive commoning if loop only iterates 1 time.  */
2575   if (get_max_loop_iterations_int (loop) == 0)
2576     {
2577       if (dump_file && (dump_flags & TDF_DETAILS))
2578 	fprintf (dump_file, "Loop iterates only 1 time, nothing to do.\n");
2579 
2580       return false;
2581     }
2582 
2583   /* Find the data references and split them into components according to their
2584      dependence relations.  */
2585   auto_vec<loop_p, 3> loop_nest;
2586   dependences.create (10);
2587   datarefs.create (10);
2588   if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
2589 					   &dependences))
2590     {
2591       if (dump_file && (dump_flags & TDF_DETAILS))
2592 	fprintf (dump_file, "Cannot analyze data dependencies\n");
2593       free_data_refs (datarefs);
2594       free_dependence_relations (dependences);
2595       return false;
2596     }
2597 
2598   if (dump_file && (dump_flags & TDF_DETAILS))
2599     dump_data_dependence_relations (dump_file, dependences);
2600 
2601   components = split_data_refs_to_components (loop, datarefs, dependences);
2602   loop_nest.release ();
2603   free_dependence_relations (dependences);
2604   if (!components)
2605     {
2606       free_data_refs (datarefs);
2607       free_affine_expand_cache (&name_expansions);
2608       return false;
2609     }
2610 
2611   if (dump_file && (dump_flags & TDF_DETAILS))
2612     {
2613       fprintf (dump_file, "Initial state:\n\n");
2614       dump_components (dump_file, components);
2615     }
2616 
2617   /* Find the suitable components and split them into chains.  */
2618   components = filter_suitable_components (loop, components);
2619 
2620   tmp_vars = BITMAP_ALLOC (NULL);
2621   looparound_phis = BITMAP_ALLOC (NULL);
2622   determine_roots (loop, components, &chains);
2623   release_components (components);
2624 
2625   if (!chains.exists ())
2626     {
2627       if (dump_file && (dump_flags & TDF_DETAILS))
2628 	fprintf (dump_file,
2629 		 "Predictive commoning failed: no suitable chains\n");
2630       goto end;
2631     }
2632   prepare_initializers (loop, chains);
2633 
2634   /* Try to combine the chains that are always worked with together.  */
2635   try_combine_chains (loop, &chains);
2636 
2637   if (dump_file && (dump_flags & TDF_DETAILS))
2638     {
2639       fprintf (dump_file, "Before commoning:\n\n");
2640       dump_chains (dump_file, chains);
2641     }
2642 
2643   /* Determine the unroll factor, and if the loop should be unrolled, ensure
2644      that its number of iterations is divisible by the factor.  */
2645   unroll_factor = determine_unroll_factor (chains);
2646   scev_reset ();
2647   unroll = (unroll_factor > 1
2648 	    && can_unroll_loop_p (loop, unroll_factor, &desc));
2649   exit = single_dom_exit (loop);
2650 
2651   /* Execute the predictive commoning transformations, and possibly unroll the
2652      loop.  */
2653   if (unroll)
2654     {
2655       struct epcc_data dta;
2656 
2657       if (dump_file && (dump_flags & TDF_DETAILS))
2658 	fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
2659 
2660       dta.chains = chains;
2661       dta.tmp_vars = tmp_vars;
2662 
2663       update_ssa (TODO_update_ssa_only_virtuals);
2664 
2665       /* Cfg manipulations performed in tree_transform_and_unroll_loop before
2666 	 execute_pred_commoning_cbck is called may cause phi nodes to be
2667 	 reallocated, which is a problem since CHAINS may point to these
2668 	 statements.  To fix this, we store the ssa names defined by the
2669 	 phi nodes here instead of the phi nodes themselves, and restore
2670 	 the phi nodes in execute_pred_commoning_cbck.  A bit hacky.  */
2671       replace_phis_by_defined_names (chains);
2672 
2673       tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
2674 				      execute_pred_commoning_cbck, &dta);
2675       eliminate_temp_copies (loop, tmp_vars);
2676     }
2677   else
2678     {
2679       if (dump_file && (dump_flags & TDF_DETAILS))
2680 	fprintf (dump_file,
2681 		 "Executing predictive commoning without unrolling.\n");
2682       execute_pred_commoning (loop, chains, tmp_vars);
2683     }
2684 
2685 end: ;
2686   release_chains (chains);
2687   free_data_refs (datarefs);
2688   BITMAP_FREE (tmp_vars);
2689   BITMAP_FREE (looparound_phis);
2690 
2691   free_affine_expand_cache (&name_expansions);
2692 
2693   return unroll;
2694 }
2695 
2696 /* Runs predictive commoning.  */
2697 
2698 unsigned
2699 tree_predictive_commoning (void)
2700 {
2701   bool unrolled = false;
2702   struct loop *loop;
2703   unsigned ret = 0;
2704 
2705   initialize_original_copy_tables ();
2706   FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
2707     if (optimize_loop_for_speed_p (loop))
2708       {
2709 	unrolled |= tree_predictive_commoning_loop (loop);
2710       }
2711 
2712   if (unrolled)
2713     {
2714       scev_reset ();
2715       ret = TODO_cleanup_cfg;
2716     }
2717   free_original_copy_tables ();
2718 
2719   return ret;
2720 }
2721 
2722 /* Predictive commoning Pass.  */
2723 
2724 static unsigned
2725 run_tree_predictive_commoning (struct function *fun)
2726 {
2727   if (number_of_loops (fun) <= 1)
2728     return 0;
2729 
2730   return tree_predictive_commoning ();
2731 }
2732 
2733 namespace {
2734 
2735 const pass_data pass_data_predcom =
2736 {
2737   GIMPLE_PASS, /* type */
2738   "pcom", /* name */
2739   OPTGROUP_LOOP, /* optinfo_flags */
2740   TV_PREDCOM, /* tv_id */
2741   PROP_cfg, /* properties_required */
2742   0, /* properties_provided */
2743   0, /* properties_destroyed */
2744   0, /* todo_flags_start */
2745   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
2746 };
2747 
2748 class pass_predcom : public gimple_opt_pass
2749 {
2750 public:
2751   pass_predcom (gcc::context *ctxt)
2752     : gimple_opt_pass (pass_data_predcom, ctxt)
2753   {}
2754 
2755   /* opt_pass methods: */
2756   virtual bool gate (function *) { return flag_predictive_commoning != 0; }
2757   virtual unsigned int execute (function *fun)
2758     {
2759       return run_tree_predictive_commoning (fun);
2760     }
2761 
2762 }; // class pass_predcom
2763 
2764 } // anon namespace
2765 
2766 gimple_opt_pass *
2767 make_pass_predcom (gcc::context *ctxt)
2768 {
2769   return new pass_predcom (ctxt);
2770 }
2771 
2772 
2773