xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-predcom.c (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1 /* Predictive commoning.
2    Copyright (C) 2005-2013 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This file implements the predictive commoning optimization.  Predictive
21    commoning can be viewed as CSE around a loop, and with some improvements,
22    as generalized strength reduction-- i.e., reusing values computed in
23    earlier iterations of a loop in the later ones.  So far, the pass only
24    handles the most useful case, that is, reusing values of memory references.
25    If you think this is all just a special case of PRE, you are sort of right;
26    however, concentrating on loops is simpler, and makes it possible to
27    incorporate data dependence analysis to detect the opportunities, perform
28    loop unrolling to avoid copies together with renaming immediately,
29    and if needed, we could also take register pressure into account.
30 
31    Let us demonstrate what is done on an example:
32 
33    for (i = 0; i < 100; i++)
34      {
35        a[i+2] = a[i] + a[i+1];
36        b[10] = b[10] + i;
37        c[i] = c[99 - i];
38        d[i] = d[i + 1];
39      }
40 
41    1) We find data references in the loop, and split them to mutually
42       independent groups (i.e., we find components of a data dependence
43       graph).  We ignore read-read dependences whose distance is not constant.
44       (TODO -- we could also ignore antidependences).  In this example, we
45       find the following groups:
46 
47       a[i]{read}, a[i+1]{read}, a[i+2]{write}
48       b[10]{read}, b[10]{write}
49       c[99 - i]{read}, c[i]{write}
50       d[i + 1]{read}, d[i]{write}
51 
52    2) Inside each of the group, we verify several conditions:
53       a) all the references must differ in indices only, and the indices
54 	 must all have the same step
55       b) the references must dominate loop latch (and thus, they must be
56 	 ordered by dominance relation).
57       c) the distance of the indices must be a small multiple of the step
58       We are then able to compute the difference of the references (# of
59       iterations before they point to the same place as the first of them).
60       Also, in case there are writes in the loop, we split the groups into
61       chains whose head is the write whose values are used by the reads in
62       the same chain.  The chains are then processed independently,
63       making the further transformations simpler.  Also, the shorter chains
64       need the same number of registers, but may require lower unrolling
65       factor in order to get rid of the copies on the loop latch.
66 
67       In our example, we get the following chains (the chain for c is invalid).
68 
69       a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70       b[10]{read,+0}, b[10]{write,+0}
71       d[i + 1]{read,+0}, d[i]{write,+1}
72 
73    3) For each read, we determine the read or write whose value it reuses,
74       together with the distance of this reuse.  I.e. we take the last
75       reference before it with distance 0, or the last of the references
76       with the smallest positive distance to the read.  Then, we remove
77       the references that are not used in any of these chains, discard the
78       empty groups, and propagate all the links so that they point to the
79       single root reference of the chain (adjusting their distance
80       appropriately).  Some extra care needs to be taken for references with
81       step 0.  In our example (the numbers indicate the distance of the
82       reuse),
83 
84       a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85       b[10] --> (*) 1, b[10] (*)
86 
87    4) The chains are combined together if possible.  If the corresponding
88       elements of two chains are always combined together with the same
89       operator, we remember just the result of this combination, instead
90       of remembering the values separately.  We may need to perform
91       reassociation to enable combining, for example
92 
93       e[i] + f[i+1] + e[i+1] + f[i]
94 
95       can be reassociated as
96 
97       (e[i] + f[i]) + (e[i+1] + f[i+1])
98 
99       and we can combine the chains for e and f into one chain.
100 
101    5) For each root reference (end of the chain) R, let N be maximum distance
102       of a reference reusing its value.  Variables R0 up to RN are created,
103       together with phi nodes that transfer values from R1 .. RN to
104       R0 .. R(N-1).
105       Initial values are loaded to R0..R(N-1) (in case not all references
106       must necessarily be accessed and they may trap, we may fail here;
107       TODO sometimes, the loads could be guarded by a check for the number
108       of iterations).  Values loaded/stored in roots are also copied to
109       RN.  Other reads are replaced with the appropriate variable Ri.
110       Everything is put to SSA form.
111 
112       As a small improvement, if R0 is dead after the root (i.e., all uses of
113       the value with the maximum distance dominate the root), we can avoid
114       creating RN and use R0 instead of it.
115 
116       In our example, we get (only the parts concerning a and b are shown):
117       for (i = 0; i < 100; i++)
118 	{
119 	  f = phi (a[0], s);
120 	  s = phi (a[1], f);
121 	  x = phi (b[10], x);
122 
123 	  f = f + s;
124 	  a[i+2] = f;
125 	  x = x + i;
126 	  b[10] = x;
127 	}
128 
129    6) Factor F for unrolling is determined as the smallest common multiple of
130       (N + 1) for each root reference (N for references for that we avoided
131       creating RN).  If F and the loop is small enough, loop is unrolled F
132       times.  The stores to RN (R0) in the copies of the loop body are
133       periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134       be coalesced and the copies can be eliminated.
135 
136       TODO -- copy propagation and other optimizations may change the live
137       ranges of the temporary registers and prevent them from being coalesced;
138       this may increase the register pressure.
139 
140       In our case, F = 2 and the (main loop of the) result is
141 
142       for (i = 0; i < ...; i += 2)
143         {
144           f = phi (a[0], f);
145           s = phi (a[1], s);
146           x = phi (b[10], x);
147 
148           f = f + s;
149           a[i+2] = f;
150           x = x + i;
151           b[10] = x;
152 
153           s = s + f;
154           a[i+3] = s;
155           x = x + i;
156           b[10] = x;
157        }
158 
159    TODO -- stores killing other stores can be taken into account, e.g.,
160    for (i = 0; i < n; i++)
161      {
162        a[i] = 1;
163        a[i+2] = 2;
164      }
165 
166    can be replaced with
167 
168    t0 = a[0];
169    t1 = a[1];
170    for (i = 0; i < n; i++)
171      {
172        a[i] = 1;
173        t2 = 2;
174        t0 = t1;
175        t1 = t2;
176      }
177    a[n] = t0;
178    a[n+1] = t1;
179 
180    The interesting part is that this would generalize store motion; still, since
181    sm is performed elsewhere, it does not seem that important.
182 
183    Predictive commoning can be generalized for arbitrary computations (not
184    just memory loads), and also nontrivial transfer functions (e.g., replacing
185    i * i with ii_last + 2 * i + 1), to generalize strength reduction.  */
186 
187 #include "config.h"
188 #include "system.h"
189 #include "coretypes.h"
190 #include "tm.h"
191 #include "tree.h"
192 #include "tm_p.h"
193 #include "cfgloop.h"
194 #include "tree-flow.h"
195 #include "ggc.h"
196 #include "tree-data-ref.h"
197 #include "tree-scalar-evolution.h"
198 #include "tree-chrec.h"
199 #include "params.h"
200 #include "gimple-pretty-print.h"
201 #include "tree-pass.h"
202 #include "tree-affine.h"
203 #include "tree-inline.h"
204 
205 /* The maximum number of iterations between the considered memory
206    references.  */
207 
208 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
209 
210 /* Data references (or phi nodes that carry data reference values across
211    loop iterations).  */
212 
213 typedef struct dref_d
214 {
215   /* The reference itself.  */
216   struct data_reference *ref;
217 
218   /* The statement in that the reference appears.  */
219   gimple stmt;
220 
221   /* In case that STMT is a phi node, this field is set to the SSA name
222      defined by it in replace_phis_by_defined_names (in order to avoid
223      pointing to phi node that got reallocated in the meantime).  */
224   tree name_defined_by_phi;
225 
226   /* Distance of the reference from the root of the chain (in number of
227      iterations of the loop).  */
228   unsigned distance;
229 
230   /* Number of iterations offset from the first reference in the component.  */
231   double_int offset;
232 
233   /* Number of the reference in a component, in dominance ordering.  */
234   unsigned pos;
235 
236   /* True if the memory reference is always accessed when the loop is
237      entered.  */
238   unsigned always_accessed : 1;
239 } *dref;
240 
241 
242 /* Type of the chain of the references.  */
243 
244 enum chain_type
245 {
246   /* The addresses of the references in the chain are constant.  */
247   CT_INVARIANT,
248 
249   /* There are only loads in the chain.  */
250   CT_LOAD,
251 
252   /* Root of the chain is store, the rest are loads.  */
253   CT_STORE_LOAD,
254 
255   /* A combination of two chains.  */
256   CT_COMBINATION
257 };
258 
259 /* Chains of data references.  */
260 
261 typedef struct chain
262 {
263   /* Type of the chain.  */
264   enum chain_type type;
265 
266   /* For combination chains, the operator and the two chains that are
267      combined, and the type of the result.  */
268   enum tree_code op;
269   tree rslt_type;
270   struct chain *ch1, *ch2;
271 
272   /* The references in the chain.  */
273   vec<dref> refs;
274 
275   /* The maximum distance of the reference in the chain from the root.  */
276   unsigned length;
277 
278   /* The variables used to copy the value throughout iterations.  */
279   vec<tree> vars;
280 
281   /* Initializers for the variables.  */
282   vec<tree> inits;
283 
284   /* True if there is a use of a variable with the maximal distance
285      that comes after the root in the loop.  */
286   unsigned has_max_use_after : 1;
287 
288   /* True if all the memory references in the chain are always accessed.  */
289   unsigned all_always_accessed : 1;
290 
291   /* True if this chain was combined together with some other chain.  */
292   unsigned combined : 1;
293 } *chain_p;
294 
295 
296 /* Describes the knowledge about the step of the memory references in
297    the component.  */
298 
299 enum ref_step_type
300 {
301   /* The step is zero.  */
302   RS_INVARIANT,
303 
304   /* The step is nonzero.  */
305   RS_NONZERO,
306 
307   /* The step may or may not be nonzero.  */
308   RS_ANY
309 };
310 
311 /* Components of the data dependence graph.  */
312 
313 struct component
314 {
315   /* The references in the component.  */
316   vec<dref> refs;
317 
318   /* What we know about the step of the references in the component.  */
319   enum ref_step_type comp_step;
320 
321   /* Next component in the list.  */
322   struct component *next;
323 };
324 
325 /* Bitmap of ssa names defined by looparound phi nodes covered by chains.  */
326 
327 static bitmap looparound_phis;
328 
329 /* Cache used by tree_to_aff_combination_expand.  */
330 
331 static struct pointer_map_t *name_expansions;
332 
333 /* Dumps data reference REF to FILE.  */
334 
335 extern void dump_dref (FILE *, dref);
336 void
337 dump_dref (FILE *file, dref ref)
338 {
339   if (ref->ref)
340     {
341       fprintf (file, "    ");
342       print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
343       fprintf (file, " (id %u%s)\n", ref->pos,
344 	       DR_IS_READ (ref->ref) ? "" : ", write");
345 
346       fprintf (file, "      offset ");
347       dump_double_int (file, ref->offset, false);
348       fprintf (file, "\n");
349 
350       fprintf (file, "      distance %u\n", ref->distance);
351     }
352   else
353     {
354       if (gimple_code (ref->stmt) == GIMPLE_PHI)
355 	fprintf (file, "    looparound ref\n");
356       else
357 	fprintf (file, "    combination ref\n");
358       fprintf (file, "      in statement ");
359       print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
360       fprintf (file, "\n");
361       fprintf (file, "      distance %u\n", ref->distance);
362     }
363 
364 }
365 
366 /* Dumps CHAIN to FILE.  */
367 
368 extern void dump_chain (FILE *, chain_p);
369 void
370 dump_chain (FILE *file, chain_p chain)
371 {
372   dref a;
373   const char *chain_type;
374   unsigned i;
375   tree var;
376 
377   switch (chain->type)
378     {
379     case CT_INVARIANT:
380       chain_type = "Load motion";
381       break;
382 
383     case CT_LOAD:
384       chain_type = "Loads-only";
385       break;
386 
387     case CT_STORE_LOAD:
388       chain_type = "Store-loads";
389       break;
390 
391     case CT_COMBINATION:
392       chain_type = "Combination";
393       break;
394 
395     default:
396       gcc_unreachable ();
397     }
398 
399   fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
400 	   chain->combined ? " (combined)" : "");
401   if (chain->type != CT_INVARIANT)
402     fprintf (file, "  max distance %u%s\n", chain->length,
403 	     chain->has_max_use_after ? "" : ", may reuse first");
404 
405   if (chain->type == CT_COMBINATION)
406     {
407       fprintf (file, "  equal to %p %s %p in type ",
408 	       (void *) chain->ch1, op_symbol_code (chain->op),
409 	       (void *) chain->ch2);
410       print_generic_expr (file, chain->rslt_type, TDF_SLIM);
411       fprintf (file, "\n");
412     }
413 
414   if (chain->vars.exists ())
415     {
416       fprintf (file, "  vars");
417       FOR_EACH_VEC_ELT (chain->vars, i, var)
418 	{
419 	  fprintf (file, " ");
420 	  print_generic_expr (file, var, TDF_SLIM);
421 	}
422       fprintf (file, "\n");
423     }
424 
425   if (chain->inits.exists ())
426     {
427       fprintf (file, "  inits");
428       FOR_EACH_VEC_ELT (chain->inits, i, var)
429 	{
430 	  fprintf (file, " ");
431 	  print_generic_expr (file, var, TDF_SLIM);
432 	}
433       fprintf (file, "\n");
434     }
435 
436   fprintf (file, "  references:\n");
437   FOR_EACH_VEC_ELT (chain->refs, i, a)
438     dump_dref (file, a);
439 
440   fprintf (file, "\n");
441 }
442 
443 /* Dumps CHAINS to FILE.  */
444 
445 extern void dump_chains (FILE *, vec<chain_p> );
446 void
447 dump_chains (FILE *file, vec<chain_p> chains)
448 {
449   chain_p chain;
450   unsigned i;
451 
452   FOR_EACH_VEC_ELT (chains, i, chain)
453     dump_chain (file, chain);
454 }
455 
456 /* Dumps COMP to FILE.  */
457 
458 extern void dump_component (FILE *, struct component *);
459 void
460 dump_component (FILE *file, struct component *comp)
461 {
462   dref a;
463   unsigned i;
464 
465   fprintf (file, "Component%s:\n",
466 	   comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
467   FOR_EACH_VEC_ELT (comp->refs, i, a)
468     dump_dref (file, a);
469   fprintf (file, "\n");
470 }
471 
472 /* Dumps COMPS to FILE.  */
473 
474 extern void dump_components (FILE *, struct component *);
475 void
476 dump_components (FILE *file, struct component *comps)
477 {
478   struct component *comp;
479 
480   for (comp = comps; comp; comp = comp->next)
481     dump_component (file, comp);
482 }
483 
484 /* Frees a chain CHAIN.  */
485 
486 static void
487 release_chain (chain_p chain)
488 {
489   dref ref;
490   unsigned i;
491 
492   if (chain == NULL)
493     return;
494 
495   FOR_EACH_VEC_ELT (chain->refs, i, ref)
496     free (ref);
497 
498   chain->refs.release ();
499   chain->vars.release ();
500   chain->inits.release ();
501 
502   free (chain);
503 }
504 
505 /* Frees CHAINS.  */
506 
507 static void
508 release_chains (vec<chain_p> chains)
509 {
510   unsigned i;
511   chain_p chain;
512 
513   FOR_EACH_VEC_ELT (chains, i, chain)
514     release_chain (chain);
515   chains.release ();
516 }
517 
518 /* Frees a component COMP.  */
519 
520 static void
521 release_component (struct component *comp)
522 {
523   comp->refs.release ();
524   free (comp);
525 }
526 
527 /* Frees list of components COMPS.  */
528 
529 static void
530 release_components (struct component *comps)
531 {
532   struct component *act, *next;
533 
534   for (act = comps; act; act = next)
535     {
536       next = act->next;
537       release_component (act);
538     }
539 }
540 
541 /* Finds a root of tree given by FATHERS containing A, and performs path
542    shortening.  */
543 
544 static unsigned
545 component_of (unsigned fathers[], unsigned a)
546 {
547   unsigned root, n;
548 
549   for (root = a; root != fathers[root]; root = fathers[root])
550     continue;
551 
552   for (; a != root; a = n)
553     {
554       n = fathers[a];
555       fathers[a] = root;
556     }
557 
558   return root;
559 }
560 
561 /* Join operation for DFU.  FATHERS gives the tree, SIZES are sizes of the
562    components, A and B are components to merge.  */
563 
564 static void
565 merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
566 {
567   unsigned ca = component_of (fathers, a);
568   unsigned cb = component_of (fathers, b);
569 
570   if (ca == cb)
571     return;
572 
573   if (sizes[ca] < sizes[cb])
574     {
575       sizes[cb] += sizes[ca];
576       fathers[ca] = cb;
577     }
578   else
579     {
580       sizes[ca] += sizes[cb];
581       fathers[cb] = ca;
582     }
583 }
584 
585 /* Returns true if A is a reference that is suitable for predictive commoning
586    in the innermost loop that contains it.  REF_STEP is set according to the
587    step of the reference A.  */
588 
589 static bool
590 suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
591 {
592   tree ref = DR_REF (a), step = DR_STEP (a);
593 
594   if (!step
595       || TREE_THIS_VOLATILE (ref)
596       || !is_gimple_reg_type (TREE_TYPE (ref))
597       || tree_could_throw_p (ref))
598     return false;
599 
600   if (integer_zerop (step))
601     *ref_step = RS_INVARIANT;
602   else if (integer_nonzerop (step))
603     *ref_step = RS_NONZERO;
604   else
605     *ref_step = RS_ANY;
606 
607   return true;
608 }
609 
610 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET.  */
611 
612 static void
613 aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
614 {
615   tree type = TREE_TYPE (DR_OFFSET (dr));
616   aff_tree delta;
617 
618   tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
619 				  &name_expansions);
620   aff_combination_const (&delta, type, tree_to_double_int (DR_INIT (dr)));
621   aff_combination_add (offset, &delta);
622 }
623 
624 /* Determines number of iterations of the innermost enclosing loop before B
625    refers to exactly the same location as A and stores it to OFF.  If A and
626    B do not have the same step, they never meet, or anything else fails,
627    returns false, otherwise returns true.  Both A and B are assumed to
628    satisfy suitable_reference_p.  */
629 
630 static bool
631 determine_offset (struct data_reference *a, struct data_reference *b,
632 		  double_int *off)
633 {
634   aff_tree diff, baseb, step;
635   tree typea, typeb;
636 
637   /* Check that both the references access the location in the same type.  */
638   typea = TREE_TYPE (DR_REF (a));
639   typeb = TREE_TYPE (DR_REF (b));
640   if (!useless_type_conversion_p (typeb, typea))
641     return false;
642 
643   /* Check whether the base address and the step of both references is the
644      same.  */
645   if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
646       || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
647     return false;
648 
649   if (integer_zerop (DR_STEP (a)))
650     {
651       /* If the references have loop invariant address, check that they access
652 	 exactly the same location.  */
653       *off = double_int_zero;
654       return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
655 	      && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
656     }
657 
658   /* Compare the offsets of the addresses, and check whether the difference
659      is a multiple of step.  */
660   aff_combination_dr_offset (a, &diff);
661   aff_combination_dr_offset (b, &baseb);
662   aff_combination_scale (&baseb, double_int_minus_one);
663   aff_combination_add (&diff, &baseb);
664 
665   tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
666 				  &step, &name_expansions);
667   return aff_combination_constant_multiple_p (&diff, &step, off);
668 }
669 
670 /* Returns the last basic block in LOOP for that we are sure that
671    it is executed whenever the loop is entered.  */
672 
673 static basic_block
674 last_always_executed_block (struct loop *loop)
675 {
676   unsigned i;
677   vec<edge> exits = get_loop_exit_edges (loop);
678   edge ex;
679   basic_block last = loop->latch;
680 
681   FOR_EACH_VEC_ELT (exits, i, ex)
682     last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
683   exits.release ();
684 
685   return last;
686 }
687 
688 /* Splits dependence graph on DATAREFS described by DEPENDS to components.  */
689 
690 static struct component *
691 split_data_refs_to_components (struct loop *loop,
692 			       vec<data_reference_p> datarefs,
693 			       vec<ddr_p> depends)
694 {
695   unsigned i, n = datarefs.length ();
696   unsigned ca, ia, ib, bad;
697   unsigned *comp_father = XNEWVEC (unsigned, n + 1);
698   unsigned *comp_size = XNEWVEC (unsigned, n + 1);
699   struct component **comps;
700   struct data_reference *dr, *dra, *drb;
701   struct data_dependence_relation *ddr;
702   struct component *comp_list = NULL, *comp;
703   dref dataref;
704   basic_block last_always_executed = last_always_executed_block (loop);
705 
706   FOR_EACH_VEC_ELT (datarefs, i, dr)
707     {
708       if (!DR_REF (dr))
709 	{
710 	  /* A fake reference for call or asm_expr that may clobber memory;
711 	     just fail.  */
712 	  goto end;
713 	}
714       dr->aux = (void *) (size_t) i;
715       comp_father[i] = i;
716       comp_size[i] = 1;
717     }
718 
719   /* A component reserved for the "bad" data references.  */
720   comp_father[n] = n;
721   comp_size[n] = 1;
722 
723   FOR_EACH_VEC_ELT (datarefs, i, dr)
724     {
725       enum ref_step_type dummy;
726 
727       if (!suitable_reference_p (dr, &dummy))
728 	{
729 	  ia = (unsigned) (size_t) dr->aux;
730 	  merge_comps (comp_father, comp_size, n, ia);
731 	}
732     }
733 
734   FOR_EACH_VEC_ELT (depends, i, ddr)
735     {
736       double_int dummy_off;
737 
738       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
739 	continue;
740 
741       dra = DDR_A (ddr);
742       drb = DDR_B (ddr);
743       ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
744       ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
745       if (ia == ib)
746 	continue;
747 
748       bad = component_of (comp_father, n);
749 
750       /* If both A and B are reads, we may ignore unsuitable dependences.  */
751       if (DR_IS_READ (dra) && DR_IS_READ (drb)
752 	  && (ia == bad || ib == bad
753 	      || !determine_offset (dra, drb, &dummy_off)))
754 	continue;
755 
756       merge_comps (comp_father, comp_size, ia, ib);
757     }
758 
759   comps = XCNEWVEC (struct component *, n);
760   bad = component_of (comp_father, n);
761   FOR_EACH_VEC_ELT (datarefs, i, dr)
762     {
763       ia = (unsigned) (size_t) dr->aux;
764       ca = component_of (comp_father, ia);
765       if (ca == bad)
766 	continue;
767 
768       comp = comps[ca];
769       if (!comp)
770 	{
771 	  comp = XCNEW (struct component);
772 	  comp->refs.create (comp_size[ca]);
773 	  comps[ca] = comp;
774 	}
775 
776       dataref = XCNEW (struct dref_d);
777       dataref->ref = dr;
778       dataref->stmt = DR_STMT (dr);
779       dataref->offset = double_int_zero;
780       dataref->distance = 0;
781 
782       dataref->always_accessed
783 	      = dominated_by_p (CDI_DOMINATORS, last_always_executed,
784 				gimple_bb (dataref->stmt));
785       dataref->pos = comp->refs.length ();
786       comp->refs.quick_push (dataref);
787     }
788 
789   for (i = 0; i < n; i++)
790     {
791       comp = comps[i];
792       if (comp)
793 	{
794 	  comp->next = comp_list;
795 	  comp_list = comp;
796 	}
797     }
798   free (comps);
799 
800 end:
801   free (comp_father);
802   free (comp_size);
803   return comp_list;
804 }
805 
806 /* Returns true if the component COMP satisfies the conditions
807    described in 2) at the beginning of this file.  LOOP is the current
808    loop.  */
809 
810 static bool
811 suitable_component_p (struct loop *loop, struct component *comp)
812 {
813   unsigned i;
814   dref a, first;
815   basic_block ba, bp = loop->header;
816   bool ok, has_write = false;
817 
818   FOR_EACH_VEC_ELT (comp->refs, i, a)
819     {
820       ba = gimple_bb (a->stmt);
821 
822       if (!just_once_each_iteration_p (loop, ba))
823 	return false;
824 
825       gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
826       bp = ba;
827 
828       if (DR_IS_WRITE (a->ref))
829 	has_write = true;
830     }
831 
832   first = comp->refs[0];
833   ok = suitable_reference_p (first->ref, &comp->comp_step);
834   gcc_assert (ok);
835   first->offset = double_int_zero;
836 
837   for (i = 1; comp->refs.iterate (i, &a); i++)
838     {
839       if (!determine_offset (first->ref, a->ref, &a->offset))
840 	return false;
841 
842 #ifdef ENABLE_CHECKING
843       {
844 	enum ref_step_type a_step;
845 	ok = suitable_reference_p (a->ref, &a_step);
846 	gcc_assert (ok && a_step == comp->comp_step);
847       }
848 #endif
849     }
850 
851   /* If there is a write inside the component, we must know whether the
852      step is nonzero or not -- we would not otherwise be able to recognize
853      whether the value accessed by reads comes from the OFFSET-th iteration
854      or the previous one.  */
855   if (has_write && comp->comp_step == RS_ANY)
856     return false;
857 
858   return true;
859 }
860 
861 /* Check the conditions on references inside each of components COMPS,
862    and remove the unsuitable components from the list.  The new list
863    of components is returned.  The conditions are described in 2) at
864    the beginning of this file.  LOOP is the current loop.  */
865 
866 static struct component *
867 filter_suitable_components (struct loop *loop, struct component *comps)
868 {
869   struct component **comp, *act;
870 
871   for (comp = &comps; *comp; )
872     {
873       act = *comp;
874       if (suitable_component_p (loop, act))
875 	comp = &act->next;
876       else
877 	{
878 	  dref ref;
879 	  unsigned i;
880 
881 	  *comp = act->next;
882 	  FOR_EACH_VEC_ELT (act->refs, i, ref)
883 	    free (ref);
884 	  release_component (act);
885 	}
886     }
887 
888   return comps;
889 }
890 
891 /* Compares two drefs A and B by their offset and position.  Callback for
892    qsort.  */
893 
894 static int
895 order_drefs (const void *a, const void *b)
896 {
897   const dref *const da = (const dref *) a;
898   const dref *const db = (const dref *) b;
899   int offcmp = (*da)->offset.scmp ((*db)->offset);
900 
901   if (offcmp != 0)
902     return offcmp;
903 
904   return (*da)->pos - (*db)->pos;
905 }
906 
907 /* Returns root of the CHAIN.  */
908 
909 static inline dref
910 get_chain_root (chain_p chain)
911 {
912   return chain->refs[0];
913 }
914 
915 /* Adds REF to the chain CHAIN.  */
916 
917 static void
918 add_ref_to_chain (chain_p chain, dref ref)
919 {
920   dref root = get_chain_root (chain);
921   double_int dist;
922 
923   gcc_assert (root->offset.sle (ref->offset));
924   dist = ref->offset - root->offset;
925   if (double_int::from_uhwi (MAX_DISTANCE).ule (dist))
926     {
927       free (ref);
928       return;
929     }
930   gcc_assert (dist.fits_uhwi ());
931 
932   chain->refs.safe_push (ref);
933 
934   ref->distance = dist.to_uhwi ();
935 
936   if (ref->distance >= chain->length)
937     {
938       chain->length = ref->distance;
939       chain->has_max_use_after = false;
940     }
941 
942   if (ref->distance == chain->length
943       && ref->pos > root->pos)
944     chain->has_max_use_after = true;
945 
946   chain->all_always_accessed &= ref->always_accessed;
947 }
948 
949 /* Returns the chain for invariant component COMP.  */
950 
951 static chain_p
952 make_invariant_chain (struct component *comp)
953 {
954   chain_p chain = XCNEW (struct chain);
955   unsigned i;
956   dref ref;
957 
958   chain->type = CT_INVARIANT;
959 
960   chain->all_always_accessed = true;
961 
962   FOR_EACH_VEC_ELT (comp->refs, i, ref)
963     {
964       chain->refs.safe_push (ref);
965       chain->all_always_accessed &= ref->always_accessed;
966     }
967 
968   return chain;
969 }
970 
971 /* Make a new chain rooted at REF.  */
972 
973 static chain_p
974 make_rooted_chain (dref ref)
975 {
976   chain_p chain = XCNEW (struct chain);
977 
978   chain->type = DR_IS_READ (ref->ref) ? CT_LOAD : CT_STORE_LOAD;
979 
980   chain->refs.safe_push (ref);
981   chain->all_always_accessed = ref->always_accessed;
982 
983   ref->distance = 0;
984 
985   return chain;
986 }
987 
988 /* Returns true if CHAIN is not trivial.  */
989 
990 static bool
991 nontrivial_chain_p (chain_p chain)
992 {
993   return chain != NULL && chain->refs.length () > 1;
994 }
995 
996 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
997    is no such name.  */
998 
999 static tree
1000 name_for_ref (dref ref)
1001 {
1002   tree name;
1003 
1004   if (is_gimple_assign (ref->stmt))
1005     {
1006       if (!ref->ref || DR_IS_READ (ref->ref))
1007 	name = gimple_assign_lhs (ref->stmt);
1008       else
1009 	name = gimple_assign_rhs1 (ref->stmt);
1010     }
1011   else
1012     name = PHI_RESULT (ref->stmt);
1013 
1014   return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1015 }
1016 
1017 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1018    iterations of the innermost enclosing loop).  */
1019 
1020 static bool
1021 valid_initializer_p (struct data_reference *ref,
1022 		     unsigned distance, struct data_reference *root)
1023 {
1024   aff_tree diff, base, step;
1025   double_int off;
1026 
1027   /* Both REF and ROOT must be accessing the same object.  */
1028   if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1029     return false;
1030 
1031   /* The initializer is defined outside of loop, hence its address must be
1032      invariant inside the loop.  */
1033   gcc_assert (integer_zerop (DR_STEP (ref)));
1034 
1035   /* If the address of the reference is invariant, initializer must access
1036      exactly the same location.  */
1037   if (integer_zerop (DR_STEP (root)))
1038     return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1039 	    && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1040 
1041   /* Verify that this index of REF is equal to the root's index at
1042      -DISTANCE-th iteration.  */
1043   aff_combination_dr_offset (root, &diff);
1044   aff_combination_dr_offset (ref, &base);
1045   aff_combination_scale (&base, double_int_minus_one);
1046   aff_combination_add (&diff, &base);
1047 
1048   tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1049 				  &step, &name_expansions);
1050   if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1051     return false;
1052 
1053   if (off != double_int::from_uhwi (distance))
1054     return false;
1055 
1056   return true;
1057 }
1058 
1059 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
1060    initial value is correct (equal to initial value of REF shifted by one
1061    iteration), returns the phi node.  Otherwise, NULL_TREE is returned.  ROOT
1062    is the root of the current chain.  */
1063 
1064 static gimple
1065 find_looparound_phi (struct loop *loop, dref ref, dref root)
1066 {
1067   tree name, init, init_ref;
1068   gimple phi = NULL, init_stmt;
1069   edge latch = loop_latch_edge (loop);
1070   struct data_reference init_dr;
1071   gimple_stmt_iterator psi;
1072 
1073   if (is_gimple_assign (ref->stmt))
1074     {
1075       if (DR_IS_READ (ref->ref))
1076 	name = gimple_assign_lhs (ref->stmt);
1077       else
1078 	name = gimple_assign_rhs1 (ref->stmt);
1079     }
1080   else
1081     name = PHI_RESULT (ref->stmt);
1082   if (!name)
1083     return NULL;
1084 
1085   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1086     {
1087       phi = gsi_stmt (psi);
1088       if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
1089 	break;
1090     }
1091 
1092   if (gsi_end_p (psi))
1093     return NULL;
1094 
1095   init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1096   if (TREE_CODE (init) != SSA_NAME)
1097     return NULL;
1098   init_stmt = SSA_NAME_DEF_STMT (init);
1099   if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
1100     return NULL;
1101   gcc_assert (gimple_assign_lhs (init_stmt) == init);
1102 
1103   init_ref = gimple_assign_rhs1 (init_stmt);
1104   if (!REFERENCE_CLASS_P (init_ref)
1105       && !DECL_P (init_ref))
1106     return NULL;
1107 
1108   /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1109      loop enclosing PHI).  */
1110   memset (&init_dr, 0, sizeof (struct data_reference));
1111   DR_REF (&init_dr) = init_ref;
1112   DR_STMT (&init_dr) = phi;
1113   if (!dr_analyze_innermost (&init_dr, loop))
1114     return NULL;
1115 
1116   if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
1117     return NULL;
1118 
1119   return phi;
1120 }
1121 
1122 /* Adds a reference for the looparound copy of REF in PHI to CHAIN.  */
1123 
1124 static void
1125 insert_looparound_copy (chain_p chain, dref ref, gimple phi)
1126 {
1127   dref nw = XCNEW (struct dref_d), aref;
1128   unsigned i;
1129 
1130   nw->stmt = phi;
1131   nw->distance = ref->distance + 1;
1132   nw->always_accessed = 1;
1133 
1134   FOR_EACH_VEC_ELT (chain->refs, i, aref)
1135     if (aref->distance >= nw->distance)
1136       break;
1137   chain->refs.safe_insert (i, nw);
1138 
1139   if (nw->distance > chain->length)
1140     {
1141       chain->length = nw->distance;
1142       chain->has_max_use_after = false;
1143     }
1144 }
1145 
1146 /* For references in CHAIN that are copied around the LOOP (created previously
1147    by PRE, or by user), add the results of such copies to the chain.  This
1148    enables us to remove the copies by unrolling, and may need less registers
1149    (also, it may allow us to combine chains together).  */
1150 
1151 static void
1152 add_looparound_copies (struct loop *loop, chain_p chain)
1153 {
1154   unsigned i;
1155   dref ref, root = get_chain_root (chain);
1156   gimple phi;
1157 
1158   FOR_EACH_VEC_ELT (chain->refs, i, ref)
1159     {
1160       phi = find_looparound_phi (loop, ref, root);
1161       if (!phi)
1162 	continue;
1163 
1164       bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1165       insert_looparound_copy (chain, ref, phi);
1166     }
1167 }
1168 
1169 /* Find roots of the values and determine distances in the component COMP.
1170    The references are redistributed into CHAINS.  LOOP is the current
1171    loop.  */
1172 
1173 static void
1174 determine_roots_comp (struct loop *loop,
1175 		      struct component *comp,
1176 		      vec<chain_p> *chains)
1177 {
1178   unsigned i;
1179   dref a;
1180   chain_p chain = NULL;
1181   double_int last_ofs = double_int_zero;
1182 
1183   /* Invariants are handled specially.  */
1184   if (comp->comp_step == RS_INVARIANT)
1185     {
1186       chain = make_invariant_chain (comp);
1187       chains->safe_push (chain);
1188       return;
1189     }
1190 
1191   comp->refs.qsort (order_drefs);
1192 
1193   FOR_EACH_VEC_ELT (comp->refs, i, a)
1194     {
1195       if (!chain || DR_IS_WRITE (a->ref)
1196 	  || double_int::from_uhwi (MAX_DISTANCE).ule (a->offset - last_ofs))
1197 	{
1198 	  if (nontrivial_chain_p (chain))
1199 	    {
1200 	      add_looparound_copies (loop, chain);
1201 	      chains->safe_push (chain);
1202 	    }
1203 	  else
1204 	    release_chain (chain);
1205 	  chain = make_rooted_chain (a);
1206 	  last_ofs = a->offset;
1207 	  continue;
1208 	}
1209 
1210       add_ref_to_chain (chain, a);
1211     }
1212 
1213   if (nontrivial_chain_p (chain))
1214     {
1215       add_looparound_copies (loop, chain);
1216       chains->safe_push (chain);
1217     }
1218   else
1219     release_chain (chain);
1220 }
1221 
1222 /* Find roots of the values and determine distances in components COMPS, and
1223    separates the references to CHAINS.  LOOP is the current loop.  */
1224 
1225 static void
1226 determine_roots (struct loop *loop,
1227 		 struct component *comps, vec<chain_p> *chains)
1228 {
1229   struct component *comp;
1230 
1231   for (comp = comps; comp; comp = comp->next)
1232     determine_roots_comp (loop, comp, chains);
1233 }
1234 
1235 /* Replace the reference in statement STMT with temporary variable
1236    NEW_TREE.  If SET is true, NEW_TREE is instead initialized to the value of
1237    the reference in the statement.  IN_LHS is true if the reference
1238    is in the lhs of STMT, false if it is in rhs.  */
1239 
1240 static void
1241 replace_ref_with (gimple stmt, tree new_tree, bool set, bool in_lhs)
1242 {
1243   tree val;
1244   gimple new_stmt;
1245   gimple_stmt_iterator bsi, psi;
1246 
1247   if (gimple_code (stmt) == GIMPLE_PHI)
1248     {
1249       gcc_assert (!in_lhs && !set);
1250 
1251       val = PHI_RESULT (stmt);
1252       bsi = gsi_after_labels (gimple_bb (stmt));
1253       psi = gsi_for_stmt (stmt);
1254       remove_phi_node (&psi, false);
1255 
1256       /* Turn the phi node into GIMPLE_ASSIGN.  */
1257       new_stmt = gimple_build_assign (val, new_tree);
1258       gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
1259       return;
1260     }
1261 
1262   /* Since the reference is of gimple_reg type, it should only
1263      appear as lhs or rhs of modify statement.  */
1264   gcc_assert (is_gimple_assign (stmt));
1265 
1266   bsi = gsi_for_stmt (stmt);
1267 
1268   /* If we do not need to initialize NEW_TREE, just replace the use of OLD.  */
1269   if (!set)
1270     {
1271       gcc_assert (!in_lhs);
1272       gimple_assign_set_rhs_from_tree (&bsi, new_tree);
1273       stmt = gsi_stmt (bsi);
1274       update_stmt (stmt);
1275       return;
1276     }
1277 
1278   if (in_lhs)
1279     {
1280       /* We have statement
1281 
1282 	 OLD = VAL
1283 
1284 	 If OLD is a memory reference, then VAL is gimple_val, and we transform
1285 	 this to
1286 
1287 	 OLD = VAL
1288 	 NEW = VAL
1289 
1290 	 Otherwise, we are replacing a combination chain,
1291 	 VAL is the expression that performs the combination, and OLD is an
1292 	 SSA name.  In this case, we transform the assignment to
1293 
1294 	 OLD = VAL
1295 	 NEW = OLD
1296 
1297 	 */
1298 
1299       val = gimple_assign_lhs (stmt);
1300       if (TREE_CODE (val) != SSA_NAME)
1301 	{
1302 	  val = gimple_assign_rhs1 (stmt);
1303 	  gcc_assert (gimple_assign_single_p (stmt));
1304 	  if (TREE_CLOBBER_P (val))
1305 	    val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
1306 	  else
1307 	    gcc_assert (gimple_assign_copy_p (stmt));
1308 	}
1309     }
1310   else
1311     {
1312       /* VAL = OLD
1313 
1314 	 is transformed to
1315 
1316 	 VAL = OLD
1317 	 NEW = VAL  */
1318 
1319       val = gimple_assign_lhs (stmt);
1320     }
1321 
1322   new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
1323   gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
1324 }
1325 
1326 /* Returns a memory reference to DR in the ITER-th iteration of
1327    the loop it was analyzed in.  Append init stmts to STMTS.  */
1328 
1329 static tree
1330 ref_at_iteration (data_reference_p dr, int iter, gimple_seq *stmts)
1331 {
1332   tree off = DR_OFFSET (dr);
1333   tree coff = DR_INIT (dr);
1334   if (iter == 0)
1335     ;
1336   else if (TREE_CODE (DR_STEP (dr)) == INTEGER_CST)
1337     coff = size_binop (PLUS_EXPR, coff,
1338 		       size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
1339   else
1340     off = size_binop (PLUS_EXPR, off,
1341 		      size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
1342   tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
1343   addr = force_gimple_operand_1 (addr, stmts, is_gimple_mem_ref_addr,
1344 				 NULL_TREE);
1345   tree alias_ptr = fold_convert (reference_alias_ptr_type (DR_REF (dr)), coff);
1346   /* While data-ref analysis punts on bit offsets it still handles
1347      bitfield accesses at byte boundaries.  Cope with that.  Note that
1348      we cannot simply re-apply the outer COMPONENT_REF because the
1349      byte-granular portion of it is already applied via DR_INIT and
1350      DR_OFFSET, so simply build a BIT_FIELD_REF knowing that the bits
1351      start at offset zero.  */
1352   if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
1353       && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
1354     {
1355       tree field = TREE_OPERAND (DR_REF (dr), 1);
1356       return build3 (BIT_FIELD_REF, TREE_TYPE (DR_REF (dr)),
1357 		     build2 (MEM_REF, DECL_BIT_FIELD_TYPE (field),
1358 			     addr, alias_ptr),
1359 		     DECL_SIZE (field), bitsize_zero_node);
1360     }
1361   else
1362     return fold_build2 (MEM_REF, TREE_TYPE (DR_REF (dr)), addr, alias_ptr);
1363 }
1364 
1365 /* Get the initialization expression for the INDEX-th temporary variable
1366    of CHAIN.  */
1367 
1368 static tree
1369 get_init_expr (chain_p chain, unsigned index)
1370 {
1371   if (chain->type == CT_COMBINATION)
1372     {
1373       tree e1 = get_init_expr (chain->ch1, index);
1374       tree e2 = get_init_expr (chain->ch2, index);
1375 
1376       return fold_build2 (chain->op, chain->rslt_type, e1, e2);
1377     }
1378   else
1379     return chain->inits[index];
1380 }
1381 
1382 /* Returns a new temporary variable used for the I-th variable carrying
1383    value of REF.  The variable's uid is marked in TMP_VARS.  */
1384 
1385 static tree
1386 predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1387 {
1388   tree type = TREE_TYPE (ref);
1389   /* We never access the components of the temporary variable in predictive
1390      commoning.  */
1391   tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
1392   bitmap_set_bit (tmp_vars, DECL_UID (var));
1393   return var;
1394 }
1395 
1396 /* Creates the variables for CHAIN, as well as phi nodes for them and
1397    initialization on entry to LOOP.  Uids of the newly created
1398    temporary variables are marked in TMP_VARS.  */
1399 
1400 static void
1401 initialize_root_vars (struct loop *loop, chain_p chain, bitmap tmp_vars)
1402 {
1403   unsigned i;
1404   unsigned n = chain->length;
1405   dref root = get_chain_root (chain);
1406   bool reuse_first = !chain->has_max_use_after;
1407   tree ref, init, var, next;
1408   gimple phi;
1409   gimple_seq stmts;
1410   edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1411 
1412   /* If N == 0, then all the references are within the single iteration.  And
1413      since this is an nonempty chain, reuse_first cannot be true.  */
1414   gcc_assert (n > 0 || !reuse_first);
1415 
1416   chain->vars.create (n + 1);
1417 
1418   if (chain->type == CT_COMBINATION)
1419     ref = gimple_assign_lhs (root->stmt);
1420   else
1421     ref = DR_REF (root->ref);
1422 
1423   for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1424     {
1425       var = predcom_tmp_var (ref, i, tmp_vars);
1426       chain->vars.quick_push (var);
1427     }
1428   if (reuse_first)
1429     chain->vars.quick_push (chain->vars[0]);
1430 
1431   FOR_EACH_VEC_ELT (chain->vars, i, var)
1432     chain->vars[i] = make_ssa_name (var, NULL);
1433 
1434   for (i = 0; i < n; i++)
1435     {
1436       var = chain->vars[i];
1437       next = chain->vars[i + 1];
1438       init = get_init_expr (chain, i);
1439 
1440       init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1441       if (stmts)
1442 	gsi_insert_seq_on_edge_immediate (entry, stmts);
1443 
1444       phi = create_phi_node (var, loop->header);
1445       add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1446       add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1447     }
1448 }
1449 
1450 /* Create the variables and initialization statement for root of chain
1451    CHAIN.  Uids of the newly created temporary variables are marked
1452    in TMP_VARS.  */
1453 
1454 static void
1455 initialize_root (struct loop *loop, chain_p chain, bitmap tmp_vars)
1456 {
1457   dref root = get_chain_root (chain);
1458   bool in_lhs = (chain->type == CT_STORE_LOAD
1459 		 || chain->type == CT_COMBINATION);
1460 
1461   initialize_root_vars (loop, chain, tmp_vars);
1462   replace_ref_with (root->stmt,
1463 		    chain->vars[chain->length],
1464 		    true, in_lhs);
1465 }
1466 
1467 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
1468    initialization on entry to LOOP if necessary.  The ssa name for the variable
1469    is stored in VARS.  If WRITTEN is true, also a phi node to copy its value
1470    around the loop is created.  Uid of the newly created temporary variable
1471    is marked in TMP_VARS.  INITS is the list containing the (single)
1472    initializer.  */
1473 
1474 static void
1475 initialize_root_vars_lm (struct loop *loop, dref root, bool written,
1476 			 vec<tree> *vars, vec<tree> inits,
1477 			 bitmap tmp_vars)
1478 {
1479   unsigned i;
1480   tree ref = DR_REF (root->ref), init, var, next;
1481   gimple_seq stmts;
1482   gimple phi;
1483   edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1484 
1485   /* Find the initializer for the variable, and check that it cannot
1486      trap.  */
1487   init = inits[0];
1488 
1489   vars->create (written ? 2 : 1);
1490   var = predcom_tmp_var (ref, 0, tmp_vars);
1491   vars->quick_push (var);
1492   if (written)
1493     vars->quick_push ((*vars)[0]);
1494 
1495   FOR_EACH_VEC_ELT (*vars, i, var)
1496     (*vars)[i] = make_ssa_name (var, NULL);
1497 
1498   var = (*vars)[0];
1499 
1500   init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1501   if (stmts)
1502     gsi_insert_seq_on_edge_immediate (entry, stmts);
1503 
1504   if (written)
1505     {
1506       next = (*vars)[1];
1507       phi = create_phi_node (var, loop->header);
1508       add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1509       add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1510     }
1511   else
1512     {
1513       gimple init_stmt = gimple_build_assign (var, init);
1514       gsi_insert_on_edge_immediate (entry, init_stmt);
1515     }
1516 }
1517 
1518 
1519 /* Execute load motion for references in chain CHAIN.  Uids of the newly
1520    created temporary variables are marked in TMP_VARS.  */
1521 
1522 static void
1523 execute_load_motion (struct loop *loop, chain_p chain, bitmap tmp_vars)
1524 {
1525   vec<tree> vars;
1526   dref a;
1527   unsigned n_writes = 0, ridx, i;
1528   tree var;
1529 
1530   gcc_assert (chain->type == CT_INVARIANT);
1531   gcc_assert (!chain->combined);
1532   FOR_EACH_VEC_ELT (chain->refs, i, a)
1533     if (DR_IS_WRITE (a->ref))
1534       n_writes++;
1535 
1536   /* If there are no reads in the loop, there is nothing to do.  */
1537   if (n_writes == chain->refs.length ())
1538     return;
1539 
1540   initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
1541 			   &vars, chain->inits, tmp_vars);
1542 
1543   ridx = 0;
1544   FOR_EACH_VEC_ELT (chain->refs, i, a)
1545     {
1546       bool is_read = DR_IS_READ (a->ref);
1547 
1548       if (DR_IS_WRITE (a->ref))
1549 	{
1550 	  n_writes--;
1551 	  if (n_writes)
1552 	    {
1553 	      var = vars[0];
1554 	      var = make_ssa_name (SSA_NAME_VAR (var), NULL);
1555 	      vars[0] = var;
1556 	    }
1557 	  else
1558 	    ridx = 1;
1559 	}
1560 
1561       replace_ref_with (a->stmt, vars[ridx],
1562 			!is_read, !is_read);
1563     }
1564 
1565   vars.release ();
1566 }
1567 
1568 /* Returns the single statement in that NAME is used, excepting
1569    the looparound phi nodes contained in one of the chains.  If there is no
1570    such statement, or more statements, NULL is returned.  */
1571 
1572 static gimple
1573 single_nonlooparound_use (tree name)
1574 {
1575   use_operand_p use;
1576   imm_use_iterator it;
1577   gimple stmt, ret = NULL;
1578 
1579   FOR_EACH_IMM_USE_FAST (use, it, name)
1580     {
1581       stmt = USE_STMT (use);
1582 
1583       if (gimple_code (stmt) == GIMPLE_PHI)
1584 	{
1585 	  /* Ignore uses in looparound phi nodes.  Uses in other phi nodes
1586 	     could not be processed anyway, so just fail for them.  */
1587 	  if (bitmap_bit_p (looparound_phis,
1588 			    SSA_NAME_VERSION (PHI_RESULT (stmt))))
1589 	    continue;
1590 
1591 	  return NULL;
1592 	}
1593       else if (is_gimple_debug (stmt))
1594 	continue;
1595       else if (ret != NULL)
1596 	return NULL;
1597       else
1598 	ret = stmt;
1599     }
1600 
1601   return ret;
1602 }
1603 
1604 /* Remove statement STMT, as well as the chain of assignments in that it is
1605    used.  */
1606 
1607 static void
1608 remove_stmt (gimple stmt)
1609 {
1610   tree name;
1611   gimple next;
1612   gimple_stmt_iterator psi;
1613 
1614   if (gimple_code (stmt) == GIMPLE_PHI)
1615     {
1616       name = PHI_RESULT (stmt);
1617       next = single_nonlooparound_use (name);
1618       reset_debug_uses (stmt);
1619       psi = gsi_for_stmt (stmt);
1620       remove_phi_node (&psi, true);
1621 
1622       if (!next
1623 	  || !gimple_assign_ssa_name_copy_p (next)
1624 	  || gimple_assign_rhs1 (next) != name)
1625 	return;
1626 
1627       stmt = next;
1628     }
1629 
1630   while (1)
1631     {
1632       gimple_stmt_iterator bsi;
1633 
1634       bsi = gsi_for_stmt (stmt);
1635 
1636       name = gimple_assign_lhs (stmt);
1637       gcc_assert (TREE_CODE (name) == SSA_NAME);
1638 
1639       next = single_nonlooparound_use (name);
1640       reset_debug_uses (stmt);
1641 
1642       unlink_stmt_vdef (stmt);
1643       gsi_remove (&bsi, true);
1644       release_defs (stmt);
1645 
1646       if (!next
1647 	  || !gimple_assign_ssa_name_copy_p (next)
1648 	  || gimple_assign_rhs1 (next) != name)
1649 	return;
1650 
1651       stmt = next;
1652     }
1653 }
1654 
1655 /* Perform the predictive commoning optimization for a chain CHAIN.
1656    Uids of the newly created temporary variables are marked in TMP_VARS.*/
1657 
1658 static void
1659 execute_pred_commoning_chain (struct loop *loop, chain_p chain,
1660 			     bitmap tmp_vars)
1661 {
1662   unsigned i;
1663   dref a;
1664   tree var;
1665 
1666   if (chain->combined)
1667     {
1668       /* For combined chains, just remove the statements that are used to
1669 	 compute the values of the expression (except for the root one).
1670 	 We delay this until after all chains are processed.  */
1671     }
1672   else
1673     {
1674       /* For non-combined chains, set up the variables that hold its value,
1675 	 and replace the uses of the original references by these
1676 	 variables.  */
1677       initialize_root (loop, chain, tmp_vars);
1678       for (i = 1; chain->refs.iterate (i, &a); i++)
1679 	{
1680 	  var = chain->vars[chain->length - a->distance];
1681 	  replace_ref_with (a->stmt, var, false, false);
1682 	}
1683     }
1684 }
1685 
1686 /* Determines the unroll factor necessary to remove as many temporary variable
1687    copies as possible.  CHAINS is the list of chains that will be
1688    optimized.  */
1689 
1690 static unsigned
1691 determine_unroll_factor (vec<chain_p> chains)
1692 {
1693   chain_p chain;
1694   unsigned factor = 1, af, nfactor, i;
1695   unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1696 
1697   FOR_EACH_VEC_ELT (chains, i, chain)
1698     {
1699       if (chain->type == CT_INVARIANT)
1700 	continue;
1701 
1702       if (chain->combined)
1703 	{
1704 	  /* For combined chains, we can't handle unrolling if we replace
1705 	     looparound PHIs.  */
1706 	  dref a;
1707 	  unsigned j;
1708 	  for (j = 1; chain->refs.iterate (j, &a); j++)
1709 	    if (gimple_code (a->stmt) == GIMPLE_PHI)
1710 	      return 1;
1711 	  continue;
1712 	}
1713 
1714       /* The best unroll factor for this chain is equal to the number of
1715 	 temporary variables that we create for it.  */
1716       af = chain->length;
1717       if (chain->has_max_use_after)
1718 	af++;
1719 
1720       nfactor = factor * af / gcd (factor, af);
1721       if (nfactor <= max)
1722 	factor = nfactor;
1723     }
1724 
1725   return factor;
1726 }
1727 
1728 /* Perform the predictive commoning optimization for CHAINS.
1729    Uids of the newly created temporary variables are marked in TMP_VARS.  */
1730 
1731 static void
1732 execute_pred_commoning (struct loop *loop, vec<chain_p> chains,
1733 			bitmap tmp_vars)
1734 {
1735   chain_p chain;
1736   unsigned i;
1737 
1738   FOR_EACH_VEC_ELT (chains, i, chain)
1739     {
1740       if (chain->type == CT_INVARIANT)
1741 	execute_load_motion (loop, chain, tmp_vars);
1742       else
1743 	execute_pred_commoning_chain (loop, chain, tmp_vars);
1744     }
1745 
1746   FOR_EACH_VEC_ELT (chains, i, chain)
1747     {
1748       if (chain->type == CT_INVARIANT)
1749 	;
1750       else if (chain->combined)
1751 	{
1752 	  /* For combined chains, just remove the statements that are used to
1753 	     compute the values of the expression (except for the root one).  */
1754 	  dref a;
1755 	  unsigned j;
1756 	  for (j = 1; chain->refs.iterate (j, &a); j++)
1757 	    remove_stmt (a->stmt);
1758 	}
1759     }
1760 
1761   update_ssa (TODO_update_ssa_only_virtuals);
1762 }
1763 
1764 /* For each reference in CHAINS, if its defining statement is
1765    phi node, record the ssa name that is defined by it.  */
1766 
1767 static void
1768 replace_phis_by_defined_names (vec<chain_p> chains)
1769 {
1770   chain_p chain;
1771   dref a;
1772   unsigned i, j;
1773 
1774   FOR_EACH_VEC_ELT (chains, i, chain)
1775     FOR_EACH_VEC_ELT (chain->refs, j, a)
1776       {
1777 	if (gimple_code (a->stmt) == GIMPLE_PHI)
1778 	  {
1779 	    a->name_defined_by_phi = PHI_RESULT (a->stmt);
1780 	    a->stmt = NULL;
1781 	  }
1782       }
1783 }
1784 
1785 /* For each reference in CHAINS, if name_defined_by_phi is not
1786    NULL, use it to set the stmt field.  */
1787 
1788 static void
1789 replace_names_by_phis (vec<chain_p> chains)
1790 {
1791   chain_p chain;
1792   dref a;
1793   unsigned i, j;
1794 
1795   FOR_EACH_VEC_ELT (chains, i, chain)
1796     FOR_EACH_VEC_ELT (chain->refs, j, a)
1797       if (a->stmt == NULL)
1798 	{
1799 	  a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
1800 	  gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
1801 	  a->name_defined_by_phi = NULL_TREE;
1802 	}
1803 }
1804 
1805 /* Wrapper over execute_pred_commoning, to pass it as a callback
1806    to tree_transform_and_unroll_loop.  */
1807 
1808 struct epcc_data
1809 {
1810   vec<chain_p> chains;
1811   bitmap tmp_vars;
1812 };
1813 
1814 static void
1815 execute_pred_commoning_cbck (struct loop *loop, void *data)
1816 {
1817   struct epcc_data *const dta = (struct epcc_data *) data;
1818 
1819   /* Restore phi nodes that were replaced by ssa names before
1820      tree_transform_and_unroll_loop (see detailed description in
1821      tree_predictive_commoning_loop).  */
1822   replace_names_by_phis (dta->chains);
1823   execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
1824 }
1825 
1826 /* Base NAME and all the names in the chain of phi nodes that use it
1827    on variable VAR.  The phi nodes are recognized by being in the copies of
1828    the header of the LOOP.  */
1829 
1830 static void
1831 base_names_in_chain_on (struct loop *loop, tree name, tree var)
1832 {
1833   gimple stmt, phi;
1834   imm_use_iterator iter;
1835 
1836   replace_ssa_name_symbol (name, var);
1837 
1838   while (1)
1839     {
1840       phi = NULL;
1841       FOR_EACH_IMM_USE_STMT (stmt, iter, name)
1842 	{
1843 	  if (gimple_code (stmt) == GIMPLE_PHI
1844 	      && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1845 	    {
1846 	      phi = stmt;
1847 	      BREAK_FROM_IMM_USE_STMT (iter);
1848 	    }
1849 	}
1850       if (!phi)
1851 	return;
1852 
1853       name = PHI_RESULT (phi);
1854       replace_ssa_name_symbol (name, var);
1855     }
1856 }
1857 
1858 /* Given an unrolled LOOP after predictive commoning, remove the
1859    register copies arising from phi nodes by changing the base
1860    variables of SSA names.  TMP_VARS is the set of the temporary variables
1861    for those we want to perform this.  */
1862 
1863 static void
1864 eliminate_temp_copies (struct loop *loop, bitmap tmp_vars)
1865 {
1866   edge e;
1867   gimple phi, stmt;
1868   tree name, use, var;
1869   gimple_stmt_iterator psi;
1870 
1871   e = loop_latch_edge (loop);
1872   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1873     {
1874       phi = gsi_stmt (psi);
1875       name = PHI_RESULT (phi);
1876       var = SSA_NAME_VAR (name);
1877       if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
1878 	continue;
1879       use = PHI_ARG_DEF_FROM_EDGE (phi, e);
1880       gcc_assert (TREE_CODE (use) == SSA_NAME);
1881 
1882       /* Base all the ssa names in the ud and du chain of NAME on VAR.  */
1883       stmt = SSA_NAME_DEF_STMT (use);
1884       while (gimple_code (stmt) == GIMPLE_PHI
1885 	     /* In case we could not unroll the loop enough to eliminate
1886 		all copies, we may reach the loop header before the defining
1887 		statement (in that case, some register copies will be present
1888 		in loop latch in the final code, corresponding to the newly
1889 		created looparound phi nodes).  */
1890 	     && gimple_bb (stmt) != loop->header)
1891 	{
1892 	  gcc_assert (single_pred_p (gimple_bb (stmt)));
1893 	  use = PHI_ARG_DEF (stmt, 0);
1894 	  stmt = SSA_NAME_DEF_STMT (use);
1895 	}
1896 
1897       base_names_in_chain_on (loop, use, var);
1898     }
1899 }
1900 
1901 /* Returns true if CHAIN is suitable to be combined.  */
1902 
1903 static bool
1904 chain_can_be_combined_p (chain_p chain)
1905 {
1906   return (!chain->combined
1907 	  && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
1908 }
1909 
1910 /* Returns the modify statement that uses NAME.  Skips over assignment
1911    statements, NAME is replaced with the actual name used in the returned
1912    statement.  */
1913 
1914 static gimple
1915 find_use_stmt (tree *name)
1916 {
1917   gimple stmt;
1918   tree rhs, lhs;
1919 
1920   /* Skip over assignments.  */
1921   while (1)
1922     {
1923       stmt = single_nonlooparound_use (*name);
1924       if (!stmt)
1925 	return NULL;
1926 
1927       if (gimple_code (stmt) != GIMPLE_ASSIGN)
1928 	return NULL;
1929 
1930       lhs = gimple_assign_lhs (stmt);
1931       if (TREE_CODE (lhs) != SSA_NAME)
1932 	return NULL;
1933 
1934       if (gimple_assign_copy_p (stmt))
1935 	{
1936 	  rhs = gimple_assign_rhs1 (stmt);
1937 	  if (rhs != *name)
1938 	    return NULL;
1939 
1940 	  *name = lhs;
1941 	}
1942       else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1943 	       == GIMPLE_BINARY_RHS)
1944 	return stmt;
1945       else
1946 	return NULL;
1947     }
1948 }
1949 
1950 /* Returns true if we may perform reassociation for operation CODE in TYPE.  */
1951 
1952 static bool
1953 may_reassociate_p (tree type, enum tree_code code)
1954 {
1955   if (FLOAT_TYPE_P (type)
1956       && !flag_unsafe_math_optimizations)
1957     return false;
1958 
1959   return (commutative_tree_code (code)
1960 	  && associative_tree_code (code));
1961 }
1962 
1963 /* If the operation used in STMT is associative and commutative, go through the
1964    tree of the same operations and returns its root.  Distance to the root
1965    is stored in DISTANCE.  */
1966 
1967 static gimple
1968 find_associative_operation_root (gimple stmt, unsigned *distance)
1969 {
1970   tree lhs;
1971   gimple next;
1972   enum tree_code code = gimple_assign_rhs_code (stmt);
1973   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
1974   unsigned dist = 0;
1975 
1976   if (!may_reassociate_p (type, code))
1977     return NULL;
1978 
1979   while (1)
1980     {
1981       lhs = gimple_assign_lhs (stmt);
1982       gcc_assert (TREE_CODE (lhs) == SSA_NAME);
1983 
1984       next = find_use_stmt (&lhs);
1985       if (!next
1986 	  || gimple_assign_rhs_code (next) != code)
1987 	break;
1988 
1989       stmt = next;
1990       dist++;
1991     }
1992 
1993   if (distance)
1994     *distance = dist;
1995   return stmt;
1996 }
1997 
1998 /* Returns the common statement in that NAME1 and NAME2 have a use.  If there
1999    is no such statement, returns NULL_TREE.  In case the operation used on
2000    NAME1 and NAME2 is associative and commutative, returns the root of the
2001    tree formed by this operation instead of the statement that uses NAME1 or
2002    NAME2.  */
2003 
2004 static gimple
2005 find_common_use_stmt (tree *name1, tree *name2)
2006 {
2007   gimple stmt1, stmt2;
2008 
2009   stmt1 = find_use_stmt (name1);
2010   if (!stmt1)
2011     return NULL;
2012 
2013   stmt2 = find_use_stmt (name2);
2014   if (!stmt2)
2015     return NULL;
2016 
2017   if (stmt1 == stmt2)
2018     return stmt1;
2019 
2020   stmt1 = find_associative_operation_root (stmt1, NULL);
2021   if (!stmt1)
2022     return NULL;
2023   stmt2 = find_associative_operation_root (stmt2, NULL);
2024   if (!stmt2)
2025     return NULL;
2026 
2027   return (stmt1 == stmt2 ? stmt1 : NULL);
2028 }
2029 
2030 /* Checks whether R1 and R2 are combined together using CODE, with the result
2031    in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2032    if it is true.  If CODE is ERROR_MARK, set these values instead.  */
2033 
2034 static bool
2035 combinable_refs_p (dref r1, dref r2,
2036 		   enum tree_code *code, bool *swap, tree *rslt_type)
2037 {
2038   enum tree_code acode;
2039   bool aswap;
2040   tree atype;
2041   tree name1, name2;
2042   gimple stmt;
2043 
2044   name1 = name_for_ref (r1);
2045   name2 = name_for_ref (r2);
2046   gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2047 
2048   stmt = find_common_use_stmt (&name1, &name2);
2049 
2050   if (!stmt
2051       /* A simple post-dominance check - make sure the combination
2052          is executed under the same condition as the references.  */
2053       || (gimple_bb (stmt) != gimple_bb (r1->stmt)
2054 	  && gimple_bb (stmt) != gimple_bb (r2->stmt)))
2055     return false;
2056 
2057   acode = gimple_assign_rhs_code (stmt);
2058   aswap = (!commutative_tree_code (acode)
2059 	   && gimple_assign_rhs1 (stmt) != name1);
2060   atype = TREE_TYPE (gimple_assign_lhs (stmt));
2061 
2062   if (*code == ERROR_MARK)
2063     {
2064       *code = acode;
2065       *swap = aswap;
2066       *rslt_type = atype;
2067       return true;
2068     }
2069 
2070   return (*code == acode
2071 	  && *swap == aswap
2072 	  && *rslt_type == atype);
2073 }
2074 
2075 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2076    an assignment of the remaining operand.  */
2077 
2078 static void
2079 remove_name_from_operation (gimple stmt, tree op)
2080 {
2081   tree other_op;
2082   gimple_stmt_iterator si;
2083 
2084   gcc_assert (is_gimple_assign (stmt));
2085 
2086   if (gimple_assign_rhs1 (stmt) == op)
2087     other_op = gimple_assign_rhs2 (stmt);
2088   else
2089     other_op = gimple_assign_rhs1 (stmt);
2090 
2091   si = gsi_for_stmt (stmt);
2092   gimple_assign_set_rhs_from_tree (&si, other_op);
2093 
2094   /* We should not have reallocated STMT.  */
2095   gcc_assert (gsi_stmt (si) == stmt);
2096 
2097   update_stmt (stmt);
2098 }
2099 
2100 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2101    are combined in a single statement, and returns this statement.  */
2102 
2103 static gimple
2104 reassociate_to_the_same_stmt (tree name1, tree name2)
2105 {
2106   gimple stmt1, stmt2, root1, root2, s1, s2;
2107   gimple new_stmt, tmp_stmt;
2108   tree new_name, tmp_name, var, r1, r2;
2109   unsigned dist1, dist2;
2110   enum tree_code code;
2111   tree type = TREE_TYPE (name1);
2112   gimple_stmt_iterator bsi;
2113 
2114   stmt1 = find_use_stmt (&name1);
2115   stmt2 = find_use_stmt (&name2);
2116   root1 = find_associative_operation_root (stmt1, &dist1);
2117   root2 = find_associative_operation_root (stmt2, &dist2);
2118   code = gimple_assign_rhs_code (stmt1);
2119 
2120   gcc_assert (root1 && root2 && root1 == root2
2121 	      && code == gimple_assign_rhs_code (stmt2));
2122 
2123   /* Find the root of the nearest expression in that both NAME1 and NAME2
2124      are used.  */
2125   r1 = name1;
2126   s1 = stmt1;
2127   r2 = name2;
2128   s2 = stmt2;
2129 
2130   while (dist1 > dist2)
2131     {
2132       s1 = find_use_stmt (&r1);
2133       r1 = gimple_assign_lhs (s1);
2134       dist1--;
2135     }
2136   while (dist2 > dist1)
2137     {
2138       s2 = find_use_stmt (&r2);
2139       r2 = gimple_assign_lhs (s2);
2140       dist2--;
2141     }
2142 
2143   while (s1 != s2)
2144     {
2145       s1 = find_use_stmt (&r1);
2146       r1 = gimple_assign_lhs (s1);
2147       s2 = find_use_stmt (&r2);
2148       r2 = gimple_assign_lhs (s2);
2149     }
2150 
2151   /* Remove NAME1 and NAME2 from the statements in that they are used
2152      currently.  */
2153   remove_name_from_operation (stmt1, name1);
2154   remove_name_from_operation (stmt2, name2);
2155 
2156   /* Insert the new statement combining NAME1 and NAME2 before S1, and
2157      combine it with the rhs of S1.  */
2158   var = create_tmp_reg (type, "predreastmp");
2159   new_name = make_ssa_name (var, NULL);
2160   new_stmt = gimple_build_assign_with_ops (code, new_name, name1, name2);
2161 
2162   var = create_tmp_reg (type, "predreastmp");
2163   tmp_name = make_ssa_name (var, NULL);
2164 
2165   /* Rhs of S1 may now be either a binary expression with operation
2166      CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2167      so that name1 or name2 was removed from it).  */
2168   tmp_stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (s1),
2169 					   tmp_name,
2170 					   gimple_assign_rhs1 (s1),
2171 					   gimple_assign_rhs2 (s1));
2172 
2173   bsi = gsi_for_stmt (s1);
2174   gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2175   s1 = gsi_stmt (bsi);
2176   update_stmt (s1);
2177 
2178   gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
2179   gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
2180 
2181   return new_stmt;
2182 }
2183 
2184 /* Returns the statement that combines references R1 and R2.  In case R1
2185    and R2 are not used in the same statement, but they are used with an
2186    associative and commutative operation in the same expression, reassociate
2187    the expression so that they are used in the same statement.  */
2188 
2189 static gimple
2190 stmt_combining_refs (dref r1, dref r2)
2191 {
2192   gimple stmt1, stmt2;
2193   tree name1 = name_for_ref (r1);
2194   tree name2 = name_for_ref (r2);
2195 
2196   stmt1 = find_use_stmt (&name1);
2197   stmt2 = find_use_stmt (&name2);
2198   if (stmt1 == stmt2)
2199     return stmt1;
2200 
2201   return reassociate_to_the_same_stmt (name1, name2);
2202 }
2203 
2204 /* Tries to combine chains CH1 and CH2 together.  If this succeeds, the
2205    description of the new chain is returned, otherwise we return NULL.  */
2206 
2207 static chain_p
2208 combine_chains (chain_p ch1, chain_p ch2)
2209 {
2210   dref r1, r2, nw;
2211   enum tree_code op = ERROR_MARK;
2212   bool swap = false;
2213   chain_p new_chain;
2214   unsigned i;
2215   gimple root_stmt;
2216   tree rslt_type = NULL_TREE;
2217 
2218   if (ch1 == ch2)
2219     return NULL;
2220   if (ch1->length != ch2->length)
2221     return NULL;
2222 
2223   if (ch1->refs.length () != ch2->refs.length ())
2224     return NULL;
2225 
2226   for (i = 0; (ch1->refs.iterate (i, &r1)
2227 	       && ch2->refs.iterate (i, &r2)); i++)
2228     {
2229       if (r1->distance != r2->distance)
2230 	return NULL;
2231 
2232       if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2233 	return NULL;
2234     }
2235 
2236   if (swap)
2237     {
2238       chain_p tmp = ch1;
2239       ch1 = ch2;
2240       ch2 = tmp;
2241     }
2242 
2243   new_chain = XCNEW (struct chain);
2244   new_chain->type = CT_COMBINATION;
2245   new_chain->op = op;
2246   new_chain->ch1 = ch1;
2247   new_chain->ch2 = ch2;
2248   new_chain->rslt_type = rslt_type;
2249   new_chain->length = ch1->length;
2250 
2251   for (i = 0; (ch1->refs.iterate (i, &r1)
2252 	       && ch2->refs.iterate (i, &r2)); i++)
2253     {
2254       nw = XCNEW (struct dref_d);
2255       nw->stmt = stmt_combining_refs (r1, r2);
2256       nw->distance = r1->distance;
2257 
2258       new_chain->refs.safe_push (nw);
2259     }
2260 
2261   new_chain->has_max_use_after = false;
2262   root_stmt = get_chain_root (new_chain)->stmt;
2263   for (i = 1; new_chain->refs.iterate (i, &nw); i++)
2264     {
2265       if (nw->distance == new_chain->length
2266 	  && !stmt_dominates_stmt_p (nw->stmt, root_stmt))
2267 	{
2268 	  new_chain->has_max_use_after = true;
2269 	  break;
2270 	}
2271     }
2272 
2273   ch1->combined = true;
2274   ch2->combined = true;
2275   return new_chain;
2276 }
2277 
2278 /* Try to combine the CHAINS.  */
2279 
2280 static void
2281 try_combine_chains (vec<chain_p> *chains)
2282 {
2283   unsigned i, j;
2284   chain_p ch1, ch2, cch;
2285   vec<chain_p> worklist = vNULL;
2286 
2287   FOR_EACH_VEC_ELT (*chains, i, ch1)
2288     if (chain_can_be_combined_p (ch1))
2289       worklist.safe_push (ch1);
2290 
2291   while (!worklist.is_empty ())
2292     {
2293       ch1 = worklist.pop ();
2294       if (!chain_can_be_combined_p (ch1))
2295 	continue;
2296 
2297       FOR_EACH_VEC_ELT (*chains, j, ch2)
2298 	{
2299 	  if (!chain_can_be_combined_p (ch2))
2300 	    continue;
2301 
2302 	  cch = combine_chains (ch1, ch2);
2303 	  if (cch)
2304 	    {
2305 	      worklist.safe_push (cch);
2306 	      chains->safe_push (cch);
2307 	      break;
2308 	    }
2309 	}
2310     }
2311 
2312   worklist.release ();
2313 }
2314 
2315 /* Prepare initializers for CHAIN in LOOP.  Returns false if this is
2316    impossible because one of these initializers may trap, true otherwise.  */
2317 
2318 static bool
2319 prepare_initializers_chain (struct loop *loop, chain_p chain)
2320 {
2321   unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
2322   struct data_reference *dr = get_chain_root (chain)->ref;
2323   tree init;
2324   gimple_seq stmts;
2325   dref laref;
2326   edge entry = loop_preheader_edge (loop);
2327 
2328   /* Find the initializers for the variables, and check that they cannot
2329      trap.  */
2330   chain->inits.create (n);
2331   for (i = 0; i < n; i++)
2332     chain->inits.quick_push (NULL_TREE);
2333 
2334   /* If we have replaced some looparound phi nodes, use their initializers
2335      instead of creating our own.  */
2336   FOR_EACH_VEC_ELT (chain->refs, i, laref)
2337     {
2338       if (gimple_code (laref->stmt) != GIMPLE_PHI)
2339 	continue;
2340 
2341       gcc_assert (laref->distance > 0);
2342       chain->inits[n - laref->distance]
2343 	= PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
2344     }
2345 
2346   for (i = 0; i < n; i++)
2347     {
2348       if (chain->inits[i] != NULL_TREE)
2349 	continue;
2350 
2351       init = ref_at_iteration (dr, (int) i - n, &stmts);
2352       if (!chain->all_always_accessed && tree_could_trap_p (init))
2353 	return false;
2354 
2355       if (stmts)
2356 	gsi_insert_seq_on_edge_immediate (entry, stmts);
2357 
2358       chain->inits[i] = init;
2359     }
2360 
2361   return true;
2362 }
2363 
2364 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
2365    be used because the initializers might trap.  */
2366 
2367 static void
2368 prepare_initializers (struct loop *loop, vec<chain_p> chains)
2369 {
2370   chain_p chain;
2371   unsigned i;
2372 
2373   for (i = 0; i < chains.length (); )
2374     {
2375       chain = chains[i];
2376       if (prepare_initializers_chain (loop, chain))
2377 	i++;
2378       else
2379 	{
2380 	  release_chain (chain);
2381 	  chains.unordered_remove (i);
2382 	}
2383     }
2384 }
2385 
2386 /* Performs predictive commoning for LOOP.  Returns true if LOOP was
2387    unrolled.  */
2388 
2389 static bool
2390 tree_predictive_commoning_loop (struct loop *loop)
2391 {
2392   vec<loop_p> loop_nest;
2393   vec<data_reference_p> datarefs;
2394   vec<ddr_p> dependences;
2395   struct component *components;
2396   vec<chain_p> chains = vNULL;
2397   unsigned unroll_factor;
2398   struct tree_niter_desc desc;
2399   bool unroll = false;
2400   edge exit;
2401   bitmap tmp_vars;
2402 
2403   if (dump_file && (dump_flags & TDF_DETAILS))
2404     fprintf (dump_file, "Processing loop %d\n",  loop->num);
2405 
2406   /* Find the data references and split them into components according to their
2407      dependence relations.  */
2408   datarefs.create (10);
2409   dependences.create (10);
2410   loop_nest.create (3);
2411   if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
2412 					   &dependences))
2413     {
2414       if (dump_file && (dump_flags & TDF_DETAILS))
2415 	fprintf (dump_file, "Cannot analyze data dependencies\n");
2416       loop_nest.release ();
2417       free_data_refs (datarefs);
2418       free_dependence_relations (dependences);
2419       return false;
2420     }
2421 
2422   if (dump_file && (dump_flags & TDF_DETAILS))
2423     dump_data_dependence_relations (dump_file, dependences);
2424 
2425   components = split_data_refs_to_components (loop, datarefs, dependences);
2426   loop_nest.release ();
2427   free_dependence_relations (dependences);
2428   if (!components)
2429     {
2430       free_data_refs (datarefs);
2431       free_affine_expand_cache (&name_expansions);
2432       return false;
2433     }
2434 
2435   if (dump_file && (dump_flags & TDF_DETAILS))
2436     {
2437       fprintf (dump_file, "Initial state:\n\n");
2438       dump_components (dump_file, components);
2439     }
2440 
2441   /* Find the suitable components and split them into chains.  */
2442   components = filter_suitable_components (loop, components);
2443 
2444   tmp_vars = BITMAP_ALLOC (NULL);
2445   looparound_phis = BITMAP_ALLOC (NULL);
2446   determine_roots (loop, components, &chains);
2447   release_components (components);
2448 
2449   if (!chains.exists ())
2450     {
2451       if (dump_file && (dump_flags & TDF_DETAILS))
2452 	fprintf (dump_file,
2453 		 "Predictive commoning failed: no suitable chains\n");
2454       goto end;
2455     }
2456   prepare_initializers (loop, chains);
2457 
2458   /* Try to combine the chains that are always worked with together.  */
2459   try_combine_chains (&chains);
2460 
2461   if (dump_file && (dump_flags & TDF_DETAILS))
2462     {
2463       fprintf (dump_file, "Before commoning:\n\n");
2464       dump_chains (dump_file, chains);
2465     }
2466 
2467   /* Determine the unroll factor, and if the loop should be unrolled, ensure
2468      that its number of iterations is divisible by the factor.  */
2469   unroll_factor = determine_unroll_factor (chains);
2470   scev_reset ();
2471   unroll = (unroll_factor > 1
2472 	    && can_unroll_loop_p (loop, unroll_factor, &desc));
2473   exit = single_dom_exit (loop);
2474 
2475   /* Execute the predictive commoning transformations, and possibly unroll the
2476      loop.  */
2477   if (unroll)
2478     {
2479       struct epcc_data dta;
2480 
2481       if (dump_file && (dump_flags & TDF_DETAILS))
2482 	fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
2483 
2484       dta.chains = chains;
2485       dta.tmp_vars = tmp_vars;
2486 
2487       update_ssa (TODO_update_ssa_only_virtuals);
2488 
2489       /* Cfg manipulations performed in tree_transform_and_unroll_loop before
2490 	 execute_pred_commoning_cbck is called may cause phi nodes to be
2491 	 reallocated, which is a problem since CHAINS may point to these
2492 	 statements.  To fix this, we store the ssa names defined by the
2493 	 phi nodes here instead of the phi nodes themselves, and restore
2494 	 the phi nodes in execute_pred_commoning_cbck.  A bit hacky.  */
2495       replace_phis_by_defined_names (chains);
2496 
2497       tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
2498 				      execute_pred_commoning_cbck, &dta);
2499       eliminate_temp_copies (loop, tmp_vars);
2500     }
2501   else
2502     {
2503       if (dump_file && (dump_flags & TDF_DETAILS))
2504 	fprintf (dump_file,
2505 		 "Executing predictive commoning without unrolling.\n");
2506       execute_pred_commoning (loop, chains, tmp_vars);
2507     }
2508 
2509 end: ;
2510   release_chains (chains);
2511   free_data_refs (datarefs);
2512   BITMAP_FREE (tmp_vars);
2513   BITMAP_FREE (looparound_phis);
2514 
2515   free_affine_expand_cache (&name_expansions);
2516 
2517   return unroll;
2518 }
2519 
2520 /* Runs predictive commoning.  */
2521 
2522 unsigned
2523 tree_predictive_commoning (void)
2524 {
2525   bool unrolled = false;
2526   struct loop *loop;
2527   loop_iterator li;
2528   unsigned ret = 0;
2529 
2530   initialize_original_copy_tables ();
2531   FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
2532     if (optimize_loop_for_speed_p (loop))
2533       {
2534 	unrolled |= tree_predictive_commoning_loop (loop);
2535       }
2536 
2537   if (unrolled)
2538     {
2539       scev_reset ();
2540       ret = TODO_cleanup_cfg;
2541     }
2542   free_original_copy_tables ();
2543 
2544   return ret;
2545 }
2546