xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-loop-distribution.c (revision 76c7fc5f6b13ed0b1508e6b313e88e59977ed78e)
1 /* Loop distribution.
2    Copyright (C) 2006-2017 Free Software Foundation, Inc.
3    Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4    and Sebastian Pop <sebastian.pop@amd.com>.
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 /* This pass performs loop distribution: for example, the loop
23 
24    |DO I = 2, N
25    |    A(I) = B(I) + C
26    |    D(I) = A(I-1)*E
27    |ENDDO
28 
29    is transformed to
30 
31    |DOALL I = 2, N
32    |   A(I) = B(I) + C
33    |ENDDO
34    |
35    |DOALL I = 2, N
36    |   D(I) = A(I-1)*E
37    |ENDDO
38 
39    This pass uses an RDG, Reduced Dependence Graph built on top of the
40    data dependence relations.  The RDG is then topologically sorted to
41    obtain a map of information producers/consumers based on which it
42    generates the new loops.  */
43 
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "backend.h"
48 #include "tree.h"
49 #include "gimple.h"
50 #include "cfghooks.h"
51 #include "tree-pass.h"
52 #include "ssa.h"
53 #include "gimple-pretty-print.h"
54 #include "fold-const.h"
55 #include "cfganal.h"
56 #include "gimple-iterator.h"
57 #include "gimplify-me.h"
58 #include "stor-layout.h"
59 #include "tree-cfg.h"
60 #include "tree-ssa-loop-manip.h"
61 #include "tree-ssa-loop.h"
62 #include "tree-into-ssa.h"
63 #include "tree-ssa.h"
64 #include "cfgloop.h"
65 #include "tree-scalar-evolution.h"
66 #include "tree-vectorizer.h"
67 
68 
69 /* A Reduced Dependence Graph (RDG) vertex representing a statement.  */
70 struct rdg_vertex
71 {
72   /* The statement represented by this vertex.  */
73   gimple *stmt;
74 
75   /* Vector of data-references in this statement.  */
76   vec<data_reference_p> datarefs;
77 
78   /* True when the statement contains a write to memory.  */
79   bool has_mem_write;
80 
81   /* True when the statement contains a read from memory.  */
82   bool has_mem_reads;
83 };
84 
85 #define RDGV_STMT(V)     ((struct rdg_vertex *) ((V)->data))->stmt
86 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
87 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
88 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
89 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
90 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
91 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
92 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
93 
94 /* Data dependence type.  */
95 
96 enum rdg_dep_type
97 {
98   /* Read After Write (RAW).  */
99   flow_dd = 'f',
100 
101   /* Control dependence (execute conditional on).  */
102   control_dd = 'c'
103 };
104 
105 /* Dependence information attached to an edge of the RDG.  */
106 
107 struct rdg_edge
108 {
109   /* Type of the dependence.  */
110   enum rdg_dep_type type;
111 };
112 
113 #define RDGE_TYPE(E)        ((struct rdg_edge *) ((E)->data))->type
114 
115 /* Dump vertex I in RDG to FILE.  */
116 
117 static void
118 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
119 {
120   struct vertex *v = &(rdg->vertices[i]);
121   struct graph_edge *e;
122 
123   fprintf (file, "(vertex %d: (%s%s) (in:", i,
124 	   RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
125 	   RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
126 
127   if (v->pred)
128     for (e = v->pred; e; e = e->pred_next)
129       fprintf (file, " %d", e->src);
130 
131   fprintf (file, ") (out:");
132 
133   if (v->succ)
134     for (e = v->succ; e; e = e->succ_next)
135       fprintf (file, " %d", e->dest);
136 
137   fprintf (file, ")\n");
138   print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
139   fprintf (file, ")\n");
140 }
141 
142 /* Call dump_rdg_vertex on stderr.  */
143 
144 DEBUG_FUNCTION void
145 debug_rdg_vertex (struct graph *rdg, int i)
146 {
147   dump_rdg_vertex (stderr, rdg, i);
148 }
149 
150 /* Dump the reduced dependence graph RDG to FILE.  */
151 
152 static void
153 dump_rdg (FILE *file, struct graph *rdg)
154 {
155   fprintf (file, "(rdg\n");
156   for (int i = 0; i < rdg->n_vertices; i++)
157     dump_rdg_vertex (file, rdg, i);
158   fprintf (file, ")\n");
159 }
160 
161 /* Call dump_rdg on stderr.  */
162 
163 DEBUG_FUNCTION void
164 debug_rdg (struct graph *rdg)
165 {
166   dump_rdg (stderr, rdg);
167 }
168 
169 static void
170 dot_rdg_1 (FILE *file, struct graph *rdg)
171 {
172   int i;
173   pretty_printer buffer;
174   pp_needs_newline (&buffer) = false;
175   buffer.buffer->stream = file;
176 
177   fprintf (file, "digraph RDG {\n");
178 
179   for (i = 0; i < rdg->n_vertices; i++)
180     {
181       struct vertex *v = &(rdg->vertices[i]);
182       struct graph_edge *e;
183 
184       fprintf (file, "%d [label=\"[%d] ", i, i);
185       pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
186       pp_flush (&buffer);
187       fprintf (file, "\"]\n");
188 
189       /* Highlight reads from memory.  */
190       if (RDG_MEM_READS_STMT (rdg, i))
191        fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
192 
193       /* Highlight stores to memory.  */
194       if (RDG_MEM_WRITE_STMT (rdg, i))
195        fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
196 
197       if (v->succ)
198        for (e = v->succ; e; e = e->succ_next)
199          switch (RDGE_TYPE (e))
200            {
201            case flow_dd:
202              /* These are the most common dependences: don't print these. */
203              fprintf (file, "%d -> %d \n", i, e->dest);
204              break;
205 
206 	   case control_dd:
207              fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
208              break;
209 
210            default:
211              gcc_unreachable ();
212            }
213     }
214 
215   fprintf (file, "}\n\n");
216 }
217 
218 /* Display the Reduced Dependence Graph using dotty.  */
219 
220 DEBUG_FUNCTION void
221 dot_rdg (struct graph *rdg)
222 {
223   /* When debugging, you may want to enable the following code.  */
224 #ifdef HAVE_POPEN
225   FILE *file = popen ("dot -Tx11", "w");
226   if (!file)
227     return;
228   dot_rdg_1 (file, rdg);
229   fflush (file);
230   close (fileno (file));
231   pclose (file);
232 #else
233   dot_rdg_1 (stderr, rdg);
234 #endif
235 }
236 
237 /* Returns the index of STMT in RDG.  */
238 
239 static int
240 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple *stmt)
241 {
242   int index = gimple_uid (stmt);
243   gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
244   return index;
245 }
246 
247 /* Creates dependence edges in RDG for all the uses of DEF.  IDEF is
248    the index of DEF in RDG.  */
249 
250 static void
251 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
252 {
253   use_operand_p imm_use_p;
254   imm_use_iterator iterator;
255 
256   FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
257     {
258       struct graph_edge *e;
259       int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
260 
261       if (use < 0)
262 	continue;
263 
264       e = add_edge (rdg, idef, use);
265       e->data = XNEW (struct rdg_edge);
266       RDGE_TYPE (e) = flow_dd;
267     }
268 }
269 
270 /* Creates an edge for the control dependences of BB to the vertex V.  */
271 
272 static void
273 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
274 				    int v, control_dependences *cd)
275 {
276   bitmap_iterator bi;
277   unsigned edge_n;
278   EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
279 			    0, edge_n, bi)
280     {
281       basic_block cond_bb = cd->get_edge_src (edge_n);
282       gimple *stmt = last_stmt (cond_bb);
283       if (stmt && is_ctrl_stmt (stmt))
284 	{
285 	  struct graph_edge *e;
286 	  int c = rdg_vertex_for_stmt (rdg, stmt);
287 	  if (c < 0)
288 	    continue;
289 
290 	  e = add_edge (rdg, c, v);
291 	  e->data = XNEW (struct rdg_edge);
292 	  RDGE_TYPE (e) = control_dd;
293 	}
294     }
295 }
296 
297 /* Creates the edges of the reduced dependence graph RDG.  */
298 
299 static void
300 create_rdg_flow_edges (struct graph *rdg)
301 {
302   int i;
303   def_operand_p def_p;
304   ssa_op_iter iter;
305 
306   for (i = 0; i < rdg->n_vertices; i++)
307     FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
308 			      iter, SSA_OP_DEF)
309       create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
310 }
311 
312 /* Creates the edges of the reduced dependence graph RDG.  */
313 
314 static void
315 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd, loop_p loop)
316 {
317   int i;
318 
319   for (i = 0; i < rdg->n_vertices; i++)
320     {
321       gimple *stmt = RDG_STMT (rdg, i);
322       if (gimple_code (stmt) == GIMPLE_PHI)
323 	{
324 	  edge_iterator ei;
325 	  edge e;
326 	  FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
327 	    if (flow_bb_inside_loop_p (loop, e->src))
328 	      create_edge_for_control_dependence (rdg, e->src, i, cd);
329 	}
330       else
331 	create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
332     }
333 }
334 
335 /* Build the vertices of the reduced dependence graph RDG.  Return false
336    if that failed.  */
337 
338 static bool
339 create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop,
340 		     vec<data_reference_p> *datarefs)
341 {
342   int i;
343   gimple *stmt;
344 
345   FOR_EACH_VEC_ELT (stmts, i, stmt)
346     {
347       struct vertex *v = &(rdg->vertices[i]);
348 
349       /* Record statement to vertex mapping.  */
350       gimple_set_uid (stmt, i);
351 
352       v->data = XNEW (struct rdg_vertex);
353       RDGV_STMT (v) = stmt;
354       RDGV_DATAREFS (v).create (0);
355       RDGV_HAS_MEM_WRITE (v) = false;
356       RDGV_HAS_MEM_READS (v) = false;
357       if (gimple_code (stmt) == GIMPLE_PHI)
358 	continue;
359 
360       unsigned drp = datarefs->length ();
361       if (!find_data_references_in_stmt (loop, stmt, datarefs))
362 	return false;
363       for (unsigned j = drp; j < datarefs->length (); ++j)
364 	{
365 	  data_reference_p dr = (*datarefs)[j];
366 	  if (DR_IS_READ (dr))
367 	    RDGV_HAS_MEM_READS (v) = true;
368 	  else
369 	    RDGV_HAS_MEM_WRITE (v) = true;
370 	  RDGV_DATAREFS (v).safe_push (dr);
371 	}
372     }
373   return true;
374 }
375 
376 /* Initialize STMTS with all the statements of LOOP.  The order in
377    which we discover statements is important as
378    generate_loops_for_partition is using the same traversal for
379    identifying statements in loop copies.  */
380 
381 static void
382 stmts_from_loop (struct loop *loop, vec<gimple *> *stmts)
383 {
384   unsigned int i;
385   basic_block *bbs = get_loop_body_in_dom_order (loop);
386 
387   for (i = 0; i < loop->num_nodes; i++)
388     {
389       basic_block bb = bbs[i];
390 
391       for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
392 	   gsi_next (&bsi))
393 	if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
394 	  stmts->safe_push (bsi.phi ());
395 
396       for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
397 	   gsi_next (&bsi))
398 	{
399 	  gimple *stmt = gsi_stmt (bsi);
400 	  if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
401 	    stmts->safe_push (stmt);
402 	}
403     }
404 
405   free (bbs);
406 }
407 
408 /* Free the reduced dependence graph RDG.  */
409 
410 static void
411 free_rdg (struct graph *rdg)
412 {
413   int i;
414 
415   for (i = 0; i < rdg->n_vertices; i++)
416     {
417       struct vertex *v = &(rdg->vertices[i]);
418       struct graph_edge *e;
419 
420       for (e = v->succ; e; e = e->succ_next)
421 	free (e->data);
422 
423       if (v->data)
424 	{
425 	  gimple_set_uid (RDGV_STMT (v), -1);
426 	  free_data_refs (RDGV_DATAREFS (v));
427 	  free (v->data);
428 	}
429     }
430 
431   free_graph (rdg);
432 }
433 
434 /* Build the Reduced Dependence Graph (RDG) with one vertex per
435    statement of the loop nest LOOP_NEST, and one edge per data dependence or
436    scalar dependence.  */
437 
438 static struct graph *
439 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
440 {
441   struct graph *rdg;
442   vec<data_reference_p> datarefs;
443 
444   /* Create the RDG vertices from the stmts of the loop nest.  */
445   auto_vec<gimple *, 10> stmts;
446   stmts_from_loop (loop_nest[0], &stmts);
447   rdg = new_graph (stmts.length ());
448   datarefs.create (10);
449   if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
450     {
451       datarefs.release ();
452       free_rdg (rdg);
453       return NULL;
454     }
455   stmts.release ();
456 
457   create_rdg_flow_edges (rdg);
458   if (cd)
459     create_rdg_cd_edges (rdg, cd, loop_nest[0]);
460 
461   datarefs.release ();
462 
463   return rdg;
464 }
465 
466 
467 
468 enum partition_kind {
469     PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY, PKIND_MEMMOVE
470 };
471 
472 struct partition
473 {
474   bitmap stmts;
475   bitmap loops;
476   bool reduction_p;
477   bool plus_one;
478   enum partition_kind kind;
479   /* data-references a kind != PKIND_NORMAL partition is about.  */
480   data_reference_p main_dr;
481   data_reference_p secondary_dr;
482   tree niter;
483 };
484 
485 
486 /* Allocate and initialize a partition from BITMAP.  */
487 
488 static partition *
489 partition_alloc (bitmap stmts, bitmap loops)
490 {
491   partition *partition = XCNEW (struct partition);
492   partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
493   partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
494   partition->reduction_p = false;
495   partition->kind = PKIND_NORMAL;
496   return partition;
497 }
498 
499 /* Free PARTITION.  */
500 
501 static void
502 partition_free (partition *partition)
503 {
504   BITMAP_FREE (partition->stmts);
505   BITMAP_FREE (partition->loops);
506   free (partition);
507 }
508 
509 /* Returns true if the partition can be generated as a builtin.  */
510 
511 static bool
512 partition_builtin_p (partition *partition)
513 {
514   return partition->kind != PKIND_NORMAL;
515 }
516 
517 /* Returns true if the partition contains a reduction.  */
518 
519 static bool
520 partition_reduction_p (partition *partition)
521 {
522   return partition->reduction_p;
523 }
524 
525 /* Merge PARTITION into the partition DEST.  */
526 
527 static void
528 partition_merge_into (partition *dest, partition *partition)
529 {
530   dest->kind = PKIND_NORMAL;
531   bitmap_ior_into (dest->stmts, partition->stmts);
532   if (partition_reduction_p (partition))
533     dest->reduction_p = true;
534 }
535 
536 
537 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
538    the LOOP.  */
539 
540 static bool
541 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
542 {
543   imm_use_iterator imm_iter;
544   use_operand_p use_p;
545 
546   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
547     {
548       gimple *use_stmt = USE_STMT (use_p);
549       if (!is_gimple_debug (use_stmt)
550 	  && loop != loop_containing_stmt (use_stmt))
551 	return true;
552     }
553 
554   return false;
555 }
556 
557 /* Returns true when STMT defines a scalar variable used after the
558    loop LOOP.  */
559 
560 static bool
561 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple *stmt)
562 {
563   def_operand_p def_p;
564   ssa_op_iter op_iter;
565 
566   if (gimple_code (stmt) == GIMPLE_PHI)
567     return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
568 
569   FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
570     if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
571       return true;
572 
573   return false;
574 }
575 
576 /* Return a copy of LOOP placed before LOOP.  */
577 
578 static struct loop *
579 copy_loop_before (struct loop *loop)
580 {
581   struct loop *res;
582   edge preheader = loop_preheader_edge (loop);
583 
584   initialize_original_copy_tables ();
585   res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
586   gcc_assert (res != NULL);
587   free_original_copy_tables ();
588   delete_update_ssa ();
589 
590   return res;
591 }
592 
593 /* Creates an empty basic block after LOOP.  */
594 
595 static void
596 create_bb_after_loop (struct loop *loop)
597 {
598   edge exit = single_exit (loop);
599 
600   if (!exit)
601     return;
602 
603   split_edge (exit);
604 }
605 
606 /* Generate code for PARTITION from the code in LOOP.  The loop is
607    copied when COPY_P is true.  All the statements not flagged in the
608    PARTITION bitmap are removed from the loop or from its copy.  The
609    statements are indexed in sequence inside a basic block, and the
610    basic blocks of a loop are taken in dom order.  */
611 
612 static void
613 generate_loops_for_partition (struct loop *loop, partition *partition,
614 			      bool copy_p)
615 {
616   unsigned i;
617   basic_block *bbs;
618 
619   if (copy_p)
620     {
621       loop = copy_loop_before (loop);
622       gcc_assert (loop != NULL);
623       create_preheader (loop, CP_SIMPLE_PREHEADERS);
624       create_bb_after_loop (loop);
625     }
626 
627   /* Remove stmts not in the PARTITION bitmap.  */
628   bbs = get_loop_body_in_dom_order (loop);
629 
630   if (MAY_HAVE_DEBUG_STMTS)
631     for (i = 0; i < loop->num_nodes; i++)
632       {
633 	basic_block bb = bbs[i];
634 
635 	for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
636 	     gsi_next (&bsi))
637 	  {
638 	    gphi *phi = bsi.phi ();
639 	    if (!virtual_operand_p (gimple_phi_result (phi))
640 		&& !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
641 	      reset_debug_uses (phi);
642 	  }
643 
644 	for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
645 	  {
646 	    gimple *stmt = gsi_stmt (bsi);
647 	    if (gimple_code (stmt) != GIMPLE_LABEL
648 		&& !is_gimple_debug (stmt)
649 		&& !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
650 	      reset_debug_uses (stmt);
651 	  }
652       }
653 
654   for (i = 0; i < loop->num_nodes; i++)
655     {
656       basic_block bb = bbs[i];
657 
658       for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
659 	{
660 	  gphi *phi = bsi.phi ();
661 	  if (!virtual_operand_p (gimple_phi_result (phi))
662 	      && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
663 	    remove_phi_node (&bsi, true);
664 	  else
665 	    gsi_next (&bsi);
666 	}
667 
668       for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
669 	{
670 	  gimple *stmt = gsi_stmt (bsi);
671 	  if (gimple_code (stmt) != GIMPLE_LABEL
672 	      && !is_gimple_debug (stmt)
673 	      && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
674 	    {
675 	      /* Choose an arbitrary path through the empty CFG part
676 		 that this unnecessary control stmt controls.  */
677 	      if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
678 		{
679 		  gimple_cond_make_false (cond_stmt);
680 		  update_stmt (stmt);
681 		}
682 	      else if (gimple_code (stmt) == GIMPLE_SWITCH)
683 		{
684 		  gswitch *switch_stmt = as_a <gswitch *> (stmt);
685 		  gimple_switch_set_index
686 		      (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1)));
687 		  update_stmt (stmt);
688 		}
689 	      else
690 		{
691 		  unlink_stmt_vdef (stmt);
692 		  gsi_remove (&bsi, true);
693 		  release_defs (stmt);
694 		  continue;
695 		}
696 	    }
697 	  gsi_next (&bsi);
698 	}
699     }
700 
701   free (bbs);
702 }
703 
704 /* Build the size argument for a memory operation call.  */
705 
706 static tree
707 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
708 		    bool plus_one)
709 {
710   tree size = fold_convert_loc (loc, sizetype, nb_iter);
711   if (plus_one)
712     size = size_binop (PLUS_EXPR, size, size_one_node);
713   size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
714 			  TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
715   size = fold_convert_loc (loc, size_type_node, size);
716   return size;
717 }
718 
719 /* Build an address argument for a memory operation call.  */
720 
721 static tree
722 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
723 {
724   tree addr_base;
725 
726   addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
727   addr_base = fold_convert_loc (loc, sizetype, addr_base);
728 
729   /* Test for a negative stride, iterating over every element.  */
730   if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
731     {
732       addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
733 				  fold_convert_loc (loc, sizetype, nb_bytes));
734       addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
735 				  TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
736     }
737 
738   return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
739 }
740 
741 /* If VAL memory representation contains the same value in all bytes,
742    return that value, otherwise return -1.
743    E.g. for 0x24242424 return 0x24, for IEEE double
744    747708026454360457216.0 return 0x44, etc.  */
745 
746 static int
747 const_with_all_bytes_same (tree val)
748 {
749   unsigned char buf[64];
750   int i, len;
751 
752   if (integer_zerop (val)
753       || (TREE_CODE (val) == CONSTRUCTOR
754           && !TREE_CLOBBER_P (val)
755           && CONSTRUCTOR_NELTS (val) == 0))
756     return 0;
757 
758   if (real_zerop (val))
759     {
760       /* Only return 0 for +0.0, not for -0.0, which doesn't have
761 	 an all bytes same memory representation.  Don't transform
762 	 -0.0 stores into +0.0 even for !HONOR_SIGNED_ZEROS.  */
763       switch (TREE_CODE (val))
764 	{
765 	case REAL_CST:
766 	  if (!real_isneg (TREE_REAL_CST_PTR (val)))
767 	    return 0;
768 	  break;
769 	case COMPLEX_CST:
770 	  if (!const_with_all_bytes_same (TREE_REALPART (val))
771 	      && !const_with_all_bytes_same (TREE_IMAGPART (val)))
772 	    return 0;
773 	  break;
774 	case VECTOR_CST:
775 	  unsigned int j;
776 	  for (j = 0; j < VECTOR_CST_NELTS (val); ++j)
777 	    if (const_with_all_bytes_same (VECTOR_CST_ELT (val, j)))
778 	      break;
779 	  if (j == VECTOR_CST_NELTS (val))
780 	    return 0;
781 	  break;
782 	default:
783 	  break;
784 	}
785     }
786 
787   if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
788     return -1;
789 
790   len = native_encode_expr (val, buf, sizeof (buf));
791   if (len == 0)
792     return -1;
793   for (i = 1; i < len; i++)
794     if (buf[i] != buf[0])
795       return -1;
796   return buf[0];
797 }
798 
799 /* Generate a call to memset for PARTITION in LOOP.  */
800 
801 static void
802 generate_memset_builtin (struct loop *loop, partition *partition)
803 {
804   gimple_stmt_iterator gsi;
805   gimple *stmt, *fn_call;
806   tree mem, fn, nb_bytes;
807   location_t loc;
808   tree val;
809 
810   stmt = DR_STMT (partition->main_dr);
811   loc = gimple_location (stmt);
812 
813   /* The new statements will be placed before LOOP.  */
814   gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
815 
816   nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
817 				 partition->plus_one);
818   nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
819 				       false, GSI_CONTINUE_LINKING);
820   mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
821   mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
822 				  false, GSI_CONTINUE_LINKING);
823 
824   /* This exactly matches the pattern recognition in classify_partition.  */
825   val = gimple_assign_rhs1 (stmt);
826   /* Handle constants like 0x15151515 and similarly
827      floating point constants etc. where all bytes are the same.  */
828   int bytev = const_with_all_bytes_same (val);
829   if (bytev != -1)
830     val = build_int_cst (integer_type_node, bytev);
831   else if (TREE_CODE (val) == INTEGER_CST)
832     val = fold_convert (integer_type_node, val);
833   else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
834     {
835       tree tem = make_ssa_name (integer_type_node);
836       gimple *cstmt = gimple_build_assign (tem, NOP_EXPR, val);
837       gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
838       val = tem;
839     }
840 
841   fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
842   fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
843   gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
844 
845   if (dump_file && (dump_flags & TDF_DETAILS))
846     {
847       fprintf (dump_file, "generated memset");
848       if (bytev == 0)
849 	fprintf (dump_file, " zero\n");
850       else
851 	fprintf (dump_file, "\n");
852     }
853 }
854 
855 /* Generate a call to memcpy for PARTITION in LOOP.  */
856 
857 static void
858 generate_memcpy_builtin (struct loop *loop, partition *partition)
859 {
860   gimple_stmt_iterator gsi;
861   gimple *stmt, *fn_call;
862   tree dest, src, fn, nb_bytes;
863   location_t loc;
864   enum built_in_function kind;
865 
866   stmt = DR_STMT (partition->main_dr);
867   loc = gimple_location (stmt);
868 
869   /* The new statements will be placed before LOOP.  */
870   gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
871 
872   nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
873 				 partition->plus_one);
874   nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
875 				       false, GSI_CONTINUE_LINKING);
876   dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
877   src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
878   if (partition->kind == PKIND_MEMCPY
879       || ! ptr_derefs_may_alias_p (dest, src))
880     kind = BUILT_IN_MEMCPY;
881   else
882     kind = BUILT_IN_MEMMOVE;
883 
884   dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
885 				   false, GSI_CONTINUE_LINKING);
886   src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
887 				  false, GSI_CONTINUE_LINKING);
888   fn = build_fold_addr_expr (builtin_decl_implicit (kind));
889   fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
890   gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
891 
892   if (dump_file && (dump_flags & TDF_DETAILS))
893     {
894       if (kind == BUILT_IN_MEMCPY)
895 	fprintf (dump_file, "generated memcpy\n");
896       else
897 	fprintf (dump_file, "generated memmove\n");
898     }
899 }
900 
901 /* Remove and destroy the loop LOOP.  */
902 
903 static void
904 destroy_loop (struct loop *loop)
905 {
906   unsigned nbbs = loop->num_nodes;
907   edge exit = single_exit (loop);
908   basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
909   basic_block *bbs;
910   unsigned i;
911 
912   bbs = get_loop_body_in_dom_order (loop);
913 
914   redirect_edge_pred (exit, src);
915   exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
916   exit->flags |= EDGE_FALLTHRU;
917   cancel_loop_tree (loop);
918   rescan_loop_exit (exit, false, true);
919 
920   i = nbbs;
921   do
922     {
923       /* We have made sure to not leave any dangling uses of SSA
924          names defined in the loop.  With the exception of virtuals.
925 	 Make sure we replace all uses of virtual defs that will remain
926 	 outside of the loop with the bare symbol as delete_basic_block
927 	 will release them.  */
928       --i;
929       for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
930 	   gsi_next (&gsi))
931 	{
932 	  gphi *phi = gsi.phi ();
933 	  if (virtual_operand_p (gimple_phi_result (phi)))
934 	    mark_virtual_phi_result_for_renaming (phi);
935 	}
936       for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
937 	   gsi_next (&gsi))
938 	{
939 	  gimple *stmt = gsi_stmt (gsi);
940 	  tree vdef = gimple_vdef (stmt);
941 	  if (vdef && TREE_CODE (vdef) == SSA_NAME)
942 	    mark_virtual_operand_for_renaming (vdef);
943 	}
944       delete_basic_block (bbs[i]);
945     }
946   while (i != 0);
947 
948   free (bbs);
949 
950   set_immediate_dominator (CDI_DOMINATORS, dest,
951 			   recompute_dominator (CDI_DOMINATORS, dest));
952 }
953 
954 /* Generates code for PARTITION.  Return whether LOOP needs to be destroyed.  */
955 
956 static bool
957 generate_code_for_partition (struct loop *loop,
958 			     partition *partition, bool copy_p)
959 {
960   switch (partition->kind)
961     {
962     case PKIND_NORMAL:
963       /* Reductions all have to be in the last partition.  */
964       gcc_assert (!partition_reduction_p (partition)
965 		  || !copy_p);
966       generate_loops_for_partition (loop, partition, copy_p);
967       return false;
968 
969     case PKIND_MEMSET:
970       generate_memset_builtin (loop, partition);
971       break;
972 
973     case PKIND_MEMCPY:
974     case PKIND_MEMMOVE:
975       generate_memcpy_builtin (loop, partition);
976       break;
977 
978     default:
979       gcc_unreachable ();
980     }
981 
982   /* Common tail for partitions we turn into a call.  If this was the last
983      partition for which we generate code, we have to destroy the loop.  */
984   if (!copy_p)
985     return true;
986   return false;
987 }
988 
989 
990 /* Returns a partition with all the statements needed for computing
991    the vertex V of the RDG, also including the loop exit conditions.  */
992 
993 static partition *
994 build_rdg_partition_for_vertex (struct graph *rdg, int v)
995 {
996   partition *partition = partition_alloc (NULL, NULL);
997   auto_vec<int, 3> nodes;
998   unsigned i;
999   int x;
1000 
1001   graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
1002 
1003   FOR_EACH_VEC_ELT (nodes, i, x)
1004     {
1005       bitmap_set_bit (partition->stmts, x);
1006       bitmap_set_bit (partition->loops,
1007 		      loop_containing_stmt (RDG_STMT (rdg, x))->num);
1008     }
1009 
1010   return partition;
1011 }
1012 
1013 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
1014    For the moment we detect only the memset zero pattern.  */
1015 
1016 static void
1017 classify_partition (loop_p loop, struct graph *rdg, partition *partition)
1018 {
1019   bitmap_iterator bi;
1020   unsigned i;
1021   tree nb_iter;
1022   data_reference_p single_load, single_store;
1023   bool volatiles_p = false;
1024   bool plus_one = false;
1025 
1026   partition->kind = PKIND_NORMAL;
1027   partition->main_dr = NULL;
1028   partition->secondary_dr = NULL;
1029   partition->niter = NULL_TREE;
1030   partition->plus_one = false;
1031 
1032   EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1033     {
1034       gimple *stmt = RDG_STMT (rdg, i);
1035 
1036       if (gimple_has_volatile_ops (stmt))
1037 	volatiles_p = true;
1038 
1039       /* If the stmt has uses outside of the loop mark it as reduction.  */
1040       if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1041 	{
1042 	  partition->reduction_p = true;
1043 	  return;
1044 	}
1045     }
1046 
1047   /* Perform general partition disqualification for builtins.  */
1048   if (volatiles_p
1049       || !flag_tree_loop_distribute_patterns)
1050     return;
1051 
1052   /* Detect memset and memcpy.  */
1053   single_load = NULL;
1054   single_store = NULL;
1055   EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1056     {
1057       gimple *stmt = RDG_STMT (rdg, i);
1058       data_reference_p dr;
1059       unsigned j;
1060 
1061       if (gimple_code (stmt) == GIMPLE_PHI)
1062 	continue;
1063 
1064       /* Any scalar stmts are ok.  */
1065       if (!gimple_vuse (stmt))
1066 	continue;
1067 
1068       /* Otherwise just regular loads/stores.  */
1069       if (!gimple_assign_single_p (stmt))
1070 	return;
1071 
1072       /* But exactly one store and/or load.  */
1073       for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1074 	{
1075 	  tree type = TREE_TYPE (DR_REF (dr));
1076 
1077 	  /* The memset, memcpy and memmove library calls are only
1078 	     able to deal with generic address space.  */
1079 	  if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (type)))
1080 	    return;
1081 
1082 	  if (DR_IS_READ (dr))
1083 	    {
1084 	      if (single_load != NULL)
1085 		return;
1086 	      single_load = dr;
1087 	    }
1088 	  else
1089 	    {
1090 	      if (single_store != NULL)
1091 		return;
1092 	      single_store = dr;
1093 	    }
1094 	}
1095     }
1096 
1097   if (!single_store)
1098     return;
1099 
1100   nb_iter = number_of_latch_executions (loop);
1101   if (!nb_iter || nb_iter == chrec_dont_know)
1102     return;
1103   if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1104 		      gimple_bb (DR_STMT (single_store))))
1105     plus_one = true;
1106 
1107   if (single_store && !single_load)
1108     {
1109       gimple *stmt = DR_STMT (single_store);
1110       tree rhs = gimple_assign_rhs1 (stmt);
1111       if (const_with_all_bytes_same (rhs) == -1
1112 	  && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1113 	      || (TYPE_MODE (TREE_TYPE (rhs))
1114 		  != TYPE_MODE (unsigned_char_type_node))))
1115 	return;
1116       if (TREE_CODE (rhs) == SSA_NAME
1117 	  && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1118 	  && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1119 	return;
1120       if (!adjacent_dr_p (single_store)
1121 	  || !dominated_by_p (CDI_DOMINATORS,
1122 			      loop->latch, gimple_bb (stmt)))
1123 	return;
1124       partition->kind = PKIND_MEMSET;
1125       partition->main_dr = single_store;
1126       partition->niter = nb_iter;
1127       partition->plus_one = plus_one;
1128     }
1129   else if (single_store && single_load)
1130     {
1131       gimple *store = DR_STMT (single_store);
1132       gimple *load = DR_STMT (single_load);
1133       /* Direct aggregate copy or via an SSA name temporary.  */
1134       if (load != store
1135 	  && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1136 	return;
1137       if (!adjacent_dr_p (single_store)
1138 	  || !adjacent_dr_p (single_load)
1139 	  || !operand_equal_p (DR_STEP (single_store),
1140 			       DR_STEP (single_load), 0)
1141 	  || !dominated_by_p (CDI_DOMINATORS,
1142 			      loop->latch, gimple_bb (store)))
1143 	return;
1144       /* Now check that if there is a dependence this dependence is
1145          of a suitable form for memmove.  */
1146       vec<loop_p> loops = vNULL;
1147       ddr_p ddr;
1148       loops.safe_push (loop);
1149       ddr = initialize_data_dependence_relation (single_load, single_store,
1150 						 loops);
1151       compute_affine_dependence (ddr, loop);
1152       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1153 	{
1154 	  free_dependence_relation (ddr);
1155 	  loops.release ();
1156 	  return;
1157 	}
1158       if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1159 	{
1160 	  if (DDR_NUM_DIST_VECTS (ddr) == 0)
1161 	    {
1162 	      free_dependence_relation (ddr);
1163 	      loops.release ();
1164 	      return;
1165 	    }
1166 	  lambda_vector dist_v;
1167 	  FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1168 	    {
1169 	      int dist = dist_v[index_in_loop_nest (loop->num,
1170 						    DDR_LOOP_NEST (ddr))];
1171 	      if (dist > 0 && !DDR_REVERSED_P (ddr))
1172 		{
1173 		  free_dependence_relation (ddr);
1174 		  loops.release ();
1175 		  return;
1176 		}
1177 	    }
1178 	  partition->kind = PKIND_MEMMOVE;
1179 	}
1180       else
1181 	partition->kind = PKIND_MEMCPY;
1182       free_dependence_relation (ddr);
1183       loops.release ();
1184       partition->main_dr = single_store;
1185       partition->secondary_dr = single_load;
1186       partition->niter = nb_iter;
1187       partition->plus_one = plus_one;
1188     }
1189 }
1190 
1191 /* For a data reference REF, return the declaration of its base
1192    address or NULL_TREE if the base is not determined.  */
1193 
1194 static tree
1195 ref_base_address (data_reference_p dr)
1196 {
1197   tree base_address = DR_BASE_ADDRESS (dr);
1198   if (base_address
1199       && TREE_CODE (base_address) == ADDR_EXPR)
1200     return TREE_OPERAND (base_address, 0);
1201 
1202   return base_address;
1203 }
1204 
1205 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1206    accesses in RDG.  */
1207 
1208 static bool
1209 similar_memory_accesses (struct graph *rdg, partition *partition1,
1210 			 partition *partition2)
1211 {
1212   unsigned i, j, k, l;
1213   bitmap_iterator bi, bj;
1214   data_reference_p ref1, ref2;
1215 
1216   /* First check whether in the intersection of the two partitions are
1217      any loads or stores.  Common loads are the situation that happens
1218      most often.  */
1219   EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1220     if (RDG_MEM_WRITE_STMT (rdg, i)
1221 	|| RDG_MEM_READS_STMT (rdg, i))
1222       return true;
1223 
1224   /* Then check all data-references against each other.  */
1225   EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1226     if (RDG_MEM_WRITE_STMT (rdg, i)
1227 	|| RDG_MEM_READS_STMT (rdg, i))
1228       EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1229 	if (RDG_MEM_WRITE_STMT (rdg, j)
1230 	    || RDG_MEM_READS_STMT (rdg, j))
1231 	  {
1232 	    FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1233 	      {
1234 		tree base1 = ref_base_address (ref1);
1235 		if (base1)
1236 		  FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1237 		    if (base1 == ref_base_address (ref2))
1238 		      return true;
1239 	      }
1240 	  }
1241 
1242   return false;
1243 }
1244 
1245 /* Aggregate several components into a useful partition that is
1246    registered in the PARTITIONS vector.  Partitions will be
1247    distributed in different loops.  */
1248 
1249 static void
1250 rdg_build_partitions (struct graph *rdg,
1251 		      vec<gimple *> starting_stmts,
1252 		      vec<partition *> *partitions)
1253 {
1254   bitmap processed = BITMAP_ALLOC (NULL);
1255   int i;
1256   gimple *stmt;
1257 
1258   FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1259     {
1260       int v = rdg_vertex_for_stmt (rdg, stmt);
1261 
1262       if (dump_file && (dump_flags & TDF_DETAILS))
1263 	fprintf (dump_file,
1264 		 "ldist asked to generate code for vertex %d\n", v);
1265 
1266       /* If the vertex is already contained in another partition so
1267          is the partition rooted at it.  */
1268       if (bitmap_bit_p (processed, v))
1269 	continue;
1270 
1271       partition *partition = build_rdg_partition_for_vertex (rdg, v);
1272       bitmap_ior_into (processed, partition->stmts);
1273 
1274       if (dump_file && (dump_flags & TDF_DETAILS))
1275 	{
1276 	  fprintf (dump_file, "ldist useful partition:\n");
1277 	  dump_bitmap (dump_file, partition->stmts);
1278 	}
1279 
1280       partitions->safe_push (partition);
1281     }
1282 
1283   /* All vertices should have been assigned to at least one partition now,
1284      other than vertices belonging to dead code.  */
1285 
1286   BITMAP_FREE (processed);
1287 }
1288 
1289 /* Dump to FILE the PARTITIONS.  */
1290 
1291 static void
1292 dump_rdg_partitions (FILE *file, vec<partition *> partitions)
1293 {
1294   int i;
1295   partition *partition;
1296 
1297   FOR_EACH_VEC_ELT (partitions, i, partition)
1298     debug_bitmap_file (file, partition->stmts);
1299 }
1300 
1301 /* Debug PARTITIONS.  */
1302 extern void debug_rdg_partitions (vec<partition *> );
1303 
1304 DEBUG_FUNCTION void
1305 debug_rdg_partitions (vec<partition *> partitions)
1306 {
1307   dump_rdg_partitions (stderr, partitions);
1308 }
1309 
1310 /* Returns the number of read and write operations in the RDG.  */
1311 
1312 static int
1313 number_of_rw_in_rdg (struct graph *rdg)
1314 {
1315   int i, res = 0;
1316 
1317   for (i = 0; i < rdg->n_vertices; i++)
1318     {
1319       if (RDG_MEM_WRITE_STMT (rdg, i))
1320 	++res;
1321 
1322       if (RDG_MEM_READS_STMT (rdg, i))
1323 	++res;
1324     }
1325 
1326   return res;
1327 }
1328 
1329 /* Returns the number of read and write operations in a PARTITION of
1330    the RDG.  */
1331 
1332 static int
1333 number_of_rw_in_partition (struct graph *rdg, partition *partition)
1334 {
1335   int res = 0;
1336   unsigned i;
1337   bitmap_iterator ii;
1338 
1339   EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1340     {
1341       if (RDG_MEM_WRITE_STMT (rdg, i))
1342 	++res;
1343 
1344       if (RDG_MEM_READS_STMT (rdg, i))
1345 	++res;
1346     }
1347 
1348   return res;
1349 }
1350 
1351 /* Returns true when one of the PARTITIONS contains all the read or
1352    write operations of RDG.  */
1353 
1354 static bool
1355 partition_contains_all_rw (struct graph *rdg,
1356 			   vec<partition *> partitions)
1357 {
1358   int i;
1359   partition *partition;
1360   int nrw = number_of_rw_in_rdg (rdg);
1361 
1362   FOR_EACH_VEC_ELT (partitions, i, partition)
1363     if (nrw == number_of_rw_in_partition (rdg, partition))
1364       return true;
1365 
1366   return false;
1367 }
1368 
1369 /* Compute partition dependence created by the data references in DRS1
1370    and DRS2 and modify and return DIR according to that.  */
1371 
1372 static int
1373 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1374 			 vec<data_reference_p> drs1,
1375 			 vec<data_reference_p> drs2)
1376 {
1377   data_reference_p dr1, dr2;
1378 
1379   /* dependence direction - 0 is no dependence, -1 is back,
1380      1 is forth, 2 is both (we can stop then, merging will occur).  */
1381   for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1382     for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1383       {
1384 	data_reference_p saved_dr1 = dr1;
1385 	int this_dir = 1;
1386 	ddr_p ddr;
1387 	/* Re-shuffle data-refs to be in dominator order.  */
1388 	if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1389 	    > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1390 	  {
1391 	    std::swap (dr1, dr2);
1392 	    this_dir = -this_dir;
1393 	  }
1394 	ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1395 	compute_affine_dependence (ddr, loops[0]);
1396 	if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1397 	  this_dir = 2;
1398 	else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1399 	  {
1400 	    if (DDR_REVERSED_P (ddr))
1401 	      {
1402 		std::swap (dr1, dr2);
1403 		this_dir = -this_dir;
1404 	      }
1405 	    /* Known dependences can still be unordered througout the
1406 	       iteration space, see gcc.dg/tree-ssa/ldist-16.c.  */
1407 	    if (DDR_NUM_DIST_VECTS (ddr) != 1)
1408 	      this_dir = 2;
1409 	    /* If the overlap is exact preserve stmt order.  */
1410 	    else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1411 	      ;
1412 	    else
1413 	      {
1414 		/* Else as the distance vector is lexicographic positive
1415 		   swap the dependence direction.  */
1416 		this_dir = -this_dir;
1417 	      }
1418 	  }
1419 	else
1420 	  this_dir = 0;
1421 	free_dependence_relation (ddr);
1422 	if (this_dir == 2)
1423 	  return 2;
1424 	else if (dir == 0)
1425 	  dir = this_dir;
1426 	else if (this_dir != 0 && dir != this_dir)
1427 	  return 2;
1428 	/* Shuffle "back" dr1.  */
1429 	dr1 = saved_dr1;
1430       }
1431   return dir;
1432 }
1433 
1434 /* Compare postorder number of the partition graph vertices V1 and V2.  */
1435 
1436 static int
1437 pgcmp (const void *v1_, const void *v2_)
1438 {
1439   const vertex *v1 = (const vertex *)v1_;
1440   const vertex *v2 = (const vertex *)v2_;
1441   return v2->post - v1->post;
1442 }
1443 
1444 /* Distributes the code from LOOP in such a way that producer
1445    statements are placed before consumer statements.  Tries to separate
1446    only the statements from STMTS into separate loops.
1447    Returns the number of distributed loops.  Set *DESTROY_P to whether
1448    LOOP needs to be destroyed.  */
1449 
1450 static int
1451 distribute_loop (struct loop *loop, vec<gimple *> stmts,
1452 		 control_dependences *cd, int *nb_calls, bool *destroy_p)
1453 {
1454   struct graph *rdg;
1455   partition *partition;
1456   bool any_builtin;
1457   int i, nbp;
1458   graph *pg = NULL;
1459   int num_sccs = 1;
1460 
1461   *destroy_p = false;
1462   *nb_calls = 0;
1463   auto_vec<loop_p, 3> loop_nest;
1464   if (!find_loop_nest (loop, &loop_nest))
1465     return 0;
1466 
1467   rdg = build_rdg (loop_nest, cd);
1468   if (!rdg)
1469     {
1470       if (dump_file && (dump_flags & TDF_DETAILS))
1471 	fprintf (dump_file,
1472 		 "Loop %d not distributed: failed to build the RDG.\n",
1473 		 loop->num);
1474 
1475       return 0;
1476     }
1477 
1478   if (dump_file && (dump_flags & TDF_DETAILS))
1479     dump_rdg (dump_file, rdg);
1480 
1481   auto_vec<struct partition *, 3> partitions;
1482   rdg_build_partitions (rdg, stmts, &partitions);
1483 
1484   any_builtin = false;
1485   FOR_EACH_VEC_ELT (partitions, i, partition)
1486     {
1487       classify_partition (loop, rdg, partition);
1488       any_builtin |= partition_builtin_p (partition);
1489     }
1490 
1491   /* If we are only distributing patterns but did not detect any,
1492      simply bail out.  */
1493   if (!flag_tree_loop_distribution
1494       && !any_builtin)
1495     {
1496       nbp = 0;
1497       goto ldist_done;
1498     }
1499 
1500   /* If we are only distributing patterns fuse all partitions that
1501      were not classified as builtins.  This also avoids chopping
1502      a loop into pieces, separated by builtin calls.  That is, we
1503      only want no or a single loop body remaining.  */
1504   struct partition *into;
1505   if (!flag_tree_loop_distribution)
1506     {
1507       for (i = 0; partitions.iterate (i, &into); ++i)
1508 	if (!partition_builtin_p (into))
1509 	  break;
1510       for (++i; partitions.iterate (i, &partition); ++i)
1511 	if (!partition_builtin_p (partition))
1512 	  {
1513 	    if (dump_file && (dump_flags & TDF_DETAILS))
1514 	      {
1515 		fprintf (dump_file, "fusing non-builtin partitions\n");
1516 		dump_bitmap (dump_file, into->stmts);
1517 		dump_bitmap (dump_file, partition->stmts);
1518 	      }
1519 	    partition_merge_into (into, partition);
1520 	    partitions.unordered_remove (i);
1521 	    partition_free (partition);
1522 	    i--;
1523 	  }
1524     }
1525 
1526   /* Due to limitations in the transform phase we have to fuse all
1527      reduction partitions into the last partition so the existing
1528      loop will contain all loop-closed PHI nodes.  */
1529   for (i = 0; partitions.iterate (i, &into); ++i)
1530     if (partition_reduction_p (into))
1531       break;
1532   for (i = i + 1; partitions.iterate (i, &partition); ++i)
1533     if (partition_reduction_p (partition))
1534       {
1535 	if (dump_file && (dump_flags & TDF_DETAILS))
1536 	  {
1537 	    fprintf (dump_file, "fusing partitions\n");
1538 	    dump_bitmap (dump_file, into->stmts);
1539 	    dump_bitmap (dump_file, partition->stmts);
1540 	    fprintf (dump_file, "because they have reductions\n");
1541 	  }
1542 	partition_merge_into (into, partition);
1543 	partitions.unordered_remove (i);
1544 	partition_free (partition);
1545 	i--;
1546       }
1547 
1548   /* Apply our simple cost model - fuse partitions with similar
1549      memory accesses.  */
1550   for (i = 0; partitions.iterate (i, &into); ++i)
1551     {
1552       bool changed = false;
1553       if (partition_builtin_p (into))
1554 	continue;
1555       for (int j = i + 1;
1556 	   partitions.iterate (j, &partition); ++j)
1557 	{
1558 	  if (similar_memory_accesses (rdg, into, partition))
1559 	    {
1560 	      if (dump_file && (dump_flags & TDF_DETAILS))
1561 		{
1562 		  fprintf (dump_file, "fusing partitions\n");
1563 		  dump_bitmap (dump_file, into->stmts);
1564 		  dump_bitmap (dump_file, partition->stmts);
1565 		  fprintf (dump_file, "because they have similar "
1566 			   "memory accesses\n");
1567 		}
1568 	      partition_merge_into (into, partition);
1569 	      partitions.unordered_remove (j);
1570 	      partition_free (partition);
1571 	      j--;
1572 	      changed = true;
1573 	    }
1574 	}
1575       /* If we fused 0 1 2 in step 1 to 0,2 1 as 0 and 2 have similar
1576          accesses when 1 and 2 have similar accesses but not 0 and 1
1577 	 then in the next iteration we will fail to consider merging
1578 	 1 into 0,2.  So try again if we did any merging into 0.  */
1579       if (changed)
1580 	i--;
1581     }
1582 
1583   /* Build the partition dependency graph.  */
1584   if (partitions.length () > 1)
1585     {
1586       pg = new_graph (partitions.length ());
1587       struct pgdata {
1588 	  struct partition *partition;
1589 	  vec<data_reference_p> writes;
1590 	  vec<data_reference_p> reads;
1591       };
1592 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1593       for (i = 0; partitions.iterate (i, &partition); ++i)
1594 	{
1595 	  vertex *v = &pg->vertices[i];
1596 	  pgdata *data = new pgdata;
1597 	  data_reference_p dr;
1598 	  /* FIXME - leaks.  */
1599 	  v->data = data;
1600 	  bitmap_iterator bi;
1601 	  unsigned j;
1602 	  data->partition = partition;
1603 	  data->reads = vNULL;
1604 	  data->writes = vNULL;
1605 	  EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1606 	    for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1607 	      if (DR_IS_READ (dr))
1608 		data->reads.safe_push (dr);
1609 	      else
1610 		data->writes.safe_push (dr);
1611 	}
1612       struct partition *partition1, *partition2;
1613       for (i = 0; partitions.iterate (i, &partition1); ++i)
1614 	for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1615 	  {
1616 	    /* dependence direction - 0 is no dependence, -1 is back,
1617 	       1 is forth, 2 is both (we can stop then, merging will occur).  */
1618 	    int dir = 0;
1619 	    dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1620 					   PGDATA(i)->writes,
1621 					   PGDATA(j)->reads);
1622 	    if (dir != 2)
1623 	      dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1624 					     PGDATA(i)->reads,
1625 					     PGDATA(j)->writes);
1626 	    if (dir != 2)
1627 	      dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1628 					     PGDATA(i)->writes,
1629 					     PGDATA(j)->writes);
1630 	    if (dir == 1 || dir == 2)
1631 	      add_edge (pg, i, j);
1632 	    if (dir == -1 || dir == 2)
1633 	      add_edge (pg, j, i);
1634 	  }
1635 
1636       /* Add edges to the reduction partition (if any) to force it last.  */
1637       unsigned j;
1638       for (j = 0; partitions.iterate (j, &partition); ++j)
1639 	if (partition_reduction_p (partition))
1640 	  break;
1641       if (j < partitions.length ())
1642 	{
1643 	  for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1644 	    if (i != j)
1645 	      add_edge (pg, i, j);
1646 	}
1647 
1648       /* Compute partitions we cannot separate and fuse them.  */
1649       num_sccs = graphds_scc (pg, NULL);
1650       for (i = 0; i < num_sccs; ++i)
1651 	{
1652 	  struct partition *first;
1653 	  int j;
1654 	  for (j = 0; partitions.iterate (j, &first); ++j)
1655 	    if (pg->vertices[j].component == i)
1656 	      break;
1657 	  for (j = j + 1; partitions.iterate (j, &partition); ++j)
1658 	    if (pg->vertices[j].component == i)
1659 	      {
1660 		if (dump_file && (dump_flags & TDF_DETAILS))
1661 		  {
1662 		    fprintf (dump_file, "fusing partitions\n");
1663 		    dump_bitmap (dump_file, first->stmts);
1664 		    dump_bitmap (dump_file, partition->stmts);
1665 		    fprintf (dump_file, "because they are in the same "
1666 			     "dependence SCC\n");
1667 		  }
1668 		partition_merge_into (first, partition);
1669 		partitions[j] = NULL;
1670 		partition_free (partition);
1671 		PGDATA (j)->partition = NULL;
1672 	      }
1673 	}
1674 
1675       /* Now order the remaining nodes in postorder.  */
1676       qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1677       partitions.truncate (0);
1678       for (i = 0; i < pg->n_vertices; ++i)
1679 	{
1680 	  pgdata *data = PGDATA (i);
1681 	  if (data->partition)
1682 	    partitions.safe_push (data->partition);
1683 	  data->reads.release ();
1684 	  data->writes.release ();
1685 	  delete data;
1686 	}
1687       gcc_assert (partitions.length () == (unsigned)num_sccs);
1688       free_graph (pg);
1689     }
1690 
1691   nbp = partitions.length ();
1692   if (nbp == 0
1693       || (nbp == 1 && !partition_builtin_p (partitions[0]))
1694       || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1695     {
1696       nbp = 0;
1697       goto ldist_done;
1698     }
1699 
1700   if (dump_file && (dump_flags & TDF_DETAILS))
1701     dump_rdg_partitions (dump_file, partitions);
1702 
1703   FOR_EACH_VEC_ELT (partitions, i, partition)
1704     {
1705       if (partition_builtin_p (partition))
1706 	(*nb_calls)++;
1707       *destroy_p |= generate_code_for_partition (loop, partition, i < nbp - 1);
1708     }
1709 
1710  ldist_done:
1711 
1712   FOR_EACH_VEC_ELT (partitions, i, partition)
1713     partition_free (partition);
1714 
1715   free_rdg (rdg);
1716   return nbp - *nb_calls;
1717 }
1718 
1719 /* Distribute all loops in the current function.  */
1720 
1721 namespace {
1722 
1723 const pass_data pass_data_loop_distribution =
1724 {
1725   GIMPLE_PASS, /* type */
1726   "ldist", /* name */
1727   OPTGROUP_LOOP, /* optinfo_flags */
1728   TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1729   ( PROP_cfg | PROP_ssa ), /* properties_required */
1730   0, /* properties_provided */
1731   0, /* properties_destroyed */
1732   0, /* todo_flags_start */
1733   0, /* todo_flags_finish */
1734 };
1735 
1736 class pass_loop_distribution : public gimple_opt_pass
1737 {
1738 public:
1739   pass_loop_distribution (gcc::context *ctxt)
1740     : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1741   {}
1742 
1743   /* opt_pass methods: */
1744   virtual bool gate (function *)
1745     {
1746       return flag_tree_loop_distribution
1747 	|| flag_tree_loop_distribute_patterns;
1748     }
1749 
1750   virtual unsigned int execute (function *);
1751 
1752 }; // class pass_loop_distribution
1753 
1754 unsigned int
1755 pass_loop_distribution::execute (function *fun)
1756 {
1757   struct loop *loop;
1758   bool changed = false;
1759   basic_block bb;
1760   control_dependences *cd = NULL;
1761   auto_vec<loop_p> loops_to_be_destroyed;
1762 
1763   FOR_ALL_BB_FN (bb, fun)
1764     {
1765       gimple_stmt_iterator gsi;
1766       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1767 	gimple_set_uid (gsi_stmt (gsi), -1);
1768       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1769 	gimple_set_uid (gsi_stmt (gsi), -1);
1770     }
1771 
1772   /* We can at the moment only distribute non-nested loops, thus restrict
1773      walking to innermost loops.  */
1774   FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
1775     {
1776       auto_vec<gimple *> work_list;
1777       basic_block *bbs;
1778       int num = loop->num;
1779       unsigned int i;
1780 
1781       /* If the loop doesn't have a single exit we will fail anyway,
1782 	 so do that early.  */
1783       if (!single_exit (loop))
1784 	continue;
1785 
1786       /* Only optimize hot loops.  */
1787       if (!optimize_loop_for_speed_p (loop))
1788 	continue;
1789 
1790       /* Initialize the worklist with stmts we seed the partitions with.  */
1791       bbs = get_loop_body_in_dom_order (loop);
1792       for (i = 0; i < loop->num_nodes; ++i)
1793 	{
1794 	  for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
1795 	       !gsi_end_p (gsi);
1796 	       gsi_next (&gsi))
1797 	    {
1798 	      gphi *phi = gsi.phi ();
1799 	      if (virtual_operand_p (gimple_phi_result (phi)))
1800 		continue;
1801 	      /* Distribute stmts which have defs that are used outside of
1802 		 the loop.  */
1803 	      if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1804 		continue;
1805 	      work_list.safe_push (phi);
1806 	    }
1807 	  for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1808 	       !gsi_end_p (gsi);
1809 	       gsi_next (&gsi))
1810 	    {
1811 	      gimple *stmt = gsi_stmt (gsi);
1812 
1813 	      /* If there is a stmt with side-effects bail out - we
1814 		 cannot and should not distribute this loop.  */
1815 	      if (gimple_has_side_effects (stmt))
1816 		{
1817 		  work_list.truncate (0);
1818 		  goto out;
1819 		}
1820 
1821 	      /* Distribute stmts which have defs that are used outside of
1822 		 the loop.  */
1823 	      if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1824 		;
1825 	      /* Otherwise only distribute stores for now.  */
1826 	      else if (!gimple_vdef (stmt))
1827 		continue;
1828 
1829 	      work_list.safe_push (stmt);
1830 	    }
1831 	}
1832 out:
1833       free (bbs);
1834 
1835       int nb_generated_loops = 0;
1836       int nb_generated_calls = 0;
1837       location_t loc = find_loop_location (loop);
1838       if (work_list.length () > 0)
1839 	{
1840 	  if (!cd)
1841 	    {
1842 	      calculate_dominance_info (CDI_DOMINATORS);
1843 	      calculate_dominance_info (CDI_POST_DOMINATORS);
1844 	      cd = new control_dependences ();
1845 	      free_dominance_info (CDI_POST_DOMINATORS);
1846 	    }
1847 	  bool destroy_p;
1848 	  nb_generated_loops = distribute_loop (loop, work_list, cd,
1849 						&nb_generated_calls,
1850 						&destroy_p);
1851 	  if (destroy_p)
1852 	    loops_to_be_destroyed.safe_push (loop);
1853 	}
1854 
1855       if (nb_generated_loops + nb_generated_calls > 0)
1856 	{
1857 	  changed = true;
1858 	  dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1859 			   loc, "Loop %d distributed: split to %d loops "
1860 			   "and %d library calls.\n",
1861 			   num, nb_generated_loops, nb_generated_calls);
1862 	}
1863       else if (dump_file && (dump_flags & TDF_DETAILS))
1864 	fprintf (dump_file, "Loop %d is the same.\n", num);
1865     }
1866 
1867   if (cd)
1868     delete cd;
1869 
1870   if (changed)
1871     {
1872       /* Destroy loop bodies that could not be reused.  Do this late as we
1873 	 otherwise can end up refering to stale data in control dependences.  */
1874       unsigned i;
1875       FOR_EACH_VEC_ELT (loops_to_be_destroyed, i, loop)
1876 	  destroy_loop (loop);
1877 
1878       /* Cached scalar evolutions now may refer to wrong or non-existing
1879 	 loops.  */
1880       scev_reset_htab ();
1881       mark_virtual_operands_for_renaming (fun);
1882       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1883     }
1884 
1885   checking_verify_loop_structure ();
1886 
1887   return 0;
1888 }
1889 
1890 } // anon namespace
1891 
1892 gimple_opt_pass *
1893 make_pass_loop_distribution (gcc::context *ctxt)
1894 {
1895   return new pass_loop_distribution (ctxt);
1896 }
1897