xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-loop-distribution.c (revision 7c192b2a5e1093666e67801684f930ef49b3b363)
1 /* Loop distribution.
2    Copyright (C) 2006-2015 Free Software Foundation, Inc.
3    Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4    and Sebastian Pop <sebastian.pop@amd.com>.
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 /* This pass performs loop distribution: for example, the loop
23 
24    |DO I = 2, N
25    |    A(I) = B(I) + C
26    |    D(I) = A(I-1)*E
27    |ENDDO
28 
29    is transformed to
30 
31    |DOALL I = 2, N
32    |   A(I) = B(I) + C
33    |ENDDO
34    |
35    |DOALL I = 2, N
36    |   D(I) = A(I-1)*E
37    |ENDDO
38 
39    This pass uses an RDG, Reduced Dependence Graph built on top of the
40    data dependence relations.  The RDG is then topologically sorted to
41    obtain a map of information producers/consumers based on which it
42    generates the new loops.  */
43 
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "hash-set.h"
48 #include "machmode.h"
49 #include "vec.h"
50 #include "double-int.h"
51 #include "input.h"
52 #include "alias.h"
53 #include "symtab.h"
54 #include "options.h"
55 #include "wide-int.h"
56 #include "inchash.h"
57 #include "tree.h"
58 #include "fold-const.h"
59 #include "predict.h"
60 #include "tm.h"
61 #include "hard-reg-set.h"
62 #include "input.h"
63 #include "function.h"
64 #include "dominance.h"
65 #include "cfg.h"
66 #include "cfganal.h"
67 #include "basic-block.h"
68 #include "tree-ssa-alias.h"
69 #include "internal-fn.h"
70 #include "gimple-expr.h"
71 #include "is-a.h"
72 #include "gimple.h"
73 #include "gimple-iterator.h"
74 #include "gimplify-me.h"
75 #include "stor-layout.h"
76 #include "gimple-ssa.h"
77 #include "tree-cfg.h"
78 #include "tree-phinodes.h"
79 #include "ssa-iterators.h"
80 #include "stringpool.h"
81 #include "tree-ssanames.h"
82 #include "tree-ssa-loop-manip.h"
83 #include "tree-ssa-loop.h"
84 #include "tree-into-ssa.h"
85 #include "tree-ssa.h"
86 #include "cfgloop.h"
87 #include "tree-chrec.h"
88 #include "tree-data-ref.h"
89 #include "tree-scalar-evolution.h"
90 #include "tree-pass.h"
91 #include "gimple-pretty-print.h"
92 #include "tree-vectorizer.h"
93 
94 
95 /* A Reduced Dependence Graph (RDG) vertex representing a statement.  */
96 typedef struct rdg_vertex
97 {
98   /* The statement represented by this vertex.  */
99   gimple stmt;
100 
101   /* Vector of data-references in this statement.  */
102   vec<data_reference_p> datarefs;
103 
104   /* True when the statement contains a write to memory.  */
105   bool has_mem_write;
106 
107   /* True when the statement contains a read from memory.  */
108   bool has_mem_reads;
109 } *rdg_vertex_p;
110 
111 #define RDGV_STMT(V)     ((struct rdg_vertex *) ((V)->data))->stmt
112 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
113 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
114 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
115 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
116 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
117 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
118 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
119 
120 /* Data dependence type.  */
121 
122 enum rdg_dep_type
123 {
124   /* Read After Write (RAW).  */
125   flow_dd = 'f',
126 
127   /* Control dependence (execute conditional on).  */
128   control_dd = 'c'
129 };
130 
131 /* Dependence information attached to an edge of the RDG.  */
132 
133 typedef struct rdg_edge
134 {
135   /* Type of the dependence.  */
136   enum rdg_dep_type type;
137 } *rdg_edge_p;
138 
139 #define RDGE_TYPE(E)        ((struct rdg_edge *) ((E)->data))->type
140 
141 /* Dump vertex I in RDG to FILE.  */
142 
143 static void
144 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
145 {
146   struct vertex *v = &(rdg->vertices[i]);
147   struct graph_edge *e;
148 
149   fprintf (file, "(vertex %d: (%s%s) (in:", i,
150 	   RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
151 	   RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
152 
153   if (v->pred)
154     for (e = v->pred; e; e = e->pred_next)
155       fprintf (file, " %d", e->src);
156 
157   fprintf (file, ") (out:");
158 
159   if (v->succ)
160     for (e = v->succ; e; e = e->succ_next)
161       fprintf (file, " %d", e->dest);
162 
163   fprintf (file, ")\n");
164   print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
165   fprintf (file, ")\n");
166 }
167 
168 /* Call dump_rdg_vertex on stderr.  */
169 
170 DEBUG_FUNCTION void
171 debug_rdg_vertex (struct graph *rdg, int i)
172 {
173   dump_rdg_vertex (stderr, rdg, i);
174 }
175 
176 /* Dump the reduced dependence graph RDG to FILE.  */
177 
178 static void
179 dump_rdg (FILE *file, struct graph *rdg)
180 {
181   fprintf (file, "(rdg\n");
182   for (int i = 0; i < rdg->n_vertices; i++)
183     dump_rdg_vertex (file, rdg, i);
184   fprintf (file, ")\n");
185 }
186 
187 /* Call dump_rdg on stderr.  */
188 
189 DEBUG_FUNCTION void
190 debug_rdg (struct graph *rdg)
191 {
192   dump_rdg (stderr, rdg);
193 }
194 
195 static void
196 dot_rdg_1 (FILE *file, struct graph *rdg)
197 {
198   int i;
199   pretty_printer buffer;
200   pp_needs_newline (&buffer) = false;
201   buffer.buffer->stream = file;
202 
203   fprintf (file, "digraph RDG {\n");
204 
205   for (i = 0; i < rdg->n_vertices; i++)
206     {
207       struct vertex *v = &(rdg->vertices[i]);
208       struct graph_edge *e;
209 
210       fprintf (file, "%d [label=\"[%d] ", i, i);
211       pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
212       pp_flush (&buffer);
213       fprintf (file, "\"]\n");
214 
215       /* Highlight reads from memory.  */
216       if (RDG_MEM_READS_STMT (rdg, i))
217        fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
218 
219       /* Highlight stores to memory.  */
220       if (RDG_MEM_WRITE_STMT (rdg, i))
221        fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
222 
223       if (v->succ)
224        for (e = v->succ; e; e = e->succ_next)
225          switch (RDGE_TYPE (e))
226            {
227            case flow_dd:
228              /* These are the most common dependences: don't print these. */
229              fprintf (file, "%d -> %d \n", i, e->dest);
230              break;
231 
232 	   case control_dd:
233              fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
234              break;
235 
236            default:
237              gcc_unreachable ();
238            }
239     }
240 
241   fprintf (file, "}\n\n");
242 }
243 
244 /* Display the Reduced Dependence Graph using dotty.  */
245 
246 DEBUG_FUNCTION void
247 dot_rdg (struct graph *rdg)
248 {
249   /* When debugging, you may want to enable the following code.  */
250 #ifdef HAVE_POPEN
251   FILE *file = popen ("dot -Tx11", "w");
252   if (!file)
253     return;
254   dot_rdg_1 (file, rdg);
255   fflush (file);
256   close (fileno (file));
257   pclose (file);
258 #else
259   dot_rdg_1 (stderr, rdg);
260 #endif
261 }
262 
263 /* Returns the index of STMT in RDG.  */
264 
265 static int
266 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
267 {
268   int index = gimple_uid (stmt);
269   gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
270   return index;
271 }
272 
273 /* Creates dependence edges in RDG for all the uses of DEF.  IDEF is
274    the index of DEF in RDG.  */
275 
276 static void
277 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
278 {
279   use_operand_p imm_use_p;
280   imm_use_iterator iterator;
281 
282   FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
283     {
284       struct graph_edge *e;
285       int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
286 
287       if (use < 0)
288 	continue;
289 
290       e = add_edge (rdg, idef, use);
291       e->data = XNEW (struct rdg_edge);
292       RDGE_TYPE (e) = flow_dd;
293     }
294 }
295 
296 /* Creates an edge for the control dependences of BB to the vertex V.  */
297 
298 static void
299 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
300 				    int v, control_dependences *cd)
301 {
302   bitmap_iterator bi;
303   unsigned edge_n;
304   EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
305 			    0, edge_n, bi)
306     {
307       basic_block cond_bb = cd->get_edge (edge_n)->src;
308       gimple stmt = last_stmt (cond_bb);
309       if (stmt && is_ctrl_stmt (stmt))
310 	{
311 	  struct graph_edge *e;
312 	  int c = rdg_vertex_for_stmt (rdg, stmt);
313 	  if (c < 0)
314 	    continue;
315 
316 	  e = add_edge (rdg, c, v);
317 	  e->data = XNEW (struct rdg_edge);
318 	  RDGE_TYPE (e) = control_dd;
319 	}
320     }
321 }
322 
323 /* Creates the edges of the reduced dependence graph RDG.  */
324 
325 static void
326 create_rdg_flow_edges (struct graph *rdg)
327 {
328   int i;
329   def_operand_p def_p;
330   ssa_op_iter iter;
331 
332   for (i = 0; i < rdg->n_vertices; i++)
333     FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
334 			      iter, SSA_OP_DEF)
335       create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
336 }
337 
338 /* Creates the edges of the reduced dependence graph RDG.  */
339 
340 static void
341 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
342 {
343   int i;
344 
345   for (i = 0; i < rdg->n_vertices; i++)
346     {
347       gimple stmt = RDG_STMT (rdg, i);
348       if (gimple_code (stmt) == GIMPLE_PHI)
349 	{
350 	  edge_iterator ei;
351 	  edge e;
352 	  FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
353 	      create_edge_for_control_dependence (rdg, e->src, i, cd);
354 	}
355       else
356 	create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
357     }
358 }
359 
360 /* Build the vertices of the reduced dependence graph RDG.  Return false
361    if that failed.  */
362 
363 static bool
364 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
365 		     vec<data_reference_p> *datarefs)
366 {
367   int i;
368   gimple stmt;
369 
370   FOR_EACH_VEC_ELT (stmts, i, stmt)
371     {
372       struct vertex *v = &(rdg->vertices[i]);
373 
374       /* Record statement to vertex mapping.  */
375       gimple_set_uid (stmt, i);
376 
377       v->data = XNEW (struct rdg_vertex);
378       RDGV_STMT (v) = stmt;
379       RDGV_DATAREFS (v).create (0);
380       RDGV_HAS_MEM_WRITE (v) = false;
381       RDGV_HAS_MEM_READS (v) = false;
382       if (gimple_code (stmt) == GIMPLE_PHI)
383 	continue;
384 
385       unsigned drp = datarefs->length ();
386       if (!find_data_references_in_stmt (loop, stmt, datarefs))
387 	return false;
388       for (unsigned j = drp; j < datarefs->length (); ++j)
389 	{
390 	  data_reference_p dr = (*datarefs)[j];
391 	  if (DR_IS_READ (dr))
392 	    RDGV_HAS_MEM_READS (v) = true;
393 	  else
394 	    RDGV_HAS_MEM_WRITE (v) = true;
395 	  RDGV_DATAREFS (v).safe_push (dr);
396 	}
397     }
398   return true;
399 }
400 
401 /* Initialize STMTS with all the statements of LOOP.  The order in
402    which we discover statements is important as
403    generate_loops_for_partition is using the same traversal for
404    identifying statements in loop copies.  */
405 
406 static void
407 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
408 {
409   unsigned int i;
410   basic_block *bbs = get_loop_body_in_dom_order (loop);
411 
412   for (i = 0; i < loop->num_nodes; i++)
413     {
414       basic_block bb = bbs[i];
415 
416       for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
417 	   gsi_next (&bsi))
418 	if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
419 	  stmts->safe_push (bsi.phi ());
420 
421       for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
422 	   gsi_next (&bsi))
423 	{
424 	  gimple stmt = gsi_stmt (bsi);
425 	  if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
426 	    stmts->safe_push (stmt);
427 	}
428     }
429 
430   free (bbs);
431 }
432 
433 /* Free the reduced dependence graph RDG.  */
434 
435 static void
436 free_rdg (struct graph *rdg)
437 {
438   int i;
439 
440   for (i = 0; i < rdg->n_vertices; i++)
441     {
442       struct vertex *v = &(rdg->vertices[i]);
443       struct graph_edge *e;
444 
445       for (e = v->succ; e; e = e->succ_next)
446 	free (e->data);
447 
448       if (v->data)
449 	{
450 	  gimple_set_uid (RDGV_STMT (v), -1);
451 	  free_data_refs (RDGV_DATAREFS (v));
452 	  free (v->data);
453 	}
454     }
455 
456   free_graph (rdg);
457 }
458 
459 /* Build the Reduced Dependence Graph (RDG) with one vertex per
460    statement of the loop nest LOOP_NEST, and one edge per data dependence or
461    scalar dependence.  */
462 
463 static struct graph *
464 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
465 {
466   struct graph *rdg;
467   vec<data_reference_p> datarefs;
468 
469   /* Create the RDG vertices from the stmts of the loop nest.  */
470   auto_vec<gimple, 10> stmts;
471   stmts_from_loop (loop_nest[0], &stmts);
472   rdg = new_graph (stmts.length ());
473   datarefs.create (10);
474   if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
475     {
476       datarefs.release ();
477       free_rdg (rdg);
478       return NULL;
479     }
480   stmts.release ();
481 
482   create_rdg_flow_edges (rdg);
483   if (cd)
484     create_rdg_cd_edges (rdg, cd);
485 
486   datarefs.release ();
487 
488   return rdg;
489 }
490 
491 
492 
493 enum partition_kind {
494     PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
495 };
496 
497 typedef struct partition_s
498 {
499   bitmap stmts;
500   bitmap loops;
501   bool reduction_p;
502   enum partition_kind kind;
503   /* data-references a kind != PKIND_NORMAL partition is about.  */
504   data_reference_p main_dr;
505   data_reference_p secondary_dr;
506   tree niter;
507   bool plus_one;
508 } *partition_t;
509 
510 
511 /* Allocate and initialize a partition from BITMAP.  */
512 
513 static partition_t
514 partition_alloc (bitmap stmts, bitmap loops)
515 {
516   partition_t partition = XCNEW (struct partition_s);
517   partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
518   partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
519   partition->reduction_p = false;
520   partition->kind = PKIND_NORMAL;
521   return partition;
522 }
523 
524 /* Free PARTITION.  */
525 
526 static void
527 partition_free (partition_t partition)
528 {
529   BITMAP_FREE (partition->stmts);
530   BITMAP_FREE (partition->loops);
531   free (partition);
532 }
533 
534 /* Returns true if the partition can be generated as a builtin.  */
535 
536 static bool
537 partition_builtin_p (partition_t partition)
538 {
539   return partition->kind != PKIND_NORMAL;
540 }
541 
542 /* Returns true if the partition contains a reduction.  */
543 
544 static bool
545 partition_reduction_p (partition_t partition)
546 {
547   return partition->reduction_p;
548 }
549 
550 /* Merge PARTITION into the partition DEST.  */
551 
552 static void
553 partition_merge_into (partition_t dest, partition_t partition)
554 {
555   dest->kind = PKIND_NORMAL;
556   bitmap_ior_into (dest->stmts, partition->stmts);
557   if (partition_reduction_p (partition))
558     dest->reduction_p = true;
559 }
560 
561 
562 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
563    the LOOP.  */
564 
565 static bool
566 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
567 {
568   imm_use_iterator imm_iter;
569   use_operand_p use_p;
570 
571   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
572     {
573       gimple use_stmt = USE_STMT (use_p);
574       if (!is_gimple_debug (use_stmt)
575 	  && loop != loop_containing_stmt (use_stmt))
576 	return true;
577     }
578 
579   return false;
580 }
581 
582 /* Returns true when STMT defines a scalar variable used after the
583    loop LOOP.  */
584 
585 static bool
586 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
587 {
588   def_operand_p def_p;
589   ssa_op_iter op_iter;
590 
591   if (gimple_code (stmt) == GIMPLE_PHI)
592     return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
593 
594   FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
595     if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
596       return true;
597 
598   return false;
599 }
600 
601 /* Return a copy of LOOP placed before LOOP.  */
602 
603 static struct loop *
604 copy_loop_before (struct loop *loop)
605 {
606   struct loop *res;
607   edge preheader = loop_preheader_edge (loop);
608 
609   initialize_original_copy_tables ();
610   res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
611   gcc_assert (res != NULL);
612   free_original_copy_tables ();
613   delete_update_ssa ();
614 
615   return res;
616 }
617 
618 /* Creates an empty basic block after LOOP.  */
619 
620 static void
621 create_bb_after_loop (struct loop *loop)
622 {
623   edge exit = single_exit (loop);
624 
625   if (!exit)
626     return;
627 
628   split_edge (exit);
629 }
630 
631 /* Generate code for PARTITION from the code in LOOP.  The loop is
632    copied when COPY_P is true.  All the statements not flagged in the
633    PARTITION bitmap are removed from the loop or from its copy.  The
634    statements are indexed in sequence inside a basic block, and the
635    basic blocks of a loop are taken in dom order.  */
636 
637 static void
638 generate_loops_for_partition (struct loop *loop, partition_t partition,
639 			      bool copy_p)
640 {
641   unsigned i;
642   basic_block *bbs;
643 
644   if (copy_p)
645     {
646       loop = copy_loop_before (loop);
647       gcc_assert (loop != NULL);
648       create_preheader (loop, CP_SIMPLE_PREHEADERS);
649       create_bb_after_loop (loop);
650     }
651 
652   /* Remove stmts not in the PARTITION bitmap.  */
653   bbs = get_loop_body_in_dom_order (loop);
654 
655   if (MAY_HAVE_DEBUG_STMTS)
656     for (i = 0; i < loop->num_nodes; i++)
657       {
658 	basic_block bb = bbs[i];
659 
660 	for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
661 	     gsi_next (&bsi))
662 	  {
663 	    gphi *phi = bsi.phi ();
664 	    if (!virtual_operand_p (gimple_phi_result (phi))
665 		&& !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
666 	      reset_debug_uses (phi);
667 	  }
668 
669 	for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
670 	  {
671 	    gimple stmt = gsi_stmt (bsi);
672 	    if (gimple_code (stmt) != GIMPLE_LABEL
673 		&& !is_gimple_debug (stmt)
674 		&& !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
675 	      reset_debug_uses (stmt);
676 	  }
677       }
678 
679   for (i = 0; i < loop->num_nodes; i++)
680     {
681       basic_block bb = bbs[i];
682 
683       for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
684 	{
685 	  gphi *phi = bsi.phi ();
686 	  if (!virtual_operand_p (gimple_phi_result (phi))
687 	      && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
688 	    remove_phi_node (&bsi, true);
689 	  else
690 	    gsi_next (&bsi);
691 	}
692 
693       for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
694 	{
695 	  gimple stmt = gsi_stmt (bsi);
696 	  if (gimple_code (stmt) != GIMPLE_LABEL
697 	      && !is_gimple_debug (stmt)
698 	      && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
699 	    {
700 	      /* Choose an arbitrary path through the empty CFG part
701 		 that this unnecessary control stmt controls.  */
702 	      if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
703 		{
704 		  gimple_cond_make_false (cond_stmt);
705 		  update_stmt (stmt);
706 		}
707 	      else if (gimple_code (stmt) == GIMPLE_SWITCH)
708 		{
709 		  gswitch *switch_stmt = as_a <gswitch *> (stmt);
710 		  gimple_switch_set_index
711 		      (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1)));
712 		  update_stmt (stmt);
713 		}
714 	      else
715 		{
716 		  unlink_stmt_vdef (stmt);
717 		  gsi_remove (&bsi, true);
718 		  release_defs (stmt);
719 		  continue;
720 		}
721 	    }
722 	  gsi_next (&bsi);
723 	}
724     }
725 
726   free (bbs);
727 }
728 
729 /* Build the size argument for a memory operation call.  */
730 
731 static tree
732 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
733 		    bool plus_one)
734 {
735   tree size = fold_convert_loc (loc, sizetype, nb_iter);
736   if (plus_one)
737     size = size_binop (PLUS_EXPR, size, size_one_node);
738   size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
739 			  TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
740   size = fold_convert_loc (loc, size_type_node, size);
741   return size;
742 }
743 
744 /* Build an address argument for a memory operation call.  */
745 
746 static tree
747 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
748 {
749   tree addr_base;
750 
751   addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
752   addr_base = fold_convert_loc (loc, sizetype, addr_base);
753 
754   /* Test for a negative stride, iterating over every element.  */
755   if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
756     {
757       addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
758 				  fold_convert_loc (loc, sizetype, nb_bytes));
759       addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
760 				  TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
761     }
762 
763   return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
764 }
765 
766 /* If VAL memory representation contains the same value in all bytes,
767    return that value, otherwise return -1.
768    E.g. for 0x24242424 return 0x24, for IEEE double
769    747708026454360457216.0 return 0x44, etc.  */
770 
771 static int
772 const_with_all_bytes_same (tree val)
773 {
774   unsigned char buf[64];
775   int i, len;
776 
777   if (integer_zerop (val)
778       || real_zerop (val)
779       || (TREE_CODE (val) == CONSTRUCTOR
780           && !TREE_CLOBBER_P (val)
781           && CONSTRUCTOR_NELTS (val) == 0))
782     return 0;
783 
784   if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
785     return -1;
786 
787   len = native_encode_expr (val, buf, sizeof (buf));
788   if (len == 0)
789     return -1;
790   for (i = 1; i < len; i++)
791     if (buf[i] != buf[0])
792       return -1;
793   return buf[0];
794 }
795 
796 /* Generate a call to memset for PARTITION in LOOP.  */
797 
798 static void
799 generate_memset_builtin (struct loop *loop, partition_t partition)
800 {
801   gimple_stmt_iterator gsi;
802   gimple stmt, fn_call;
803   tree mem, fn, nb_bytes;
804   location_t loc;
805   tree val;
806 
807   stmt = DR_STMT (partition->main_dr);
808   loc = gimple_location (stmt);
809 
810   /* The new statements will be placed before LOOP.  */
811   gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
812 
813   nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
814 				 partition->plus_one);
815   nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
816 				       false, GSI_CONTINUE_LINKING);
817   mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
818   mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
819 				  false, GSI_CONTINUE_LINKING);
820 
821   /* This exactly matches the pattern recognition in classify_partition.  */
822   val = gimple_assign_rhs1 (stmt);
823   /* Handle constants like 0x15151515 and similarly
824      floating point constants etc. where all bytes are the same.  */
825   int bytev = const_with_all_bytes_same (val);
826   if (bytev != -1)
827     val = build_int_cst (integer_type_node, bytev);
828   else if (TREE_CODE (val) == INTEGER_CST)
829     val = fold_convert (integer_type_node, val);
830   else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
831     {
832       tree tem = make_ssa_name (integer_type_node);
833       gimple cstmt = gimple_build_assign (tem, NOP_EXPR, val);
834       gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
835       val = tem;
836     }
837 
838   fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
839   fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
840   gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
841 
842   if (dump_file && (dump_flags & TDF_DETAILS))
843     {
844       fprintf (dump_file, "generated memset");
845       if (bytev == 0)
846 	fprintf (dump_file, " zero\n");
847       else
848 	fprintf (dump_file, "\n");
849     }
850 }
851 
852 /* Generate a call to memcpy for PARTITION in LOOP.  */
853 
854 static void
855 generate_memcpy_builtin (struct loop *loop, partition_t partition)
856 {
857   gimple_stmt_iterator gsi;
858   gimple stmt, fn_call;
859   tree dest, src, fn, nb_bytes;
860   location_t loc;
861   enum built_in_function kind;
862 
863   stmt = DR_STMT (partition->main_dr);
864   loc = gimple_location (stmt);
865 
866   /* The new statements will be placed before LOOP.  */
867   gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
868 
869   nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
870 				 partition->plus_one);
871   nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
872 				       false, GSI_CONTINUE_LINKING);
873   dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
874   src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
875   if (ptr_derefs_may_alias_p (dest, src))
876     kind = BUILT_IN_MEMMOVE;
877   else
878     kind = BUILT_IN_MEMCPY;
879 
880   dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
881 				   false, GSI_CONTINUE_LINKING);
882   src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
883 				  false, GSI_CONTINUE_LINKING);
884   fn = build_fold_addr_expr (builtin_decl_implicit (kind));
885   fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
886   gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
887 
888   if (dump_file && (dump_flags & TDF_DETAILS))
889     {
890       if (kind == BUILT_IN_MEMCPY)
891 	fprintf (dump_file, "generated memcpy\n");
892       else
893 	fprintf (dump_file, "generated memmove\n");
894     }
895 }
896 
897 /* Remove and destroy the loop LOOP.  */
898 
899 static void
900 destroy_loop (struct loop *loop)
901 {
902   unsigned nbbs = loop->num_nodes;
903   edge exit = single_exit (loop);
904   basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
905   basic_block *bbs;
906   unsigned i;
907 
908   bbs = get_loop_body_in_dom_order (loop);
909 
910   redirect_edge_pred (exit, src);
911   exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
912   exit->flags |= EDGE_FALLTHRU;
913   cancel_loop_tree (loop);
914   rescan_loop_exit (exit, false, true);
915 
916   for (i = 0; i < nbbs; i++)
917     {
918       /* We have made sure to not leave any dangling uses of SSA
919          names defined in the loop.  With the exception of virtuals.
920 	 Make sure we replace all uses of virtual defs that will remain
921 	 outside of the loop with the bare symbol as delete_basic_block
922 	 will release them.  */
923       for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
924 	   gsi_next (&gsi))
925 	{
926 	  gphi *phi = gsi.phi ();
927 	  if (virtual_operand_p (gimple_phi_result (phi)))
928 	    mark_virtual_phi_result_for_renaming (phi);
929 	}
930       for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
931 	   gsi_next (&gsi))
932 	{
933 	  gimple stmt = gsi_stmt (gsi);
934 	  tree vdef = gimple_vdef (stmt);
935 	  if (vdef && TREE_CODE (vdef) == SSA_NAME)
936 	    mark_virtual_operand_for_renaming (vdef);
937 	}
938       delete_basic_block (bbs[i]);
939     }
940   free (bbs);
941 
942   set_immediate_dominator (CDI_DOMINATORS, dest,
943 			   recompute_dominator (CDI_DOMINATORS, dest));
944 }
945 
946 /* Generates code for PARTITION.  */
947 
948 static void
949 generate_code_for_partition (struct loop *loop,
950 			     partition_t partition, bool copy_p)
951 {
952   switch (partition->kind)
953     {
954     case PKIND_NORMAL:
955       /* Reductions all have to be in the last partition.  */
956       gcc_assert (!partition_reduction_p (partition)
957 		  || !copy_p);
958       generate_loops_for_partition (loop, partition, copy_p);
959       return;
960 
961     case PKIND_MEMSET:
962       generate_memset_builtin (loop, partition);
963       break;
964 
965     case PKIND_MEMCPY:
966       generate_memcpy_builtin (loop, partition);
967       break;
968 
969     default:
970       gcc_unreachable ();
971     }
972 
973   /* Common tail for partitions we turn into a call.  If this was the last
974      partition for which we generate code, we have to destroy the loop.  */
975   if (!copy_p)
976     destroy_loop (loop);
977 }
978 
979 
980 /* Returns a partition with all the statements needed for computing
981    the vertex V of the RDG, also including the loop exit conditions.  */
982 
983 static partition_t
984 build_rdg_partition_for_vertex (struct graph *rdg, int v)
985 {
986   partition_t partition = partition_alloc (NULL, NULL);
987   auto_vec<int, 3> nodes;
988   unsigned i;
989   int x;
990 
991   graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
992 
993   FOR_EACH_VEC_ELT (nodes, i, x)
994     {
995       bitmap_set_bit (partition->stmts, x);
996       bitmap_set_bit (partition->loops,
997 		      loop_containing_stmt (RDG_STMT (rdg, x))->num);
998     }
999 
1000   return partition;
1001 }
1002 
1003 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
1004    For the moment we detect only the memset zero pattern.  */
1005 
1006 static void
1007 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
1008 {
1009   bitmap_iterator bi;
1010   unsigned i;
1011   tree nb_iter;
1012   data_reference_p single_load, single_store;
1013   bool volatiles_p = false;
1014   bool plus_one = false;
1015 
1016   partition->kind = PKIND_NORMAL;
1017   partition->main_dr = NULL;
1018   partition->secondary_dr = NULL;
1019   partition->niter = NULL_TREE;
1020   partition->plus_one = false;
1021 
1022   EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1023     {
1024       gimple stmt = RDG_STMT (rdg, i);
1025 
1026       if (gimple_has_volatile_ops (stmt))
1027 	volatiles_p = true;
1028 
1029       /* If the stmt has uses outside of the loop mark it as reduction.  */
1030       if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1031 	{
1032 	  partition->reduction_p = true;
1033 	  return;
1034 	}
1035     }
1036 
1037   /* Perform general partition disqualification for builtins.  */
1038   if (volatiles_p
1039       || !flag_tree_loop_distribute_patterns)
1040     return;
1041 
1042   /* Detect memset and memcpy.  */
1043   single_load = NULL;
1044   single_store = NULL;
1045   EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1046     {
1047       gimple stmt = RDG_STMT (rdg, i);
1048       data_reference_p dr;
1049       unsigned j;
1050 
1051       if (gimple_code (stmt) == GIMPLE_PHI)
1052 	continue;
1053 
1054       /* Any scalar stmts are ok.  */
1055       if (!gimple_vuse (stmt))
1056 	continue;
1057 
1058       /* Otherwise just regular loads/stores.  */
1059       if (!gimple_assign_single_p (stmt))
1060 	return;
1061 
1062       /* But exactly one store and/or load.  */
1063       for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1064 	{
1065 	  if (DR_IS_READ (dr))
1066 	    {
1067 	      if (single_load != NULL)
1068 		return;
1069 	      single_load = dr;
1070 	    }
1071 	  else
1072 	    {
1073 	      if (single_store != NULL)
1074 		return;
1075 	      single_store = dr;
1076 	    }
1077 	}
1078     }
1079 
1080   if (!single_store)
1081     return;
1082 
1083   nb_iter = number_of_latch_executions (loop);
1084   if (!nb_iter || nb_iter == chrec_dont_know)
1085     return;
1086   if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1087 		      gimple_bb (DR_STMT (single_store))))
1088     plus_one = true;
1089 
1090   if (single_store && !single_load)
1091     {
1092       gimple stmt = DR_STMT (single_store);
1093       tree rhs = gimple_assign_rhs1 (stmt);
1094       if (const_with_all_bytes_same (rhs) == -1
1095 	  && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1096 	      || (TYPE_MODE (TREE_TYPE (rhs))
1097 		  != TYPE_MODE (unsigned_char_type_node))))
1098 	return;
1099       if (TREE_CODE (rhs) == SSA_NAME
1100 	  && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1101 	  && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1102 	return;
1103       if (!adjacent_dr_p (single_store)
1104 	  || !dominated_by_p (CDI_DOMINATORS,
1105 			      loop->latch, gimple_bb (stmt)))
1106 	return;
1107       partition->kind = PKIND_MEMSET;
1108       partition->main_dr = single_store;
1109       partition->niter = nb_iter;
1110       partition->plus_one = plus_one;
1111     }
1112   else if (single_store && single_load)
1113     {
1114       gimple store = DR_STMT (single_store);
1115       gimple load = DR_STMT (single_load);
1116       /* Direct aggregate copy or via an SSA name temporary.  */
1117       if (load != store
1118 	  && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1119 	return;
1120       if (!adjacent_dr_p (single_store)
1121 	  || !adjacent_dr_p (single_load)
1122 	  || !operand_equal_p (DR_STEP (single_store),
1123 			       DR_STEP (single_load), 0)
1124 	  || !dominated_by_p (CDI_DOMINATORS,
1125 			      loop->latch, gimple_bb (store)))
1126 	return;
1127       /* Now check that if there is a dependence this dependence is
1128          of a suitable form for memmove.  */
1129       vec<loop_p> loops = vNULL;
1130       ddr_p ddr;
1131       loops.safe_push (loop);
1132       ddr = initialize_data_dependence_relation (single_load, single_store,
1133 						 loops);
1134       compute_affine_dependence (ddr, loop);
1135       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1136 	{
1137 	  free_dependence_relation (ddr);
1138 	  loops.release ();
1139 	  return;
1140 	}
1141       if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1142 	{
1143 	  if (DDR_NUM_DIST_VECTS (ddr) == 0)
1144 	    {
1145 	      free_dependence_relation (ddr);
1146 	      loops.release ();
1147 	      return;
1148 	    }
1149 	  lambda_vector dist_v;
1150 	  FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1151 	    {
1152 	      int dist = dist_v[index_in_loop_nest (loop->num,
1153 						    DDR_LOOP_NEST (ddr))];
1154 	      if (dist > 0 && !DDR_REVERSED_P (ddr))
1155 		{
1156 		  free_dependence_relation (ddr);
1157 		  loops.release ();
1158 		  return;
1159 		}
1160 	    }
1161 	}
1162       free_dependence_relation (ddr);
1163       loops.release ();
1164       partition->kind = PKIND_MEMCPY;
1165       partition->main_dr = single_store;
1166       partition->secondary_dr = single_load;
1167       partition->niter = nb_iter;
1168       partition->plus_one = plus_one;
1169     }
1170 }
1171 
1172 /* For a data reference REF, return the declaration of its base
1173    address or NULL_TREE if the base is not determined.  */
1174 
1175 static tree
1176 ref_base_address (data_reference_p dr)
1177 {
1178   tree base_address = DR_BASE_ADDRESS (dr);
1179   if (base_address
1180       && TREE_CODE (base_address) == ADDR_EXPR)
1181     return TREE_OPERAND (base_address, 0);
1182 
1183   return base_address;
1184 }
1185 
1186 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1187    accesses in RDG.  */
1188 
1189 static bool
1190 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1191 			 partition_t partition2)
1192 {
1193   unsigned i, j, k, l;
1194   bitmap_iterator bi, bj;
1195   data_reference_p ref1, ref2;
1196 
1197   /* First check whether in the intersection of the two partitions are
1198      any loads or stores.  Common loads are the situation that happens
1199      most often.  */
1200   EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1201     if (RDG_MEM_WRITE_STMT (rdg, i)
1202 	|| RDG_MEM_READS_STMT (rdg, i))
1203       return true;
1204 
1205   /* Then check all data-references against each other.  */
1206   EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1207     if (RDG_MEM_WRITE_STMT (rdg, i)
1208 	|| RDG_MEM_READS_STMT (rdg, i))
1209       EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1210 	if (RDG_MEM_WRITE_STMT (rdg, j)
1211 	    || RDG_MEM_READS_STMT (rdg, j))
1212 	  {
1213 	    FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1214 	      {
1215 		tree base1 = ref_base_address (ref1);
1216 		if (base1)
1217 		  FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1218 		    if (base1 == ref_base_address (ref2))
1219 		      return true;
1220 	      }
1221 	  }
1222 
1223   return false;
1224 }
1225 
1226 /* Aggregate several components into a useful partition that is
1227    registered in the PARTITIONS vector.  Partitions will be
1228    distributed in different loops.  */
1229 
1230 static void
1231 rdg_build_partitions (struct graph *rdg,
1232 		      vec<gimple> starting_stmts,
1233 		      vec<partition_t> *partitions)
1234 {
1235   bitmap processed = BITMAP_ALLOC (NULL);
1236   int i;
1237   gimple stmt;
1238 
1239   FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1240     {
1241       int v = rdg_vertex_for_stmt (rdg, stmt);
1242 
1243       if (dump_file && (dump_flags & TDF_DETAILS))
1244 	fprintf (dump_file,
1245 		 "ldist asked to generate code for vertex %d\n", v);
1246 
1247       /* If the vertex is already contained in another partition so
1248          is the partition rooted at it.  */
1249       if (bitmap_bit_p (processed, v))
1250 	continue;
1251 
1252       partition_t partition = build_rdg_partition_for_vertex (rdg, v);
1253       bitmap_ior_into (processed, partition->stmts);
1254 
1255       if (dump_file && (dump_flags & TDF_DETAILS))
1256 	{
1257 	  fprintf (dump_file, "ldist useful partition:\n");
1258 	  dump_bitmap (dump_file, partition->stmts);
1259 	}
1260 
1261       partitions->safe_push (partition);
1262     }
1263 
1264   /* All vertices should have been assigned to at least one partition now,
1265      other than vertices belonging to dead code.  */
1266 
1267   BITMAP_FREE (processed);
1268 }
1269 
1270 /* Dump to FILE the PARTITIONS.  */
1271 
1272 static void
1273 dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
1274 {
1275   int i;
1276   partition_t partition;
1277 
1278   FOR_EACH_VEC_ELT (partitions, i, partition)
1279     debug_bitmap_file (file, partition->stmts);
1280 }
1281 
1282 /* Debug PARTITIONS.  */
1283 extern void debug_rdg_partitions (vec<partition_t> );
1284 
1285 DEBUG_FUNCTION void
1286 debug_rdg_partitions (vec<partition_t> partitions)
1287 {
1288   dump_rdg_partitions (stderr, partitions);
1289 }
1290 
1291 /* Returns the number of read and write operations in the RDG.  */
1292 
1293 static int
1294 number_of_rw_in_rdg (struct graph *rdg)
1295 {
1296   int i, res = 0;
1297 
1298   for (i = 0; i < rdg->n_vertices; i++)
1299     {
1300       if (RDG_MEM_WRITE_STMT (rdg, i))
1301 	++res;
1302 
1303       if (RDG_MEM_READS_STMT (rdg, i))
1304 	++res;
1305     }
1306 
1307   return res;
1308 }
1309 
1310 /* Returns the number of read and write operations in a PARTITION of
1311    the RDG.  */
1312 
1313 static int
1314 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1315 {
1316   int res = 0;
1317   unsigned i;
1318   bitmap_iterator ii;
1319 
1320   EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1321     {
1322       if (RDG_MEM_WRITE_STMT (rdg, i))
1323 	++res;
1324 
1325       if (RDG_MEM_READS_STMT (rdg, i))
1326 	++res;
1327     }
1328 
1329   return res;
1330 }
1331 
1332 /* Returns true when one of the PARTITIONS contains all the read or
1333    write operations of RDG.  */
1334 
1335 static bool
1336 partition_contains_all_rw (struct graph *rdg,
1337 			   vec<partition_t> partitions)
1338 {
1339   int i;
1340   partition_t partition;
1341   int nrw = number_of_rw_in_rdg (rdg);
1342 
1343   FOR_EACH_VEC_ELT (partitions, i, partition)
1344     if (nrw == number_of_rw_in_partition (rdg, partition))
1345       return true;
1346 
1347   return false;
1348 }
1349 
1350 /* Compute partition dependence created by the data references in DRS1
1351    and DRS2 and modify and return DIR according to that.  */
1352 
1353 static int
1354 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1355 			 vec<data_reference_p> drs1,
1356 			 vec<data_reference_p> drs2)
1357 {
1358   data_reference_p dr1, dr2;
1359 
1360   /* dependence direction - 0 is no dependence, -1 is back,
1361      1 is forth, 2 is both (we can stop then, merging will occur).  */
1362   for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1363     for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1364       {
1365 	data_reference_p saved_dr1 = dr1;
1366 	int this_dir = 1;
1367 	ddr_p ddr;
1368 	/* Re-shuffle data-refs to be in dominator order.  */
1369 	if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1370 	    > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1371 	  {
1372 	    data_reference_p tem = dr1;
1373 	    dr1 = dr2;
1374 	    dr2 = tem;
1375 	    this_dir = -this_dir;
1376 	  }
1377 	ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1378 	compute_affine_dependence (ddr, loops[0]);
1379 	if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1380 	  this_dir = 2;
1381 	else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1382 	  {
1383 	    if (DDR_REVERSED_P (ddr))
1384 	      {
1385 		data_reference_p tem = dr1;
1386 		dr1 = dr2;
1387 		dr2 = tem;
1388 		this_dir = -this_dir;
1389 	      }
1390 	    /* Known dependences can still be unordered througout the
1391 	       iteration space, see gcc.dg/tree-ssa/ldist-16.c.  */
1392 	    if (DDR_NUM_DIST_VECTS (ddr) != 1)
1393 	      this_dir = 2;
1394 	    /* If the overlap is exact preserve stmt order.  */
1395 	    else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1396 	      ;
1397 	    else
1398 	      {
1399 		/* Else as the distance vector is lexicographic positive
1400 		   swap the dependence direction.  */
1401 		this_dir = -this_dir;
1402 	      }
1403 	  }
1404 	else
1405 	  this_dir = 0;
1406 	free_dependence_relation (ddr);
1407 	if (dir == 0)
1408 	  dir = this_dir;
1409 	else if (dir != this_dir)
1410 	  return 2;
1411 	/* Shuffle "back" dr1.  */
1412 	dr1 = saved_dr1;
1413       }
1414   return dir;
1415 }
1416 
1417 /* Compare postorder number of the partition graph vertices V1 and V2.  */
1418 
1419 static int
1420 pgcmp (const void *v1_, const void *v2_)
1421 {
1422   const vertex *v1 = (const vertex *)v1_;
1423   const vertex *v2 = (const vertex *)v2_;
1424   return v2->post - v1->post;
1425 }
1426 
1427 /* Distributes the code from LOOP in such a way that producer
1428    statements are placed before consumer statements.  Tries to separate
1429    only the statements from STMTS into separate loops.
1430    Returns the number of distributed loops.  */
1431 
1432 static int
1433 distribute_loop (struct loop *loop, vec<gimple> stmts,
1434 		 control_dependences *cd, int *nb_calls)
1435 {
1436   struct graph *rdg;
1437   partition_t partition;
1438   bool any_builtin;
1439   int i, nbp;
1440   graph *pg = NULL;
1441   int num_sccs = 1;
1442 
1443   *nb_calls = 0;
1444   auto_vec<loop_p, 3> loop_nest;
1445   if (!find_loop_nest (loop, &loop_nest))
1446     return 0;
1447 
1448   rdg = build_rdg (loop_nest, cd);
1449   if (!rdg)
1450     {
1451       if (dump_file && (dump_flags & TDF_DETAILS))
1452 	fprintf (dump_file,
1453 		 "Loop %d not distributed: failed to build the RDG.\n",
1454 		 loop->num);
1455 
1456       return 0;
1457     }
1458 
1459   if (dump_file && (dump_flags & TDF_DETAILS))
1460     dump_rdg (dump_file, rdg);
1461 
1462   auto_vec<partition_t, 3> partitions;
1463   rdg_build_partitions (rdg, stmts, &partitions);
1464 
1465   any_builtin = false;
1466   FOR_EACH_VEC_ELT (partitions, i, partition)
1467     {
1468       classify_partition (loop, rdg, partition);
1469       any_builtin |= partition_builtin_p (partition);
1470     }
1471 
1472   /* If we are only distributing patterns but did not detect any,
1473      simply bail out.  */
1474   if (!flag_tree_loop_distribution
1475       && !any_builtin)
1476     {
1477       nbp = 0;
1478       goto ldist_done;
1479     }
1480 
1481   /* If we are only distributing patterns fuse all partitions that
1482      were not classified as builtins.  This also avoids chopping
1483      a loop into pieces, separated by builtin calls.  That is, we
1484      only want no or a single loop body remaining.  */
1485   partition_t into;
1486   if (!flag_tree_loop_distribution)
1487     {
1488       for (i = 0; partitions.iterate (i, &into); ++i)
1489 	if (!partition_builtin_p (into))
1490 	  break;
1491       for (++i; partitions.iterate (i, &partition); ++i)
1492 	if (!partition_builtin_p (partition))
1493 	  {
1494 	    if (dump_file && (dump_flags & TDF_DETAILS))
1495 	      {
1496 		fprintf (dump_file, "fusing non-builtin partitions\n");
1497 		dump_bitmap (dump_file, into->stmts);
1498 		dump_bitmap (dump_file, partition->stmts);
1499 	      }
1500 	    partition_merge_into (into, partition);
1501 	    partitions.unordered_remove (i);
1502 	    partition_free (partition);
1503 	    i--;
1504 	  }
1505     }
1506 
1507   /* Due to limitations in the transform phase we have to fuse all
1508      reduction partitions into the last partition so the existing
1509      loop will contain all loop-closed PHI nodes.  */
1510   for (i = 0; partitions.iterate (i, &into); ++i)
1511     if (partition_reduction_p (into))
1512       break;
1513   for (i = i + 1; partitions.iterate (i, &partition); ++i)
1514     if (partition_reduction_p (partition))
1515       {
1516 	if (dump_file && (dump_flags & TDF_DETAILS))
1517 	  {
1518 	    fprintf (dump_file, "fusing partitions\n");
1519 	    dump_bitmap (dump_file, into->stmts);
1520 	    dump_bitmap (dump_file, partition->stmts);
1521 	    fprintf (dump_file, "because they have reductions\n");
1522 	  }
1523 	partition_merge_into (into, partition);
1524 	partitions.unordered_remove (i);
1525 	partition_free (partition);
1526 	i--;
1527       }
1528 
1529   /* Apply our simple cost model - fuse partitions with similar
1530      memory accesses.  */
1531   for (i = 0; partitions.iterate (i, &into); ++i)
1532     {
1533       if (partition_builtin_p (into))
1534 	continue;
1535       for (int j = i + 1;
1536 	   partitions.iterate (j, &partition); ++j)
1537 	{
1538 	  if (!partition_builtin_p (partition)
1539 	      && similar_memory_accesses (rdg, into, partition))
1540 	    {
1541 	      if (dump_file && (dump_flags & TDF_DETAILS))
1542 		{
1543 		  fprintf (dump_file, "fusing partitions\n");
1544 		  dump_bitmap (dump_file, into->stmts);
1545 		  dump_bitmap (dump_file, partition->stmts);
1546 		  fprintf (dump_file, "because they have similar "
1547 			   "memory accesses\n");
1548 		}
1549 	      partition_merge_into (into, partition);
1550 	      partitions.unordered_remove (j);
1551 	      partition_free (partition);
1552 	      j--;
1553 	    }
1554 	}
1555     }
1556 
1557   /* Build the partition dependency graph.  */
1558   if (partitions.length () > 1)
1559     {
1560       pg = new_graph (partitions.length ());
1561       struct pgdata {
1562 	  partition_t partition;
1563 	  vec<data_reference_p> writes;
1564 	  vec<data_reference_p> reads;
1565       };
1566 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1567       for (i = 0; partitions.iterate (i, &partition); ++i)
1568 	{
1569 	  vertex *v = &pg->vertices[i];
1570 	  pgdata *data = new pgdata;
1571 	  data_reference_p dr;
1572 	  /* FIXME - leaks.  */
1573 	  v->data = data;
1574 	  bitmap_iterator bi;
1575 	  unsigned j;
1576 	  data->partition = partition;
1577 	  data->reads = vNULL;
1578 	  data->writes = vNULL;
1579 	  EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1580 	    for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1581 	      if (DR_IS_READ (dr))
1582 		data->reads.safe_push (dr);
1583 	      else
1584 		data->writes.safe_push (dr);
1585 	}
1586       partition_t partition1, partition2;
1587       for (i = 0; partitions.iterate (i, &partition1); ++i)
1588 	for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1589 	  {
1590 	    /* dependence direction - 0 is no dependence, -1 is back,
1591 	       1 is forth, 2 is both (we can stop then, merging will occur).  */
1592 	    int dir = 0;
1593 	    dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1594 					   PGDATA(i)->writes,
1595 					   PGDATA(j)->reads);
1596 	    if (dir != 2)
1597 	      dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1598 					     PGDATA(i)->reads,
1599 					     PGDATA(j)->writes);
1600 	    if (dir != 2)
1601 	      dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1602 					     PGDATA(i)->writes,
1603 					     PGDATA(j)->writes);
1604 	    if (dir == 1 || dir == 2)
1605 	      add_edge (pg, i, j);
1606 	    if (dir == -1 || dir == 2)
1607 	      add_edge (pg, j, i);
1608 	  }
1609 
1610       /* Add edges to the reduction partition (if any) to force it last.  */
1611       unsigned j;
1612       for (j = 0; partitions.iterate (j, &partition); ++j)
1613 	if (partition_reduction_p (partition))
1614 	  break;
1615       if (j < partitions.length ())
1616 	{
1617 	  for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1618 	    if (i != j)
1619 	      add_edge (pg, i, j);
1620 	}
1621 
1622       /* Compute partitions we cannot separate and fuse them.  */
1623       num_sccs = graphds_scc (pg, NULL);
1624       for (i = 0; i < num_sccs; ++i)
1625 	{
1626 	  partition_t first;
1627 	  int j;
1628 	  for (j = 0; partitions.iterate (j, &first); ++j)
1629 	    if (pg->vertices[j].component == i)
1630 	      break;
1631 	  for (j = j + 1; partitions.iterate (j, &partition); ++j)
1632 	    if (pg->vertices[j].component == i)
1633 	      {
1634 		if (dump_file && (dump_flags & TDF_DETAILS))
1635 		  {
1636 		    fprintf (dump_file, "fusing partitions\n");
1637 		    dump_bitmap (dump_file, first->stmts);
1638 		    dump_bitmap (dump_file, partition->stmts);
1639 		    fprintf (dump_file, "because they are in the same "
1640 			     "dependence SCC\n");
1641 		  }
1642 		partition_merge_into (first, partition);
1643 		partitions[j] = NULL;
1644 		partition_free (partition);
1645 		PGDATA (j)->partition = NULL;
1646 	      }
1647 	}
1648 
1649       /* Now order the remaining nodes in postorder.  */
1650       qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1651       partitions.truncate (0);
1652       for (i = 0; i < pg->n_vertices; ++i)
1653 	{
1654 	  pgdata *data = PGDATA (i);
1655 	  if (data->partition)
1656 	    partitions.safe_push (data->partition);
1657 	  data->reads.release ();
1658 	  data->writes.release ();
1659 	  delete data;
1660 	}
1661       gcc_assert (partitions.length () == (unsigned)num_sccs);
1662       free_graph (pg);
1663     }
1664 
1665   nbp = partitions.length ();
1666   if (nbp == 0
1667       || (nbp == 1 && !partition_builtin_p (partitions[0]))
1668       || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1669     {
1670       nbp = 0;
1671       goto ldist_done;
1672     }
1673 
1674   if (dump_file && (dump_flags & TDF_DETAILS))
1675     dump_rdg_partitions (dump_file, partitions);
1676 
1677   FOR_EACH_VEC_ELT (partitions, i, partition)
1678     {
1679       if (partition_builtin_p (partition))
1680 	(*nb_calls)++;
1681       generate_code_for_partition (loop, partition, i < nbp - 1);
1682     }
1683 
1684  ldist_done:
1685 
1686   FOR_EACH_VEC_ELT (partitions, i, partition)
1687     partition_free (partition);
1688 
1689   free_rdg (rdg);
1690   return nbp - *nb_calls;
1691 }
1692 
1693 /* Distribute all loops in the current function.  */
1694 
1695 namespace {
1696 
1697 const pass_data pass_data_loop_distribution =
1698 {
1699   GIMPLE_PASS, /* type */
1700   "ldist", /* name */
1701   OPTGROUP_LOOP, /* optinfo_flags */
1702   TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1703   ( PROP_cfg | PROP_ssa ), /* properties_required */
1704   0, /* properties_provided */
1705   0, /* properties_destroyed */
1706   0, /* todo_flags_start */
1707   0, /* todo_flags_finish */
1708 };
1709 
1710 class pass_loop_distribution : public gimple_opt_pass
1711 {
1712 public:
1713   pass_loop_distribution (gcc::context *ctxt)
1714     : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1715   {}
1716 
1717   /* opt_pass methods: */
1718   virtual bool gate (function *)
1719     {
1720       return flag_tree_loop_distribution
1721 	|| flag_tree_loop_distribute_patterns;
1722     }
1723 
1724   virtual unsigned int execute (function *);
1725 
1726 }; // class pass_loop_distribution
1727 
1728 unsigned int
1729 pass_loop_distribution::execute (function *fun)
1730 {
1731   struct loop *loop;
1732   bool changed = false;
1733   basic_block bb;
1734   control_dependences *cd = NULL;
1735 
1736   FOR_ALL_BB_FN (bb, fun)
1737     {
1738       gimple_stmt_iterator gsi;
1739       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1740 	gimple_set_uid (gsi_stmt (gsi), -1);
1741       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1742 	gimple_set_uid (gsi_stmt (gsi), -1);
1743     }
1744 
1745   /* We can at the moment only distribute non-nested loops, thus restrict
1746      walking to innermost loops.  */
1747   FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
1748     {
1749       auto_vec<gimple> work_list;
1750       basic_block *bbs;
1751       int num = loop->num;
1752       unsigned int i;
1753 
1754       /* If the loop doesn't have a single exit we will fail anyway,
1755 	 so do that early.  */
1756       if (!single_exit (loop))
1757 	continue;
1758 
1759       /* Only optimize hot loops.  */
1760       if (!optimize_loop_for_speed_p (loop))
1761 	continue;
1762 
1763       /* Initialize the worklist with stmts we seed the partitions with.  */
1764       bbs = get_loop_body_in_dom_order (loop);
1765       for (i = 0; i < loop->num_nodes; ++i)
1766 	{
1767 	  for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
1768 	       !gsi_end_p (gsi);
1769 	       gsi_next (&gsi))
1770 	    {
1771 	      gphi *phi = gsi.phi ();
1772 	      if (virtual_operand_p (gimple_phi_result (phi)))
1773 		continue;
1774 	      /* Distribute stmts which have defs that are used outside of
1775 		 the loop.  */
1776 	      if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1777 		continue;
1778 	      work_list.safe_push (phi);
1779 	    }
1780 	  for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1781 	       !gsi_end_p (gsi);
1782 	       gsi_next (&gsi))
1783 	    {
1784 	      gimple stmt = gsi_stmt (gsi);
1785 
1786 	      /* If there is a stmt with side-effects bail out - we
1787 		 cannot and should not distribute this loop.  */
1788 	      if (gimple_has_side_effects (stmt))
1789 		{
1790 		  work_list.truncate (0);
1791 		  goto out;
1792 		}
1793 
1794 	      /* Distribute stmts which have defs that are used outside of
1795 		 the loop.  */
1796 	      if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1797 		;
1798 	      /* Otherwise only distribute stores for now.  */
1799 	      else if (!gimple_vdef (stmt))
1800 		continue;
1801 
1802 	      work_list.safe_push (stmt);
1803 	    }
1804 	}
1805 out:
1806       free (bbs);
1807 
1808       int nb_generated_loops = 0;
1809       int nb_generated_calls = 0;
1810       location_t loc = find_loop_location (loop);
1811       if (work_list.length () > 0)
1812 	{
1813 	  if (!cd)
1814 	    {
1815 	      calculate_dominance_info (CDI_DOMINATORS);
1816 	      calculate_dominance_info (CDI_POST_DOMINATORS);
1817 	      cd = new control_dependences (create_edge_list ());
1818 	      free_dominance_info (CDI_POST_DOMINATORS);
1819 	    }
1820 	  nb_generated_loops = distribute_loop (loop, work_list, cd,
1821 						&nb_generated_calls);
1822 	}
1823 
1824       if (nb_generated_loops + nb_generated_calls > 0)
1825 	{
1826 	  changed = true;
1827 	  dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1828 			   loc, "Loop %d distributed: split to %d loops "
1829 			   "and %d library calls.\n",
1830 			   num, nb_generated_loops, nb_generated_calls);
1831 	}
1832       else if (dump_file && (dump_flags & TDF_DETAILS))
1833 	fprintf (dump_file, "Loop %d is the same.\n", num);
1834     }
1835 
1836   if (cd)
1837     delete cd;
1838 
1839   if (changed)
1840     {
1841       /* Cached scalar evolutions now may refer to wrong or non-existing
1842 	 loops.  */
1843       scev_reset_htab ();
1844       mark_virtual_operands_for_renaming (fun);
1845       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1846     }
1847 
1848 #ifdef ENABLE_CHECKING
1849   verify_loop_structure ();
1850 #endif
1851 
1852   return 0;
1853 }
1854 
1855 } // anon namespace
1856 
1857 gimple_opt_pass *
1858 make_pass_loop_distribution (gcc::context *ctxt)
1859 {
1860   return new pass_loop_distribution (ctxt);
1861 }
1862