xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-loop-distribution.c (revision c9496f6b604074a9451a67df576a5b423068e71e)
1 /* Loop distribution.
2    Copyright (C) 2006-2015 Free Software Foundation, Inc.
3    Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4    and Sebastian Pop <sebastian.pop@amd.com>.
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 /* This pass performs loop distribution: for example, the loop
23 
24    |DO I = 2, N
25    |    A(I) = B(I) + C
26    |    D(I) = A(I-1)*E
27    |ENDDO
28 
29    is transformed to
30 
31    |DOALL I = 2, N
32    |   A(I) = B(I) + C
33    |ENDDO
34    |
35    |DOALL I = 2, N
36    |   D(I) = A(I-1)*E
37    |ENDDO
38 
39    This pass uses an RDG, Reduced Dependence Graph built on top of the
40    data dependence relations.  The RDG is then topologically sorted to
41    obtain a map of information producers/consumers based on which it
42    generates the new loops.  */
43 
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "hash-set.h"
48 #include "machmode.h"
49 #include "vec.h"
50 #include "double-int.h"
51 #include "input.h"
52 #include "alias.h"
53 #include "symtab.h"
54 #include "options.h"
55 #include "wide-int.h"
56 #include "inchash.h"
57 #include "tree.h"
58 #include "fold-const.h"
59 #include "predict.h"
60 #include "tm.h"
61 #include "hard-reg-set.h"
62 #include "input.h"
63 #include "function.h"
64 #include "dominance.h"
65 #include "cfg.h"
66 #include "cfganal.h"
67 #include "basic-block.h"
68 #include "tree-ssa-alias.h"
69 #include "internal-fn.h"
70 #include "gimple-expr.h"
71 #include "is-a.h"
72 #include "gimple.h"
73 #include "gimple-iterator.h"
74 #include "gimplify-me.h"
75 #include "stor-layout.h"
76 #include "gimple-ssa.h"
77 #include "tree-cfg.h"
78 #include "tree-phinodes.h"
79 #include "ssa-iterators.h"
80 #include "stringpool.h"
81 #include "tree-ssanames.h"
82 #include "tree-ssa-loop-manip.h"
83 #include "tree-ssa-loop.h"
84 #include "tree-into-ssa.h"
85 #include "tree-ssa.h"
86 #include "cfgloop.h"
87 #include "tree-chrec.h"
88 #include "tree-data-ref.h"
89 #include "tree-scalar-evolution.h"
90 #include "tree-pass.h"
91 #include "gimple-pretty-print.h"
92 #include "tree-vectorizer.h"
93 #include "real.h"
94 
95 
96 /* A Reduced Dependence Graph (RDG) vertex representing a statement.  */
97 typedef struct rdg_vertex
98 {
99   /* The statement represented by this vertex.  */
100   gimple stmt;
101 
102   /* Vector of data-references in this statement.  */
103   vec<data_reference_p> datarefs;
104 
105   /* True when the statement contains a write to memory.  */
106   bool has_mem_write;
107 
108   /* True when the statement contains a read from memory.  */
109   bool has_mem_reads;
110 } *rdg_vertex_p;
111 
112 #define RDGV_STMT(V)     ((struct rdg_vertex *) ((V)->data))->stmt
113 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
114 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
115 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
116 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
117 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
118 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
119 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
120 
121 /* Data dependence type.  */
122 
123 enum rdg_dep_type
124 {
125   /* Read After Write (RAW).  */
126   flow_dd = 'f',
127 
128   /* Control dependence (execute conditional on).  */
129   control_dd = 'c'
130 };
131 
132 /* Dependence information attached to an edge of the RDG.  */
133 
134 typedef struct rdg_edge
135 {
136   /* Type of the dependence.  */
137   enum rdg_dep_type type;
138 } *rdg_edge_p;
139 
140 #define RDGE_TYPE(E)        ((struct rdg_edge *) ((E)->data))->type
141 
142 /* Dump vertex I in RDG to FILE.  */
143 
144 static void
145 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
146 {
147   struct vertex *v = &(rdg->vertices[i]);
148   struct graph_edge *e;
149 
150   fprintf (file, "(vertex %d: (%s%s) (in:", i,
151 	   RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
152 	   RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
153 
154   if (v->pred)
155     for (e = v->pred; e; e = e->pred_next)
156       fprintf (file, " %d", e->src);
157 
158   fprintf (file, ") (out:");
159 
160   if (v->succ)
161     for (e = v->succ; e; e = e->succ_next)
162       fprintf (file, " %d", e->dest);
163 
164   fprintf (file, ")\n");
165   print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
166   fprintf (file, ")\n");
167 }
168 
169 /* Call dump_rdg_vertex on stderr.  */
170 
171 DEBUG_FUNCTION void
172 debug_rdg_vertex (struct graph *rdg, int i)
173 {
174   dump_rdg_vertex (stderr, rdg, i);
175 }
176 
177 /* Dump the reduced dependence graph RDG to FILE.  */
178 
179 static void
180 dump_rdg (FILE *file, struct graph *rdg)
181 {
182   fprintf (file, "(rdg\n");
183   for (int i = 0; i < rdg->n_vertices; i++)
184     dump_rdg_vertex (file, rdg, i);
185   fprintf (file, ")\n");
186 }
187 
188 /* Call dump_rdg on stderr.  */
189 
190 DEBUG_FUNCTION void
191 debug_rdg (struct graph *rdg)
192 {
193   dump_rdg (stderr, rdg);
194 }
195 
196 static void
197 dot_rdg_1 (FILE *file, struct graph *rdg)
198 {
199   int i;
200   pretty_printer buffer;
201   pp_needs_newline (&buffer) = false;
202   buffer.buffer->stream = file;
203 
204   fprintf (file, "digraph RDG {\n");
205 
206   for (i = 0; i < rdg->n_vertices; i++)
207     {
208       struct vertex *v = &(rdg->vertices[i]);
209       struct graph_edge *e;
210 
211       fprintf (file, "%d [label=\"[%d] ", i, i);
212       pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
213       pp_flush (&buffer);
214       fprintf (file, "\"]\n");
215 
216       /* Highlight reads from memory.  */
217       if (RDG_MEM_READS_STMT (rdg, i))
218        fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
219 
220       /* Highlight stores to memory.  */
221       if (RDG_MEM_WRITE_STMT (rdg, i))
222        fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
223 
224       if (v->succ)
225        for (e = v->succ; e; e = e->succ_next)
226          switch (RDGE_TYPE (e))
227            {
228            case flow_dd:
229              /* These are the most common dependences: don't print these. */
230              fprintf (file, "%d -> %d \n", i, e->dest);
231              break;
232 
233 	   case control_dd:
234              fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
235              break;
236 
237            default:
238              gcc_unreachable ();
239            }
240     }
241 
242   fprintf (file, "}\n\n");
243 }
244 
245 /* Display the Reduced Dependence Graph using dotty.  */
246 
247 DEBUG_FUNCTION void
248 dot_rdg (struct graph *rdg)
249 {
250   /* When debugging, you may want to enable the following code.  */
251 #ifdef HAVE_POPEN
252   FILE *file = popen ("dot -Tx11", "w");
253   if (!file)
254     return;
255   dot_rdg_1 (file, rdg);
256   fflush (file);
257   close (fileno (file));
258   pclose (file);
259 #else
260   dot_rdg_1 (stderr, rdg);
261 #endif
262 }
263 
264 /* Returns the index of STMT in RDG.  */
265 
266 static int
267 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
268 {
269   int index = gimple_uid (stmt);
270   gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
271   return index;
272 }
273 
274 /* Creates dependence edges in RDG for all the uses of DEF.  IDEF is
275    the index of DEF in RDG.  */
276 
277 static void
278 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
279 {
280   use_operand_p imm_use_p;
281   imm_use_iterator iterator;
282 
283   FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
284     {
285       struct graph_edge *e;
286       int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
287 
288       if (use < 0)
289 	continue;
290 
291       e = add_edge (rdg, idef, use);
292       e->data = XNEW (struct rdg_edge);
293       RDGE_TYPE (e) = flow_dd;
294     }
295 }
296 
297 /* Creates an edge for the control dependences of BB to the vertex V.  */
298 
299 static void
300 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
301 				    int v, control_dependences *cd)
302 {
303   bitmap_iterator bi;
304   unsigned edge_n;
305   EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
306 			    0, edge_n, bi)
307     {
308       basic_block cond_bb = cd->get_edge (edge_n)->src;
309       gimple stmt = last_stmt (cond_bb);
310       if (stmt && is_ctrl_stmt (stmt))
311 	{
312 	  struct graph_edge *e;
313 	  int c = rdg_vertex_for_stmt (rdg, stmt);
314 	  if (c < 0)
315 	    continue;
316 
317 	  e = add_edge (rdg, c, v);
318 	  e->data = XNEW (struct rdg_edge);
319 	  RDGE_TYPE (e) = control_dd;
320 	}
321     }
322 }
323 
324 /* Creates the edges of the reduced dependence graph RDG.  */
325 
326 static void
327 create_rdg_flow_edges (struct graph *rdg)
328 {
329   int i;
330   def_operand_p def_p;
331   ssa_op_iter iter;
332 
333   for (i = 0; i < rdg->n_vertices; i++)
334     FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
335 			      iter, SSA_OP_DEF)
336       create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
337 }
338 
339 /* Creates the edges of the reduced dependence graph RDG.  */
340 
341 static void
342 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
343 {
344   int i;
345 
346   for (i = 0; i < rdg->n_vertices; i++)
347     {
348       gimple stmt = RDG_STMT (rdg, i);
349       if (gimple_code (stmt) == GIMPLE_PHI)
350 	{
351 	  edge_iterator ei;
352 	  edge e;
353 	  FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
354 	      create_edge_for_control_dependence (rdg, e->src, i, cd);
355 	}
356       else
357 	create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
358     }
359 }
360 
361 /* Build the vertices of the reduced dependence graph RDG.  Return false
362    if that failed.  */
363 
364 static bool
365 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
366 		     vec<data_reference_p> *datarefs)
367 {
368   int i;
369   gimple stmt;
370 
371   FOR_EACH_VEC_ELT (stmts, i, stmt)
372     {
373       struct vertex *v = &(rdg->vertices[i]);
374 
375       /* Record statement to vertex mapping.  */
376       gimple_set_uid (stmt, i);
377 
378       v->data = XNEW (struct rdg_vertex);
379       RDGV_STMT (v) = stmt;
380       RDGV_DATAREFS (v).create (0);
381       RDGV_HAS_MEM_WRITE (v) = false;
382       RDGV_HAS_MEM_READS (v) = false;
383       if (gimple_code (stmt) == GIMPLE_PHI)
384 	continue;
385 
386       unsigned drp = datarefs->length ();
387       if (!find_data_references_in_stmt (loop, stmt, datarefs))
388 	return false;
389       for (unsigned j = drp; j < datarefs->length (); ++j)
390 	{
391 	  data_reference_p dr = (*datarefs)[j];
392 	  if (DR_IS_READ (dr))
393 	    RDGV_HAS_MEM_READS (v) = true;
394 	  else
395 	    RDGV_HAS_MEM_WRITE (v) = true;
396 	  RDGV_DATAREFS (v).safe_push (dr);
397 	}
398     }
399   return true;
400 }
401 
402 /* Initialize STMTS with all the statements of LOOP.  The order in
403    which we discover statements is important as
404    generate_loops_for_partition is using the same traversal for
405    identifying statements in loop copies.  */
406 
407 static void
408 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
409 {
410   unsigned int i;
411   basic_block *bbs = get_loop_body_in_dom_order (loop);
412 
413   for (i = 0; i < loop->num_nodes; i++)
414     {
415       basic_block bb = bbs[i];
416 
417       for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
418 	   gsi_next (&bsi))
419 	if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
420 	  stmts->safe_push (bsi.phi ());
421 
422       for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
423 	   gsi_next (&bsi))
424 	{
425 	  gimple stmt = gsi_stmt (bsi);
426 	  if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
427 	    stmts->safe_push (stmt);
428 	}
429     }
430 
431   free (bbs);
432 }
433 
434 /* Free the reduced dependence graph RDG.  */
435 
436 static void
437 free_rdg (struct graph *rdg)
438 {
439   int i;
440 
441   for (i = 0; i < rdg->n_vertices; i++)
442     {
443       struct vertex *v = &(rdg->vertices[i]);
444       struct graph_edge *e;
445 
446       for (e = v->succ; e; e = e->succ_next)
447 	free (e->data);
448 
449       if (v->data)
450 	{
451 	  gimple_set_uid (RDGV_STMT (v), -1);
452 	  free_data_refs (RDGV_DATAREFS (v));
453 	  free (v->data);
454 	}
455     }
456 
457   free_graph (rdg);
458 }
459 
460 /* Build the Reduced Dependence Graph (RDG) with one vertex per
461    statement of the loop nest LOOP_NEST, and one edge per data dependence or
462    scalar dependence.  */
463 
464 static struct graph *
465 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
466 {
467   struct graph *rdg;
468   vec<data_reference_p> datarefs;
469 
470   /* Create the RDG vertices from the stmts of the loop nest.  */
471   auto_vec<gimple, 10> stmts;
472   stmts_from_loop (loop_nest[0], &stmts);
473   rdg = new_graph (stmts.length ());
474   datarefs.create (10);
475   if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
476     {
477       datarefs.release ();
478       free_rdg (rdg);
479       return NULL;
480     }
481   stmts.release ();
482 
483   create_rdg_flow_edges (rdg);
484   if (cd)
485     create_rdg_cd_edges (rdg, cd);
486 
487   datarefs.release ();
488 
489   return rdg;
490 }
491 
492 
493 
494 enum partition_kind {
495     PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
496 };
497 
498 typedef struct partition_s
499 {
500   bitmap stmts;
501   bitmap loops;
502   bool reduction_p;
503   enum partition_kind kind;
504   /* data-references a kind != PKIND_NORMAL partition is about.  */
505   data_reference_p main_dr;
506   data_reference_p secondary_dr;
507   tree niter;
508   bool plus_one;
509 } *partition_t;
510 
511 
512 /* Allocate and initialize a partition from BITMAP.  */
513 
514 static partition_t
515 partition_alloc (bitmap stmts, bitmap loops)
516 {
517   partition_t partition = XCNEW (struct partition_s);
518   partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
519   partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
520   partition->reduction_p = false;
521   partition->kind = PKIND_NORMAL;
522   return partition;
523 }
524 
525 /* Free PARTITION.  */
526 
527 static void
528 partition_free (partition_t partition)
529 {
530   BITMAP_FREE (partition->stmts);
531   BITMAP_FREE (partition->loops);
532   free (partition);
533 }
534 
535 /* Returns true if the partition can be generated as a builtin.  */
536 
537 static bool
538 partition_builtin_p (partition_t partition)
539 {
540   return partition->kind != PKIND_NORMAL;
541 }
542 
543 /* Returns true if the partition contains a reduction.  */
544 
545 static bool
546 partition_reduction_p (partition_t partition)
547 {
548   return partition->reduction_p;
549 }
550 
551 /* Merge PARTITION into the partition DEST.  */
552 
553 static void
554 partition_merge_into (partition_t dest, partition_t partition)
555 {
556   dest->kind = PKIND_NORMAL;
557   bitmap_ior_into (dest->stmts, partition->stmts);
558   if (partition_reduction_p (partition))
559     dest->reduction_p = true;
560 }
561 
562 
563 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
564    the LOOP.  */
565 
566 static bool
567 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
568 {
569   imm_use_iterator imm_iter;
570   use_operand_p use_p;
571 
572   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
573     {
574       gimple use_stmt = USE_STMT (use_p);
575       if (!is_gimple_debug (use_stmt)
576 	  && loop != loop_containing_stmt (use_stmt))
577 	return true;
578     }
579 
580   return false;
581 }
582 
583 /* Returns true when STMT defines a scalar variable used after the
584    loop LOOP.  */
585 
586 static bool
587 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
588 {
589   def_operand_p def_p;
590   ssa_op_iter op_iter;
591 
592   if (gimple_code (stmt) == GIMPLE_PHI)
593     return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
594 
595   FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
596     if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
597       return true;
598 
599   return false;
600 }
601 
602 /* Return a copy of LOOP placed before LOOP.  */
603 
604 static struct loop *
605 copy_loop_before (struct loop *loop)
606 {
607   struct loop *res;
608   edge preheader = loop_preheader_edge (loop);
609 
610   initialize_original_copy_tables ();
611   res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
612   gcc_assert (res != NULL);
613   free_original_copy_tables ();
614   delete_update_ssa ();
615 
616   return res;
617 }
618 
619 /* Creates an empty basic block after LOOP.  */
620 
621 static void
622 create_bb_after_loop (struct loop *loop)
623 {
624   edge exit = single_exit (loop);
625 
626   if (!exit)
627     return;
628 
629   split_edge (exit);
630 }
631 
632 /* Generate code for PARTITION from the code in LOOP.  The loop is
633    copied when COPY_P is true.  All the statements not flagged in the
634    PARTITION bitmap are removed from the loop or from its copy.  The
635    statements are indexed in sequence inside a basic block, and the
636    basic blocks of a loop are taken in dom order.  */
637 
638 static void
639 generate_loops_for_partition (struct loop *loop, partition_t partition,
640 			      bool copy_p)
641 {
642   unsigned i;
643   basic_block *bbs;
644 
645   if (copy_p)
646     {
647       loop = copy_loop_before (loop);
648       gcc_assert (loop != NULL);
649       create_preheader (loop, CP_SIMPLE_PREHEADERS);
650       create_bb_after_loop (loop);
651     }
652 
653   /* Remove stmts not in the PARTITION bitmap.  */
654   bbs = get_loop_body_in_dom_order (loop);
655 
656   if (MAY_HAVE_DEBUG_STMTS)
657     for (i = 0; i < loop->num_nodes; i++)
658       {
659 	basic_block bb = bbs[i];
660 
661 	for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
662 	     gsi_next (&bsi))
663 	  {
664 	    gphi *phi = bsi.phi ();
665 	    if (!virtual_operand_p (gimple_phi_result (phi))
666 		&& !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
667 	      reset_debug_uses (phi);
668 	  }
669 
670 	for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
671 	  {
672 	    gimple stmt = gsi_stmt (bsi);
673 	    if (gimple_code (stmt) != GIMPLE_LABEL
674 		&& !is_gimple_debug (stmt)
675 		&& !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
676 	      reset_debug_uses (stmt);
677 	  }
678       }
679 
680   for (i = 0; i < loop->num_nodes; i++)
681     {
682       basic_block bb = bbs[i];
683 
684       for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
685 	{
686 	  gphi *phi = bsi.phi ();
687 	  if (!virtual_operand_p (gimple_phi_result (phi))
688 	      && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
689 	    remove_phi_node (&bsi, true);
690 	  else
691 	    gsi_next (&bsi);
692 	}
693 
694       for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
695 	{
696 	  gimple stmt = gsi_stmt (bsi);
697 	  if (gimple_code (stmt) != GIMPLE_LABEL
698 	      && !is_gimple_debug (stmt)
699 	      && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
700 	    {
701 	      /* Choose an arbitrary path through the empty CFG part
702 		 that this unnecessary control stmt controls.  */
703 	      if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
704 		{
705 		  gimple_cond_make_false (cond_stmt);
706 		  update_stmt (stmt);
707 		}
708 	      else if (gimple_code (stmt) == GIMPLE_SWITCH)
709 		{
710 		  gswitch *switch_stmt = as_a <gswitch *> (stmt);
711 		  gimple_switch_set_index
712 		      (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1)));
713 		  update_stmt (stmt);
714 		}
715 	      else
716 		{
717 		  unlink_stmt_vdef (stmt);
718 		  gsi_remove (&bsi, true);
719 		  release_defs (stmt);
720 		  continue;
721 		}
722 	    }
723 	  gsi_next (&bsi);
724 	}
725     }
726 
727   free (bbs);
728 }
729 
730 /* Build the size argument for a memory operation call.  */
731 
732 static tree
733 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
734 		    bool plus_one)
735 {
736   tree size = fold_convert_loc (loc, sizetype, nb_iter);
737   if (plus_one)
738     size = size_binop (PLUS_EXPR, size, size_one_node);
739   size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
740 			  TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
741   size = fold_convert_loc (loc, size_type_node, size);
742   return size;
743 }
744 
745 /* Build an address argument for a memory operation call.  */
746 
747 static tree
748 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
749 {
750   tree addr_base;
751 
752   addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
753   addr_base = fold_convert_loc (loc, sizetype, addr_base);
754 
755   /* Test for a negative stride, iterating over every element.  */
756   if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
757     {
758       addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
759 				  fold_convert_loc (loc, sizetype, nb_bytes));
760       addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
761 				  TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
762     }
763 
764   return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
765 }
766 
767 /* If VAL memory representation contains the same value in all bytes,
768    return that value, otherwise return -1.
769    E.g. for 0x24242424 return 0x24, for IEEE double
770    747708026454360457216.0 return 0x44, etc.  */
771 
772 static int
773 const_with_all_bytes_same (tree val)
774 {
775   unsigned char buf[64];
776   int i, len;
777 
778   if (integer_zerop (val)
779       || (TREE_CODE (val) == CONSTRUCTOR
780           && !TREE_CLOBBER_P (val)
781           && CONSTRUCTOR_NELTS (val) == 0))
782     return 0;
783 
784   if (real_zerop (val))
785     {
786       /* Only return 0 for +0.0, not for -0.0, which doesn't have
787 	 an all bytes same memory representation.  Don't transform
788 	 -0.0 stores into +0.0 even for !HONOR_SIGNED_ZEROS.  */
789       switch (TREE_CODE (val))
790 	{
791 	case REAL_CST:
792 	  if (!real_isneg (TREE_REAL_CST_PTR (val)))
793 	    return 0;
794 	  break;
795 	case COMPLEX_CST:
796 	  if (!const_with_all_bytes_same (TREE_REALPART (val))
797 	      && !const_with_all_bytes_same (TREE_IMAGPART (val)))
798 	    return 0;
799 	  break;
800 	case VECTOR_CST:
801 	  unsigned int j;
802 	  for (j = 0; j < VECTOR_CST_NELTS (val); ++j)
803 	    if (const_with_all_bytes_same (VECTOR_CST_ELT (val, j)))
804 	      break;
805 	  if (j == VECTOR_CST_NELTS (val))
806 	    return 0;
807 	  break;
808 	default:
809 	  break;
810 	}
811     }
812 
813   if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
814     return -1;
815 
816   len = native_encode_expr (val, buf, sizeof (buf));
817   if (len == 0)
818     return -1;
819   for (i = 1; i < len; i++)
820     if (buf[i] != buf[0])
821       return -1;
822   return buf[0];
823 }
824 
825 /* Generate a call to memset for PARTITION in LOOP.  */
826 
827 static void
828 generate_memset_builtin (struct loop *loop, partition_t partition)
829 {
830   gimple_stmt_iterator gsi;
831   gimple stmt, fn_call;
832   tree mem, fn, nb_bytes;
833   location_t loc;
834   tree val;
835 
836   stmt = DR_STMT (partition->main_dr);
837   loc = gimple_location (stmt);
838 
839   /* The new statements will be placed before LOOP.  */
840   gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
841 
842   nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
843 				 partition->plus_one);
844   nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
845 				       false, GSI_CONTINUE_LINKING);
846   mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
847   mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
848 				  false, GSI_CONTINUE_LINKING);
849 
850   /* This exactly matches the pattern recognition in classify_partition.  */
851   val = gimple_assign_rhs1 (stmt);
852   /* Handle constants like 0x15151515 and similarly
853      floating point constants etc. where all bytes are the same.  */
854   int bytev = const_with_all_bytes_same (val);
855   if (bytev != -1)
856     val = build_int_cst (integer_type_node, bytev);
857   else if (TREE_CODE (val) == INTEGER_CST)
858     val = fold_convert (integer_type_node, val);
859   else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
860     {
861       tree tem = make_ssa_name (integer_type_node);
862       gimple cstmt = gimple_build_assign (tem, NOP_EXPR, val);
863       gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
864       val = tem;
865     }
866 
867   fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
868   fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
869   gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
870 
871   if (dump_file && (dump_flags & TDF_DETAILS))
872     {
873       fprintf (dump_file, "generated memset");
874       if (bytev == 0)
875 	fprintf (dump_file, " zero\n");
876       else
877 	fprintf (dump_file, "\n");
878     }
879 }
880 
881 /* Generate a call to memcpy for PARTITION in LOOP.  */
882 
883 static void
884 generate_memcpy_builtin (struct loop *loop, partition_t partition)
885 {
886   gimple_stmt_iterator gsi;
887   gimple stmt, fn_call;
888   tree dest, src, fn, nb_bytes;
889   location_t loc;
890   enum built_in_function kind;
891 
892   stmt = DR_STMT (partition->main_dr);
893   loc = gimple_location (stmt);
894 
895   /* The new statements will be placed before LOOP.  */
896   gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
897 
898   nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
899 				 partition->plus_one);
900   nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
901 				       false, GSI_CONTINUE_LINKING);
902   dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
903   src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
904   if (ptr_derefs_may_alias_p (dest, src))
905     kind = BUILT_IN_MEMMOVE;
906   else
907     kind = BUILT_IN_MEMCPY;
908 
909   dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
910 				   false, GSI_CONTINUE_LINKING);
911   src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
912 				  false, GSI_CONTINUE_LINKING);
913   fn = build_fold_addr_expr (builtin_decl_implicit (kind));
914   fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
915   gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
916 
917   if (dump_file && (dump_flags & TDF_DETAILS))
918     {
919       if (kind == BUILT_IN_MEMCPY)
920 	fprintf (dump_file, "generated memcpy\n");
921       else
922 	fprintf (dump_file, "generated memmove\n");
923     }
924 }
925 
926 /* Remove and destroy the loop LOOP.  */
927 
928 static void
929 destroy_loop (struct loop *loop)
930 {
931   unsigned nbbs = loop->num_nodes;
932   edge exit = single_exit (loop);
933   basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
934   basic_block *bbs;
935   unsigned i;
936 
937   bbs = get_loop_body_in_dom_order (loop);
938 
939   redirect_edge_pred (exit, src);
940   exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
941   exit->flags |= EDGE_FALLTHRU;
942   cancel_loop_tree (loop);
943   rescan_loop_exit (exit, false, true);
944 
945   for (i = 0; i < nbbs; i++)
946     {
947       /* We have made sure to not leave any dangling uses of SSA
948          names defined in the loop.  With the exception of virtuals.
949 	 Make sure we replace all uses of virtual defs that will remain
950 	 outside of the loop with the bare symbol as delete_basic_block
951 	 will release them.  */
952       for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
953 	   gsi_next (&gsi))
954 	{
955 	  gphi *phi = gsi.phi ();
956 	  if (virtual_operand_p (gimple_phi_result (phi)))
957 	    mark_virtual_phi_result_for_renaming (phi);
958 	}
959       for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
960 	   gsi_next (&gsi))
961 	{
962 	  gimple stmt = gsi_stmt (gsi);
963 	  tree vdef = gimple_vdef (stmt);
964 	  if (vdef && TREE_CODE (vdef) == SSA_NAME)
965 	    mark_virtual_operand_for_renaming (vdef);
966 	}
967       delete_basic_block (bbs[i]);
968     }
969   free (bbs);
970 
971   set_immediate_dominator (CDI_DOMINATORS, dest,
972 			   recompute_dominator (CDI_DOMINATORS, dest));
973 }
974 
975 /* Generates code for PARTITION.  */
976 
977 static void
978 generate_code_for_partition (struct loop *loop,
979 			     partition_t partition, bool copy_p)
980 {
981   switch (partition->kind)
982     {
983     case PKIND_NORMAL:
984       /* Reductions all have to be in the last partition.  */
985       gcc_assert (!partition_reduction_p (partition)
986 		  || !copy_p);
987       generate_loops_for_partition (loop, partition, copy_p);
988       return;
989 
990     case PKIND_MEMSET:
991       generate_memset_builtin (loop, partition);
992       break;
993 
994     case PKIND_MEMCPY:
995       generate_memcpy_builtin (loop, partition);
996       break;
997 
998     default:
999       gcc_unreachable ();
1000     }
1001 
1002   /* Common tail for partitions we turn into a call.  If this was the last
1003      partition for which we generate code, we have to destroy the loop.  */
1004   if (!copy_p)
1005     destroy_loop (loop);
1006 }
1007 
1008 
1009 /* Returns a partition with all the statements needed for computing
1010    the vertex V of the RDG, also including the loop exit conditions.  */
1011 
1012 static partition_t
1013 build_rdg_partition_for_vertex (struct graph *rdg, int v)
1014 {
1015   partition_t partition = partition_alloc (NULL, NULL);
1016   auto_vec<int, 3> nodes;
1017   unsigned i;
1018   int x;
1019 
1020   graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
1021 
1022   FOR_EACH_VEC_ELT (nodes, i, x)
1023     {
1024       bitmap_set_bit (partition->stmts, x);
1025       bitmap_set_bit (partition->loops,
1026 		      loop_containing_stmt (RDG_STMT (rdg, x))->num);
1027     }
1028 
1029   return partition;
1030 }
1031 
1032 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
1033    For the moment we detect only the memset zero pattern.  */
1034 
1035 static void
1036 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
1037 {
1038   bitmap_iterator bi;
1039   unsigned i;
1040   tree nb_iter;
1041   data_reference_p single_load, single_store;
1042   bool volatiles_p = false;
1043   bool plus_one = false;
1044 
1045   partition->kind = PKIND_NORMAL;
1046   partition->main_dr = NULL;
1047   partition->secondary_dr = NULL;
1048   partition->niter = NULL_TREE;
1049   partition->plus_one = false;
1050 
1051   EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1052     {
1053       gimple stmt = RDG_STMT (rdg, i);
1054 
1055       if (gimple_has_volatile_ops (stmt))
1056 	volatiles_p = true;
1057 
1058       /* If the stmt has uses outside of the loop mark it as reduction.  */
1059       if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1060 	{
1061 	  partition->reduction_p = true;
1062 	  return;
1063 	}
1064     }
1065 
1066   /* Perform general partition disqualification for builtins.  */
1067   if (volatiles_p
1068       || !flag_tree_loop_distribute_patterns)
1069     return;
1070 
1071   /* Detect memset and memcpy.  */
1072   single_load = NULL;
1073   single_store = NULL;
1074   EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1075     {
1076       gimple stmt = RDG_STMT (rdg, i);
1077       data_reference_p dr;
1078       unsigned j;
1079 
1080       if (gimple_code (stmt) == GIMPLE_PHI)
1081 	continue;
1082 
1083       /* Any scalar stmts are ok.  */
1084       if (!gimple_vuse (stmt))
1085 	continue;
1086 
1087       /* Otherwise just regular loads/stores.  */
1088       if (!gimple_assign_single_p (stmt))
1089 	return;
1090 
1091       /* But exactly one store and/or load.  */
1092       for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1093 	{
1094 	  if (DR_IS_READ (dr))
1095 	    {
1096 	      if (single_load != NULL)
1097 		return;
1098 	      single_load = dr;
1099 	    }
1100 	  else
1101 	    {
1102 	      if (single_store != NULL)
1103 		return;
1104 	      single_store = dr;
1105 	    }
1106 	}
1107     }
1108 
1109   if (!single_store)
1110     return;
1111 
1112   nb_iter = number_of_latch_executions (loop);
1113   if (!nb_iter || nb_iter == chrec_dont_know)
1114     return;
1115   if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1116 		      gimple_bb (DR_STMT (single_store))))
1117     plus_one = true;
1118 
1119   if (single_store && !single_load)
1120     {
1121       gimple stmt = DR_STMT (single_store);
1122       tree rhs = gimple_assign_rhs1 (stmt);
1123       if (const_with_all_bytes_same (rhs) == -1
1124 	  && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1125 	      || (TYPE_MODE (TREE_TYPE (rhs))
1126 		  != TYPE_MODE (unsigned_char_type_node))))
1127 	return;
1128       if (TREE_CODE (rhs) == SSA_NAME
1129 	  && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1130 	  && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1131 	return;
1132       if (!adjacent_dr_p (single_store)
1133 	  || !dominated_by_p (CDI_DOMINATORS,
1134 			      loop->latch, gimple_bb (stmt)))
1135 	return;
1136       partition->kind = PKIND_MEMSET;
1137       partition->main_dr = single_store;
1138       partition->niter = nb_iter;
1139       partition->plus_one = plus_one;
1140     }
1141   else if (single_store && single_load)
1142     {
1143       gimple store = DR_STMT (single_store);
1144       gimple load = DR_STMT (single_load);
1145       /* Direct aggregate copy or via an SSA name temporary.  */
1146       if (load != store
1147 	  && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1148 	return;
1149       if (!adjacent_dr_p (single_store)
1150 	  || !adjacent_dr_p (single_load)
1151 	  || !operand_equal_p (DR_STEP (single_store),
1152 			       DR_STEP (single_load), 0)
1153 	  || !dominated_by_p (CDI_DOMINATORS,
1154 			      loop->latch, gimple_bb (store)))
1155 	return;
1156       /* Now check that if there is a dependence this dependence is
1157          of a suitable form for memmove.  */
1158       vec<loop_p> loops = vNULL;
1159       ddr_p ddr;
1160       loops.safe_push (loop);
1161       ddr = initialize_data_dependence_relation (single_load, single_store,
1162 						 loops);
1163       compute_affine_dependence (ddr, loop);
1164       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1165 	{
1166 	  free_dependence_relation (ddr);
1167 	  loops.release ();
1168 	  return;
1169 	}
1170       if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1171 	{
1172 	  if (DDR_NUM_DIST_VECTS (ddr) == 0)
1173 	    {
1174 	      free_dependence_relation (ddr);
1175 	      loops.release ();
1176 	      return;
1177 	    }
1178 	  lambda_vector dist_v;
1179 	  FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1180 	    {
1181 	      int dist = dist_v[index_in_loop_nest (loop->num,
1182 						    DDR_LOOP_NEST (ddr))];
1183 	      if (dist > 0 && !DDR_REVERSED_P (ddr))
1184 		{
1185 		  free_dependence_relation (ddr);
1186 		  loops.release ();
1187 		  return;
1188 		}
1189 	    }
1190 	}
1191       free_dependence_relation (ddr);
1192       loops.release ();
1193       partition->kind = PKIND_MEMCPY;
1194       partition->main_dr = single_store;
1195       partition->secondary_dr = single_load;
1196       partition->niter = nb_iter;
1197       partition->plus_one = plus_one;
1198     }
1199 }
1200 
1201 /* For a data reference REF, return the declaration of its base
1202    address or NULL_TREE if the base is not determined.  */
1203 
1204 static tree
1205 ref_base_address (data_reference_p dr)
1206 {
1207   tree base_address = DR_BASE_ADDRESS (dr);
1208   if (base_address
1209       && TREE_CODE (base_address) == ADDR_EXPR)
1210     return TREE_OPERAND (base_address, 0);
1211 
1212   return base_address;
1213 }
1214 
1215 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1216    accesses in RDG.  */
1217 
1218 static bool
1219 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1220 			 partition_t partition2)
1221 {
1222   unsigned i, j, k, l;
1223   bitmap_iterator bi, bj;
1224   data_reference_p ref1, ref2;
1225 
1226   /* First check whether in the intersection of the two partitions are
1227      any loads or stores.  Common loads are the situation that happens
1228      most often.  */
1229   EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1230     if (RDG_MEM_WRITE_STMT (rdg, i)
1231 	|| RDG_MEM_READS_STMT (rdg, i))
1232       return true;
1233 
1234   /* Then check all data-references against each other.  */
1235   EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1236     if (RDG_MEM_WRITE_STMT (rdg, i)
1237 	|| RDG_MEM_READS_STMT (rdg, i))
1238       EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1239 	if (RDG_MEM_WRITE_STMT (rdg, j)
1240 	    || RDG_MEM_READS_STMT (rdg, j))
1241 	  {
1242 	    FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1243 	      {
1244 		tree base1 = ref_base_address (ref1);
1245 		if (base1)
1246 		  FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1247 		    if (base1 == ref_base_address (ref2))
1248 		      return true;
1249 	      }
1250 	  }
1251 
1252   return false;
1253 }
1254 
1255 /* Aggregate several components into a useful partition that is
1256    registered in the PARTITIONS vector.  Partitions will be
1257    distributed in different loops.  */
1258 
1259 static void
1260 rdg_build_partitions (struct graph *rdg,
1261 		      vec<gimple> starting_stmts,
1262 		      vec<partition_t> *partitions)
1263 {
1264   bitmap processed = BITMAP_ALLOC (NULL);
1265   int i;
1266   gimple stmt;
1267 
1268   FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1269     {
1270       int v = rdg_vertex_for_stmt (rdg, stmt);
1271 
1272       if (dump_file && (dump_flags & TDF_DETAILS))
1273 	fprintf (dump_file,
1274 		 "ldist asked to generate code for vertex %d\n", v);
1275 
1276       /* If the vertex is already contained in another partition so
1277          is the partition rooted at it.  */
1278       if (bitmap_bit_p (processed, v))
1279 	continue;
1280 
1281       partition_t partition = build_rdg_partition_for_vertex (rdg, v);
1282       bitmap_ior_into (processed, partition->stmts);
1283 
1284       if (dump_file && (dump_flags & TDF_DETAILS))
1285 	{
1286 	  fprintf (dump_file, "ldist useful partition:\n");
1287 	  dump_bitmap (dump_file, partition->stmts);
1288 	}
1289 
1290       partitions->safe_push (partition);
1291     }
1292 
1293   /* All vertices should have been assigned to at least one partition now,
1294      other than vertices belonging to dead code.  */
1295 
1296   BITMAP_FREE (processed);
1297 }
1298 
1299 /* Dump to FILE the PARTITIONS.  */
1300 
1301 static void
1302 dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
1303 {
1304   int i;
1305   partition_t partition;
1306 
1307   FOR_EACH_VEC_ELT (partitions, i, partition)
1308     debug_bitmap_file (file, partition->stmts);
1309 }
1310 
1311 /* Debug PARTITIONS.  */
1312 extern void debug_rdg_partitions (vec<partition_t> );
1313 
1314 DEBUG_FUNCTION void
1315 debug_rdg_partitions (vec<partition_t> partitions)
1316 {
1317   dump_rdg_partitions (stderr, partitions);
1318 }
1319 
1320 /* Returns the number of read and write operations in the RDG.  */
1321 
1322 static int
1323 number_of_rw_in_rdg (struct graph *rdg)
1324 {
1325   int i, res = 0;
1326 
1327   for (i = 0; i < rdg->n_vertices; i++)
1328     {
1329       if (RDG_MEM_WRITE_STMT (rdg, i))
1330 	++res;
1331 
1332       if (RDG_MEM_READS_STMT (rdg, i))
1333 	++res;
1334     }
1335 
1336   return res;
1337 }
1338 
1339 /* Returns the number of read and write operations in a PARTITION of
1340    the RDG.  */
1341 
1342 static int
1343 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1344 {
1345   int res = 0;
1346   unsigned i;
1347   bitmap_iterator ii;
1348 
1349   EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1350     {
1351       if (RDG_MEM_WRITE_STMT (rdg, i))
1352 	++res;
1353 
1354       if (RDG_MEM_READS_STMT (rdg, i))
1355 	++res;
1356     }
1357 
1358   return res;
1359 }
1360 
1361 /* Returns true when one of the PARTITIONS contains all the read or
1362    write operations of RDG.  */
1363 
1364 static bool
1365 partition_contains_all_rw (struct graph *rdg,
1366 			   vec<partition_t> partitions)
1367 {
1368   int i;
1369   partition_t partition;
1370   int nrw = number_of_rw_in_rdg (rdg);
1371 
1372   FOR_EACH_VEC_ELT (partitions, i, partition)
1373     if (nrw == number_of_rw_in_partition (rdg, partition))
1374       return true;
1375 
1376   return false;
1377 }
1378 
1379 /* Compute partition dependence created by the data references in DRS1
1380    and DRS2 and modify and return DIR according to that.  */
1381 
1382 static int
1383 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1384 			 vec<data_reference_p> drs1,
1385 			 vec<data_reference_p> drs2)
1386 {
1387   data_reference_p dr1, dr2;
1388 
1389   /* dependence direction - 0 is no dependence, -1 is back,
1390      1 is forth, 2 is both (we can stop then, merging will occur).  */
1391   for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1392     for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1393       {
1394 	data_reference_p saved_dr1 = dr1;
1395 	int this_dir = 1;
1396 	ddr_p ddr;
1397 	/* Re-shuffle data-refs to be in dominator order.  */
1398 	if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1399 	    > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1400 	  {
1401 	    data_reference_p tem = dr1;
1402 	    dr1 = dr2;
1403 	    dr2 = tem;
1404 	    this_dir = -this_dir;
1405 	  }
1406 	ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1407 	compute_affine_dependence (ddr, loops[0]);
1408 	if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1409 	  this_dir = 2;
1410 	else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1411 	  {
1412 	    if (DDR_REVERSED_P (ddr))
1413 	      {
1414 		data_reference_p tem = dr1;
1415 		dr1 = dr2;
1416 		dr2 = tem;
1417 		this_dir = -this_dir;
1418 	      }
1419 	    /* Known dependences can still be unordered througout the
1420 	       iteration space, see gcc.dg/tree-ssa/ldist-16.c.  */
1421 	    if (DDR_NUM_DIST_VECTS (ddr) != 1)
1422 	      this_dir = 2;
1423 	    /* If the overlap is exact preserve stmt order.  */
1424 	    else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1425 	      ;
1426 	    else
1427 	      {
1428 		/* Else as the distance vector is lexicographic positive
1429 		   swap the dependence direction.  */
1430 		this_dir = -this_dir;
1431 	      }
1432 	  }
1433 	else
1434 	  this_dir = 0;
1435 	free_dependence_relation (ddr);
1436 	if (dir == 0)
1437 	  dir = this_dir;
1438 	else if (dir != this_dir)
1439 	  return 2;
1440 	/* Shuffle "back" dr1.  */
1441 	dr1 = saved_dr1;
1442       }
1443   return dir;
1444 }
1445 
1446 /* Compare postorder number of the partition graph vertices V1 and V2.  */
1447 
1448 static int
1449 pgcmp (const void *v1_, const void *v2_)
1450 {
1451   const vertex *v1 = (const vertex *)v1_;
1452   const vertex *v2 = (const vertex *)v2_;
1453   return v2->post - v1->post;
1454 }
1455 
1456 /* Distributes the code from LOOP in such a way that producer
1457    statements are placed before consumer statements.  Tries to separate
1458    only the statements from STMTS into separate loops.
1459    Returns the number of distributed loops.  */
1460 
1461 static int
1462 distribute_loop (struct loop *loop, vec<gimple> stmts,
1463 		 control_dependences *cd, int *nb_calls)
1464 {
1465   struct graph *rdg;
1466   partition_t partition;
1467   bool any_builtin;
1468   int i, nbp;
1469   graph *pg = NULL;
1470   int num_sccs = 1;
1471 
1472   *nb_calls = 0;
1473   auto_vec<loop_p, 3> loop_nest;
1474   if (!find_loop_nest (loop, &loop_nest))
1475     return 0;
1476 
1477   rdg = build_rdg (loop_nest, cd);
1478   if (!rdg)
1479     {
1480       if (dump_file && (dump_flags & TDF_DETAILS))
1481 	fprintf (dump_file,
1482 		 "Loop %d not distributed: failed to build the RDG.\n",
1483 		 loop->num);
1484 
1485       return 0;
1486     }
1487 
1488   if (dump_file && (dump_flags & TDF_DETAILS))
1489     dump_rdg (dump_file, rdg);
1490 
1491   auto_vec<partition_t, 3> partitions;
1492   rdg_build_partitions (rdg, stmts, &partitions);
1493 
1494   any_builtin = false;
1495   FOR_EACH_VEC_ELT (partitions, i, partition)
1496     {
1497       classify_partition (loop, rdg, partition);
1498       any_builtin |= partition_builtin_p (partition);
1499     }
1500 
1501   /* If we are only distributing patterns but did not detect any,
1502      simply bail out.  */
1503   if (!flag_tree_loop_distribution
1504       && !any_builtin)
1505     {
1506       nbp = 0;
1507       goto ldist_done;
1508     }
1509 
1510   /* If we are only distributing patterns fuse all partitions that
1511      were not classified as builtins.  This also avoids chopping
1512      a loop into pieces, separated by builtin calls.  That is, we
1513      only want no or a single loop body remaining.  */
1514   partition_t into;
1515   if (!flag_tree_loop_distribution)
1516     {
1517       for (i = 0; partitions.iterate (i, &into); ++i)
1518 	if (!partition_builtin_p (into))
1519 	  break;
1520       for (++i; partitions.iterate (i, &partition); ++i)
1521 	if (!partition_builtin_p (partition))
1522 	  {
1523 	    if (dump_file && (dump_flags & TDF_DETAILS))
1524 	      {
1525 		fprintf (dump_file, "fusing non-builtin partitions\n");
1526 		dump_bitmap (dump_file, into->stmts);
1527 		dump_bitmap (dump_file, partition->stmts);
1528 	      }
1529 	    partition_merge_into (into, partition);
1530 	    partitions.unordered_remove (i);
1531 	    partition_free (partition);
1532 	    i--;
1533 	  }
1534     }
1535 
1536   /* Due to limitations in the transform phase we have to fuse all
1537      reduction partitions into the last partition so the existing
1538      loop will contain all loop-closed PHI nodes.  */
1539   for (i = 0; partitions.iterate (i, &into); ++i)
1540     if (partition_reduction_p (into))
1541       break;
1542   for (i = i + 1; partitions.iterate (i, &partition); ++i)
1543     if (partition_reduction_p (partition))
1544       {
1545 	if (dump_file && (dump_flags & TDF_DETAILS))
1546 	  {
1547 	    fprintf (dump_file, "fusing partitions\n");
1548 	    dump_bitmap (dump_file, into->stmts);
1549 	    dump_bitmap (dump_file, partition->stmts);
1550 	    fprintf (dump_file, "because they have reductions\n");
1551 	  }
1552 	partition_merge_into (into, partition);
1553 	partitions.unordered_remove (i);
1554 	partition_free (partition);
1555 	i--;
1556       }
1557 
1558   /* Apply our simple cost model - fuse partitions with similar
1559      memory accesses.  */
1560   for (i = 0; partitions.iterate (i, &into); ++i)
1561     {
1562       if (partition_builtin_p (into))
1563 	continue;
1564       for (int j = i + 1;
1565 	   partitions.iterate (j, &partition); ++j)
1566 	{
1567 	  if (!partition_builtin_p (partition)
1568 	      && similar_memory_accesses (rdg, into, partition))
1569 	    {
1570 	      if (dump_file && (dump_flags & TDF_DETAILS))
1571 		{
1572 		  fprintf (dump_file, "fusing partitions\n");
1573 		  dump_bitmap (dump_file, into->stmts);
1574 		  dump_bitmap (dump_file, partition->stmts);
1575 		  fprintf (dump_file, "because they have similar "
1576 			   "memory accesses\n");
1577 		}
1578 	      partition_merge_into (into, partition);
1579 	      partitions.unordered_remove (j);
1580 	      partition_free (partition);
1581 	      j--;
1582 	    }
1583 	}
1584     }
1585 
1586   /* Build the partition dependency graph.  */
1587   if (partitions.length () > 1)
1588     {
1589       pg = new_graph (partitions.length ());
1590       struct pgdata {
1591 	  partition_t partition;
1592 	  vec<data_reference_p> writes;
1593 	  vec<data_reference_p> reads;
1594       };
1595 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1596       for (i = 0; partitions.iterate (i, &partition); ++i)
1597 	{
1598 	  vertex *v = &pg->vertices[i];
1599 	  pgdata *data = new pgdata;
1600 	  data_reference_p dr;
1601 	  /* FIXME - leaks.  */
1602 	  v->data = data;
1603 	  bitmap_iterator bi;
1604 	  unsigned j;
1605 	  data->partition = partition;
1606 	  data->reads = vNULL;
1607 	  data->writes = vNULL;
1608 	  EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1609 	    for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1610 	      if (DR_IS_READ (dr))
1611 		data->reads.safe_push (dr);
1612 	      else
1613 		data->writes.safe_push (dr);
1614 	}
1615       partition_t partition1, partition2;
1616       for (i = 0; partitions.iterate (i, &partition1); ++i)
1617 	for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1618 	  {
1619 	    /* dependence direction - 0 is no dependence, -1 is back,
1620 	       1 is forth, 2 is both (we can stop then, merging will occur).  */
1621 	    int dir = 0;
1622 	    dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1623 					   PGDATA(i)->writes,
1624 					   PGDATA(j)->reads);
1625 	    if (dir != 2)
1626 	      dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1627 					     PGDATA(i)->reads,
1628 					     PGDATA(j)->writes);
1629 	    if (dir != 2)
1630 	      dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1631 					     PGDATA(i)->writes,
1632 					     PGDATA(j)->writes);
1633 	    if (dir == 1 || dir == 2)
1634 	      add_edge (pg, i, j);
1635 	    if (dir == -1 || dir == 2)
1636 	      add_edge (pg, j, i);
1637 	  }
1638 
1639       /* Add edges to the reduction partition (if any) to force it last.  */
1640       unsigned j;
1641       for (j = 0; partitions.iterate (j, &partition); ++j)
1642 	if (partition_reduction_p (partition))
1643 	  break;
1644       if (j < partitions.length ())
1645 	{
1646 	  for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1647 	    if (i != j)
1648 	      add_edge (pg, i, j);
1649 	}
1650 
1651       /* Compute partitions we cannot separate and fuse them.  */
1652       num_sccs = graphds_scc (pg, NULL);
1653       for (i = 0; i < num_sccs; ++i)
1654 	{
1655 	  partition_t first;
1656 	  int j;
1657 	  for (j = 0; partitions.iterate (j, &first); ++j)
1658 	    if (pg->vertices[j].component == i)
1659 	      break;
1660 	  for (j = j + 1; partitions.iterate (j, &partition); ++j)
1661 	    if (pg->vertices[j].component == i)
1662 	      {
1663 		if (dump_file && (dump_flags & TDF_DETAILS))
1664 		  {
1665 		    fprintf (dump_file, "fusing partitions\n");
1666 		    dump_bitmap (dump_file, first->stmts);
1667 		    dump_bitmap (dump_file, partition->stmts);
1668 		    fprintf (dump_file, "because they are in the same "
1669 			     "dependence SCC\n");
1670 		  }
1671 		partition_merge_into (first, partition);
1672 		partitions[j] = NULL;
1673 		partition_free (partition);
1674 		PGDATA (j)->partition = NULL;
1675 	      }
1676 	}
1677 
1678       /* Now order the remaining nodes in postorder.  */
1679       qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1680       partitions.truncate (0);
1681       for (i = 0; i < pg->n_vertices; ++i)
1682 	{
1683 	  pgdata *data = PGDATA (i);
1684 	  if (data->partition)
1685 	    partitions.safe_push (data->partition);
1686 	  data->reads.release ();
1687 	  data->writes.release ();
1688 	  delete data;
1689 	}
1690       gcc_assert (partitions.length () == (unsigned)num_sccs);
1691       free_graph (pg);
1692     }
1693 
1694   nbp = partitions.length ();
1695   if (nbp == 0
1696       || (nbp == 1 && !partition_builtin_p (partitions[0]))
1697       || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1698     {
1699       nbp = 0;
1700       goto ldist_done;
1701     }
1702 
1703   if (dump_file && (dump_flags & TDF_DETAILS))
1704     dump_rdg_partitions (dump_file, partitions);
1705 
1706   FOR_EACH_VEC_ELT (partitions, i, partition)
1707     {
1708       if (partition_builtin_p (partition))
1709 	(*nb_calls)++;
1710       generate_code_for_partition (loop, partition, i < nbp - 1);
1711     }
1712 
1713  ldist_done:
1714 
1715   FOR_EACH_VEC_ELT (partitions, i, partition)
1716     partition_free (partition);
1717 
1718   free_rdg (rdg);
1719   return nbp - *nb_calls;
1720 }
1721 
1722 /* Distribute all loops in the current function.  */
1723 
1724 namespace {
1725 
1726 const pass_data pass_data_loop_distribution =
1727 {
1728   GIMPLE_PASS, /* type */
1729   "ldist", /* name */
1730   OPTGROUP_LOOP, /* optinfo_flags */
1731   TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1732   ( PROP_cfg | PROP_ssa ), /* properties_required */
1733   0, /* properties_provided */
1734   0, /* properties_destroyed */
1735   0, /* todo_flags_start */
1736   0, /* todo_flags_finish */
1737 };
1738 
1739 class pass_loop_distribution : public gimple_opt_pass
1740 {
1741 public:
1742   pass_loop_distribution (gcc::context *ctxt)
1743     : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1744   {}
1745 
1746   /* opt_pass methods: */
1747   virtual bool gate (function *)
1748     {
1749       return flag_tree_loop_distribution
1750 	|| flag_tree_loop_distribute_patterns;
1751     }
1752 
1753   virtual unsigned int execute (function *);
1754 
1755 }; // class pass_loop_distribution
1756 
1757 unsigned int
1758 pass_loop_distribution::execute (function *fun)
1759 {
1760   struct loop *loop;
1761   bool changed = false;
1762   basic_block bb;
1763   control_dependences *cd = NULL;
1764 
1765   FOR_ALL_BB_FN (bb, fun)
1766     {
1767       gimple_stmt_iterator gsi;
1768       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1769 	gimple_set_uid (gsi_stmt (gsi), -1);
1770       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1771 	gimple_set_uid (gsi_stmt (gsi), -1);
1772     }
1773 
1774   /* We can at the moment only distribute non-nested loops, thus restrict
1775      walking to innermost loops.  */
1776   FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
1777     {
1778       auto_vec<gimple> work_list;
1779       basic_block *bbs;
1780       int num = loop->num;
1781       unsigned int i;
1782 
1783       /* If the loop doesn't have a single exit we will fail anyway,
1784 	 so do that early.  */
1785       if (!single_exit (loop))
1786 	continue;
1787 
1788       /* Only optimize hot loops.  */
1789       if (!optimize_loop_for_speed_p (loop))
1790 	continue;
1791 
1792       /* Initialize the worklist with stmts we seed the partitions with.  */
1793       bbs = get_loop_body_in_dom_order (loop);
1794       for (i = 0; i < loop->num_nodes; ++i)
1795 	{
1796 	  for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
1797 	       !gsi_end_p (gsi);
1798 	       gsi_next (&gsi))
1799 	    {
1800 	      gphi *phi = gsi.phi ();
1801 	      if (virtual_operand_p (gimple_phi_result (phi)))
1802 		continue;
1803 	      /* Distribute stmts which have defs that are used outside of
1804 		 the loop.  */
1805 	      if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1806 		continue;
1807 	      work_list.safe_push (phi);
1808 	    }
1809 	  for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1810 	       !gsi_end_p (gsi);
1811 	       gsi_next (&gsi))
1812 	    {
1813 	      gimple stmt = gsi_stmt (gsi);
1814 
1815 	      /* If there is a stmt with side-effects bail out - we
1816 		 cannot and should not distribute this loop.  */
1817 	      if (gimple_has_side_effects (stmt))
1818 		{
1819 		  work_list.truncate (0);
1820 		  goto out;
1821 		}
1822 
1823 	      /* Distribute stmts which have defs that are used outside of
1824 		 the loop.  */
1825 	      if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1826 		;
1827 	      /* Otherwise only distribute stores for now.  */
1828 	      else if (!gimple_vdef (stmt))
1829 		continue;
1830 
1831 	      work_list.safe_push (stmt);
1832 	    }
1833 	}
1834 out:
1835       free (bbs);
1836 
1837       int nb_generated_loops = 0;
1838       int nb_generated_calls = 0;
1839       location_t loc = find_loop_location (loop);
1840       if (work_list.length () > 0)
1841 	{
1842 	  if (!cd)
1843 	    {
1844 	      calculate_dominance_info (CDI_DOMINATORS);
1845 	      calculate_dominance_info (CDI_POST_DOMINATORS);
1846 	      cd = new control_dependences (create_edge_list ());
1847 	      free_dominance_info (CDI_POST_DOMINATORS);
1848 	    }
1849 	  nb_generated_loops = distribute_loop (loop, work_list, cd,
1850 						&nb_generated_calls);
1851 	}
1852 
1853       if (nb_generated_loops + nb_generated_calls > 0)
1854 	{
1855 	  changed = true;
1856 	  dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1857 			   loc, "Loop %d distributed: split to %d loops "
1858 			   "and %d library calls.\n",
1859 			   num, nb_generated_loops, nb_generated_calls);
1860 	}
1861       else if (dump_file && (dump_flags & TDF_DETAILS))
1862 	fprintf (dump_file, "Loop %d is the same.\n", num);
1863     }
1864 
1865   if (cd)
1866     delete cd;
1867 
1868   if (changed)
1869     {
1870       /* Cached scalar evolutions now may refer to wrong or non-existing
1871 	 loops.  */
1872       scev_reset_htab ();
1873       mark_virtual_operands_for_renaming (fun);
1874       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1875     }
1876 
1877 #ifdef ENABLE_CHECKING
1878   verify_loop_structure ();
1879 #endif
1880 
1881   return 0;
1882 }
1883 
1884 } // anon namespace
1885 
1886 gimple_opt_pass *
1887 make_pass_loop_distribution (gcc::context *ctxt)
1888 {
1889   return new pass_loop_distribution (ctxt);
1890 }
1891