xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-loop-distribution.c (revision deb6f0161a9109e7de9b519dc8dfb9478668dcdd)
1 /* Loop distribution.
2    Copyright (C) 2006-2016 Free Software Foundation, Inc.
3    Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4    and Sebastian Pop <sebastian.pop@amd.com>.
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 /* This pass performs loop distribution: for example, the loop
23 
24    |DO I = 2, N
25    |    A(I) = B(I) + C
26    |    D(I) = A(I-1)*E
27    |ENDDO
28 
29    is transformed to
30 
31    |DOALL I = 2, N
32    |   A(I) = B(I) + C
33    |ENDDO
34    |
35    |DOALL I = 2, N
36    |   D(I) = A(I-1)*E
37    |ENDDO
38 
39    This pass uses an RDG, Reduced Dependence Graph built on top of the
40    data dependence relations.  The RDG is then topologically sorted to
41    obtain a map of information producers/consumers based on which it
42    generates the new loops.  */
43 
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "backend.h"
48 #include "tree.h"
49 #include "gimple.h"
50 #include "cfghooks.h"
51 #include "tree-pass.h"
52 #include "ssa.h"
53 #include "gimple-pretty-print.h"
54 #include "fold-const.h"
55 #include "cfganal.h"
56 #include "gimple-iterator.h"
57 #include "gimplify-me.h"
58 #include "stor-layout.h"
59 #include "tree-cfg.h"
60 #include "tree-ssa-loop-manip.h"
61 #include "tree-ssa-loop.h"
62 #include "tree-into-ssa.h"
63 #include "tree-ssa.h"
64 #include "cfgloop.h"
65 #include "tree-scalar-evolution.h"
66 #include "tree-vectorizer.h"
67 
68 
69 /* A Reduced Dependence Graph (RDG) vertex representing a statement.  */
70 struct rdg_vertex
71 {
72   /* The statement represented by this vertex.  */
73   gimple *stmt;
74 
75   /* Vector of data-references in this statement.  */
76   vec<data_reference_p> datarefs;
77 
78   /* True when the statement contains a write to memory.  */
79   bool has_mem_write;
80 
81   /* True when the statement contains a read from memory.  */
82   bool has_mem_reads;
83 };
84 
85 #define RDGV_STMT(V)     ((struct rdg_vertex *) ((V)->data))->stmt
86 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
87 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
88 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
89 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
90 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
91 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
92 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
93 
94 /* Data dependence type.  */
95 
96 enum rdg_dep_type
97 {
98   /* Read After Write (RAW).  */
99   flow_dd = 'f',
100 
101   /* Control dependence (execute conditional on).  */
102   control_dd = 'c'
103 };
104 
105 /* Dependence information attached to an edge of the RDG.  */
106 
107 struct rdg_edge
108 {
109   /* Type of the dependence.  */
110   enum rdg_dep_type type;
111 };
112 
113 #define RDGE_TYPE(E)        ((struct rdg_edge *) ((E)->data))->type
114 
115 /* Dump vertex I in RDG to FILE.  */
116 
117 static void
118 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
119 {
120   struct vertex *v = &(rdg->vertices[i]);
121   struct graph_edge *e;
122 
123   fprintf (file, "(vertex %d: (%s%s) (in:", i,
124 	   RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
125 	   RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
126 
127   if (v->pred)
128     for (e = v->pred; e; e = e->pred_next)
129       fprintf (file, " %d", e->src);
130 
131   fprintf (file, ") (out:");
132 
133   if (v->succ)
134     for (e = v->succ; e; e = e->succ_next)
135       fprintf (file, " %d", e->dest);
136 
137   fprintf (file, ")\n");
138   print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
139   fprintf (file, ")\n");
140 }
141 
142 /* Call dump_rdg_vertex on stderr.  */
143 
144 DEBUG_FUNCTION void
145 debug_rdg_vertex (struct graph *rdg, int i)
146 {
147   dump_rdg_vertex (stderr, rdg, i);
148 }
149 
150 /* Dump the reduced dependence graph RDG to FILE.  */
151 
152 static void
153 dump_rdg (FILE *file, struct graph *rdg)
154 {
155   fprintf (file, "(rdg\n");
156   for (int i = 0; i < rdg->n_vertices; i++)
157     dump_rdg_vertex (file, rdg, i);
158   fprintf (file, ")\n");
159 }
160 
161 /* Call dump_rdg on stderr.  */
162 
163 DEBUG_FUNCTION void
164 debug_rdg (struct graph *rdg)
165 {
166   dump_rdg (stderr, rdg);
167 }
168 
169 static void
170 dot_rdg_1 (FILE *file, struct graph *rdg)
171 {
172   int i;
173   pretty_printer buffer;
174   pp_needs_newline (&buffer) = false;
175   buffer.buffer->stream = file;
176 
177   fprintf (file, "digraph RDG {\n");
178 
179   for (i = 0; i < rdg->n_vertices; i++)
180     {
181       struct vertex *v = &(rdg->vertices[i]);
182       struct graph_edge *e;
183 
184       fprintf (file, "%d [label=\"[%d] ", i, i);
185       pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
186       pp_flush (&buffer);
187       fprintf (file, "\"]\n");
188 
189       /* Highlight reads from memory.  */
190       if (RDG_MEM_READS_STMT (rdg, i))
191        fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
192 
193       /* Highlight stores to memory.  */
194       if (RDG_MEM_WRITE_STMT (rdg, i))
195        fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
196 
197       if (v->succ)
198        for (e = v->succ; e; e = e->succ_next)
199          switch (RDGE_TYPE (e))
200            {
201            case flow_dd:
202              /* These are the most common dependences: don't print these. */
203              fprintf (file, "%d -> %d \n", i, e->dest);
204              break;
205 
206 	   case control_dd:
207              fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
208              break;
209 
210            default:
211              gcc_unreachable ();
212            }
213     }
214 
215   fprintf (file, "}\n\n");
216 }
217 
218 /* Display the Reduced Dependence Graph using dotty.  */
219 
220 DEBUG_FUNCTION void
221 dot_rdg (struct graph *rdg)
222 {
223   /* When debugging, you may want to enable the following code.  */
224 #ifdef HAVE_POPEN
225   FILE *file = popen ("dot -Tx11", "w");
226   if (!file)
227     return;
228   dot_rdg_1 (file, rdg);
229   fflush (file);
230   close (fileno (file));
231   pclose (file);
232 #else
233   dot_rdg_1 (stderr, rdg);
234 #endif
235 }
236 
237 /* Returns the index of STMT in RDG.  */
238 
239 static int
240 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple *stmt)
241 {
242   int index = gimple_uid (stmt);
243   gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
244   return index;
245 }
246 
247 /* Creates dependence edges in RDG for all the uses of DEF.  IDEF is
248    the index of DEF in RDG.  */
249 
250 static void
251 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
252 {
253   use_operand_p imm_use_p;
254   imm_use_iterator iterator;
255 
256   FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
257     {
258       struct graph_edge *e;
259       int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
260 
261       if (use < 0)
262 	continue;
263 
264       e = add_edge (rdg, idef, use);
265       e->data = XNEW (struct rdg_edge);
266       RDGE_TYPE (e) = flow_dd;
267     }
268 }
269 
270 /* Creates an edge for the control dependences of BB to the vertex V.  */
271 
272 static void
273 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
274 				    int v, control_dependences *cd)
275 {
276   bitmap_iterator bi;
277   unsigned edge_n;
278   EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
279 			    0, edge_n, bi)
280     {
281       basic_block cond_bb = cd->get_edge (edge_n)->src;
282       gimple *stmt = last_stmt (cond_bb);
283       if (stmt && is_ctrl_stmt (stmt))
284 	{
285 	  struct graph_edge *e;
286 	  int c = rdg_vertex_for_stmt (rdg, stmt);
287 	  if (c < 0)
288 	    continue;
289 
290 	  e = add_edge (rdg, c, v);
291 	  e->data = XNEW (struct rdg_edge);
292 	  RDGE_TYPE (e) = control_dd;
293 	}
294     }
295 }
296 
297 /* Creates the edges of the reduced dependence graph RDG.  */
298 
299 static void
300 create_rdg_flow_edges (struct graph *rdg)
301 {
302   int i;
303   def_operand_p def_p;
304   ssa_op_iter iter;
305 
306   for (i = 0; i < rdg->n_vertices; i++)
307     FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
308 			      iter, SSA_OP_DEF)
309       create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
310 }
311 
312 /* Creates the edges of the reduced dependence graph RDG.  */
313 
314 static void
315 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
316 {
317   int i;
318 
319   for (i = 0; i < rdg->n_vertices; i++)
320     {
321       gimple *stmt = RDG_STMT (rdg, i);
322       if (gimple_code (stmt) == GIMPLE_PHI)
323 	{
324 	  edge_iterator ei;
325 	  edge e;
326 	  FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
327 	      create_edge_for_control_dependence (rdg, e->src, i, cd);
328 	}
329       else
330 	create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
331     }
332 }
333 
334 /* Build the vertices of the reduced dependence graph RDG.  Return false
335    if that failed.  */
336 
337 static bool
338 create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop,
339 		     vec<data_reference_p> *datarefs)
340 {
341   int i;
342   gimple *stmt;
343 
344   FOR_EACH_VEC_ELT (stmts, i, stmt)
345     {
346       struct vertex *v = &(rdg->vertices[i]);
347 
348       /* Record statement to vertex mapping.  */
349       gimple_set_uid (stmt, i);
350 
351       v->data = XNEW (struct rdg_vertex);
352       RDGV_STMT (v) = stmt;
353       RDGV_DATAREFS (v).create (0);
354       RDGV_HAS_MEM_WRITE (v) = false;
355       RDGV_HAS_MEM_READS (v) = false;
356       if (gimple_code (stmt) == GIMPLE_PHI)
357 	continue;
358 
359       unsigned drp = datarefs->length ();
360       if (!find_data_references_in_stmt (loop, stmt, datarefs))
361 	return false;
362       for (unsigned j = drp; j < datarefs->length (); ++j)
363 	{
364 	  data_reference_p dr = (*datarefs)[j];
365 	  if (DR_IS_READ (dr))
366 	    RDGV_HAS_MEM_READS (v) = true;
367 	  else
368 	    RDGV_HAS_MEM_WRITE (v) = true;
369 	  RDGV_DATAREFS (v).safe_push (dr);
370 	}
371     }
372   return true;
373 }
374 
375 /* Initialize STMTS with all the statements of LOOP.  The order in
376    which we discover statements is important as
377    generate_loops_for_partition is using the same traversal for
378    identifying statements in loop copies.  */
379 
380 static void
381 stmts_from_loop (struct loop *loop, vec<gimple *> *stmts)
382 {
383   unsigned int i;
384   basic_block *bbs = get_loop_body_in_dom_order (loop);
385 
386   for (i = 0; i < loop->num_nodes; i++)
387     {
388       basic_block bb = bbs[i];
389 
390       for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
391 	   gsi_next (&bsi))
392 	if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
393 	  stmts->safe_push (bsi.phi ());
394 
395       for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
396 	   gsi_next (&bsi))
397 	{
398 	  gimple *stmt = gsi_stmt (bsi);
399 	  if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
400 	    stmts->safe_push (stmt);
401 	}
402     }
403 
404   free (bbs);
405 }
406 
407 /* Free the reduced dependence graph RDG.  */
408 
409 static void
410 free_rdg (struct graph *rdg)
411 {
412   int i;
413 
414   for (i = 0; i < rdg->n_vertices; i++)
415     {
416       struct vertex *v = &(rdg->vertices[i]);
417       struct graph_edge *e;
418 
419       for (e = v->succ; e; e = e->succ_next)
420 	free (e->data);
421 
422       if (v->data)
423 	{
424 	  gimple_set_uid (RDGV_STMT (v), -1);
425 	  free_data_refs (RDGV_DATAREFS (v));
426 	  free (v->data);
427 	}
428     }
429 
430   free_graph (rdg);
431 }
432 
433 /* Build the Reduced Dependence Graph (RDG) with one vertex per
434    statement of the loop nest LOOP_NEST, and one edge per data dependence or
435    scalar dependence.  */
436 
437 static struct graph *
438 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
439 {
440   struct graph *rdg;
441   vec<data_reference_p> datarefs;
442 
443   /* Create the RDG vertices from the stmts of the loop nest.  */
444   auto_vec<gimple *, 10> stmts;
445   stmts_from_loop (loop_nest[0], &stmts);
446   rdg = new_graph (stmts.length ());
447   datarefs.create (10);
448   if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
449     {
450       datarefs.release ();
451       free_rdg (rdg);
452       return NULL;
453     }
454   stmts.release ();
455 
456   create_rdg_flow_edges (rdg);
457   if (cd)
458     create_rdg_cd_edges (rdg, cd);
459 
460   datarefs.release ();
461 
462   return rdg;
463 }
464 
465 
466 
467 enum partition_kind {
468     PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
469 };
470 
471 struct partition
472 {
473   bitmap stmts;
474   bitmap loops;
475   bool reduction_p;
476   enum partition_kind kind;
477   /* data-references a kind != PKIND_NORMAL partition is about.  */
478   data_reference_p main_dr;
479   data_reference_p secondary_dr;
480   tree niter;
481   bool plus_one;
482 };
483 
484 
485 /* Allocate and initialize a partition from BITMAP.  */
486 
487 static partition *
488 partition_alloc (bitmap stmts, bitmap loops)
489 {
490   partition *partition = XCNEW (struct partition);
491   partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
492   partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
493   partition->reduction_p = false;
494   partition->kind = PKIND_NORMAL;
495   return partition;
496 }
497 
498 /* Free PARTITION.  */
499 
500 static void
501 partition_free (partition *partition)
502 {
503   BITMAP_FREE (partition->stmts);
504   BITMAP_FREE (partition->loops);
505   free (partition);
506 }
507 
508 /* Returns true if the partition can be generated as a builtin.  */
509 
510 static bool
511 partition_builtin_p (partition *partition)
512 {
513   return partition->kind != PKIND_NORMAL;
514 }
515 
516 /* Returns true if the partition contains a reduction.  */
517 
518 static bool
519 partition_reduction_p (partition *partition)
520 {
521   return partition->reduction_p;
522 }
523 
524 /* Merge PARTITION into the partition DEST.  */
525 
526 static void
527 partition_merge_into (partition *dest, partition *partition)
528 {
529   dest->kind = PKIND_NORMAL;
530   bitmap_ior_into (dest->stmts, partition->stmts);
531   if (partition_reduction_p (partition))
532     dest->reduction_p = true;
533 }
534 
535 
536 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
537    the LOOP.  */
538 
539 static bool
540 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
541 {
542   imm_use_iterator imm_iter;
543   use_operand_p use_p;
544 
545   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
546     {
547       gimple *use_stmt = USE_STMT (use_p);
548       if (!is_gimple_debug (use_stmt)
549 	  && loop != loop_containing_stmt (use_stmt))
550 	return true;
551     }
552 
553   return false;
554 }
555 
556 /* Returns true when STMT defines a scalar variable used after the
557    loop LOOP.  */
558 
559 static bool
560 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple *stmt)
561 {
562   def_operand_p def_p;
563   ssa_op_iter op_iter;
564 
565   if (gimple_code (stmt) == GIMPLE_PHI)
566     return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
567 
568   FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
569     if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
570       return true;
571 
572   return false;
573 }
574 
575 /* Return a copy of LOOP placed before LOOP.  */
576 
577 static struct loop *
578 copy_loop_before (struct loop *loop)
579 {
580   struct loop *res;
581   edge preheader = loop_preheader_edge (loop);
582 
583   initialize_original_copy_tables ();
584   res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
585   gcc_assert (res != NULL);
586   free_original_copy_tables ();
587   delete_update_ssa ();
588 
589   return res;
590 }
591 
592 /* Creates an empty basic block after LOOP.  */
593 
594 static void
595 create_bb_after_loop (struct loop *loop)
596 {
597   edge exit = single_exit (loop);
598 
599   if (!exit)
600     return;
601 
602   split_edge (exit);
603 }
604 
605 /* Generate code for PARTITION from the code in LOOP.  The loop is
606    copied when COPY_P is true.  All the statements not flagged in the
607    PARTITION bitmap are removed from the loop or from its copy.  The
608    statements are indexed in sequence inside a basic block, and the
609    basic blocks of a loop are taken in dom order.  */
610 
611 static void
612 generate_loops_for_partition (struct loop *loop, partition *partition,
613 			      bool copy_p)
614 {
615   unsigned i;
616   basic_block *bbs;
617 
618   if (copy_p)
619     {
620       loop = copy_loop_before (loop);
621       gcc_assert (loop != NULL);
622       create_preheader (loop, CP_SIMPLE_PREHEADERS);
623       create_bb_after_loop (loop);
624     }
625 
626   /* Remove stmts not in the PARTITION bitmap.  */
627   bbs = get_loop_body_in_dom_order (loop);
628 
629   if (MAY_HAVE_DEBUG_STMTS)
630     for (i = 0; i < loop->num_nodes; i++)
631       {
632 	basic_block bb = bbs[i];
633 
634 	for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
635 	     gsi_next (&bsi))
636 	  {
637 	    gphi *phi = bsi.phi ();
638 	    if (!virtual_operand_p (gimple_phi_result (phi))
639 		&& !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
640 	      reset_debug_uses (phi);
641 	  }
642 
643 	for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
644 	  {
645 	    gimple *stmt = gsi_stmt (bsi);
646 	    if (gimple_code (stmt) != GIMPLE_LABEL
647 		&& !is_gimple_debug (stmt)
648 		&& !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
649 	      reset_debug_uses (stmt);
650 	  }
651       }
652 
653   for (i = 0; i < loop->num_nodes; i++)
654     {
655       basic_block bb = bbs[i];
656 
657       for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
658 	{
659 	  gphi *phi = bsi.phi ();
660 	  if (!virtual_operand_p (gimple_phi_result (phi))
661 	      && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
662 	    remove_phi_node (&bsi, true);
663 	  else
664 	    gsi_next (&bsi);
665 	}
666 
667       for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
668 	{
669 	  gimple *stmt = gsi_stmt (bsi);
670 	  if (gimple_code (stmt) != GIMPLE_LABEL
671 	      && !is_gimple_debug (stmt)
672 	      && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
673 	    {
674 	      /* Choose an arbitrary path through the empty CFG part
675 		 that this unnecessary control stmt controls.  */
676 	      if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
677 		{
678 		  gimple_cond_make_false (cond_stmt);
679 		  update_stmt (stmt);
680 		}
681 	      else if (gimple_code (stmt) == GIMPLE_SWITCH)
682 		{
683 		  gswitch *switch_stmt = as_a <gswitch *> (stmt);
684 		  gimple_switch_set_index
685 		      (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1)));
686 		  update_stmt (stmt);
687 		}
688 	      else
689 		{
690 		  unlink_stmt_vdef (stmt);
691 		  gsi_remove (&bsi, true);
692 		  release_defs (stmt);
693 		  continue;
694 		}
695 	    }
696 	  gsi_next (&bsi);
697 	}
698     }
699 
700   free (bbs);
701 }
702 
703 /* Build the size argument for a memory operation call.  */
704 
705 static tree
706 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
707 		    bool plus_one)
708 {
709   tree size = fold_convert_loc (loc, sizetype, nb_iter);
710   if (plus_one)
711     size = size_binop (PLUS_EXPR, size, size_one_node);
712   size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
713 			  TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
714   size = fold_convert_loc (loc, size_type_node, size);
715   return size;
716 }
717 
718 /* Build an address argument for a memory operation call.  */
719 
720 static tree
721 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
722 {
723   tree addr_base;
724 
725   addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
726   addr_base = fold_convert_loc (loc, sizetype, addr_base);
727 
728   /* Test for a negative stride, iterating over every element.  */
729   if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
730     {
731       addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
732 				  fold_convert_loc (loc, sizetype, nb_bytes));
733       addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
734 				  TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
735     }
736 
737   return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
738 }
739 
740 /* If VAL memory representation contains the same value in all bytes,
741    return that value, otherwise return -1.
742    E.g. for 0x24242424 return 0x24, for IEEE double
743    747708026454360457216.0 return 0x44, etc.  */
744 
745 static int
746 const_with_all_bytes_same (tree val)
747 {
748   unsigned char buf[64];
749   int i, len;
750 
751   if (integer_zerop (val)
752       || (TREE_CODE (val) == CONSTRUCTOR
753           && !TREE_CLOBBER_P (val)
754           && CONSTRUCTOR_NELTS (val) == 0))
755     return 0;
756 
757   if (real_zerop (val))
758     {
759       /* Only return 0 for +0.0, not for -0.0, which doesn't have
760 	 an all bytes same memory representation.  Don't transform
761 	 -0.0 stores into +0.0 even for !HONOR_SIGNED_ZEROS.  */
762       switch (TREE_CODE (val))
763 	{
764 	case REAL_CST:
765 	  if (!real_isneg (TREE_REAL_CST_PTR (val)))
766 	    return 0;
767 	  break;
768 	case COMPLEX_CST:
769 	  if (!const_with_all_bytes_same (TREE_REALPART (val))
770 	      && !const_with_all_bytes_same (TREE_IMAGPART (val)))
771 	    return 0;
772 	  break;
773 	case VECTOR_CST:
774 	  unsigned int j;
775 	  for (j = 0; j < VECTOR_CST_NELTS (val); ++j)
776 	    if (const_with_all_bytes_same (VECTOR_CST_ELT (val, j)))
777 	      break;
778 	  if (j == VECTOR_CST_NELTS (val))
779 	    return 0;
780 	  break;
781 	default:
782 	  break;
783 	}
784     }
785 
786   if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
787     return -1;
788 
789   len = native_encode_expr (val, buf, sizeof (buf));
790   if (len == 0)
791     return -1;
792   for (i = 1; i < len; i++)
793     if (buf[i] != buf[0])
794       return -1;
795   return buf[0];
796 }
797 
798 /* Generate a call to memset for PARTITION in LOOP.  */
799 
800 static void
801 generate_memset_builtin (struct loop *loop, partition *partition)
802 {
803   gimple_stmt_iterator gsi;
804   gimple *stmt, *fn_call;
805   tree mem, fn, nb_bytes;
806   location_t loc;
807   tree val;
808 
809   stmt = DR_STMT (partition->main_dr);
810   loc = gimple_location (stmt);
811 
812   /* The new statements will be placed before LOOP.  */
813   gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
814 
815   nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
816 				 partition->plus_one);
817   nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
818 				       false, GSI_CONTINUE_LINKING);
819   mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
820   mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
821 				  false, GSI_CONTINUE_LINKING);
822 
823   /* This exactly matches the pattern recognition in classify_partition.  */
824   val = gimple_assign_rhs1 (stmt);
825   /* Handle constants like 0x15151515 and similarly
826      floating point constants etc. where all bytes are the same.  */
827   int bytev = const_with_all_bytes_same (val);
828   if (bytev != -1)
829     val = build_int_cst (integer_type_node, bytev);
830   else if (TREE_CODE (val) == INTEGER_CST)
831     val = fold_convert (integer_type_node, val);
832   else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
833     {
834       tree tem = make_ssa_name (integer_type_node);
835       gimple *cstmt = gimple_build_assign (tem, NOP_EXPR, val);
836       gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
837       val = tem;
838     }
839 
840   fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
841   fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
842   gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
843 
844   if (dump_file && (dump_flags & TDF_DETAILS))
845     {
846       fprintf (dump_file, "generated memset");
847       if (bytev == 0)
848 	fprintf (dump_file, " zero\n");
849       else
850 	fprintf (dump_file, "\n");
851     }
852 }
853 
854 /* Generate a call to memcpy for PARTITION in LOOP.  */
855 
856 static void
857 generate_memcpy_builtin (struct loop *loop, partition *partition)
858 {
859   gimple_stmt_iterator gsi;
860   gimple *stmt, *fn_call;
861   tree dest, src, fn, nb_bytes;
862   location_t loc;
863   enum built_in_function kind;
864 
865   stmt = DR_STMT (partition->main_dr);
866   loc = gimple_location (stmt);
867 
868   /* The new statements will be placed before LOOP.  */
869   gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
870 
871   nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
872 				 partition->plus_one);
873   nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
874 				       false, GSI_CONTINUE_LINKING);
875   dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
876   src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
877   if (ptr_derefs_may_alias_p (dest, src))
878     kind = BUILT_IN_MEMMOVE;
879   else
880     kind = BUILT_IN_MEMCPY;
881 
882   dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
883 				   false, GSI_CONTINUE_LINKING);
884   src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
885 				  false, GSI_CONTINUE_LINKING);
886   fn = build_fold_addr_expr (builtin_decl_implicit (kind));
887   fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
888   gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
889 
890   if (dump_file && (dump_flags & TDF_DETAILS))
891     {
892       if (kind == BUILT_IN_MEMCPY)
893 	fprintf (dump_file, "generated memcpy\n");
894       else
895 	fprintf (dump_file, "generated memmove\n");
896     }
897 }
898 
899 /* Remove and destroy the loop LOOP.  */
900 
901 static void
902 destroy_loop (struct loop *loop)
903 {
904   unsigned nbbs = loop->num_nodes;
905   edge exit = single_exit (loop);
906   basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
907   basic_block *bbs;
908   unsigned i;
909 
910   bbs = get_loop_body_in_dom_order (loop);
911 
912   redirect_edge_pred (exit, src);
913   exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
914   exit->flags |= EDGE_FALLTHRU;
915   cancel_loop_tree (loop);
916   rescan_loop_exit (exit, false, true);
917 
918   i = nbbs;
919   do
920     {
921       /* We have made sure to not leave any dangling uses of SSA
922          names defined in the loop.  With the exception of virtuals.
923 	 Make sure we replace all uses of virtual defs that will remain
924 	 outside of the loop with the bare symbol as delete_basic_block
925 	 will release them.  */
926       --i;
927       for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
928 	   gsi_next (&gsi))
929 	{
930 	  gphi *phi = gsi.phi ();
931 	  if (virtual_operand_p (gimple_phi_result (phi)))
932 	    mark_virtual_phi_result_for_renaming (phi);
933 	}
934       for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
935 	   gsi_next (&gsi))
936 	{
937 	  gimple *stmt = gsi_stmt (gsi);
938 	  tree vdef = gimple_vdef (stmt);
939 	  if (vdef && TREE_CODE (vdef) == SSA_NAME)
940 	    mark_virtual_operand_for_renaming (vdef);
941 	}
942       delete_basic_block (bbs[i]);
943     }
944   while (i != 0);
945 
946   free (bbs);
947 
948   set_immediate_dominator (CDI_DOMINATORS, dest,
949 			   recompute_dominator (CDI_DOMINATORS, dest));
950 }
951 
952 /* Generates code for PARTITION.  */
953 
954 static void
955 generate_code_for_partition (struct loop *loop,
956 			     partition *partition, bool copy_p)
957 {
958   switch (partition->kind)
959     {
960     case PKIND_NORMAL:
961       /* Reductions all have to be in the last partition.  */
962       gcc_assert (!partition_reduction_p (partition)
963 		  || !copy_p);
964       generate_loops_for_partition (loop, partition, copy_p);
965       return;
966 
967     case PKIND_MEMSET:
968       generate_memset_builtin (loop, partition);
969       break;
970 
971     case PKIND_MEMCPY:
972       generate_memcpy_builtin (loop, partition);
973       break;
974 
975     default:
976       gcc_unreachable ();
977     }
978 
979   /* Common tail for partitions we turn into a call.  If this was the last
980      partition for which we generate code, we have to destroy the loop.  */
981   if (!copy_p)
982     destroy_loop (loop);
983 }
984 
985 
986 /* Returns a partition with all the statements needed for computing
987    the vertex V of the RDG, also including the loop exit conditions.  */
988 
989 static partition *
990 build_rdg_partition_for_vertex (struct graph *rdg, int v)
991 {
992   partition *partition = partition_alloc (NULL, NULL);
993   auto_vec<int, 3> nodes;
994   unsigned i;
995   int x;
996 
997   graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
998 
999   FOR_EACH_VEC_ELT (nodes, i, x)
1000     {
1001       bitmap_set_bit (partition->stmts, x);
1002       bitmap_set_bit (partition->loops,
1003 		      loop_containing_stmt (RDG_STMT (rdg, x))->num);
1004     }
1005 
1006   return partition;
1007 }
1008 
1009 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
1010    For the moment we detect only the memset zero pattern.  */
1011 
1012 static void
1013 classify_partition (loop_p loop, struct graph *rdg, partition *partition)
1014 {
1015   bitmap_iterator bi;
1016   unsigned i;
1017   tree nb_iter;
1018   data_reference_p single_load, single_store;
1019   bool volatiles_p = false;
1020   bool plus_one = false;
1021 
1022   partition->kind = PKIND_NORMAL;
1023   partition->main_dr = NULL;
1024   partition->secondary_dr = NULL;
1025   partition->niter = NULL_TREE;
1026   partition->plus_one = false;
1027 
1028   EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1029     {
1030       gimple *stmt = RDG_STMT (rdg, i);
1031 
1032       if (gimple_has_volatile_ops (stmt))
1033 	volatiles_p = true;
1034 
1035       /* If the stmt has uses outside of the loop mark it as reduction.  */
1036       if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1037 	{
1038 	  partition->reduction_p = true;
1039 	  return;
1040 	}
1041     }
1042 
1043   /* Perform general partition disqualification for builtins.  */
1044   if (volatiles_p
1045       || !flag_tree_loop_distribute_patterns)
1046     return;
1047 
1048   /* Detect memset and memcpy.  */
1049   single_load = NULL;
1050   single_store = NULL;
1051   EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1052     {
1053       gimple *stmt = RDG_STMT (rdg, i);
1054       data_reference_p dr;
1055       unsigned j;
1056 
1057       if (gimple_code (stmt) == GIMPLE_PHI)
1058 	continue;
1059 
1060       /* Any scalar stmts are ok.  */
1061       if (!gimple_vuse (stmt))
1062 	continue;
1063 
1064       /* Otherwise just regular loads/stores.  */
1065       if (!gimple_assign_single_p (stmt))
1066 	return;
1067 
1068       /* But exactly one store and/or load.  */
1069       for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1070 	{
1071 	  if (DR_IS_READ (dr))
1072 	    {
1073 	      if (single_load != NULL)
1074 		return;
1075 	      single_load = dr;
1076 	    }
1077 	  else
1078 	    {
1079 	      if (single_store != NULL)
1080 		return;
1081 	      single_store = dr;
1082 	    }
1083 	}
1084     }
1085 
1086   if (!single_store)
1087     return;
1088 
1089   nb_iter = number_of_latch_executions (loop);
1090   if (!nb_iter || nb_iter == chrec_dont_know)
1091     return;
1092   if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1093 		      gimple_bb (DR_STMT (single_store))))
1094     plus_one = true;
1095 
1096   if (single_store && !single_load)
1097     {
1098       gimple *stmt = DR_STMT (single_store);
1099       tree rhs = gimple_assign_rhs1 (stmt);
1100       if (const_with_all_bytes_same (rhs) == -1
1101 	  && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1102 	      || (TYPE_MODE (TREE_TYPE (rhs))
1103 		  != TYPE_MODE (unsigned_char_type_node))))
1104 	return;
1105       if (TREE_CODE (rhs) == SSA_NAME
1106 	  && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1107 	  && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1108 	return;
1109       if (!adjacent_dr_p (single_store)
1110 	  || !dominated_by_p (CDI_DOMINATORS,
1111 			      loop->latch, gimple_bb (stmt)))
1112 	return;
1113       partition->kind = PKIND_MEMSET;
1114       partition->main_dr = single_store;
1115       partition->niter = nb_iter;
1116       partition->plus_one = plus_one;
1117     }
1118   else if (single_store && single_load)
1119     {
1120       gimple *store = DR_STMT (single_store);
1121       gimple *load = DR_STMT (single_load);
1122       /* Direct aggregate copy or via an SSA name temporary.  */
1123       if (load != store
1124 	  && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1125 	return;
1126       if (!adjacent_dr_p (single_store)
1127 	  || !adjacent_dr_p (single_load)
1128 	  || !operand_equal_p (DR_STEP (single_store),
1129 			       DR_STEP (single_load), 0)
1130 	  || !dominated_by_p (CDI_DOMINATORS,
1131 			      loop->latch, gimple_bb (store)))
1132 	return;
1133       /* Now check that if there is a dependence this dependence is
1134          of a suitable form for memmove.  */
1135       vec<loop_p> loops = vNULL;
1136       ddr_p ddr;
1137       loops.safe_push (loop);
1138       ddr = initialize_data_dependence_relation (single_load, single_store,
1139 						 loops);
1140       compute_affine_dependence (ddr, loop);
1141       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1142 	{
1143 	  free_dependence_relation (ddr);
1144 	  loops.release ();
1145 	  return;
1146 	}
1147       if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1148 	{
1149 	  if (DDR_NUM_DIST_VECTS (ddr) == 0)
1150 	    {
1151 	      free_dependence_relation (ddr);
1152 	      loops.release ();
1153 	      return;
1154 	    }
1155 	  lambda_vector dist_v;
1156 	  FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1157 	    {
1158 	      int dist = dist_v[index_in_loop_nest (loop->num,
1159 						    DDR_LOOP_NEST (ddr))];
1160 	      if (dist > 0 && !DDR_REVERSED_P (ddr))
1161 		{
1162 		  free_dependence_relation (ddr);
1163 		  loops.release ();
1164 		  return;
1165 		}
1166 	    }
1167 	}
1168       free_dependence_relation (ddr);
1169       loops.release ();
1170       partition->kind = PKIND_MEMCPY;
1171       partition->main_dr = single_store;
1172       partition->secondary_dr = single_load;
1173       partition->niter = nb_iter;
1174       partition->plus_one = plus_one;
1175     }
1176 }
1177 
1178 /* For a data reference REF, return the declaration of its base
1179    address or NULL_TREE if the base is not determined.  */
1180 
1181 static tree
1182 ref_base_address (data_reference_p dr)
1183 {
1184   tree base_address = DR_BASE_ADDRESS (dr);
1185   if (base_address
1186       && TREE_CODE (base_address) == ADDR_EXPR)
1187     return TREE_OPERAND (base_address, 0);
1188 
1189   return base_address;
1190 }
1191 
1192 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1193    accesses in RDG.  */
1194 
1195 static bool
1196 similar_memory_accesses (struct graph *rdg, partition *partition1,
1197 			 partition *partition2)
1198 {
1199   unsigned i, j, k, l;
1200   bitmap_iterator bi, bj;
1201   data_reference_p ref1, ref2;
1202 
1203   /* First check whether in the intersection of the two partitions are
1204      any loads or stores.  Common loads are the situation that happens
1205      most often.  */
1206   EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1207     if (RDG_MEM_WRITE_STMT (rdg, i)
1208 	|| RDG_MEM_READS_STMT (rdg, i))
1209       return true;
1210 
1211   /* Then check all data-references against each other.  */
1212   EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1213     if (RDG_MEM_WRITE_STMT (rdg, i)
1214 	|| RDG_MEM_READS_STMT (rdg, i))
1215       EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1216 	if (RDG_MEM_WRITE_STMT (rdg, j)
1217 	    || RDG_MEM_READS_STMT (rdg, j))
1218 	  {
1219 	    FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1220 	      {
1221 		tree base1 = ref_base_address (ref1);
1222 		if (base1)
1223 		  FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1224 		    if (base1 == ref_base_address (ref2))
1225 		      return true;
1226 	      }
1227 	  }
1228 
1229   return false;
1230 }
1231 
1232 /* Aggregate several components into a useful partition that is
1233    registered in the PARTITIONS vector.  Partitions will be
1234    distributed in different loops.  */
1235 
1236 static void
1237 rdg_build_partitions (struct graph *rdg,
1238 		      vec<gimple *> starting_stmts,
1239 		      vec<partition *> *partitions)
1240 {
1241   bitmap processed = BITMAP_ALLOC (NULL);
1242   int i;
1243   gimple *stmt;
1244 
1245   FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1246     {
1247       int v = rdg_vertex_for_stmt (rdg, stmt);
1248 
1249       if (dump_file && (dump_flags & TDF_DETAILS))
1250 	fprintf (dump_file,
1251 		 "ldist asked to generate code for vertex %d\n", v);
1252 
1253       /* If the vertex is already contained in another partition so
1254          is the partition rooted at it.  */
1255       if (bitmap_bit_p (processed, v))
1256 	continue;
1257 
1258       partition *partition = build_rdg_partition_for_vertex (rdg, v);
1259       bitmap_ior_into (processed, partition->stmts);
1260 
1261       if (dump_file && (dump_flags & TDF_DETAILS))
1262 	{
1263 	  fprintf (dump_file, "ldist useful partition:\n");
1264 	  dump_bitmap (dump_file, partition->stmts);
1265 	}
1266 
1267       partitions->safe_push (partition);
1268     }
1269 
1270   /* All vertices should have been assigned to at least one partition now,
1271      other than vertices belonging to dead code.  */
1272 
1273   BITMAP_FREE (processed);
1274 }
1275 
1276 /* Dump to FILE the PARTITIONS.  */
1277 
1278 static void
1279 dump_rdg_partitions (FILE *file, vec<partition *> partitions)
1280 {
1281   int i;
1282   partition *partition;
1283 
1284   FOR_EACH_VEC_ELT (partitions, i, partition)
1285     debug_bitmap_file (file, partition->stmts);
1286 }
1287 
1288 /* Debug PARTITIONS.  */
1289 extern void debug_rdg_partitions (vec<partition *> );
1290 
1291 DEBUG_FUNCTION void
1292 debug_rdg_partitions (vec<partition *> partitions)
1293 {
1294   dump_rdg_partitions (stderr, partitions);
1295 }
1296 
1297 /* Returns the number of read and write operations in the RDG.  */
1298 
1299 static int
1300 number_of_rw_in_rdg (struct graph *rdg)
1301 {
1302   int i, res = 0;
1303 
1304   for (i = 0; i < rdg->n_vertices; i++)
1305     {
1306       if (RDG_MEM_WRITE_STMT (rdg, i))
1307 	++res;
1308 
1309       if (RDG_MEM_READS_STMT (rdg, i))
1310 	++res;
1311     }
1312 
1313   return res;
1314 }
1315 
1316 /* Returns the number of read and write operations in a PARTITION of
1317    the RDG.  */
1318 
1319 static int
1320 number_of_rw_in_partition (struct graph *rdg, partition *partition)
1321 {
1322   int res = 0;
1323   unsigned i;
1324   bitmap_iterator ii;
1325 
1326   EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1327     {
1328       if (RDG_MEM_WRITE_STMT (rdg, i))
1329 	++res;
1330 
1331       if (RDG_MEM_READS_STMT (rdg, i))
1332 	++res;
1333     }
1334 
1335   return res;
1336 }
1337 
1338 /* Returns true when one of the PARTITIONS contains all the read or
1339    write operations of RDG.  */
1340 
1341 static bool
1342 partition_contains_all_rw (struct graph *rdg,
1343 			   vec<partition *> partitions)
1344 {
1345   int i;
1346   partition *partition;
1347   int nrw = number_of_rw_in_rdg (rdg);
1348 
1349   FOR_EACH_VEC_ELT (partitions, i, partition)
1350     if (nrw == number_of_rw_in_partition (rdg, partition))
1351       return true;
1352 
1353   return false;
1354 }
1355 
1356 /* Compute partition dependence created by the data references in DRS1
1357    and DRS2 and modify and return DIR according to that.  */
1358 
1359 static int
1360 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1361 			 vec<data_reference_p> drs1,
1362 			 vec<data_reference_p> drs2)
1363 {
1364   data_reference_p dr1, dr2;
1365 
1366   /* dependence direction - 0 is no dependence, -1 is back,
1367      1 is forth, 2 is both (we can stop then, merging will occur).  */
1368   for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1369     for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1370       {
1371 	data_reference_p saved_dr1 = dr1;
1372 	int this_dir = 1;
1373 	ddr_p ddr;
1374 	/* Re-shuffle data-refs to be in dominator order.  */
1375 	if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1376 	    > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1377 	  {
1378 	    std::swap (dr1, dr2);
1379 	    this_dir = -this_dir;
1380 	  }
1381 	ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1382 	compute_affine_dependence (ddr, loops[0]);
1383 	if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1384 	  this_dir = 2;
1385 	else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1386 	  {
1387 	    if (DDR_REVERSED_P (ddr))
1388 	      {
1389 		std::swap (dr1, dr2);
1390 		this_dir = -this_dir;
1391 	      }
1392 	    /* Known dependences can still be unordered througout the
1393 	       iteration space, see gcc.dg/tree-ssa/ldist-16.c.  */
1394 	    if (DDR_NUM_DIST_VECTS (ddr) != 1)
1395 	      this_dir = 2;
1396 	    /* If the overlap is exact preserve stmt order.  */
1397 	    else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1398 	      ;
1399 	    else
1400 	      {
1401 		/* Else as the distance vector is lexicographic positive
1402 		   swap the dependence direction.  */
1403 		this_dir = -this_dir;
1404 	      }
1405 	  }
1406 	else
1407 	  this_dir = 0;
1408 	free_dependence_relation (ddr);
1409 	if (dir == 0)
1410 	  dir = this_dir;
1411 	else if (dir != this_dir)
1412 	  return 2;
1413 	/* Shuffle "back" dr1.  */
1414 	dr1 = saved_dr1;
1415       }
1416   return dir;
1417 }
1418 
1419 /* Compare postorder number of the partition graph vertices V1 and V2.  */
1420 
1421 static int
1422 pgcmp (const void *v1_, const void *v2_)
1423 {
1424   const vertex *v1 = (const vertex *)v1_;
1425   const vertex *v2 = (const vertex *)v2_;
1426   return v2->post - v1->post;
1427 }
1428 
1429 /* Distributes the code from LOOP in such a way that producer
1430    statements are placed before consumer statements.  Tries to separate
1431    only the statements from STMTS into separate loops.
1432    Returns the number of distributed loops.  */
1433 
1434 static int
1435 distribute_loop (struct loop *loop, vec<gimple *> stmts,
1436 		 control_dependences *cd, int *nb_calls)
1437 {
1438   struct graph *rdg;
1439   partition *partition;
1440   bool any_builtin;
1441   int i, nbp;
1442   graph *pg = NULL;
1443   int num_sccs = 1;
1444 
1445   *nb_calls = 0;
1446   auto_vec<loop_p, 3> loop_nest;
1447   if (!find_loop_nest (loop, &loop_nest))
1448     return 0;
1449 
1450   rdg = build_rdg (loop_nest, cd);
1451   if (!rdg)
1452     {
1453       if (dump_file && (dump_flags & TDF_DETAILS))
1454 	fprintf (dump_file,
1455 		 "Loop %d not distributed: failed to build the RDG.\n",
1456 		 loop->num);
1457 
1458       return 0;
1459     }
1460 
1461   if (dump_file && (dump_flags & TDF_DETAILS))
1462     dump_rdg (dump_file, rdg);
1463 
1464   auto_vec<struct partition *, 3> partitions;
1465   rdg_build_partitions (rdg, stmts, &partitions);
1466 
1467   any_builtin = false;
1468   FOR_EACH_VEC_ELT (partitions, i, partition)
1469     {
1470       classify_partition (loop, rdg, partition);
1471       any_builtin |= partition_builtin_p (partition);
1472     }
1473 
1474   /* If we are only distributing patterns but did not detect any,
1475      simply bail out.  */
1476   if (!flag_tree_loop_distribution
1477       && !any_builtin)
1478     {
1479       nbp = 0;
1480       goto ldist_done;
1481     }
1482 
1483   /* If we are only distributing patterns fuse all partitions that
1484      were not classified as builtins.  This also avoids chopping
1485      a loop into pieces, separated by builtin calls.  That is, we
1486      only want no or a single loop body remaining.  */
1487   struct partition *into;
1488   if (!flag_tree_loop_distribution)
1489     {
1490       for (i = 0; partitions.iterate (i, &into); ++i)
1491 	if (!partition_builtin_p (into))
1492 	  break;
1493       for (++i; partitions.iterate (i, &partition); ++i)
1494 	if (!partition_builtin_p (partition))
1495 	  {
1496 	    if (dump_file && (dump_flags & TDF_DETAILS))
1497 	      {
1498 		fprintf (dump_file, "fusing non-builtin partitions\n");
1499 		dump_bitmap (dump_file, into->stmts);
1500 		dump_bitmap (dump_file, partition->stmts);
1501 	      }
1502 	    partition_merge_into (into, partition);
1503 	    partitions.unordered_remove (i);
1504 	    partition_free (partition);
1505 	    i--;
1506 	  }
1507     }
1508 
1509   /* Due to limitations in the transform phase we have to fuse all
1510      reduction partitions into the last partition so the existing
1511      loop will contain all loop-closed PHI nodes.  */
1512   for (i = 0; partitions.iterate (i, &into); ++i)
1513     if (partition_reduction_p (into))
1514       break;
1515   for (i = i + 1; partitions.iterate (i, &partition); ++i)
1516     if (partition_reduction_p (partition))
1517       {
1518 	if (dump_file && (dump_flags & TDF_DETAILS))
1519 	  {
1520 	    fprintf (dump_file, "fusing partitions\n");
1521 	    dump_bitmap (dump_file, into->stmts);
1522 	    dump_bitmap (dump_file, partition->stmts);
1523 	    fprintf (dump_file, "because they have reductions\n");
1524 	  }
1525 	partition_merge_into (into, partition);
1526 	partitions.unordered_remove (i);
1527 	partition_free (partition);
1528 	i--;
1529       }
1530 
1531   /* Apply our simple cost model - fuse partitions with similar
1532      memory accesses.  */
1533   for (i = 0; partitions.iterate (i, &into); ++i)
1534     {
1535       if (partition_builtin_p (into))
1536 	continue;
1537       for (int j = i + 1;
1538 	   partitions.iterate (j, &partition); ++j)
1539 	{
1540 	  if (!partition_builtin_p (partition)
1541 	      && similar_memory_accesses (rdg, into, partition))
1542 	    {
1543 	      if (dump_file && (dump_flags & TDF_DETAILS))
1544 		{
1545 		  fprintf (dump_file, "fusing partitions\n");
1546 		  dump_bitmap (dump_file, into->stmts);
1547 		  dump_bitmap (dump_file, partition->stmts);
1548 		  fprintf (dump_file, "because they have similar "
1549 			   "memory accesses\n");
1550 		}
1551 	      partition_merge_into (into, partition);
1552 	      partitions.unordered_remove (j);
1553 	      partition_free (partition);
1554 	      j--;
1555 	    }
1556 	}
1557     }
1558 
1559   /* Build the partition dependency graph.  */
1560   if (partitions.length () > 1)
1561     {
1562       pg = new_graph (partitions.length ());
1563       struct pgdata {
1564 	  struct partition *partition;
1565 	  vec<data_reference_p> writes;
1566 	  vec<data_reference_p> reads;
1567       };
1568 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1569       for (i = 0; partitions.iterate (i, &partition); ++i)
1570 	{
1571 	  vertex *v = &pg->vertices[i];
1572 	  pgdata *data = new pgdata;
1573 	  data_reference_p dr;
1574 	  /* FIXME - leaks.  */
1575 	  v->data = data;
1576 	  bitmap_iterator bi;
1577 	  unsigned j;
1578 	  data->partition = partition;
1579 	  data->reads = vNULL;
1580 	  data->writes = vNULL;
1581 	  EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1582 	    for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1583 	      if (DR_IS_READ (dr))
1584 		data->reads.safe_push (dr);
1585 	      else
1586 		data->writes.safe_push (dr);
1587 	}
1588       struct partition *partition1, *partition2;
1589       for (i = 0; partitions.iterate (i, &partition1); ++i)
1590 	for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1591 	  {
1592 	    /* dependence direction - 0 is no dependence, -1 is back,
1593 	       1 is forth, 2 is both (we can stop then, merging will occur).  */
1594 	    int dir = 0;
1595 	    dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1596 					   PGDATA(i)->writes,
1597 					   PGDATA(j)->reads);
1598 	    if (dir != 2)
1599 	      dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1600 					     PGDATA(i)->reads,
1601 					     PGDATA(j)->writes);
1602 	    if (dir != 2)
1603 	      dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1604 					     PGDATA(i)->writes,
1605 					     PGDATA(j)->writes);
1606 	    if (dir == 1 || dir == 2)
1607 	      add_edge (pg, i, j);
1608 	    if (dir == -1 || dir == 2)
1609 	      add_edge (pg, j, i);
1610 	  }
1611 
1612       /* Add edges to the reduction partition (if any) to force it last.  */
1613       unsigned j;
1614       for (j = 0; partitions.iterate (j, &partition); ++j)
1615 	if (partition_reduction_p (partition))
1616 	  break;
1617       if (j < partitions.length ())
1618 	{
1619 	  for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1620 	    if (i != j)
1621 	      add_edge (pg, i, j);
1622 	}
1623 
1624       /* Compute partitions we cannot separate and fuse them.  */
1625       num_sccs = graphds_scc (pg, NULL);
1626       for (i = 0; i < num_sccs; ++i)
1627 	{
1628 	  struct partition *first;
1629 	  int j;
1630 	  for (j = 0; partitions.iterate (j, &first); ++j)
1631 	    if (pg->vertices[j].component == i)
1632 	      break;
1633 	  for (j = j + 1; partitions.iterate (j, &partition); ++j)
1634 	    if (pg->vertices[j].component == i)
1635 	      {
1636 		if (dump_file && (dump_flags & TDF_DETAILS))
1637 		  {
1638 		    fprintf (dump_file, "fusing partitions\n");
1639 		    dump_bitmap (dump_file, first->stmts);
1640 		    dump_bitmap (dump_file, partition->stmts);
1641 		    fprintf (dump_file, "because they are in the same "
1642 			     "dependence SCC\n");
1643 		  }
1644 		partition_merge_into (first, partition);
1645 		partitions[j] = NULL;
1646 		partition_free (partition);
1647 		PGDATA (j)->partition = NULL;
1648 	      }
1649 	}
1650 
1651       /* Now order the remaining nodes in postorder.  */
1652       qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1653       partitions.truncate (0);
1654       for (i = 0; i < pg->n_vertices; ++i)
1655 	{
1656 	  pgdata *data = PGDATA (i);
1657 	  if (data->partition)
1658 	    partitions.safe_push (data->partition);
1659 	  data->reads.release ();
1660 	  data->writes.release ();
1661 	  delete data;
1662 	}
1663       gcc_assert (partitions.length () == (unsigned)num_sccs);
1664       free_graph (pg);
1665     }
1666 
1667   nbp = partitions.length ();
1668   if (nbp == 0
1669       || (nbp == 1 && !partition_builtin_p (partitions[0]))
1670       || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1671     {
1672       nbp = 0;
1673       goto ldist_done;
1674     }
1675 
1676   if (dump_file && (dump_flags & TDF_DETAILS))
1677     dump_rdg_partitions (dump_file, partitions);
1678 
1679   FOR_EACH_VEC_ELT (partitions, i, partition)
1680     {
1681       if (partition_builtin_p (partition))
1682 	(*nb_calls)++;
1683       generate_code_for_partition (loop, partition, i < nbp - 1);
1684     }
1685 
1686  ldist_done:
1687 
1688   FOR_EACH_VEC_ELT (partitions, i, partition)
1689     partition_free (partition);
1690 
1691   free_rdg (rdg);
1692   return nbp - *nb_calls;
1693 }
1694 
1695 /* Distribute all loops in the current function.  */
1696 
1697 namespace {
1698 
1699 const pass_data pass_data_loop_distribution =
1700 {
1701   GIMPLE_PASS, /* type */
1702   "ldist", /* name */
1703   OPTGROUP_LOOP, /* optinfo_flags */
1704   TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1705   ( PROP_cfg | PROP_ssa ), /* properties_required */
1706   0, /* properties_provided */
1707   0, /* properties_destroyed */
1708   0, /* todo_flags_start */
1709   0, /* todo_flags_finish */
1710 };
1711 
1712 class pass_loop_distribution : public gimple_opt_pass
1713 {
1714 public:
1715   pass_loop_distribution (gcc::context *ctxt)
1716     : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1717   {}
1718 
1719   /* opt_pass methods: */
1720   virtual bool gate (function *)
1721     {
1722       return flag_tree_loop_distribution
1723 	|| flag_tree_loop_distribute_patterns;
1724     }
1725 
1726   virtual unsigned int execute (function *);
1727 
1728 }; // class pass_loop_distribution
1729 
1730 unsigned int
1731 pass_loop_distribution::execute (function *fun)
1732 {
1733   struct loop *loop;
1734   bool changed = false;
1735   basic_block bb;
1736   control_dependences *cd = NULL;
1737 
1738   FOR_ALL_BB_FN (bb, fun)
1739     {
1740       gimple_stmt_iterator gsi;
1741       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1742 	gimple_set_uid (gsi_stmt (gsi), -1);
1743       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1744 	gimple_set_uid (gsi_stmt (gsi), -1);
1745     }
1746 
1747   /* We can at the moment only distribute non-nested loops, thus restrict
1748      walking to innermost loops.  */
1749   FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
1750     {
1751       auto_vec<gimple *> work_list;
1752       basic_block *bbs;
1753       int num = loop->num;
1754       unsigned int i;
1755 
1756       /* If the loop doesn't have a single exit we will fail anyway,
1757 	 so do that early.  */
1758       if (!single_exit (loop))
1759 	continue;
1760 
1761       /* Only optimize hot loops.  */
1762       if (!optimize_loop_for_speed_p (loop))
1763 	continue;
1764 
1765       /* Initialize the worklist with stmts we seed the partitions with.  */
1766       bbs = get_loop_body_in_dom_order (loop);
1767       for (i = 0; i < loop->num_nodes; ++i)
1768 	{
1769 	  for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
1770 	       !gsi_end_p (gsi);
1771 	       gsi_next (&gsi))
1772 	    {
1773 	      gphi *phi = gsi.phi ();
1774 	      if (virtual_operand_p (gimple_phi_result (phi)))
1775 		continue;
1776 	      /* Distribute stmts which have defs that are used outside of
1777 		 the loop.  */
1778 	      if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1779 		continue;
1780 	      work_list.safe_push (phi);
1781 	    }
1782 	  for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1783 	       !gsi_end_p (gsi);
1784 	       gsi_next (&gsi))
1785 	    {
1786 	      gimple *stmt = gsi_stmt (gsi);
1787 
1788 	      /* If there is a stmt with side-effects bail out - we
1789 		 cannot and should not distribute this loop.  */
1790 	      if (gimple_has_side_effects (stmt))
1791 		{
1792 		  work_list.truncate (0);
1793 		  goto out;
1794 		}
1795 
1796 	      /* Distribute stmts which have defs that are used outside of
1797 		 the loop.  */
1798 	      if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1799 		;
1800 	      /* Otherwise only distribute stores for now.  */
1801 	      else if (!gimple_vdef (stmt))
1802 		continue;
1803 
1804 	      work_list.safe_push (stmt);
1805 	    }
1806 	}
1807 out:
1808       free (bbs);
1809 
1810       int nb_generated_loops = 0;
1811       int nb_generated_calls = 0;
1812       location_t loc = find_loop_location (loop);
1813       if (work_list.length () > 0)
1814 	{
1815 	  if (!cd)
1816 	    {
1817 	      calculate_dominance_info (CDI_DOMINATORS);
1818 	      calculate_dominance_info (CDI_POST_DOMINATORS);
1819 	      cd = new control_dependences (create_edge_list ());
1820 	      free_dominance_info (CDI_POST_DOMINATORS);
1821 	    }
1822 	  nb_generated_loops = distribute_loop (loop, work_list, cd,
1823 						&nb_generated_calls);
1824 	}
1825 
1826       if (nb_generated_loops + nb_generated_calls > 0)
1827 	{
1828 	  changed = true;
1829 	  dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1830 			   loc, "Loop %d distributed: split to %d loops "
1831 			   "and %d library calls.\n",
1832 			   num, nb_generated_loops, nb_generated_calls);
1833 	}
1834       else if (dump_file && (dump_flags & TDF_DETAILS))
1835 	fprintf (dump_file, "Loop %d is the same.\n", num);
1836     }
1837 
1838   if (cd)
1839     delete cd;
1840 
1841   if (changed)
1842     {
1843       /* Cached scalar evolutions now may refer to wrong or non-existing
1844 	 loops.  */
1845       scev_reset_htab ();
1846       mark_virtual_operands_for_renaming (fun);
1847       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1848     }
1849 
1850   checking_verify_loop_structure ();
1851 
1852   return 0;
1853 }
1854 
1855 } // anon namespace
1856 
1857 gimple_opt_pass *
1858 make_pass_loop_distribution (gcc::context *ctxt)
1859 {
1860   return new pass_loop_distribution (ctxt);
1861 }
1862