xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-loop-distribution.c (revision cef8759bd76c1b621f8eab8faa6f208faabc2e15)
1 /* Loop distribution.
2    Copyright (C) 2006-2017 Free Software Foundation, Inc.
3    Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4    and Sebastian Pop <sebastian.pop@amd.com>.
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 /* This pass performs loop distribution: for example, the loop
23 
24    |DO I = 2, N
25    |    A(I) = B(I) + C
26    |    D(I) = A(I-1)*E
27    |ENDDO
28 
29    is transformed to
30 
31    |DOALL I = 2, N
32    |   A(I) = B(I) + C
33    |ENDDO
34    |
35    |DOALL I = 2, N
36    |   D(I) = A(I-1)*E
37    |ENDDO
38 
39    This pass uses an RDG, Reduced Dependence Graph built on top of the
40    data dependence relations.  The RDG is then topologically sorted to
41    obtain a map of information producers/consumers based on which it
42    generates the new loops.  */
43 
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "backend.h"
48 #include "tree.h"
49 #include "gimple.h"
50 #include "cfghooks.h"
51 #include "tree-pass.h"
52 #include "ssa.h"
53 #include "gimple-pretty-print.h"
54 #include "fold-const.h"
55 #include "cfganal.h"
56 #include "gimple-iterator.h"
57 #include "gimplify-me.h"
58 #include "stor-layout.h"
59 #include "tree-cfg.h"
60 #include "tree-ssa-loop-manip.h"
61 #include "tree-ssa-loop.h"
62 #include "tree-into-ssa.h"
63 #include "tree-ssa.h"
64 #include "cfgloop.h"
65 #include "tree-scalar-evolution.h"
66 #include "tree-vectorizer.h"
67 #include "tree-eh.h"
68 
69 
70 /* A Reduced Dependence Graph (RDG) vertex representing a statement.  */
71 struct rdg_vertex
72 {
73   /* The statement represented by this vertex.  */
74   gimple *stmt;
75 
76   /* Vector of data-references in this statement.  */
77   vec<data_reference_p> datarefs;
78 
79   /* True when the statement contains a write to memory.  */
80   bool has_mem_write;
81 
82   /* True when the statement contains a read from memory.  */
83   bool has_mem_reads;
84 };
85 
86 #define RDGV_STMT(V)     ((struct rdg_vertex *) ((V)->data))->stmt
87 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
88 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
89 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
90 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
91 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
92 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
93 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
94 
95 /* Data dependence type.  */
96 
97 enum rdg_dep_type
98 {
99   /* Read After Write (RAW).  */
100   flow_dd = 'f',
101 
102   /* Control dependence (execute conditional on).  */
103   control_dd = 'c'
104 };
105 
106 /* Dependence information attached to an edge of the RDG.  */
107 
108 struct rdg_edge
109 {
110   /* Type of the dependence.  */
111   enum rdg_dep_type type;
112 };
113 
114 #define RDGE_TYPE(E)        ((struct rdg_edge *) ((E)->data))->type
115 
116 /* Dump vertex I in RDG to FILE.  */
117 
118 static void
119 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
120 {
121   struct vertex *v = &(rdg->vertices[i]);
122   struct graph_edge *e;
123 
124   fprintf (file, "(vertex %d: (%s%s) (in:", i,
125 	   RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
126 	   RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
127 
128   if (v->pred)
129     for (e = v->pred; e; e = e->pred_next)
130       fprintf (file, " %d", e->src);
131 
132   fprintf (file, ") (out:");
133 
134   if (v->succ)
135     for (e = v->succ; e; e = e->succ_next)
136       fprintf (file, " %d", e->dest);
137 
138   fprintf (file, ")\n");
139   print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
140   fprintf (file, ")\n");
141 }
142 
143 /* Call dump_rdg_vertex on stderr.  */
144 
145 DEBUG_FUNCTION void
146 debug_rdg_vertex (struct graph *rdg, int i)
147 {
148   dump_rdg_vertex (stderr, rdg, i);
149 }
150 
151 /* Dump the reduced dependence graph RDG to FILE.  */
152 
153 static void
154 dump_rdg (FILE *file, struct graph *rdg)
155 {
156   fprintf (file, "(rdg\n");
157   for (int i = 0; i < rdg->n_vertices; i++)
158     dump_rdg_vertex (file, rdg, i);
159   fprintf (file, ")\n");
160 }
161 
162 /* Call dump_rdg on stderr.  */
163 
164 DEBUG_FUNCTION void
165 debug_rdg (struct graph *rdg)
166 {
167   dump_rdg (stderr, rdg);
168 }
169 
170 static void
171 dot_rdg_1 (FILE *file, struct graph *rdg)
172 {
173   int i;
174   pretty_printer buffer;
175   pp_needs_newline (&buffer) = false;
176   buffer.buffer->stream = file;
177 
178   fprintf (file, "digraph RDG {\n");
179 
180   for (i = 0; i < rdg->n_vertices; i++)
181     {
182       struct vertex *v = &(rdg->vertices[i]);
183       struct graph_edge *e;
184 
185       fprintf (file, "%d [label=\"[%d] ", i, i);
186       pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
187       pp_flush (&buffer);
188       fprintf (file, "\"]\n");
189 
190       /* Highlight reads from memory.  */
191       if (RDG_MEM_READS_STMT (rdg, i))
192        fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
193 
194       /* Highlight stores to memory.  */
195       if (RDG_MEM_WRITE_STMT (rdg, i))
196        fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
197 
198       if (v->succ)
199        for (e = v->succ; e; e = e->succ_next)
200          switch (RDGE_TYPE (e))
201            {
202            case flow_dd:
203              /* These are the most common dependences: don't print these. */
204              fprintf (file, "%d -> %d \n", i, e->dest);
205              break;
206 
207 	   case control_dd:
208              fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
209              break;
210 
211            default:
212              gcc_unreachable ();
213            }
214     }
215 
216   fprintf (file, "}\n\n");
217 }
218 
219 /* Display the Reduced Dependence Graph using dotty.  */
220 
221 DEBUG_FUNCTION void
222 dot_rdg (struct graph *rdg)
223 {
224   /* When debugging, you may want to enable the following code.  */
225 #ifdef HAVE_POPEN
226   FILE *file = popen ("dot -Tx11", "w");
227   if (!file)
228     return;
229   dot_rdg_1 (file, rdg);
230   fflush (file);
231   close (fileno (file));
232   pclose (file);
233 #else
234   dot_rdg_1 (stderr, rdg);
235 #endif
236 }
237 
238 /* Returns the index of STMT in RDG.  */
239 
240 static int
241 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple *stmt)
242 {
243   int index = gimple_uid (stmt);
244   gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
245   return index;
246 }
247 
248 /* Creates dependence edges in RDG for all the uses of DEF.  IDEF is
249    the index of DEF in RDG.  */
250 
251 static void
252 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
253 {
254   use_operand_p imm_use_p;
255   imm_use_iterator iterator;
256 
257   FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
258     {
259       struct graph_edge *e;
260       int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
261 
262       if (use < 0)
263 	continue;
264 
265       e = add_edge (rdg, idef, use);
266       e->data = XNEW (struct rdg_edge);
267       RDGE_TYPE (e) = flow_dd;
268     }
269 }
270 
271 /* Creates an edge for the control dependences of BB to the vertex V.  */
272 
273 static void
274 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
275 				    int v, control_dependences *cd)
276 {
277   bitmap_iterator bi;
278   unsigned edge_n;
279   EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
280 			    0, edge_n, bi)
281     {
282       basic_block cond_bb = cd->get_edge_src (edge_n);
283       gimple *stmt = last_stmt (cond_bb);
284       if (stmt && is_ctrl_stmt (stmt))
285 	{
286 	  struct graph_edge *e;
287 	  int c = rdg_vertex_for_stmt (rdg, stmt);
288 	  if (c < 0)
289 	    continue;
290 
291 	  e = add_edge (rdg, c, v);
292 	  e->data = XNEW (struct rdg_edge);
293 	  RDGE_TYPE (e) = control_dd;
294 	}
295     }
296 }
297 
298 /* Creates the edges of the reduced dependence graph RDG.  */
299 
300 static void
301 create_rdg_flow_edges (struct graph *rdg)
302 {
303   int i;
304   def_operand_p def_p;
305   ssa_op_iter iter;
306 
307   for (i = 0; i < rdg->n_vertices; i++)
308     FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
309 			      iter, SSA_OP_DEF)
310       create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
311 }
312 
313 /* Creates the edges of the reduced dependence graph RDG.  */
314 
315 static void
316 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd, loop_p loop)
317 {
318   int i;
319 
320   for (i = 0; i < rdg->n_vertices; i++)
321     {
322       gimple *stmt = RDG_STMT (rdg, i);
323       if (gimple_code (stmt) == GIMPLE_PHI)
324 	{
325 	  edge_iterator ei;
326 	  edge e;
327 	  FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
328 	    if (flow_bb_inside_loop_p (loop, e->src))
329 	      create_edge_for_control_dependence (rdg, e->src, i, cd);
330 	}
331       else
332 	create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
333     }
334 }
335 
336 /* Build the vertices of the reduced dependence graph RDG.  Return false
337    if that failed.  */
338 
339 static bool
340 create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop,
341 		     vec<data_reference_p> *datarefs)
342 {
343   int i;
344   gimple *stmt;
345 
346   FOR_EACH_VEC_ELT (stmts, i, stmt)
347     {
348       struct vertex *v = &(rdg->vertices[i]);
349 
350       /* Record statement to vertex mapping.  */
351       gimple_set_uid (stmt, i);
352 
353       v->data = XNEW (struct rdg_vertex);
354       RDGV_STMT (v) = stmt;
355       RDGV_DATAREFS (v).create (0);
356       RDGV_HAS_MEM_WRITE (v) = false;
357       RDGV_HAS_MEM_READS (v) = false;
358       if (gimple_code (stmt) == GIMPLE_PHI)
359 	continue;
360 
361       unsigned drp = datarefs->length ();
362       if (!find_data_references_in_stmt (loop, stmt, datarefs))
363 	return false;
364       for (unsigned j = drp; j < datarefs->length (); ++j)
365 	{
366 	  data_reference_p dr = (*datarefs)[j];
367 	  if (DR_IS_READ (dr))
368 	    RDGV_HAS_MEM_READS (v) = true;
369 	  else
370 	    RDGV_HAS_MEM_WRITE (v) = true;
371 	  RDGV_DATAREFS (v).safe_push (dr);
372 	}
373     }
374   return true;
375 }
376 
377 /* Initialize STMTS with all the statements of LOOP.  The order in
378    which we discover statements is important as
379    generate_loops_for_partition is using the same traversal for
380    identifying statements in loop copies.  */
381 
382 static void
383 stmts_from_loop (struct loop *loop, vec<gimple *> *stmts)
384 {
385   unsigned int i;
386   basic_block *bbs = get_loop_body_in_dom_order (loop);
387 
388   for (i = 0; i < loop->num_nodes; i++)
389     {
390       basic_block bb = bbs[i];
391 
392       for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
393 	   gsi_next (&bsi))
394 	if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
395 	  stmts->safe_push (bsi.phi ());
396 
397       for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
398 	   gsi_next (&bsi))
399 	{
400 	  gimple *stmt = gsi_stmt (bsi);
401 	  if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
402 	    stmts->safe_push (stmt);
403 	}
404     }
405 
406   free (bbs);
407 }
408 
409 /* Free the reduced dependence graph RDG.  */
410 
411 static void
412 free_rdg (struct graph *rdg)
413 {
414   int i;
415 
416   for (i = 0; i < rdg->n_vertices; i++)
417     {
418       struct vertex *v = &(rdg->vertices[i]);
419       struct graph_edge *e;
420 
421       for (e = v->succ; e; e = e->succ_next)
422 	free (e->data);
423 
424       if (v->data)
425 	{
426 	  gimple_set_uid (RDGV_STMT (v), -1);
427 	  free_data_refs (RDGV_DATAREFS (v));
428 	  free (v->data);
429 	}
430     }
431 
432   free_graph (rdg);
433 }
434 
435 /* Build the Reduced Dependence Graph (RDG) with one vertex per
436    statement of the loop nest LOOP_NEST, and one edge per data dependence or
437    scalar dependence.  */
438 
439 static struct graph *
440 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
441 {
442   struct graph *rdg;
443   vec<data_reference_p> datarefs;
444 
445   /* Create the RDG vertices from the stmts of the loop nest.  */
446   auto_vec<gimple *, 10> stmts;
447   stmts_from_loop (loop_nest[0], &stmts);
448   rdg = new_graph (stmts.length ());
449   datarefs.create (10);
450   if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
451     {
452       datarefs.release ();
453       free_rdg (rdg);
454       return NULL;
455     }
456   stmts.release ();
457 
458   create_rdg_flow_edges (rdg);
459   if (cd)
460     create_rdg_cd_edges (rdg, cd, loop_nest[0]);
461 
462   datarefs.release ();
463 
464   return rdg;
465 }
466 
467 
468 
469 enum partition_kind {
470     PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY, PKIND_MEMMOVE
471 };
472 
473 struct partition
474 {
475   bitmap stmts;
476   bitmap loops;
477   bool reduction_p;
478   bool plus_one;
479   enum partition_kind kind;
480   /* data-references a kind != PKIND_NORMAL partition is about.  */
481   data_reference_p main_dr;
482   data_reference_p secondary_dr;
483   tree niter;
484 };
485 
486 
487 /* Allocate and initialize a partition from BITMAP.  */
488 
489 static partition *
490 partition_alloc (bitmap stmts, bitmap loops)
491 {
492   partition *partition = XCNEW (struct partition);
493   partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
494   partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
495   partition->reduction_p = false;
496   partition->kind = PKIND_NORMAL;
497   return partition;
498 }
499 
500 /* Free PARTITION.  */
501 
502 static void
503 partition_free (partition *partition)
504 {
505   BITMAP_FREE (partition->stmts);
506   BITMAP_FREE (partition->loops);
507   free (partition);
508 }
509 
510 /* Returns true if the partition can be generated as a builtin.  */
511 
512 static bool
513 partition_builtin_p (partition *partition)
514 {
515   return partition->kind != PKIND_NORMAL;
516 }
517 
518 /* Returns true if the partition contains a reduction.  */
519 
520 static bool
521 partition_reduction_p (partition *partition)
522 {
523   return partition->reduction_p;
524 }
525 
526 /* Merge PARTITION into the partition DEST.  */
527 
528 static void
529 partition_merge_into (partition *dest, partition *partition)
530 {
531   dest->kind = PKIND_NORMAL;
532   bitmap_ior_into (dest->stmts, partition->stmts);
533   if (partition_reduction_p (partition))
534     dest->reduction_p = true;
535 }
536 
537 
538 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
539    the LOOP.  */
540 
541 static bool
542 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
543 {
544   imm_use_iterator imm_iter;
545   use_operand_p use_p;
546 
547   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
548     {
549       gimple *use_stmt = USE_STMT (use_p);
550       if (!is_gimple_debug (use_stmt)
551 	  && loop != loop_containing_stmt (use_stmt))
552 	return true;
553     }
554 
555   return false;
556 }
557 
558 /* Returns true when STMT defines a scalar variable used after the
559    loop LOOP.  */
560 
561 static bool
562 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple *stmt)
563 {
564   def_operand_p def_p;
565   ssa_op_iter op_iter;
566 
567   if (gimple_code (stmt) == GIMPLE_PHI)
568     return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
569 
570   FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
571     if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
572       return true;
573 
574   return false;
575 }
576 
577 /* Return a copy of LOOP placed before LOOP.  */
578 
579 static struct loop *
580 copy_loop_before (struct loop *loop)
581 {
582   struct loop *res;
583   edge preheader = loop_preheader_edge (loop);
584 
585   initialize_original_copy_tables ();
586   res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
587   gcc_assert (res != NULL);
588   free_original_copy_tables ();
589   delete_update_ssa ();
590 
591   return res;
592 }
593 
594 /* Creates an empty basic block after LOOP.  */
595 
596 static void
597 create_bb_after_loop (struct loop *loop)
598 {
599   edge exit = single_exit (loop);
600 
601   if (!exit)
602     return;
603 
604   split_edge (exit);
605 }
606 
607 /* Generate code for PARTITION from the code in LOOP.  The loop is
608    copied when COPY_P is true.  All the statements not flagged in the
609    PARTITION bitmap are removed from the loop or from its copy.  The
610    statements are indexed in sequence inside a basic block, and the
611    basic blocks of a loop are taken in dom order.  */
612 
613 static void
614 generate_loops_for_partition (struct loop *loop, partition *partition,
615 			      bool copy_p)
616 {
617   unsigned i;
618   basic_block *bbs;
619 
620   if (copy_p)
621     {
622       loop = copy_loop_before (loop);
623       gcc_assert (loop != NULL);
624       create_preheader (loop, CP_SIMPLE_PREHEADERS);
625       create_bb_after_loop (loop);
626     }
627 
628   /* Remove stmts not in the PARTITION bitmap.  */
629   bbs = get_loop_body_in_dom_order (loop);
630 
631   if (MAY_HAVE_DEBUG_STMTS)
632     for (i = 0; i < loop->num_nodes; i++)
633       {
634 	basic_block bb = bbs[i];
635 
636 	for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
637 	     gsi_next (&bsi))
638 	  {
639 	    gphi *phi = bsi.phi ();
640 	    if (!virtual_operand_p (gimple_phi_result (phi))
641 		&& !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
642 	      reset_debug_uses (phi);
643 	  }
644 
645 	for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
646 	  {
647 	    gimple *stmt = gsi_stmt (bsi);
648 	    if (gimple_code (stmt) != GIMPLE_LABEL
649 		&& !is_gimple_debug (stmt)
650 		&& !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
651 	      reset_debug_uses (stmt);
652 	  }
653       }
654 
655   for (i = 0; i < loop->num_nodes; i++)
656     {
657       basic_block bb = bbs[i];
658 
659       for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
660 	{
661 	  gphi *phi = bsi.phi ();
662 	  if (!virtual_operand_p (gimple_phi_result (phi))
663 	      && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
664 	    remove_phi_node (&bsi, true);
665 	  else
666 	    gsi_next (&bsi);
667 	}
668 
669       for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
670 	{
671 	  gimple *stmt = gsi_stmt (bsi);
672 	  if (gimple_code (stmt) != GIMPLE_LABEL
673 	      && !is_gimple_debug (stmt)
674 	      && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
675 	    {
676 	      /* Choose an arbitrary path through the empty CFG part
677 		 that this unnecessary control stmt controls.  */
678 	      if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
679 		{
680 		  gimple_cond_make_false (cond_stmt);
681 		  update_stmt (stmt);
682 		}
683 	      else if (gimple_code (stmt) == GIMPLE_SWITCH)
684 		{
685 		  gswitch *switch_stmt = as_a <gswitch *> (stmt);
686 		  gimple_switch_set_index
687 		      (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1)));
688 		  update_stmt (stmt);
689 		}
690 	      else
691 		{
692 		  unlink_stmt_vdef (stmt);
693 		  gsi_remove (&bsi, true);
694 		  release_defs (stmt);
695 		  continue;
696 		}
697 	    }
698 	  gsi_next (&bsi);
699 	}
700     }
701 
702   free (bbs);
703 }
704 
705 /* Build the size argument for a memory operation call.  */
706 
707 static tree
708 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
709 		    bool plus_one)
710 {
711   tree size = fold_convert_loc (loc, sizetype, nb_iter);
712   if (plus_one)
713     size = size_binop (PLUS_EXPR, size, size_one_node);
714   size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
715 			  TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
716   size = fold_convert_loc (loc, size_type_node, size);
717   return size;
718 }
719 
720 /* Build an address argument for a memory operation call.  */
721 
722 static tree
723 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
724 {
725   tree addr_base;
726 
727   addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
728   addr_base = fold_convert_loc (loc, sizetype, addr_base);
729 
730   /* Test for a negative stride, iterating over every element.  */
731   if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
732     {
733       addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
734 				  fold_convert_loc (loc, sizetype, nb_bytes));
735       addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
736 				  TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
737     }
738 
739   return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
740 }
741 
742 /* If VAL memory representation contains the same value in all bytes,
743    return that value, otherwise return -1.
744    E.g. for 0x24242424 return 0x24, for IEEE double
745    747708026454360457216.0 return 0x44, etc.  */
746 
747 static int
748 const_with_all_bytes_same (tree val)
749 {
750   unsigned char buf[64];
751   int i, len;
752 
753   if (integer_zerop (val)
754       || (TREE_CODE (val) == CONSTRUCTOR
755           && !TREE_CLOBBER_P (val)
756           && CONSTRUCTOR_NELTS (val) == 0))
757     return 0;
758 
759   if (real_zerop (val))
760     {
761       /* Only return 0 for +0.0, not for -0.0, which doesn't have
762 	 an all bytes same memory representation.  Don't transform
763 	 -0.0 stores into +0.0 even for !HONOR_SIGNED_ZEROS.  */
764       switch (TREE_CODE (val))
765 	{
766 	case REAL_CST:
767 	  if (!real_isneg (TREE_REAL_CST_PTR (val)))
768 	    return 0;
769 	  break;
770 	case COMPLEX_CST:
771 	  if (!const_with_all_bytes_same (TREE_REALPART (val))
772 	      && !const_with_all_bytes_same (TREE_IMAGPART (val)))
773 	    return 0;
774 	  break;
775 	case VECTOR_CST:
776 	  unsigned int j;
777 	  for (j = 0; j < VECTOR_CST_NELTS (val); ++j)
778 	    if (const_with_all_bytes_same (VECTOR_CST_ELT (val, j)))
779 	      break;
780 	  if (j == VECTOR_CST_NELTS (val))
781 	    return 0;
782 	  break;
783 	default:
784 	  break;
785 	}
786     }
787 
788   if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
789     return -1;
790 
791   len = native_encode_expr (val, buf, sizeof (buf));
792   if (len == 0)
793     return -1;
794   for (i = 1; i < len; i++)
795     if (buf[i] != buf[0])
796       return -1;
797   return buf[0];
798 }
799 
800 /* Generate a call to memset for PARTITION in LOOP.  */
801 
802 static void
803 generate_memset_builtin (struct loop *loop, partition *partition)
804 {
805   gimple_stmt_iterator gsi;
806   gimple *stmt, *fn_call;
807   tree mem, fn, nb_bytes;
808   location_t loc;
809   tree val;
810 
811   stmt = DR_STMT (partition->main_dr);
812   loc = gimple_location (stmt);
813 
814   /* The new statements will be placed before LOOP.  */
815   gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
816 
817   nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
818 				 partition->plus_one);
819   nb_bytes = rewrite_to_non_trapping_overflow (nb_bytes);
820   nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
821 				       false, GSI_CONTINUE_LINKING);
822   mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
823   mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
824 				  false, GSI_CONTINUE_LINKING);
825 
826   /* This exactly matches the pattern recognition in classify_partition.  */
827   val = gimple_assign_rhs1 (stmt);
828   /* Handle constants like 0x15151515 and similarly
829      floating point constants etc. where all bytes are the same.  */
830   int bytev = const_with_all_bytes_same (val);
831   if (bytev != -1)
832     val = build_int_cst (integer_type_node, bytev);
833   else if (TREE_CODE (val) == INTEGER_CST)
834     val = fold_convert (integer_type_node, val);
835   else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
836     {
837       tree tem = make_ssa_name (integer_type_node);
838       gimple *cstmt = gimple_build_assign (tem, NOP_EXPR, val);
839       gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
840       val = tem;
841     }
842 
843   fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
844   fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
845   gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
846 
847   if (dump_file && (dump_flags & TDF_DETAILS))
848     {
849       fprintf (dump_file, "generated memset");
850       if (bytev == 0)
851 	fprintf (dump_file, " zero\n");
852       else
853 	fprintf (dump_file, "\n");
854     }
855 }
856 
857 /* Generate a call to memcpy for PARTITION in LOOP.  */
858 
859 static void
860 generate_memcpy_builtin (struct loop *loop, partition *partition)
861 {
862   gimple_stmt_iterator gsi;
863   gimple *stmt, *fn_call;
864   tree dest, src, fn, nb_bytes;
865   location_t loc;
866   enum built_in_function kind;
867 
868   stmt = DR_STMT (partition->main_dr);
869   loc = gimple_location (stmt);
870 
871   /* The new statements will be placed before LOOP.  */
872   gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
873 
874   nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
875 				 partition->plus_one);
876   nb_bytes = rewrite_to_non_trapping_overflow (nb_bytes);
877   nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
878 				       false, GSI_CONTINUE_LINKING);
879   dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
880   src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
881   if (partition->kind == PKIND_MEMCPY
882       || ! ptr_derefs_may_alias_p (dest, src))
883     kind = BUILT_IN_MEMCPY;
884   else
885     kind = BUILT_IN_MEMMOVE;
886 
887   dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
888 				   false, GSI_CONTINUE_LINKING);
889   src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
890 				  false, GSI_CONTINUE_LINKING);
891   fn = build_fold_addr_expr (builtin_decl_implicit (kind));
892   fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
893   gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
894 
895   if (dump_file && (dump_flags & TDF_DETAILS))
896     {
897       if (kind == BUILT_IN_MEMCPY)
898 	fprintf (dump_file, "generated memcpy\n");
899       else
900 	fprintf (dump_file, "generated memmove\n");
901     }
902 }
903 
904 /* Remove and destroy the loop LOOP.  */
905 
906 static void
907 destroy_loop (struct loop *loop)
908 {
909   unsigned nbbs = loop->num_nodes;
910   edge exit = single_exit (loop);
911   basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
912   basic_block *bbs;
913   unsigned i;
914 
915   bbs = get_loop_body_in_dom_order (loop);
916 
917   redirect_edge_pred (exit, src);
918   exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
919   exit->flags |= EDGE_FALLTHRU;
920   cancel_loop_tree (loop);
921   rescan_loop_exit (exit, false, true);
922 
923   i = nbbs;
924   do
925     {
926       /* We have made sure to not leave any dangling uses of SSA
927          names defined in the loop.  With the exception of virtuals.
928 	 Make sure we replace all uses of virtual defs that will remain
929 	 outside of the loop with the bare symbol as delete_basic_block
930 	 will release them.  */
931       --i;
932       for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
933 	   gsi_next (&gsi))
934 	{
935 	  gphi *phi = gsi.phi ();
936 	  if (virtual_operand_p (gimple_phi_result (phi)))
937 	    mark_virtual_phi_result_for_renaming (phi);
938 	}
939       for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
940 	   gsi_next (&gsi))
941 	{
942 	  gimple *stmt = gsi_stmt (gsi);
943 	  tree vdef = gimple_vdef (stmt);
944 	  if (vdef && TREE_CODE (vdef) == SSA_NAME)
945 	    mark_virtual_operand_for_renaming (vdef);
946 	}
947       delete_basic_block (bbs[i]);
948     }
949   while (i != 0);
950 
951   free (bbs);
952 
953   set_immediate_dominator (CDI_DOMINATORS, dest,
954 			   recompute_dominator (CDI_DOMINATORS, dest));
955 }
956 
957 /* Generates code for PARTITION.  Return whether LOOP needs to be destroyed.  */
958 
959 static bool
960 generate_code_for_partition (struct loop *loop,
961 			     partition *partition, bool copy_p)
962 {
963   switch (partition->kind)
964     {
965     case PKIND_NORMAL:
966       /* Reductions all have to be in the last partition.  */
967       gcc_assert (!partition_reduction_p (partition)
968 		  || !copy_p);
969       generate_loops_for_partition (loop, partition, copy_p);
970       return false;
971 
972     case PKIND_MEMSET:
973       generate_memset_builtin (loop, partition);
974       break;
975 
976     case PKIND_MEMCPY:
977     case PKIND_MEMMOVE:
978       generate_memcpy_builtin (loop, partition);
979       break;
980 
981     default:
982       gcc_unreachable ();
983     }
984 
985   /* Common tail for partitions we turn into a call.  If this was the last
986      partition for which we generate code, we have to destroy the loop.  */
987   if (!copy_p)
988     return true;
989   return false;
990 }
991 
992 
993 /* Returns a partition with all the statements needed for computing
994    the vertex V of the RDG, also including the loop exit conditions.  */
995 
996 static partition *
997 build_rdg_partition_for_vertex (struct graph *rdg, int v)
998 {
999   partition *partition = partition_alloc (NULL, NULL);
1000   auto_vec<int, 3> nodes;
1001   unsigned i;
1002   int x;
1003 
1004   graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
1005 
1006   FOR_EACH_VEC_ELT (nodes, i, x)
1007     {
1008       bitmap_set_bit (partition->stmts, x);
1009       bitmap_set_bit (partition->loops,
1010 		      loop_containing_stmt (RDG_STMT (rdg, x))->num);
1011     }
1012 
1013   return partition;
1014 }
1015 
1016 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
1017    For the moment we detect only the memset zero pattern.  */
1018 
1019 static void
1020 classify_partition (loop_p loop, struct graph *rdg, partition *partition)
1021 {
1022   bitmap_iterator bi;
1023   unsigned i;
1024   tree nb_iter;
1025   data_reference_p single_load, single_store;
1026   bool volatiles_p = false;
1027   bool plus_one = false;
1028 
1029   partition->kind = PKIND_NORMAL;
1030   partition->main_dr = NULL;
1031   partition->secondary_dr = NULL;
1032   partition->niter = NULL_TREE;
1033   partition->plus_one = false;
1034 
1035   EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1036     {
1037       gimple *stmt = RDG_STMT (rdg, i);
1038 
1039       if (gimple_has_volatile_ops (stmt))
1040 	volatiles_p = true;
1041 
1042       /* If the stmt has uses outside of the loop mark it as reduction.  */
1043       if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1044 	{
1045 	  partition->reduction_p = true;
1046 	  return;
1047 	}
1048     }
1049 
1050   /* Perform general partition disqualification for builtins.  */
1051   if (volatiles_p
1052       || !flag_tree_loop_distribute_patterns)
1053     return;
1054 
1055   /* Detect memset and memcpy.  */
1056   single_load = NULL;
1057   single_store = NULL;
1058   EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1059     {
1060       gimple *stmt = RDG_STMT (rdg, i);
1061       data_reference_p dr;
1062       unsigned j;
1063 
1064       if (gimple_code (stmt) == GIMPLE_PHI)
1065 	continue;
1066 
1067       /* Any scalar stmts are ok.  */
1068       if (!gimple_vuse (stmt))
1069 	continue;
1070 
1071       /* Otherwise just regular loads/stores.  */
1072       if (!gimple_assign_single_p (stmt))
1073 	return;
1074 
1075       /* But exactly one store and/or load.  */
1076       for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1077 	{
1078 	  tree type = TREE_TYPE (DR_REF (dr));
1079 
1080 	  /* The memset, memcpy and memmove library calls are only
1081 	     able to deal with generic address space.  */
1082 	  if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (type)))
1083 	    return;
1084 
1085 	  if (DR_IS_READ (dr))
1086 	    {
1087 	      if (single_load != NULL)
1088 		return;
1089 	      single_load = dr;
1090 	    }
1091 	  else
1092 	    {
1093 	      if (single_store != NULL)
1094 		return;
1095 	      single_store = dr;
1096 	    }
1097 	}
1098     }
1099 
1100   if (!single_store)
1101     return;
1102 
1103   nb_iter = number_of_latch_executions (loop);
1104   if (!nb_iter || nb_iter == chrec_dont_know)
1105     return;
1106   if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1107 		      gimple_bb (DR_STMT (single_store))))
1108     plus_one = true;
1109 
1110   if (single_store && !single_load)
1111     {
1112       gimple *stmt = DR_STMT (single_store);
1113       tree rhs = gimple_assign_rhs1 (stmt);
1114       if (const_with_all_bytes_same (rhs) == -1
1115 	  && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1116 	      || (TYPE_MODE (TREE_TYPE (rhs))
1117 		  != TYPE_MODE (unsigned_char_type_node))))
1118 	return;
1119       if (TREE_CODE (rhs) == SSA_NAME
1120 	  && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1121 	  && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1122 	return;
1123       if (!adjacent_dr_p (single_store)
1124 	  || !dominated_by_p (CDI_DOMINATORS,
1125 			      loop->latch, gimple_bb (stmt)))
1126 	return;
1127       partition->kind = PKIND_MEMSET;
1128       partition->main_dr = single_store;
1129       partition->niter = nb_iter;
1130       partition->plus_one = plus_one;
1131     }
1132   else if (single_store && single_load)
1133     {
1134       gimple *store = DR_STMT (single_store);
1135       gimple *load = DR_STMT (single_load);
1136       /* Direct aggregate copy or via an SSA name temporary.  */
1137       if (load != store
1138 	  && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1139 	return;
1140       if (!adjacent_dr_p (single_store)
1141 	  || !adjacent_dr_p (single_load)
1142 	  || !operand_equal_p (DR_STEP (single_store),
1143 			       DR_STEP (single_load), 0)
1144 	  || !dominated_by_p (CDI_DOMINATORS,
1145 			      loop->latch, gimple_bb (store)))
1146 	return;
1147       /* Now check that if there is a dependence this dependence is
1148          of a suitable form for memmove.  */
1149       vec<loop_p> loops = vNULL;
1150       ddr_p ddr;
1151       loops.safe_push (loop);
1152       ddr = initialize_data_dependence_relation (single_load, single_store,
1153 						 loops);
1154       compute_affine_dependence (ddr, loop);
1155       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1156 	{
1157 	  free_dependence_relation (ddr);
1158 	  loops.release ();
1159 	  return;
1160 	}
1161       if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1162 	{
1163 	  if (DDR_NUM_DIST_VECTS (ddr) == 0)
1164 	    {
1165 	      free_dependence_relation (ddr);
1166 	      loops.release ();
1167 	      return;
1168 	    }
1169 	  lambda_vector dist_v;
1170 	  FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1171 	    {
1172 	      int dist = dist_v[index_in_loop_nest (loop->num,
1173 						    DDR_LOOP_NEST (ddr))];
1174 	      if (dist > 0 && !DDR_REVERSED_P (ddr))
1175 		{
1176 		  free_dependence_relation (ddr);
1177 		  loops.release ();
1178 		  return;
1179 		}
1180 	    }
1181 	  partition->kind = PKIND_MEMMOVE;
1182 	}
1183       else
1184 	partition->kind = PKIND_MEMCPY;
1185       free_dependence_relation (ddr);
1186       loops.release ();
1187       partition->main_dr = single_store;
1188       partition->secondary_dr = single_load;
1189       partition->niter = nb_iter;
1190       partition->plus_one = plus_one;
1191     }
1192 }
1193 
1194 /* For a data reference REF, return the declaration of its base
1195    address or NULL_TREE if the base is not determined.  */
1196 
1197 static tree
1198 ref_base_address (data_reference_p dr)
1199 {
1200   tree base_address = DR_BASE_ADDRESS (dr);
1201   if (base_address
1202       && TREE_CODE (base_address) == ADDR_EXPR)
1203     return TREE_OPERAND (base_address, 0);
1204 
1205   return base_address;
1206 }
1207 
1208 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1209    accesses in RDG.  */
1210 
1211 static bool
1212 similar_memory_accesses (struct graph *rdg, partition *partition1,
1213 			 partition *partition2)
1214 {
1215   unsigned i, j, k, l;
1216   bitmap_iterator bi, bj;
1217   data_reference_p ref1, ref2;
1218 
1219   /* First check whether in the intersection of the two partitions are
1220      any loads or stores.  Common loads are the situation that happens
1221      most often.  */
1222   EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1223     if (RDG_MEM_WRITE_STMT (rdg, i)
1224 	|| RDG_MEM_READS_STMT (rdg, i))
1225       return true;
1226 
1227   /* Then check all data-references against each other.  */
1228   EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1229     if (RDG_MEM_WRITE_STMT (rdg, i)
1230 	|| RDG_MEM_READS_STMT (rdg, i))
1231       EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1232 	if (RDG_MEM_WRITE_STMT (rdg, j)
1233 	    || RDG_MEM_READS_STMT (rdg, j))
1234 	  {
1235 	    FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1236 	      {
1237 		tree base1 = ref_base_address (ref1);
1238 		if (base1)
1239 		  FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1240 		    if (base1 == ref_base_address (ref2))
1241 		      return true;
1242 	      }
1243 	  }
1244 
1245   return false;
1246 }
1247 
1248 /* Aggregate several components into a useful partition that is
1249    registered in the PARTITIONS vector.  Partitions will be
1250    distributed in different loops.  */
1251 
1252 static void
1253 rdg_build_partitions (struct graph *rdg,
1254 		      vec<gimple *> starting_stmts,
1255 		      vec<partition *> *partitions)
1256 {
1257   bitmap processed = BITMAP_ALLOC (NULL);
1258   int i;
1259   gimple *stmt;
1260 
1261   FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1262     {
1263       int v = rdg_vertex_for_stmt (rdg, stmt);
1264 
1265       if (dump_file && (dump_flags & TDF_DETAILS))
1266 	fprintf (dump_file,
1267 		 "ldist asked to generate code for vertex %d\n", v);
1268 
1269       /* If the vertex is already contained in another partition so
1270          is the partition rooted at it.  */
1271       if (bitmap_bit_p (processed, v))
1272 	continue;
1273 
1274       partition *partition = build_rdg_partition_for_vertex (rdg, v);
1275       bitmap_ior_into (processed, partition->stmts);
1276 
1277       if (dump_file && (dump_flags & TDF_DETAILS))
1278 	{
1279 	  fprintf (dump_file, "ldist useful partition:\n");
1280 	  dump_bitmap (dump_file, partition->stmts);
1281 	}
1282 
1283       partitions->safe_push (partition);
1284     }
1285 
1286   /* All vertices should have been assigned to at least one partition now,
1287      other than vertices belonging to dead code.  */
1288 
1289   BITMAP_FREE (processed);
1290 }
1291 
1292 /* Dump to FILE the PARTITIONS.  */
1293 
1294 static void
1295 dump_rdg_partitions (FILE *file, vec<partition *> partitions)
1296 {
1297   int i;
1298   partition *partition;
1299 
1300   FOR_EACH_VEC_ELT (partitions, i, partition)
1301     debug_bitmap_file (file, partition->stmts);
1302 }
1303 
1304 /* Debug PARTITIONS.  */
1305 extern void debug_rdg_partitions (vec<partition *> );
1306 
1307 DEBUG_FUNCTION void
1308 debug_rdg_partitions (vec<partition *> partitions)
1309 {
1310   dump_rdg_partitions (stderr, partitions);
1311 }
1312 
1313 /* Returns the number of read and write operations in the RDG.  */
1314 
1315 static int
1316 number_of_rw_in_rdg (struct graph *rdg)
1317 {
1318   int i, res = 0;
1319 
1320   for (i = 0; i < rdg->n_vertices; i++)
1321     {
1322       if (RDG_MEM_WRITE_STMT (rdg, i))
1323 	++res;
1324 
1325       if (RDG_MEM_READS_STMT (rdg, i))
1326 	++res;
1327     }
1328 
1329   return res;
1330 }
1331 
1332 /* Returns the number of read and write operations in a PARTITION of
1333    the RDG.  */
1334 
1335 static int
1336 number_of_rw_in_partition (struct graph *rdg, partition *partition)
1337 {
1338   int res = 0;
1339   unsigned i;
1340   bitmap_iterator ii;
1341 
1342   EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1343     {
1344       if (RDG_MEM_WRITE_STMT (rdg, i))
1345 	++res;
1346 
1347       if (RDG_MEM_READS_STMT (rdg, i))
1348 	++res;
1349     }
1350 
1351   return res;
1352 }
1353 
1354 /* Returns true when one of the PARTITIONS contains all the read or
1355    write operations of RDG.  */
1356 
1357 static bool
1358 partition_contains_all_rw (struct graph *rdg,
1359 			   vec<partition *> partitions)
1360 {
1361   int i;
1362   partition *partition;
1363   int nrw = number_of_rw_in_rdg (rdg);
1364 
1365   FOR_EACH_VEC_ELT (partitions, i, partition)
1366     if (nrw == number_of_rw_in_partition (rdg, partition))
1367       return true;
1368 
1369   return false;
1370 }
1371 
1372 /* Compute partition dependence created by the data references in DRS1
1373    and DRS2 and modify and return DIR according to that.  */
1374 
1375 static int
1376 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1377 			 vec<data_reference_p> drs1,
1378 			 vec<data_reference_p> drs2)
1379 {
1380   data_reference_p dr1, dr2;
1381 
1382   /* dependence direction - 0 is no dependence, -1 is back,
1383      1 is forth, 2 is both (we can stop then, merging will occur).  */
1384   for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1385     for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1386       {
1387 	data_reference_p saved_dr1 = dr1;
1388 	int this_dir = 1;
1389 	ddr_p ddr;
1390 	/* Re-shuffle data-refs to be in dominator order.  */
1391 	if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1392 	    > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1393 	  {
1394 	    std::swap (dr1, dr2);
1395 	    this_dir = -this_dir;
1396 	  }
1397 	ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1398 	compute_affine_dependence (ddr, loops[0]);
1399 	if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1400 	  this_dir = 2;
1401 	else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1402 	  {
1403 	    if (DDR_REVERSED_P (ddr))
1404 	      {
1405 		std::swap (dr1, dr2);
1406 		this_dir = -this_dir;
1407 	      }
1408 	    /* Known dependences can still be unordered througout the
1409 	       iteration space, see gcc.dg/tree-ssa/ldist-16.c.  */
1410 	    if (DDR_NUM_DIST_VECTS (ddr) != 1)
1411 	      this_dir = 2;
1412 	    /* If the overlap is exact preserve stmt order.  */
1413 	    else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1414 	      ;
1415 	    else
1416 	      {
1417 		/* Else as the distance vector is lexicographic positive
1418 		   swap the dependence direction.  */
1419 		this_dir = -this_dir;
1420 	      }
1421 	  }
1422 	else
1423 	  this_dir = 0;
1424 	free_dependence_relation (ddr);
1425 	if (this_dir == 2)
1426 	  return 2;
1427 	else if (dir == 0)
1428 	  dir = this_dir;
1429 	else if (this_dir != 0 && dir != this_dir)
1430 	  return 2;
1431 	/* Shuffle "back" dr1.  */
1432 	dr1 = saved_dr1;
1433       }
1434   return dir;
1435 }
1436 
1437 /* Compare postorder number of the partition graph vertices V1 and V2.  */
1438 
1439 static int
1440 pgcmp (const void *v1_, const void *v2_)
1441 {
1442   const vertex *v1 = (const vertex *)v1_;
1443   const vertex *v2 = (const vertex *)v2_;
1444   return v2->post - v1->post;
1445 }
1446 
1447 /* Distributes the code from LOOP in such a way that producer
1448    statements are placed before consumer statements.  Tries to separate
1449    only the statements from STMTS into separate loops.
1450    Returns the number of distributed loops.  Set *DESTROY_P to whether
1451    LOOP needs to be destroyed.  */
1452 
1453 static int
1454 distribute_loop (struct loop *loop, vec<gimple *> stmts,
1455 		 control_dependences *cd, int *nb_calls, bool *destroy_p)
1456 {
1457   struct graph *rdg;
1458   partition *partition;
1459   bool any_builtin;
1460   int i, nbp;
1461   graph *pg = NULL;
1462   int num_sccs = 1;
1463 
1464   *destroy_p = false;
1465   *nb_calls = 0;
1466   auto_vec<loop_p, 3> loop_nest;
1467   if (!find_loop_nest (loop, &loop_nest))
1468     return 0;
1469 
1470   rdg = build_rdg (loop_nest, cd);
1471   if (!rdg)
1472     {
1473       if (dump_file && (dump_flags & TDF_DETAILS))
1474 	fprintf (dump_file,
1475 		 "Loop %d not distributed: failed to build the RDG.\n",
1476 		 loop->num);
1477 
1478       return 0;
1479     }
1480 
1481   if (dump_file && (dump_flags & TDF_DETAILS))
1482     dump_rdg (dump_file, rdg);
1483 
1484   auto_vec<struct partition *, 3> partitions;
1485   rdg_build_partitions (rdg, stmts, &partitions);
1486 
1487   any_builtin = false;
1488   FOR_EACH_VEC_ELT (partitions, i, partition)
1489     {
1490       classify_partition (loop, rdg, partition);
1491       any_builtin |= partition_builtin_p (partition);
1492     }
1493 
1494   /* If we are only distributing patterns but did not detect any,
1495      simply bail out.  */
1496   if (!flag_tree_loop_distribution
1497       && !any_builtin)
1498     {
1499       nbp = 0;
1500       goto ldist_done;
1501     }
1502 
1503   /* If we are only distributing patterns fuse all partitions that
1504      were not classified as builtins.  This also avoids chopping
1505      a loop into pieces, separated by builtin calls.  That is, we
1506      only want no or a single loop body remaining.  */
1507   struct partition *into;
1508   if (!flag_tree_loop_distribution)
1509     {
1510       for (i = 0; partitions.iterate (i, &into); ++i)
1511 	if (!partition_builtin_p (into))
1512 	  break;
1513       for (++i; partitions.iterate (i, &partition); ++i)
1514 	if (!partition_builtin_p (partition))
1515 	  {
1516 	    if (dump_file && (dump_flags & TDF_DETAILS))
1517 	      {
1518 		fprintf (dump_file, "fusing non-builtin partitions\n");
1519 		dump_bitmap (dump_file, into->stmts);
1520 		dump_bitmap (dump_file, partition->stmts);
1521 	      }
1522 	    partition_merge_into (into, partition);
1523 	    partitions.unordered_remove (i);
1524 	    partition_free (partition);
1525 	    i--;
1526 	  }
1527     }
1528 
1529   /* Due to limitations in the transform phase we have to fuse all
1530      reduction partitions into the last partition so the existing
1531      loop will contain all loop-closed PHI nodes.  */
1532   for (i = 0; partitions.iterate (i, &into); ++i)
1533     if (partition_reduction_p (into))
1534       break;
1535   for (i = i + 1; partitions.iterate (i, &partition); ++i)
1536     if (partition_reduction_p (partition))
1537       {
1538 	if (dump_file && (dump_flags & TDF_DETAILS))
1539 	  {
1540 	    fprintf (dump_file, "fusing partitions\n");
1541 	    dump_bitmap (dump_file, into->stmts);
1542 	    dump_bitmap (dump_file, partition->stmts);
1543 	    fprintf (dump_file, "because they have reductions\n");
1544 	  }
1545 	partition_merge_into (into, partition);
1546 	partitions.unordered_remove (i);
1547 	partition_free (partition);
1548 	i--;
1549       }
1550 
1551   /* Apply our simple cost model - fuse partitions with similar
1552      memory accesses.  */
1553   for (i = 0; partitions.iterate (i, &into); ++i)
1554     {
1555       bool changed = false;
1556       if (partition_builtin_p (into))
1557 	continue;
1558       for (int j = i + 1;
1559 	   partitions.iterate (j, &partition); ++j)
1560 	{
1561 	  if (similar_memory_accesses (rdg, into, partition))
1562 	    {
1563 	      if (dump_file && (dump_flags & TDF_DETAILS))
1564 		{
1565 		  fprintf (dump_file, "fusing partitions\n");
1566 		  dump_bitmap (dump_file, into->stmts);
1567 		  dump_bitmap (dump_file, partition->stmts);
1568 		  fprintf (dump_file, "because they have similar "
1569 			   "memory accesses\n");
1570 		}
1571 	      partition_merge_into (into, partition);
1572 	      partitions.unordered_remove (j);
1573 	      partition_free (partition);
1574 	      j--;
1575 	      changed = true;
1576 	    }
1577 	}
1578       /* If we fused 0 1 2 in step 1 to 0,2 1 as 0 and 2 have similar
1579          accesses when 1 and 2 have similar accesses but not 0 and 1
1580 	 then in the next iteration we will fail to consider merging
1581 	 1 into 0,2.  So try again if we did any merging into 0.  */
1582       if (changed)
1583 	i--;
1584     }
1585 
1586   /* Build the partition dependency graph.  */
1587   if (partitions.length () > 1)
1588     {
1589       pg = new_graph (partitions.length ());
1590       struct pgdata {
1591 	  struct partition *partition;
1592 	  vec<data_reference_p> writes;
1593 	  vec<data_reference_p> reads;
1594       };
1595 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1596       for (i = 0; partitions.iterate (i, &partition); ++i)
1597 	{
1598 	  vertex *v = &pg->vertices[i];
1599 	  pgdata *data = new pgdata;
1600 	  data_reference_p dr;
1601 	  /* FIXME - leaks.  */
1602 	  v->data = data;
1603 	  bitmap_iterator bi;
1604 	  unsigned j;
1605 	  data->partition = partition;
1606 	  data->reads = vNULL;
1607 	  data->writes = vNULL;
1608 	  EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1609 	    for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1610 	      if (DR_IS_READ (dr))
1611 		data->reads.safe_push (dr);
1612 	      else
1613 		data->writes.safe_push (dr);
1614 	}
1615       struct partition *partition1, *partition2;
1616       for (i = 0; partitions.iterate (i, &partition1); ++i)
1617 	for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1618 	  {
1619 	    /* dependence direction - 0 is no dependence, -1 is back,
1620 	       1 is forth, 2 is both (we can stop then, merging will occur).  */
1621 	    int dir = 0;
1622 	    dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1623 					   PGDATA(i)->writes,
1624 					   PGDATA(j)->reads);
1625 	    if (dir != 2)
1626 	      dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1627 					     PGDATA(i)->reads,
1628 					     PGDATA(j)->writes);
1629 	    if (dir != 2)
1630 	      dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1631 					     PGDATA(i)->writes,
1632 					     PGDATA(j)->writes);
1633 	    if (dir == 1 || dir == 2)
1634 	      add_edge (pg, i, j);
1635 	    if (dir == -1 || dir == 2)
1636 	      add_edge (pg, j, i);
1637 	  }
1638 
1639       /* Add edges to the reduction partition (if any) to force it last.  */
1640       unsigned j;
1641       for (j = 0; partitions.iterate (j, &partition); ++j)
1642 	if (partition_reduction_p (partition))
1643 	  break;
1644       if (j < partitions.length ())
1645 	{
1646 	  for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1647 	    if (i != j)
1648 	      add_edge (pg, i, j);
1649 	}
1650 
1651       /* Compute partitions we cannot separate and fuse them.  */
1652       num_sccs = graphds_scc (pg, NULL);
1653       for (i = 0; i < num_sccs; ++i)
1654 	{
1655 	  struct partition *first;
1656 	  int j;
1657 	  for (j = 0; partitions.iterate (j, &first); ++j)
1658 	    if (pg->vertices[j].component == i)
1659 	      break;
1660 	  for (j = j + 1; partitions.iterate (j, &partition); ++j)
1661 	    if (pg->vertices[j].component == i)
1662 	      {
1663 		if (dump_file && (dump_flags & TDF_DETAILS))
1664 		  {
1665 		    fprintf (dump_file, "fusing partitions\n");
1666 		    dump_bitmap (dump_file, first->stmts);
1667 		    dump_bitmap (dump_file, partition->stmts);
1668 		    fprintf (dump_file, "because they are in the same "
1669 			     "dependence SCC\n");
1670 		  }
1671 		partition_merge_into (first, partition);
1672 		partitions[j] = NULL;
1673 		partition_free (partition);
1674 		PGDATA (j)->partition = NULL;
1675 	      }
1676 	}
1677 
1678       /* Now order the remaining nodes in postorder.  */
1679       qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1680       partitions.truncate (0);
1681       for (i = 0; i < pg->n_vertices; ++i)
1682 	{
1683 	  pgdata *data = PGDATA (i);
1684 	  if (data->partition)
1685 	    partitions.safe_push (data->partition);
1686 	  data->reads.release ();
1687 	  data->writes.release ();
1688 	  delete data;
1689 	}
1690       gcc_assert (partitions.length () == (unsigned)num_sccs);
1691       free_graph (pg);
1692     }
1693 
1694   nbp = partitions.length ();
1695   if (nbp == 0
1696       || (nbp == 1 && !partition_builtin_p (partitions[0]))
1697       || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1698     {
1699       nbp = 0;
1700       goto ldist_done;
1701     }
1702 
1703   if (dump_file && (dump_flags & TDF_DETAILS))
1704     dump_rdg_partitions (dump_file, partitions);
1705 
1706   FOR_EACH_VEC_ELT (partitions, i, partition)
1707     {
1708       if (partition_builtin_p (partition))
1709 	(*nb_calls)++;
1710       *destroy_p |= generate_code_for_partition (loop, partition, i < nbp - 1);
1711     }
1712 
1713  ldist_done:
1714 
1715   FOR_EACH_VEC_ELT (partitions, i, partition)
1716     partition_free (partition);
1717 
1718   free_rdg (rdg);
1719   return nbp - *nb_calls;
1720 }
1721 
1722 /* Distribute all loops in the current function.  */
1723 
1724 namespace {
1725 
1726 const pass_data pass_data_loop_distribution =
1727 {
1728   GIMPLE_PASS, /* type */
1729   "ldist", /* name */
1730   OPTGROUP_LOOP, /* optinfo_flags */
1731   TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1732   ( PROP_cfg | PROP_ssa ), /* properties_required */
1733   0, /* properties_provided */
1734   0, /* properties_destroyed */
1735   0, /* todo_flags_start */
1736   0, /* todo_flags_finish */
1737 };
1738 
1739 class pass_loop_distribution : public gimple_opt_pass
1740 {
1741 public:
1742   pass_loop_distribution (gcc::context *ctxt)
1743     : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1744   {}
1745 
1746   /* opt_pass methods: */
1747   virtual bool gate (function *)
1748     {
1749       return flag_tree_loop_distribution
1750 	|| flag_tree_loop_distribute_patterns;
1751     }
1752 
1753   virtual unsigned int execute (function *);
1754 
1755 }; // class pass_loop_distribution
1756 
1757 unsigned int
1758 pass_loop_distribution::execute (function *fun)
1759 {
1760   struct loop *loop;
1761   bool changed = false;
1762   basic_block bb;
1763   control_dependences *cd = NULL;
1764   auto_vec<loop_p> loops_to_be_destroyed;
1765 
1766   FOR_ALL_BB_FN (bb, fun)
1767     {
1768       gimple_stmt_iterator gsi;
1769       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1770 	gimple_set_uid (gsi_stmt (gsi), -1);
1771       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1772 	gimple_set_uid (gsi_stmt (gsi), -1);
1773     }
1774 
1775   /* We can at the moment only distribute non-nested loops, thus restrict
1776      walking to innermost loops.  */
1777   FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
1778     {
1779       auto_vec<gimple *> work_list;
1780       basic_block *bbs;
1781       int num = loop->num;
1782       unsigned int i;
1783 
1784       /* If the loop doesn't have a single exit we will fail anyway,
1785 	 so do that early.  */
1786       if (!single_exit (loop))
1787 	continue;
1788 
1789       /* Only optimize hot loops.  */
1790       if (!optimize_loop_for_speed_p (loop))
1791 	continue;
1792 
1793       /* Initialize the worklist with stmts we seed the partitions with.  */
1794       bbs = get_loop_body_in_dom_order (loop);
1795       for (i = 0; i < loop->num_nodes; ++i)
1796 	{
1797 	  for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
1798 	       !gsi_end_p (gsi);
1799 	       gsi_next (&gsi))
1800 	    {
1801 	      gphi *phi = gsi.phi ();
1802 	      if (virtual_operand_p (gimple_phi_result (phi)))
1803 		continue;
1804 	      /* Distribute stmts which have defs that are used outside of
1805 		 the loop.  */
1806 	      if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1807 		continue;
1808 	      work_list.safe_push (phi);
1809 	    }
1810 	  for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1811 	       !gsi_end_p (gsi);
1812 	       gsi_next (&gsi))
1813 	    {
1814 	      gimple *stmt = gsi_stmt (gsi);
1815 
1816 	      /* If there is a stmt with side-effects bail out - we
1817 		 cannot and should not distribute this loop.  */
1818 	      if (gimple_has_side_effects (stmt))
1819 		{
1820 		  work_list.truncate (0);
1821 		  goto out;
1822 		}
1823 
1824 	      /* Distribute stmts which have defs that are used outside of
1825 		 the loop.  */
1826 	      if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1827 		;
1828 	      /* Otherwise only distribute stores for now.  */
1829 	      else if (!gimple_vdef (stmt))
1830 		continue;
1831 
1832 	      work_list.safe_push (stmt);
1833 	    }
1834 	}
1835 out:
1836       free (bbs);
1837 
1838       int nb_generated_loops = 0;
1839       int nb_generated_calls = 0;
1840       location_t loc = find_loop_location (loop);
1841       if (work_list.length () > 0)
1842 	{
1843 	  if (!cd)
1844 	    {
1845 	      calculate_dominance_info (CDI_DOMINATORS);
1846 	      calculate_dominance_info (CDI_POST_DOMINATORS);
1847 	      cd = new control_dependences ();
1848 	      free_dominance_info (CDI_POST_DOMINATORS);
1849 	    }
1850 	  bool destroy_p;
1851 	  nb_generated_loops = distribute_loop (loop, work_list, cd,
1852 						&nb_generated_calls,
1853 						&destroy_p);
1854 	  if (destroy_p)
1855 	    loops_to_be_destroyed.safe_push (loop);
1856 	}
1857 
1858       if (nb_generated_loops + nb_generated_calls > 0)
1859 	{
1860 	  changed = true;
1861 	  dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1862 			   loc, "Loop %d distributed: split to %d loops "
1863 			   "and %d library calls.\n",
1864 			   num, nb_generated_loops, nb_generated_calls);
1865 	}
1866       else if (dump_file && (dump_flags & TDF_DETAILS))
1867 	fprintf (dump_file, "Loop %d is the same.\n", num);
1868     }
1869 
1870   if (cd)
1871     delete cd;
1872 
1873   if (changed)
1874     {
1875       /* Destroy loop bodies that could not be reused.  Do this late as we
1876 	 otherwise can end up refering to stale data in control dependences.  */
1877       unsigned i;
1878       FOR_EACH_VEC_ELT (loops_to_be_destroyed, i, loop)
1879 	  destroy_loop (loop);
1880 
1881       /* Cached scalar evolutions now may refer to wrong or non-existing
1882 	 loops.  */
1883       scev_reset_htab ();
1884       mark_virtual_operands_for_renaming (fun);
1885       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1886     }
1887 
1888   checking_verify_loop_structure ();
1889 
1890   return 0;
1891 }
1892 
1893 } // anon namespace
1894 
1895 gimple_opt_pass *
1896 make_pass_loop_distribution (gcc::context *ctxt)
1897 {
1898   return new pass_loop_distribution (ctxt);
1899 }
1900