xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-loop-distribution.c (revision 23f5f46327e37e7811da3520f4bb933f9489322f)
11debfc3dSmrg /* Loop distribution.
28feb0f0bSmrg    Copyright (C) 2006-2020 Free Software Foundation, Inc.
31debfc3dSmrg    Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
41debfc3dSmrg    and Sebastian Pop <sebastian.pop@amd.com>.
51debfc3dSmrg 
61debfc3dSmrg This file is part of GCC.
71debfc3dSmrg 
81debfc3dSmrg GCC is free software; you can redistribute it and/or modify it
91debfc3dSmrg under the terms of the GNU General Public License as published by the
101debfc3dSmrg Free Software Foundation; either version 3, or (at your option) any
111debfc3dSmrg later version.
121debfc3dSmrg 
131debfc3dSmrg GCC is distributed in the hope that it will be useful, but WITHOUT
141debfc3dSmrg ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
151debfc3dSmrg FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
161debfc3dSmrg for more details.
171debfc3dSmrg 
181debfc3dSmrg You should have received a copy of the GNU General Public License
191debfc3dSmrg along with GCC; see the file COPYING3.  If not see
201debfc3dSmrg <http://www.gnu.org/licenses/>.  */
211debfc3dSmrg 
221debfc3dSmrg /* This pass performs loop distribution: for example, the loop
231debfc3dSmrg 
241debfc3dSmrg    |DO I = 2, N
251debfc3dSmrg    |    A(I) = B(I) + C
261debfc3dSmrg    |    D(I) = A(I-1)*E
271debfc3dSmrg    |ENDDO
281debfc3dSmrg 
291debfc3dSmrg    is transformed to
301debfc3dSmrg 
311debfc3dSmrg    |DOALL I = 2, N
321debfc3dSmrg    |   A(I) = B(I) + C
331debfc3dSmrg    |ENDDO
341debfc3dSmrg    |
351debfc3dSmrg    |DOALL I = 2, N
361debfc3dSmrg    |   D(I) = A(I-1)*E
371debfc3dSmrg    |ENDDO
381debfc3dSmrg 
39a2dc1f3fSmrg    Loop distribution is the dual of loop fusion.  It separates statements
40a2dc1f3fSmrg    of a loop (or loop nest) into multiple loops (or loop nests) with the
41a2dc1f3fSmrg    same loop header.  The major goal is to separate statements which may
42a2dc1f3fSmrg    be vectorized from those that can't.  This pass implements distribution
43a2dc1f3fSmrg    in the following steps:
44a2dc1f3fSmrg 
45a2dc1f3fSmrg      1) Seed partitions with specific type statements.  For now we support
46a2dc1f3fSmrg 	two types seed statements: statement defining variable used outside
47a2dc1f3fSmrg 	of loop; statement storing to memory.
48a2dc1f3fSmrg      2) Build reduced dependence graph (RDG) for loop to be distributed.
49a2dc1f3fSmrg 	The vertices (RDG:V) model all statements in the loop and the edges
50a2dc1f3fSmrg 	(RDG:E) model flow and control dependencies between statements.
51a2dc1f3fSmrg      3) Apart from RDG, compute data dependencies between memory references.
52a2dc1f3fSmrg      4) Starting from seed statement, build up partition by adding depended
53a2dc1f3fSmrg 	statements according to RDG's dependence information.  Partition is
54a2dc1f3fSmrg 	classified as parallel type if it can be executed paralleled; or as
55a2dc1f3fSmrg 	sequential type if it can't.  Parallel type partition is further
56a2dc1f3fSmrg 	classified as different builtin kinds if it can be implemented as
57a2dc1f3fSmrg 	builtin function calls.
58a2dc1f3fSmrg      5) Build partition dependence graph (PG) based on data dependencies.
59a2dc1f3fSmrg 	The vertices (PG:V) model all partitions and the edges (PG:E) model
60a2dc1f3fSmrg 	all data dependencies between every partitions pair.  In general,
61a2dc1f3fSmrg 	data dependence is either compilation time known or unknown.  In C
62a2dc1f3fSmrg 	family languages, there exists quite amount compilation time unknown
63a2dc1f3fSmrg 	dependencies because of possible alias relation of data references.
64a2dc1f3fSmrg 	We categorize PG's edge to two types: "true" edge that represents
65a2dc1f3fSmrg 	compilation time known data dependencies; "alias" edge for all other
66a2dc1f3fSmrg 	data dependencies.
67a2dc1f3fSmrg      6) Traverse subgraph of PG as if all "alias" edges don't exist.  Merge
68a2dc1f3fSmrg 	partitions in each strong connected component (SCC) correspondingly.
69a2dc1f3fSmrg 	Build new PG for merged partitions.
70a2dc1f3fSmrg      7) Traverse PG again and this time with both "true" and "alias" edges
71a2dc1f3fSmrg 	included.  We try to break SCCs by removing some edges.  Because
72a2dc1f3fSmrg 	SCCs by "true" edges are all fused in step 6), we can break SCCs
73a2dc1f3fSmrg 	by removing some "alias" edges.  It's NP-hard to choose optimal
74a2dc1f3fSmrg 	edge set, fortunately simple approximation is good enough for us
75a2dc1f3fSmrg 	given the small problem scale.
76a2dc1f3fSmrg      8) Collect all data dependencies of the removed "alias" edges.  Create
77a2dc1f3fSmrg 	runtime alias checks for collected data dependencies.
78a2dc1f3fSmrg      9) Version loop under the condition of runtime alias checks.  Given
79a2dc1f3fSmrg 	loop distribution generally introduces additional overhead, it is
80a2dc1f3fSmrg 	only useful if vectorization is achieved in distributed loop.  We
81a2dc1f3fSmrg 	version loop with internal function call IFN_LOOP_DIST_ALIAS.  If
82a2dc1f3fSmrg 	no distributed loop can be vectorized, we simply remove distributed
83a2dc1f3fSmrg 	loops and recover to the original one.
84a2dc1f3fSmrg 
85a2dc1f3fSmrg    TODO:
86a2dc1f3fSmrg      1) We only distribute innermost two-level loop nest now.  We should
87a2dc1f3fSmrg 	extend it for arbitrary loop nests in the future.
88a2dc1f3fSmrg      2) We only fuse partitions in SCC now.  A better fusion algorithm is
89a2dc1f3fSmrg 	desired to minimize loop overhead, maximize parallelism and maximize
90a2dc1f3fSmrg 	data reuse.  */
911debfc3dSmrg 
921debfc3dSmrg #include "config.h"
931debfc3dSmrg #include "system.h"
941debfc3dSmrg #include "coretypes.h"
951debfc3dSmrg #include "backend.h"
961debfc3dSmrg #include "tree.h"
971debfc3dSmrg #include "gimple.h"
981debfc3dSmrg #include "cfghooks.h"
991debfc3dSmrg #include "tree-pass.h"
1001debfc3dSmrg #include "ssa.h"
1011debfc3dSmrg #include "gimple-pretty-print.h"
1021debfc3dSmrg #include "fold-const.h"
1031debfc3dSmrg #include "cfganal.h"
1041debfc3dSmrg #include "gimple-iterator.h"
1051debfc3dSmrg #include "gimplify-me.h"
1061debfc3dSmrg #include "stor-layout.h"
1071debfc3dSmrg #include "tree-cfg.h"
1081debfc3dSmrg #include "tree-ssa-loop-manip.h"
109a2dc1f3fSmrg #include "tree-ssa-loop-ivopts.h"
1101debfc3dSmrg #include "tree-ssa-loop.h"
1111debfc3dSmrg #include "tree-into-ssa.h"
1121debfc3dSmrg #include "tree-ssa.h"
1131debfc3dSmrg #include "cfgloop.h"
1141debfc3dSmrg #include "tree-scalar-evolution.h"
1151debfc3dSmrg #include "tree-vectorizer.h"
116a05ac97eSmrg #include "tree-eh.h"
1178feb0f0bSmrg #include "gimple-fold.h"
1188feb0f0bSmrg #include "tree-affine.h"
1191debfc3dSmrg 
1201debfc3dSmrg 
121a2dc1f3fSmrg #define MAX_DATAREFS_NUM \
1228feb0f0bSmrg 	((unsigned) param_loop_max_datarefs_for_datadeps)
123a2dc1f3fSmrg 
124a2dc1f3fSmrg /* Threshold controlling number of distributed partitions.  Given it may
125a2dc1f3fSmrg    be unnecessary if a memory stream cost model is invented in the future,
126a2dc1f3fSmrg    we define it as a temporary macro, rather than a parameter.  */
127a2dc1f3fSmrg #define NUM_PARTITION_THRESHOLD (4)
128a2dc1f3fSmrg 
129a2dc1f3fSmrg /* Hashtable helpers.  */
130a2dc1f3fSmrg 
131a2dc1f3fSmrg struct ddr_hasher : nofree_ptr_hash <struct data_dependence_relation>
132a2dc1f3fSmrg {
133a2dc1f3fSmrg   static inline hashval_t hash (const data_dependence_relation *);
134a2dc1f3fSmrg   static inline bool equal (const data_dependence_relation *,
135a2dc1f3fSmrg 			    const data_dependence_relation *);
136a2dc1f3fSmrg };
137a2dc1f3fSmrg 
138a2dc1f3fSmrg /* Hash function for data dependence.  */
139a2dc1f3fSmrg 
140a2dc1f3fSmrg inline hashval_t
hash(const data_dependence_relation * ddr)141a2dc1f3fSmrg ddr_hasher::hash (const data_dependence_relation *ddr)
142a2dc1f3fSmrg {
143a2dc1f3fSmrg   inchash::hash h;
144a2dc1f3fSmrg   h.add_ptr (DDR_A (ddr));
145a2dc1f3fSmrg   h.add_ptr (DDR_B (ddr));
146a2dc1f3fSmrg   return h.end ();
147a2dc1f3fSmrg }
148a2dc1f3fSmrg 
149a2dc1f3fSmrg /* Hash table equality function for data dependence.  */
150a2dc1f3fSmrg 
151a2dc1f3fSmrg inline bool
equal(const data_dependence_relation * ddr1,const data_dependence_relation * ddr2)152a2dc1f3fSmrg ddr_hasher::equal (const data_dependence_relation *ddr1,
153a2dc1f3fSmrg 		   const data_dependence_relation *ddr2)
154a2dc1f3fSmrg {
155a2dc1f3fSmrg   return (DDR_A (ddr1) == DDR_A (ddr2) && DDR_B (ddr1) == DDR_B (ddr2));
156a2dc1f3fSmrg }
157a2dc1f3fSmrg 
158a2dc1f3fSmrg 
159a2dc1f3fSmrg 
160a2dc1f3fSmrg #define DR_INDEX(dr)      ((uintptr_t) (dr)->aux)
161a2dc1f3fSmrg 
1621debfc3dSmrg /* A Reduced Dependence Graph (RDG) vertex representing a statement.  */
1631debfc3dSmrg struct rdg_vertex
1641debfc3dSmrg {
1651debfc3dSmrg   /* The statement represented by this vertex.  */
1661debfc3dSmrg   gimple *stmt;
1671debfc3dSmrg 
1681debfc3dSmrg   /* Vector of data-references in this statement.  */
1691debfc3dSmrg   vec<data_reference_p> datarefs;
1701debfc3dSmrg 
1711debfc3dSmrg   /* True when the statement contains a write to memory.  */
1721debfc3dSmrg   bool has_mem_write;
1731debfc3dSmrg 
1741debfc3dSmrg   /* True when the statement contains a read from memory.  */
1751debfc3dSmrg   bool has_mem_reads;
1761debfc3dSmrg };
1771debfc3dSmrg 
1781debfc3dSmrg #define RDGV_STMT(V)     ((struct rdg_vertex *) ((V)->data))->stmt
1791debfc3dSmrg #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
1801debfc3dSmrg #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
1811debfc3dSmrg #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
1821debfc3dSmrg #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
1831debfc3dSmrg #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
1841debfc3dSmrg #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
1851debfc3dSmrg #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
1861debfc3dSmrg 
1871debfc3dSmrg /* Data dependence type.  */
1881debfc3dSmrg 
1891debfc3dSmrg enum rdg_dep_type
1901debfc3dSmrg {
1911debfc3dSmrg   /* Read After Write (RAW).  */
1921debfc3dSmrg   flow_dd = 'f',
1931debfc3dSmrg 
1941debfc3dSmrg   /* Control dependence (execute conditional on).  */
1951debfc3dSmrg   control_dd = 'c'
1961debfc3dSmrg };
1971debfc3dSmrg 
1981debfc3dSmrg /* Dependence information attached to an edge of the RDG.  */
1991debfc3dSmrg 
2001debfc3dSmrg struct rdg_edge
2011debfc3dSmrg {
2021debfc3dSmrg   /* Type of the dependence.  */
2031debfc3dSmrg   enum rdg_dep_type type;
2041debfc3dSmrg };
2051debfc3dSmrg 
2061debfc3dSmrg #define RDGE_TYPE(E)        ((struct rdg_edge *) ((E)->data))->type
2071debfc3dSmrg 
2088feb0f0bSmrg /* Kind of distributed loop.  */
2098feb0f0bSmrg enum partition_kind {
2108feb0f0bSmrg     PKIND_NORMAL,
2118feb0f0bSmrg     /* Partial memset stands for a paritition can be distributed into a loop
2128feb0f0bSmrg        of memset calls, rather than a single memset call.  It's handled just
2138feb0f0bSmrg        like a normal parition, i.e, distributed as separate loop, no memset
2148feb0f0bSmrg        call is generated.
2158feb0f0bSmrg 
2168feb0f0bSmrg        Note: This is a hacking fix trying to distribute ZERO-ing stmt in a
2178feb0f0bSmrg        loop nest as deep as possible.  As a result, parloop achieves better
2188feb0f0bSmrg        parallelization by parallelizing deeper loop nest.  This hack should
2198feb0f0bSmrg        be unnecessary and removed once distributed memset can be understood
2208feb0f0bSmrg        and analyzed in data reference analysis.  See PR82604 for more.  */
2218feb0f0bSmrg     PKIND_PARTIAL_MEMSET,
2228feb0f0bSmrg     PKIND_MEMSET, PKIND_MEMCPY, PKIND_MEMMOVE
2238feb0f0bSmrg };
2248feb0f0bSmrg 
2258feb0f0bSmrg /* Type of distributed loop.  */
2268feb0f0bSmrg enum partition_type {
2278feb0f0bSmrg     /* The distributed loop can be executed parallelly.  */
2288feb0f0bSmrg     PTYPE_PARALLEL = 0,
2298feb0f0bSmrg     /* The distributed loop has to be executed sequentially.  */
2308feb0f0bSmrg     PTYPE_SEQUENTIAL
2318feb0f0bSmrg };
2328feb0f0bSmrg 
2338feb0f0bSmrg /* Builtin info for loop distribution.  */
2348feb0f0bSmrg struct builtin_info
2358feb0f0bSmrg {
2368feb0f0bSmrg   /* data-references a kind != PKIND_NORMAL partition is about.  */
2378feb0f0bSmrg   data_reference_p dst_dr;
2388feb0f0bSmrg   data_reference_p src_dr;
2398feb0f0bSmrg   /* Base address and size of memory objects operated by the builtin.  Note
2408feb0f0bSmrg      both dest and source memory objects must have the same size.  */
2418feb0f0bSmrg   tree dst_base;
2428feb0f0bSmrg   tree src_base;
2438feb0f0bSmrg   tree size;
2448feb0f0bSmrg   /* Base and offset part of dst_base after stripping constant offset.  This
2458feb0f0bSmrg      is only used in memset builtin distribution for now.  */
2468feb0f0bSmrg   tree dst_base_base;
2478feb0f0bSmrg   unsigned HOST_WIDE_INT dst_base_offset;
2488feb0f0bSmrg };
2498feb0f0bSmrg 
2508feb0f0bSmrg /* Partition for loop distribution.  */
2518feb0f0bSmrg struct partition
2528feb0f0bSmrg {
2538feb0f0bSmrg   /* Statements of the partition.  */
2548feb0f0bSmrg   bitmap stmts;
2558feb0f0bSmrg   /* True if the partition defines variable which is used outside of loop.  */
2568feb0f0bSmrg   bool reduction_p;
2578feb0f0bSmrg   location_t loc;
2588feb0f0bSmrg   enum partition_kind kind;
2598feb0f0bSmrg   enum partition_type type;
2608feb0f0bSmrg   /* Data references in the partition.  */
2618feb0f0bSmrg   bitmap datarefs;
2628feb0f0bSmrg   /* Information of builtin parition.  */
2638feb0f0bSmrg   struct builtin_info *builtin;
2648feb0f0bSmrg };
2658feb0f0bSmrg 
2668feb0f0bSmrg /* Partitions are fused because of different reasons.  */
2678feb0f0bSmrg enum fuse_type
2688feb0f0bSmrg {
2698feb0f0bSmrg   FUSE_NON_BUILTIN = 0,
2708feb0f0bSmrg   FUSE_REDUCTION = 1,
2718feb0f0bSmrg   FUSE_SHARE_REF = 2,
2728feb0f0bSmrg   FUSE_SAME_SCC = 3,
2738feb0f0bSmrg   FUSE_FINALIZE = 4
2748feb0f0bSmrg };
2758feb0f0bSmrg 
2768feb0f0bSmrg /* Description on different fusing reason.  */
2778feb0f0bSmrg static const char *fuse_message[] = {
2788feb0f0bSmrg   "they are non-builtins",
2798feb0f0bSmrg   "they have reductions",
2808feb0f0bSmrg   "they have shared memory refs",
2818feb0f0bSmrg   "they are in the same dependence scc",
2828feb0f0bSmrg   "there is no point to distribute loop"};
2838feb0f0bSmrg 
2848feb0f0bSmrg 
2851debfc3dSmrg /* Dump vertex I in RDG to FILE.  */
2861debfc3dSmrg 
2871debfc3dSmrg static void
dump_rdg_vertex(FILE * file,struct graph * rdg,int i)2881debfc3dSmrg dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
2891debfc3dSmrg {
2901debfc3dSmrg   struct vertex *v = &(rdg->vertices[i]);
2911debfc3dSmrg   struct graph_edge *e;
2921debfc3dSmrg 
2931debfc3dSmrg   fprintf (file, "(vertex %d: (%s%s) (in:", i,
2941debfc3dSmrg 	   RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
2951debfc3dSmrg 	   RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
2961debfc3dSmrg 
2971debfc3dSmrg   if (v->pred)
2981debfc3dSmrg     for (e = v->pred; e; e = e->pred_next)
2991debfc3dSmrg       fprintf (file, " %d", e->src);
3001debfc3dSmrg 
3011debfc3dSmrg   fprintf (file, ") (out:");
3021debfc3dSmrg 
3031debfc3dSmrg   if (v->succ)
3041debfc3dSmrg     for (e = v->succ; e; e = e->succ_next)
3051debfc3dSmrg       fprintf (file, " %d", e->dest);
3061debfc3dSmrg 
3071debfc3dSmrg   fprintf (file, ")\n");
3081debfc3dSmrg   print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
3091debfc3dSmrg   fprintf (file, ")\n");
3101debfc3dSmrg }
3111debfc3dSmrg 
3121debfc3dSmrg /* Call dump_rdg_vertex on stderr.  */
3131debfc3dSmrg 
3141debfc3dSmrg DEBUG_FUNCTION void
debug_rdg_vertex(struct graph * rdg,int i)3151debfc3dSmrg debug_rdg_vertex (struct graph *rdg, int i)
3161debfc3dSmrg {
3171debfc3dSmrg   dump_rdg_vertex (stderr, rdg, i);
3181debfc3dSmrg }
3191debfc3dSmrg 
3201debfc3dSmrg /* Dump the reduced dependence graph RDG to FILE.  */
3211debfc3dSmrg 
3221debfc3dSmrg static void
dump_rdg(FILE * file,struct graph * rdg)3231debfc3dSmrg dump_rdg (FILE *file, struct graph *rdg)
3241debfc3dSmrg {
3251debfc3dSmrg   fprintf (file, "(rdg\n");
3261debfc3dSmrg   for (int i = 0; i < rdg->n_vertices; i++)
3271debfc3dSmrg     dump_rdg_vertex (file, rdg, i);
3281debfc3dSmrg   fprintf (file, ")\n");
3291debfc3dSmrg }
3301debfc3dSmrg 
3311debfc3dSmrg /* Call dump_rdg on stderr.  */
3321debfc3dSmrg 
3331debfc3dSmrg DEBUG_FUNCTION void
debug_rdg(struct graph * rdg)3341debfc3dSmrg debug_rdg (struct graph *rdg)
3351debfc3dSmrg {
3361debfc3dSmrg   dump_rdg (stderr, rdg);
3371debfc3dSmrg }
3381debfc3dSmrg 
3391debfc3dSmrg static void
dot_rdg_1(FILE * file,struct graph * rdg)3401debfc3dSmrg dot_rdg_1 (FILE *file, struct graph *rdg)
3411debfc3dSmrg {
3421debfc3dSmrg   int i;
3431debfc3dSmrg   pretty_printer buffer;
3441debfc3dSmrg   pp_needs_newline (&buffer) = false;
3451debfc3dSmrg   buffer.buffer->stream = file;
3461debfc3dSmrg 
3471debfc3dSmrg   fprintf (file, "digraph RDG {\n");
3481debfc3dSmrg 
3491debfc3dSmrg   for (i = 0; i < rdg->n_vertices; i++)
3501debfc3dSmrg     {
3511debfc3dSmrg       struct vertex *v = &(rdg->vertices[i]);
3521debfc3dSmrg       struct graph_edge *e;
3531debfc3dSmrg 
3541debfc3dSmrg       fprintf (file, "%d [label=\"[%d] ", i, i);
3551debfc3dSmrg       pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
3561debfc3dSmrg       pp_flush (&buffer);
3571debfc3dSmrg       fprintf (file, "\"]\n");
3581debfc3dSmrg 
3591debfc3dSmrg       /* Highlight reads from memory.  */
3601debfc3dSmrg       if (RDG_MEM_READS_STMT (rdg, i))
3611debfc3dSmrg        fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
3621debfc3dSmrg 
3631debfc3dSmrg       /* Highlight stores to memory.  */
3641debfc3dSmrg       if (RDG_MEM_WRITE_STMT (rdg, i))
3651debfc3dSmrg        fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
3661debfc3dSmrg 
3671debfc3dSmrg       if (v->succ)
3681debfc3dSmrg        for (e = v->succ; e; e = e->succ_next)
3691debfc3dSmrg          switch (RDGE_TYPE (e))
3701debfc3dSmrg            {
3711debfc3dSmrg            case flow_dd:
3721debfc3dSmrg              /* These are the most common dependences: don't print these. */
3731debfc3dSmrg              fprintf (file, "%d -> %d \n", i, e->dest);
3741debfc3dSmrg              break;
3751debfc3dSmrg 
3761debfc3dSmrg 	   case control_dd:
3771debfc3dSmrg              fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
3781debfc3dSmrg              break;
3791debfc3dSmrg 
3801debfc3dSmrg            default:
3811debfc3dSmrg              gcc_unreachable ();
3821debfc3dSmrg            }
3831debfc3dSmrg     }
3841debfc3dSmrg 
3851debfc3dSmrg   fprintf (file, "}\n\n");
3861debfc3dSmrg }
3871debfc3dSmrg 
3881debfc3dSmrg /* Display the Reduced Dependence Graph using dotty.  */
3891debfc3dSmrg 
3901debfc3dSmrg DEBUG_FUNCTION void
dot_rdg(struct graph * rdg)3911debfc3dSmrg dot_rdg (struct graph *rdg)
3921debfc3dSmrg {
3931debfc3dSmrg   /* When debugging, you may want to enable the following code.  */
3941debfc3dSmrg #ifdef HAVE_POPEN
3951debfc3dSmrg   FILE *file = popen ("dot -Tx11", "w");
3961debfc3dSmrg   if (!file)
3971debfc3dSmrg     return;
3981debfc3dSmrg   dot_rdg_1 (file, rdg);
3991debfc3dSmrg   fflush (file);
4001debfc3dSmrg   close (fileno (file));
4011debfc3dSmrg   pclose (file);
4021debfc3dSmrg #else
4031debfc3dSmrg   dot_rdg_1 (stderr, rdg);
4041debfc3dSmrg #endif
4051debfc3dSmrg }
4061debfc3dSmrg 
4071debfc3dSmrg /* Returns the index of STMT in RDG.  */
4081debfc3dSmrg 
4091debfc3dSmrg static int
rdg_vertex_for_stmt(struct graph * rdg ATTRIBUTE_UNUSED,gimple * stmt)4101debfc3dSmrg rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple *stmt)
4111debfc3dSmrg {
4121debfc3dSmrg   int index = gimple_uid (stmt);
4131debfc3dSmrg   gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
4141debfc3dSmrg   return index;
4151debfc3dSmrg }
4161debfc3dSmrg 
4171debfc3dSmrg /* Creates dependence edges in RDG for all the uses of DEF.  IDEF is
4181debfc3dSmrg    the index of DEF in RDG.  */
4191debfc3dSmrg 
4201debfc3dSmrg static void
create_rdg_edges_for_scalar(struct graph * rdg,tree def,int idef)4211debfc3dSmrg create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4221debfc3dSmrg {
4231debfc3dSmrg   use_operand_p imm_use_p;
4241debfc3dSmrg   imm_use_iterator iterator;
4251debfc3dSmrg 
4261debfc3dSmrg   FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4271debfc3dSmrg     {
4281debfc3dSmrg       struct graph_edge *e;
4291debfc3dSmrg       int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4301debfc3dSmrg 
4311debfc3dSmrg       if (use < 0)
4321debfc3dSmrg 	continue;
4331debfc3dSmrg 
4341debfc3dSmrg       e = add_edge (rdg, idef, use);
4351debfc3dSmrg       e->data = XNEW (struct rdg_edge);
4361debfc3dSmrg       RDGE_TYPE (e) = flow_dd;
4371debfc3dSmrg     }
4381debfc3dSmrg }
4391debfc3dSmrg 
4401debfc3dSmrg /* Creates an edge for the control dependences of BB to the vertex V.  */
4411debfc3dSmrg 
4421debfc3dSmrg static void
create_edge_for_control_dependence(struct graph * rdg,basic_block bb,int v,control_dependences * cd)4431debfc3dSmrg create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
4441debfc3dSmrg 				    int v, control_dependences *cd)
4451debfc3dSmrg {
4461debfc3dSmrg   bitmap_iterator bi;
4471debfc3dSmrg   unsigned edge_n;
4481debfc3dSmrg   EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
4491debfc3dSmrg 			    0, edge_n, bi)
4501debfc3dSmrg     {
4511debfc3dSmrg       basic_block cond_bb = cd->get_edge_src (edge_n);
4521debfc3dSmrg       gimple *stmt = last_stmt (cond_bb);
4531debfc3dSmrg       if (stmt && is_ctrl_stmt (stmt))
4541debfc3dSmrg 	{
4551debfc3dSmrg 	  struct graph_edge *e;
4561debfc3dSmrg 	  int c = rdg_vertex_for_stmt (rdg, stmt);
4571debfc3dSmrg 	  if (c < 0)
4581debfc3dSmrg 	    continue;
4591debfc3dSmrg 
4601debfc3dSmrg 	  e = add_edge (rdg, c, v);
4611debfc3dSmrg 	  e->data = XNEW (struct rdg_edge);
4621debfc3dSmrg 	  RDGE_TYPE (e) = control_dd;
4631debfc3dSmrg 	}
4641debfc3dSmrg     }
4651debfc3dSmrg }
4661debfc3dSmrg 
4671debfc3dSmrg /* Creates the edges of the reduced dependence graph RDG.  */
4681debfc3dSmrg 
4691debfc3dSmrg static void
create_rdg_flow_edges(struct graph * rdg)4701debfc3dSmrg create_rdg_flow_edges (struct graph *rdg)
4711debfc3dSmrg {
4721debfc3dSmrg   int i;
4731debfc3dSmrg   def_operand_p def_p;
4741debfc3dSmrg   ssa_op_iter iter;
4751debfc3dSmrg 
4761debfc3dSmrg   for (i = 0; i < rdg->n_vertices; i++)
4771debfc3dSmrg     FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4781debfc3dSmrg 			      iter, SSA_OP_DEF)
4791debfc3dSmrg       create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4801debfc3dSmrg }
4811debfc3dSmrg 
4821debfc3dSmrg /* Creates the edges of the reduced dependence graph RDG.  */
4831debfc3dSmrg 
4841debfc3dSmrg static void
create_rdg_cd_edges(struct graph * rdg,control_dependences * cd,loop_p loop)4851debfc3dSmrg create_rdg_cd_edges (struct graph *rdg, control_dependences *cd, loop_p loop)
4861debfc3dSmrg {
4871debfc3dSmrg   int i;
4881debfc3dSmrg 
4891debfc3dSmrg   for (i = 0; i < rdg->n_vertices; i++)
4901debfc3dSmrg     {
4911debfc3dSmrg       gimple *stmt = RDG_STMT (rdg, i);
4921debfc3dSmrg       if (gimple_code (stmt) == GIMPLE_PHI)
4931debfc3dSmrg 	{
4941debfc3dSmrg 	  edge_iterator ei;
4951debfc3dSmrg 	  edge e;
4961debfc3dSmrg 	  FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
4971debfc3dSmrg 	    if (flow_bb_inside_loop_p (loop, e->src))
4981debfc3dSmrg 	      create_edge_for_control_dependence (rdg, e->src, i, cd);
4991debfc3dSmrg 	}
5001debfc3dSmrg       else
5011debfc3dSmrg 	create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
5021debfc3dSmrg     }
5031debfc3dSmrg }
5041debfc3dSmrg 
5058feb0f0bSmrg 
5068feb0f0bSmrg class loop_distribution
5078feb0f0bSmrg {
5088feb0f0bSmrg   private:
5098feb0f0bSmrg   /* The loop (nest) to be distributed.  */
5108feb0f0bSmrg   vec<loop_p> loop_nest;
5118feb0f0bSmrg 
5128feb0f0bSmrg   /* Vector of data references in the loop to be distributed.  */
5138feb0f0bSmrg   vec<data_reference_p> datarefs_vec;
5148feb0f0bSmrg 
5158feb0f0bSmrg   /* If there is nonaddressable data reference in above vector.  */
5168feb0f0bSmrg   bool has_nonaddressable_dataref_p;
5178feb0f0bSmrg 
5188feb0f0bSmrg   /* Store index of data reference in aux field.  */
5198feb0f0bSmrg 
5208feb0f0bSmrg   /* Hash table for data dependence relation in the loop to be distributed.  */
5218feb0f0bSmrg   hash_table<ddr_hasher> *ddrs_table;
5228feb0f0bSmrg 
5238feb0f0bSmrg   /* Array mapping basic block's index to its topological order.  */
5248feb0f0bSmrg   int *bb_top_order_index;
5258feb0f0bSmrg   /* And size of the array.  */
5268feb0f0bSmrg   int bb_top_order_index_size;
5278feb0f0bSmrg 
5281debfc3dSmrg   /* Build the vertices of the reduced dependence graph RDG.  Return false
5291debfc3dSmrg      if that failed.  */
5308feb0f0bSmrg   bool create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop);
5311debfc3dSmrg 
5328feb0f0bSmrg   /* Initialize STMTS with all the statements of LOOP.  We use topological
5338feb0f0bSmrg      order to discover all statements.  The order is important because
5348feb0f0bSmrg      generate_loops_for_partition is using the same traversal for identifying
5358feb0f0bSmrg      statements in loop copies.  */
5368feb0f0bSmrg   void stmts_from_loop (class loop *loop, vec<gimple *> *stmts);
5378feb0f0bSmrg 
5388feb0f0bSmrg 
5398feb0f0bSmrg   /* Build the Reduced Dependence Graph (RDG) with one vertex per statement of
5408feb0f0bSmrg      LOOP, and one edge per flow dependence or control dependence from control
5418feb0f0bSmrg      dependence CD.  During visiting each statement, data references are also
5428feb0f0bSmrg      collected and recorded in global data DATAREFS_VEC.  */
5438feb0f0bSmrg   struct graph * build_rdg (class loop *loop, control_dependences *cd);
5448feb0f0bSmrg 
5458feb0f0bSmrg /* Merge PARTITION into the partition DEST.  RDG is the reduced dependence
5468feb0f0bSmrg    graph and we update type for result partition if it is non-NULL.  */
5478feb0f0bSmrg   void partition_merge_into (struct graph *rdg,
5488feb0f0bSmrg 			     partition *dest, partition *partition,
5498feb0f0bSmrg 			     enum fuse_type ft);
5508feb0f0bSmrg 
5518feb0f0bSmrg 
5528feb0f0bSmrg   /* Return data dependence relation for data references A and B.  The two
5538feb0f0bSmrg      data references must be in lexicographic order wrto reduced dependence
5548feb0f0bSmrg      graph RDG.  We firstly try to find ddr from global ddr hash table.  If
5558feb0f0bSmrg      it doesn't exist, compute the ddr and cache it.  */
5568feb0f0bSmrg   data_dependence_relation * get_data_dependence (struct graph *rdg,
5578feb0f0bSmrg 						  data_reference_p a,
5588feb0f0bSmrg 						  data_reference_p b);
5598feb0f0bSmrg 
5608feb0f0bSmrg 
5618feb0f0bSmrg   /* In reduced dependence graph RDG for loop distribution, return true if
5628feb0f0bSmrg      dependence between references DR1 and DR2 leads to a dependence cycle
5638feb0f0bSmrg      and such dependence cycle can't be resolved by runtime alias check.  */
5648feb0f0bSmrg   bool data_dep_in_cycle_p (struct graph *rdg, data_reference_p dr1,
5658feb0f0bSmrg 			    data_reference_p dr2);
5668feb0f0bSmrg 
5678feb0f0bSmrg 
5688feb0f0bSmrg   /* Given reduced dependence graph RDG, PARTITION1 and PARTITION2, update
5698feb0f0bSmrg      PARTITION1's type after merging PARTITION2 into PARTITION1.  */
5708feb0f0bSmrg   void update_type_for_merge (struct graph *rdg,
5718feb0f0bSmrg 			      partition *partition1, partition *partition2);
5728feb0f0bSmrg 
5738feb0f0bSmrg 
5748feb0f0bSmrg   /* Returns a partition with all the statements needed for computing
5758feb0f0bSmrg      the vertex V of the RDG, also including the loop exit conditions.  */
5768feb0f0bSmrg   partition *build_rdg_partition_for_vertex (struct graph *rdg, int v);
5778feb0f0bSmrg 
5788feb0f0bSmrg   /* Given data references DST_DR and SRC_DR in loop nest LOOP and RDG, classify
5798feb0f0bSmrg      if it forms builtin memcpy or memmove call.  */
5808feb0f0bSmrg   void classify_builtin_ldst (loop_p loop, struct graph *rdg, partition *partition,
5818feb0f0bSmrg 			      data_reference_p dst_dr, data_reference_p src_dr);
5828feb0f0bSmrg 
5838feb0f0bSmrg   /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
5848feb0f0bSmrg      For the moment we detect memset, memcpy and memmove patterns.  Bitmap
5858feb0f0bSmrg      STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions.
5868feb0f0bSmrg      Returns true if there is a reduction in all partitions and we
5878feb0f0bSmrg      possibly did not mark PARTITION as having one for this reason.  */
5888feb0f0bSmrg 
5898feb0f0bSmrg   bool
5908feb0f0bSmrg   classify_partition (loop_p loop,
5918feb0f0bSmrg 		      struct graph *rdg, partition *partition,
5928feb0f0bSmrg 		      bitmap stmt_in_all_partitions);
5938feb0f0bSmrg 
5948feb0f0bSmrg 
5958feb0f0bSmrg   /* Returns true when PARTITION1 and PARTITION2 access the same memory
5968feb0f0bSmrg      object in RDG.  */
5978feb0f0bSmrg   bool share_memory_accesses (struct graph *rdg,
5988feb0f0bSmrg 			      partition *partition1, partition *partition2);
5998feb0f0bSmrg 
6008feb0f0bSmrg   /* For each seed statement in STARTING_STMTS, this function builds
6018feb0f0bSmrg      partition for it by adding depended statements according to RDG.
6028feb0f0bSmrg      All partitions are recorded in PARTITIONS.  */
6038feb0f0bSmrg   void rdg_build_partitions (struct graph *rdg,
6048feb0f0bSmrg 			     vec<gimple *> starting_stmts,
6058feb0f0bSmrg 			     vec<partition *> *partitions);
6068feb0f0bSmrg 
6078feb0f0bSmrg   /* Compute partition dependence created by the data references in DRS1
6088feb0f0bSmrg      and DRS2, modify and return DIR according to that.  IF ALIAS_DDR is
6098feb0f0bSmrg      not NULL, we record dependence introduced by possible alias between
6108feb0f0bSmrg      two data references in ALIAS_DDRS; otherwise, we simply ignore such
6118feb0f0bSmrg      dependence as if it doesn't exist at all.  */
6128feb0f0bSmrg   int pg_add_dependence_edges (struct graph *rdg, int dir, bitmap drs1,
6138feb0f0bSmrg 			       bitmap drs2, vec<ddr_p> *alias_ddrs);
6148feb0f0bSmrg 
6158feb0f0bSmrg 
6168feb0f0bSmrg   /* Build and return partition dependence graph for PARTITIONS.  RDG is
6178feb0f0bSmrg      reduced dependence graph for the loop to be distributed.  If IGNORE_ALIAS_P
6188feb0f0bSmrg      is true, data dependence caused by possible alias between references
6198feb0f0bSmrg      is ignored, as if it doesn't exist at all; otherwise all depdendences
6208feb0f0bSmrg      are considered.  */
6218feb0f0bSmrg   struct graph *build_partition_graph (struct graph *rdg,
6228feb0f0bSmrg 				       vec<struct partition *> *partitions,
6238feb0f0bSmrg 				       bool ignore_alias_p);
6248feb0f0bSmrg 
6258feb0f0bSmrg   /* Given reduced dependence graph RDG merge strong connected components
6268feb0f0bSmrg      of PARTITIONS.  If IGNORE_ALIAS_P is true, data dependence caused by
6278feb0f0bSmrg      possible alias between references is ignored, as if it doesn't exist
6288feb0f0bSmrg      at all; otherwise all depdendences are considered.  */
6298feb0f0bSmrg   void merge_dep_scc_partitions (struct graph *rdg, vec<struct partition *>
6308feb0f0bSmrg 				 *partitions, bool ignore_alias_p);
6318feb0f0bSmrg 
6328feb0f0bSmrg /* This is the main function breaking strong conected components in
6338feb0f0bSmrg    PARTITIONS giving reduced depdendence graph RDG.  Store data dependence
6348feb0f0bSmrg    relations for runtime alias check in ALIAS_DDRS.  */
6358feb0f0bSmrg   void break_alias_scc_partitions (struct graph *rdg, vec<struct partition *>
6368feb0f0bSmrg 				   *partitions, vec<ddr_p> *alias_ddrs);
6378feb0f0bSmrg 
6388feb0f0bSmrg 
6398feb0f0bSmrg   /* Fuse PARTITIONS of LOOP if necessary before finalizing distribution.
6408feb0f0bSmrg      ALIAS_DDRS contains ddrs which need runtime alias check.  */
6418feb0f0bSmrg   void finalize_partitions (class loop *loop, vec<struct partition *>
6428feb0f0bSmrg 			    *partitions, vec<ddr_p> *alias_ddrs);
6438feb0f0bSmrg 
6448feb0f0bSmrg   /* Distributes the code from LOOP in such a way that producer statements
6458feb0f0bSmrg      are placed before consumer statements.  Tries to separate only the
6468feb0f0bSmrg      statements from STMTS into separate loops.  Returns the number of
6478feb0f0bSmrg      distributed loops.  Set NB_CALLS to number of generated builtin calls.
6488feb0f0bSmrg      Set *DESTROY_P to whether LOOP needs to be destroyed.  */
6498feb0f0bSmrg   int distribute_loop (class loop *loop, vec<gimple *> stmts,
6508feb0f0bSmrg 		       control_dependences *cd, int *nb_calls, bool *destroy_p,
6518feb0f0bSmrg 		       bool only_patterns_p);
6528feb0f0bSmrg 
6538feb0f0bSmrg   /* Compute topological order for basic blocks.  Topological order is
6548feb0f0bSmrg      needed because data dependence is computed for data references in
6558feb0f0bSmrg      lexicographical order.  */
6568feb0f0bSmrg   void bb_top_order_init (void);
6578feb0f0bSmrg 
6588feb0f0bSmrg   void bb_top_order_destroy (void);
6598feb0f0bSmrg 
6608feb0f0bSmrg   public:
6618feb0f0bSmrg 
6628feb0f0bSmrg   /* Getter for bb_top_order.  */
6638feb0f0bSmrg 
get_bb_top_order_index_size(void)6648feb0f0bSmrg   inline int get_bb_top_order_index_size (void)
6658feb0f0bSmrg     {
6668feb0f0bSmrg       return bb_top_order_index_size;
6678feb0f0bSmrg     }
6688feb0f0bSmrg 
get_bb_top_order_index(int i)6698feb0f0bSmrg   inline int get_bb_top_order_index (int i)
6708feb0f0bSmrg     {
6718feb0f0bSmrg       return bb_top_order_index[i];
6728feb0f0bSmrg     }
6738feb0f0bSmrg 
6748feb0f0bSmrg   unsigned int execute (function *fun);
6758feb0f0bSmrg };
6768feb0f0bSmrg 
6778feb0f0bSmrg 
6788feb0f0bSmrg /* If X has a smaller topological sort number than Y, returns -1;
6798feb0f0bSmrg    if greater, returns 1.  */
6808feb0f0bSmrg static int
bb_top_order_cmp_r(const void * x,const void * y,void * loop)6818feb0f0bSmrg bb_top_order_cmp_r (const void *x, const void *y, void *loop)
6828feb0f0bSmrg {
6838feb0f0bSmrg   loop_distribution *_loop =
6848feb0f0bSmrg     (loop_distribution *) loop;
6858feb0f0bSmrg 
6868feb0f0bSmrg   basic_block bb1 = *(const basic_block *) x;
6878feb0f0bSmrg   basic_block bb2 = *(const basic_block *) y;
6888feb0f0bSmrg 
6898feb0f0bSmrg   int bb_top_order_index_size = _loop->get_bb_top_order_index_size ();
6908feb0f0bSmrg 
6918feb0f0bSmrg   gcc_assert (bb1->index < bb_top_order_index_size
6928feb0f0bSmrg 	      && bb2->index < bb_top_order_index_size);
6938feb0f0bSmrg   gcc_assert (bb1 == bb2
6948feb0f0bSmrg 	      || _loop->get_bb_top_order_index(bb1->index)
6958feb0f0bSmrg 		 != _loop->get_bb_top_order_index(bb2->index));
6968feb0f0bSmrg 
6978feb0f0bSmrg   return (_loop->get_bb_top_order_index(bb1->index) -
6988feb0f0bSmrg 	  _loop->get_bb_top_order_index(bb2->index));
6998feb0f0bSmrg }
7008feb0f0bSmrg 
7018feb0f0bSmrg bool
create_rdg_vertices(struct graph * rdg,vec<gimple * > stmts,loop_p loop)7028feb0f0bSmrg loop_distribution::create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts,
7038feb0f0bSmrg 					loop_p loop)
7041debfc3dSmrg {
7051debfc3dSmrg   int i;
7061debfc3dSmrg   gimple *stmt;
7071debfc3dSmrg 
7081debfc3dSmrg   FOR_EACH_VEC_ELT (stmts, i, stmt)
7091debfc3dSmrg     {
7101debfc3dSmrg       struct vertex *v = &(rdg->vertices[i]);
7111debfc3dSmrg 
7121debfc3dSmrg       /* Record statement to vertex mapping.  */
7131debfc3dSmrg       gimple_set_uid (stmt, i);
7141debfc3dSmrg 
7151debfc3dSmrg       v->data = XNEW (struct rdg_vertex);
7161debfc3dSmrg       RDGV_STMT (v) = stmt;
7171debfc3dSmrg       RDGV_DATAREFS (v).create (0);
7181debfc3dSmrg       RDGV_HAS_MEM_WRITE (v) = false;
7191debfc3dSmrg       RDGV_HAS_MEM_READS (v) = false;
7201debfc3dSmrg       if (gimple_code (stmt) == GIMPLE_PHI)
7211debfc3dSmrg 	continue;
7221debfc3dSmrg 
723a2dc1f3fSmrg       unsigned drp = datarefs_vec.length ();
724a2dc1f3fSmrg       if (!find_data_references_in_stmt (loop, stmt, &datarefs_vec))
7251debfc3dSmrg 	return false;
726a2dc1f3fSmrg       for (unsigned j = drp; j < datarefs_vec.length (); ++j)
7271debfc3dSmrg 	{
728a2dc1f3fSmrg 	  data_reference_p dr = datarefs_vec[j];
7291debfc3dSmrg 	  if (DR_IS_READ (dr))
7301debfc3dSmrg 	    RDGV_HAS_MEM_READS (v) = true;
7311debfc3dSmrg 	  else
7321debfc3dSmrg 	    RDGV_HAS_MEM_WRITE (v) = true;
7331debfc3dSmrg 	  RDGV_DATAREFS (v).safe_push (dr);
734c0a68be4Smrg 	  has_nonaddressable_dataref_p |= may_be_nonaddressable_p (dr->ref);
7351debfc3dSmrg 	}
7361debfc3dSmrg     }
7371debfc3dSmrg   return true;
7381debfc3dSmrg }
7391debfc3dSmrg 
7408feb0f0bSmrg void
stmts_from_loop(class loop * loop,vec<gimple * > * stmts)7418feb0f0bSmrg loop_distribution::stmts_from_loop (class loop *loop, vec<gimple *> *stmts)
7421debfc3dSmrg {
7431debfc3dSmrg   unsigned int i;
7448feb0f0bSmrg   basic_block *bbs = get_loop_body_in_custom_order (loop, this, bb_top_order_cmp_r);
7451debfc3dSmrg 
7461debfc3dSmrg   for (i = 0; i < loop->num_nodes; i++)
7471debfc3dSmrg     {
7481debfc3dSmrg       basic_block bb = bbs[i];
7491debfc3dSmrg 
7501debfc3dSmrg       for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
7511debfc3dSmrg 	   gsi_next (&bsi))
7521debfc3dSmrg 	if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
7531debfc3dSmrg 	  stmts->safe_push (bsi.phi ());
7541debfc3dSmrg 
7551debfc3dSmrg       for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
7561debfc3dSmrg 	   gsi_next (&bsi))
7571debfc3dSmrg 	{
7581debfc3dSmrg 	  gimple *stmt = gsi_stmt (bsi);
7591debfc3dSmrg 	  if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
7601debfc3dSmrg 	    stmts->safe_push (stmt);
7611debfc3dSmrg 	}
7621debfc3dSmrg     }
7631debfc3dSmrg 
7641debfc3dSmrg   free (bbs);
7651debfc3dSmrg }
7661debfc3dSmrg 
7671debfc3dSmrg /* Free the reduced dependence graph RDG.  */
7681debfc3dSmrg 
7691debfc3dSmrg static void
free_rdg(struct graph * rdg)7701debfc3dSmrg free_rdg (struct graph *rdg)
7711debfc3dSmrg {
7721debfc3dSmrg   int i;
7731debfc3dSmrg 
7741debfc3dSmrg   for (i = 0; i < rdg->n_vertices; i++)
7751debfc3dSmrg     {
7761debfc3dSmrg       struct vertex *v = &(rdg->vertices[i]);
7771debfc3dSmrg       struct graph_edge *e;
7781debfc3dSmrg 
7791debfc3dSmrg       for (e = v->succ; e; e = e->succ_next)
7801debfc3dSmrg 	free (e->data);
7811debfc3dSmrg 
7821debfc3dSmrg       if (v->data)
7831debfc3dSmrg 	{
7841debfc3dSmrg 	  gimple_set_uid (RDGV_STMT (v), -1);
785a2dc1f3fSmrg 	  (RDGV_DATAREFS (v)).release ();
7861debfc3dSmrg 	  free (v->data);
7871debfc3dSmrg 	}
7881debfc3dSmrg     }
7891debfc3dSmrg 
7901debfc3dSmrg   free_graph (rdg);
7911debfc3dSmrg }
7921debfc3dSmrg 
7938feb0f0bSmrg struct graph *
build_rdg(class loop * loop,control_dependences * cd)7948feb0f0bSmrg loop_distribution::build_rdg (class loop *loop, control_dependences *cd)
7951debfc3dSmrg {
7961debfc3dSmrg   struct graph *rdg;
7971debfc3dSmrg 
7981debfc3dSmrg   /* Create the RDG vertices from the stmts of the loop nest.  */
7991debfc3dSmrg   auto_vec<gimple *, 10> stmts;
800a2dc1f3fSmrg   stmts_from_loop (loop, &stmts);
8011debfc3dSmrg   rdg = new_graph (stmts.length ());
802a2dc1f3fSmrg   if (!create_rdg_vertices (rdg, stmts, loop))
8031debfc3dSmrg     {
8041debfc3dSmrg       free_rdg (rdg);
8051debfc3dSmrg       return NULL;
8061debfc3dSmrg     }
8071debfc3dSmrg   stmts.release ();
8081debfc3dSmrg 
8091debfc3dSmrg   create_rdg_flow_edges (rdg);
8101debfc3dSmrg   if (cd)
811a2dc1f3fSmrg     create_rdg_cd_edges (rdg, cd, loop);
8121debfc3dSmrg 
8131debfc3dSmrg   return rdg;
8141debfc3dSmrg }
8151debfc3dSmrg 
8161debfc3dSmrg 
8171debfc3dSmrg /* Allocate and initialize a partition from BITMAP.  */
8181debfc3dSmrg 
8191debfc3dSmrg static partition *
partition_alloc(void)820a2dc1f3fSmrg partition_alloc (void)
8211debfc3dSmrg {
8221debfc3dSmrg   partition *partition = XCNEW (struct partition);
823a2dc1f3fSmrg   partition->stmts = BITMAP_ALLOC (NULL);
8241debfc3dSmrg   partition->reduction_p = false;
8258feb0f0bSmrg   partition->loc = UNKNOWN_LOCATION;
8261debfc3dSmrg   partition->kind = PKIND_NORMAL;
8278feb0f0bSmrg   partition->type = PTYPE_PARALLEL;
828a2dc1f3fSmrg   partition->datarefs = BITMAP_ALLOC (NULL);
8291debfc3dSmrg   return partition;
8301debfc3dSmrg }
8311debfc3dSmrg 
8321debfc3dSmrg /* Free PARTITION.  */
8331debfc3dSmrg 
8341debfc3dSmrg static void
partition_free(partition * partition)8351debfc3dSmrg partition_free (partition *partition)
8361debfc3dSmrg {
8371debfc3dSmrg   BITMAP_FREE (partition->stmts);
838a2dc1f3fSmrg   BITMAP_FREE (partition->datarefs);
839a2dc1f3fSmrg   if (partition->builtin)
840a2dc1f3fSmrg     free (partition->builtin);
841a2dc1f3fSmrg 
8421debfc3dSmrg   free (partition);
8431debfc3dSmrg }
8441debfc3dSmrg 
8451debfc3dSmrg /* Returns true if the partition can be generated as a builtin.  */
8461debfc3dSmrg 
8471debfc3dSmrg static bool
partition_builtin_p(partition * partition)8481debfc3dSmrg partition_builtin_p (partition *partition)
8491debfc3dSmrg {
850a2dc1f3fSmrg   return partition->kind > PKIND_PARTIAL_MEMSET;
8511debfc3dSmrg }
8521debfc3dSmrg 
8531debfc3dSmrg /* Returns true if the partition contains a reduction.  */
8541debfc3dSmrg 
8551debfc3dSmrg static bool
partition_reduction_p(partition * partition)8561debfc3dSmrg partition_reduction_p (partition *partition)
8571debfc3dSmrg {
8581debfc3dSmrg   return partition->reduction_p;
8591debfc3dSmrg }
8601debfc3dSmrg 
8618feb0f0bSmrg void
partition_merge_into(struct graph * rdg,partition * dest,partition * partition,enum fuse_type ft)8628feb0f0bSmrg loop_distribution::partition_merge_into (struct graph *rdg,
8638feb0f0bSmrg 		      partition *dest, partition *partition, enum fuse_type ft)
8641debfc3dSmrg {
865a2dc1f3fSmrg   if (dump_file && (dump_flags & TDF_DETAILS))
866a2dc1f3fSmrg     {
867a2dc1f3fSmrg       fprintf (dump_file, "Fuse partitions because %s:\n", fuse_message[ft]);
868a2dc1f3fSmrg       fprintf (dump_file, "  Part 1: ");
869a2dc1f3fSmrg       dump_bitmap (dump_file, dest->stmts);
870a2dc1f3fSmrg       fprintf (dump_file, "  Part 2: ");
871a2dc1f3fSmrg       dump_bitmap (dump_file, partition->stmts);
872a2dc1f3fSmrg     }
873a2dc1f3fSmrg 
8741debfc3dSmrg   dest->kind = PKIND_NORMAL;
875a2dc1f3fSmrg   if (dest->type == PTYPE_PARALLEL)
876a2dc1f3fSmrg     dest->type = partition->type;
877a2dc1f3fSmrg 
8781debfc3dSmrg   bitmap_ior_into (dest->stmts, partition->stmts);
8791debfc3dSmrg   if (partition_reduction_p (partition))
8801debfc3dSmrg     dest->reduction_p = true;
881a2dc1f3fSmrg 
882a2dc1f3fSmrg   /* Further check if any data dependence prevents us from executing the
883a2dc1f3fSmrg      new partition parallelly.  */
884a2dc1f3fSmrg   if (dest->type == PTYPE_PARALLEL && rdg != NULL)
885a2dc1f3fSmrg     update_type_for_merge (rdg, dest, partition);
886a2dc1f3fSmrg 
887a2dc1f3fSmrg   bitmap_ior_into (dest->datarefs, partition->datarefs);
8881debfc3dSmrg }
8891debfc3dSmrg 
8901debfc3dSmrg 
8911debfc3dSmrg /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
8921debfc3dSmrg    the LOOP.  */
8931debfc3dSmrg 
8941debfc3dSmrg static bool
ssa_name_has_uses_outside_loop_p(tree def,loop_p loop)8951debfc3dSmrg ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
8961debfc3dSmrg {
8971debfc3dSmrg   imm_use_iterator imm_iter;
8981debfc3dSmrg   use_operand_p use_p;
8991debfc3dSmrg 
9001debfc3dSmrg   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
9011debfc3dSmrg     {
902a2dc1f3fSmrg       if (is_gimple_debug (USE_STMT (use_p)))
903a2dc1f3fSmrg 	continue;
904a2dc1f3fSmrg 
905a2dc1f3fSmrg       basic_block use_bb = gimple_bb (USE_STMT (use_p));
906a2dc1f3fSmrg       if (!flow_bb_inside_loop_p (loop, use_bb))
9071debfc3dSmrg 	return true;
9081debfc3dSmrg     }
9091debfc3dSmrg 
9101debfc3dSmrg   return false;
9111debfc3dSmrg }
9121debfc3dSmrg 
9131debfc3dSmrg /* Returns true when STMT defines a scalar variable used after the
9141debfc3dSmrg    loop LOOP.  */
9151debfc3dSmrg 
9161debfc3dSmrg static bool
stmt_has_scalar_dependences_outside_loop(loop_p loop,gimple * stmt)9171debfc3dSmrg stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple *stmt)
9181debfc3dSmrg {
9191debfc3dSmrg   def_operand_p def_p;
9201debfc3dSmrg   ssa_op_iter op_iter;
9211debfc3dSmrg 
9221debfc3dSmrg   if (gimple_code (stmt) == GIMPLE_PHI)
9231debfc3dSmrg     return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
9241debfc3dSmrg 
9251debfc3dSmrg   FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
9261debfc3dSmrg     if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
9271debfc3dSmrg       return true;
9281debfc3dSmrg 
9291debfc3dSmrg   return false;
9301debfc3dSmrg }
9311debfc3dSmrg 
9321debfc3dSmrg /* Return a copy of LOOP placed before LOOP.  */
9331debfc3dSmrg 
9348feb0f0bSmrg static class loop *
copy_loop_before(class loop * loop)9358feb0f0bSmrg copy_loop_before (class loop *loop)
9361debfc3dSmrg {
9378feb0f0bSmrg   class loop *res;
9381debfc3dSmrg   edge preheader = loop_preheader_edge (loop);
9391debfc3dSmrg 
9401debfc3dSmrg   initialize_original_copy_tables ();
9411debfc3dSmrg   res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
9421debfc3dSmrg   gcc_assert (res != NULL);
9431debfc3dSmrg   free_original_copy_tables ();
9441debfc3dSmrg   delete_update_ssa ();
9451debfc3dSmrg 
9461debfc3dSmrg   return res;
9471debfc3dSmrg }
9481debfc3dSmrg 
9491debfc3dSmrg /* Creates an empty basic block after LOOP.  */
9501debfc3dSmrg 
9511debfc3dSmrg static void
create_bb_after_loop(class loop * loop)9528feb0f0bSmrg create_bb_after_loop (class loop *loop)
9531debfc3dSmrg {
9541debfc3dSmrg   edge exit = single_exit (loop);
9551debfc3dSmrg 
9561debfc3dSmrg   if (!exit)
9571debfc3dSmrg     return;
9581debfc3dSmrg 
9591debfc3dSmrg   split_edge (exit);
9601debfc3dSmrg }
9611debfc3dSmrg 
9621debfc3dSmrg /* Generate code for PARTITION from the code in LOOP.  The loop is
9631debfc3dSmrg    copied when COPY_P is true.  All the statements not flagged in the
9641debfc3dSmrg    PARTITION bitmap are removed from the loop or from its copy.  The
9651debfc3dSmrg    statements are indexed in sequence inside a basic block, and the
9661debfc3dSmrg    basic blocks of a loop are taken in dom order.  */
9671debfc3dSmrg 
9681debfc3dSmrg static void
generate_loops_for_partition(class loop * loop,partition * partition,bool copy_p)9698feb0f0bSmrg generate_loops_for_partition (class loop *loop, partition *partition,
9701debfc3dSmrg 			      bool copy_p)
9711debfc3dSmrg {
9721debfc3dSmrg   unsigned i;
9731debfc3dSmrg   basic_block *bbs;
9741debfc3dSmrg 
9751debfc3dSmrg   if (copy_p)
9761debfc3dSmrg     {
977a2dc1f3fSmrg       int orig_loop_num = loop->orig_loop_num;
9781debfc3dSmrg       loop = copy_loop_before (loop);
9791debfc3dSmrg       gcc_assert (loop != NULL);
980a2dc1f3fSmrg       loop->orig_loop_num = orig_loop_num;
9811debfc3dSmrg       create_preheader (loop, CP_SIMPLE_PREHEADERS);
9821debfc3dSmrg       create_bb_after_loop (loop);
9831debfc3dSmrg     }
984a2dc1f3fSmrg   else
985a2dc1f3fSmrg     {
986a2dc1f3fSmrg       /* Origin number is set to the new versioned loop's num.  */
987a2dc1f3fSmrg       gcc_assert (loop->orig_loop_num != loop->num);
988a2dc1f3fSmrg     }
9891debfc3dSmrg 
9901debfc3dSmrg   /* Remove stmts not in the PARTITION bitmap.  */
9911debfc3dSmrg   bbs = get_loop_body_in_dom_order (loop);
9921debfc3dSmrg 
993a2dc1f3fSmrg   if (MAY_HAVE_DEBUG_BIND_STMTS)
9941debfc3dSmrg     for (i = 0; i < loop->num_nodes; i++)
9951debfc3dSmrg       {
9961debfc3dSmrg 	basic_block bb = bbs[i];
9971debfc3dSmrg 
9981debfc3dSmrg 	for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
9991debfc3dSmrg 	     gsi_next (&bsi))
10001debfc3dSmrg 	  {
10011debfc3dSmrg 	    gphi *phi = bsi.phi ();
10021debfc3dSmrg 	    if (!virtual_operand_p (gimple_phi_result (phi))
10031debfc3dSmrg 		&& !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
10041debfc3dSmrg 	      reset_debug_uses (phi);
10051debfc3dSmrg 	  }
10061debfc3dSmrg 
10071debfc3dSmrg 	for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
10081debfc3dSmrg 	  {
10091debfc3dSmrg 	    gimple *stmt = gsi_stmt (bsi);
10101debfc3dSmrg 	    if (gimple_code (stmt) != GIMPLE_LABEL
10111debfc3dSmrg 		&& !is_gimple_debug (stmt)
10121debfc3dSmrg 		&& !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
10131debfc3dSmrg 	      reset_debug_uses (stmt);
10141debfc3dSmrg 	  }
10151debfc3dSmrg       }
10161debfc3dSmrg 
10171debfc3dSmrg   for (i = 0; i < loop->num_nodes; i++)
10181debfc3dSmrg     {
10191debfc3dSmrg       basic_block bb = bbs[i];
1020a2dc1f3fSmrg       edge inner_exit = NULL;
1021a2dc1f3fSmrg 
1022a2dc1f3fSmrg       if (loop != bb->loop_father)
1023a2dc1f3fSmrg 	inner_exit = single_exit (bb->loop_father);
10241debfc3dSmrg 
10251debfc3dSmrg       for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
10261debfc3dSmrg 	{
10271debfc3dSmrg 	  gphi *phi = bsi.phi ();
10281debfc3dSmrg 	  if (!virtual_operand_p (gimple_phi_result (phi))
10291debfc3dSmrg 	      && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
10301debfc3dSmrg 	    remove_phi_node (&bsi, true);
10311debfc3dSmrg 	  else
10321debfc3dSmrg 	    gsi_next (&bsi);
10331debfc3dSmrg 	}
10341debfc3dSmrg 
10351debfc3dSmrg       for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
10361debfc3dSmrg 	{
10371debfc3dSmrg 	  gimple *stmt = gsi_stmt (bsi);
10381debfc3dSmrg 	  if (gimple_code (stmt) != GIMPLE_LABEL
10391debfc3dSmrg 	      && !is_gimple_debug (stmt)
10401debfc3dSmrg 	      && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
10411debfc3dSmrg 	    {
1042a2dc1f3fSmrg 	      /* In distribution of loop nest, if bb is inner loop's exit_bb,
1043a2dc1f3fSmrg 		 we choose its exit edge/path in order to avoid generating
1044a2dc1f3fSmrg 		 infinite loop.  For all other cases, we choose an arbitrary
1045a2dc1f3fSmrg 		 path through the empty CFG part that this unnecessary
1046a2dc1f3fSmrg 		 control stmt controls.  */
10471debfc3dSmrg 	      if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
10481debfc3dSmrg 		{
1049a2dc1f3fSmrg 		  if (inner_exit && inner_exit->flags & EDGE_TRUE_VALUE)
1050a2dc1f3fSmrg 		    gimple_cond_make_true (cond_stmt);
1051a2dc1f3fSmrg 		  else
10521debfc3dSmrg 		    gimple_cond_make_false (cond_stmt);
10531debfc3dSmrg 		  update_stmt (stmt);
10541debfc3dSmrg 		}
10551debfc3dSmrg 	      else if (gimple_code (stmt) == GIMPLE_SWITCH)
10561debfc3dSmrg 		{
10571debfc3dSmrg 		  gswitch *switch_stmt = as_a <gswitch *> (stmt);
10581debfc3dSmrg 		  gimple_switch_set_index
10591debfc3dSmrg 		      (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1)));
10601debfc3dSmrg 		  update_stmt (stmt);
10611debfc3dSmrg 		}
10621debfc3dSmrg 	      else
10631debfc3dSmrg 		{
10641debfc3dSmrg 		  unlink_stmt_vdef (stmt);
10651debfc3dSmrg 		  gsi_remove (&bsi, true);
10661debfc3dSmrg 		  release_defs (stmt);
10671debfc3dSmrg 		  continue;
10681debfc3dSmrg 		}
10691debfc3dSmrg 	    }
10701debfc3dSmrg 	  gsi_next (&bsi);
10711debfc3dSmrg 	}
10721debfc3dSmrg     }
10731debfc3dSmrg 
10741debfc3dSmrg   free (bbs);
10751debfc3dSmrg }
10761debfc3dSmrg 
10771debfc3dSmrg /* If VAL memory representation contains the same value in all bytes,
10781debfc3dSmrg    return that value, otherwise return -1.
10791debfc3dSmrg    E.g. for 0x24242424 return 0x24, for IEEE double
10801debfc3dSmrg    747708026454360457216.0 return 0x44, etc.  */
10811debfc3dSmrg 
10821debfc3dSmrg static int
const_with_all_bytes_same(tree val)10831debfc3dSmrg const_with_all_bytes_same (tree val)
10841debfc3dSmrg {
10851debfc3dSmrg   unsigned char buf[64];
10861debfc3dSmrg   int i, len;
10871debfc3dSmrg 
10881debfc3dSmrg   if (integer_zerop (val)
10891debfc3dSmrg       || (TREE_CODE (val) == CONSTRUCTOR
10901debfc3dSmrg           && !TREE_CLOBBER_P (val)
10911debfc3dSmrg           && CONSTRUCTOR_NELTS (val) == 0))
10921debfc3dSmrg     return 0;
10931debfc3dSmrg 
10941debfc3dSmrg   if (real_zerop (val))
10951debfc3dSmrg     {
10961debfc3dSmrg       /* Only return 0 for +0.0, not for -0.0, which doesn't have
10971debfc3dSmrg 	 an all bytes same memory representation.  Don't transform
10981debfc3dSmrg 	 -0.0 stores into +0.0 even for !HONOR_SIGNED_ZEROS.  */
10991debfc3dSmrg       switch (TREE_CODE (val))
11001debfc3dSmrg 	{
11011debfc3dSmrg 	case REAL_CST:
11021debfc3dSmrg 	  if (!real_isneg (TREE_REAL_CST_PTR (val)))
11031debfc3dSmrg 	    return 0;
11041debfc3dSmrg 	  break;
11051debfc3dSmrg 	case COMPLEX_CST:
11061debfc3dSmrg 	  if (!const_with_all_bytes_same (TREE_REALPART (val))
11071debfc3dSmrg 	      && !const_with_all_bytes_same (TREE_IMAGPART (val)))
11081debfc3dSmrg 	    return 0;
11091debfc3dSmrg 	  break;
11101debfc3dSmrg 	case VECTOR_CST:
1111a2dc1f3fSmrg 	  {
1112a2dc1f3fSmrg 	    unsigned int count = vector_cst_encoded_nelts (val);
11131debfc3dSmrg 	    unsigned int j;
1114a2dc1f3fSmrg 	    for (j = 0; j < count; ++j)
1115a2dc1f3fSmrg 	      if (const_with_all_bytes_same (VECTOR_CST_ENCODED_ELT (val, j)))
11161debfc3dSmrg 		break;
1117a2dc1f3fSmrg 	    if (j == count)
11181debfc3dSmrg 	      return 0;
11191debfc3dSmrg 	    break;
1120a2dc1f3fSmrg 	  }
11211debfc3dSmrg 	default:
11221debfc3dSmrg 	  break;
11231debfc3dSmrg 	}
11241debfc3dSmrg     }
11251debfc3dSmrg 
11261debfc3dSmrg   if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
11271debfc3dSmrg     return -1;
11281debfc3dSmrg 
11291debfc3dSmrg   len = native_encode_expr (val, buf, sizeof (buf));
11301debfc3dSmrg   if (len == 0)
11311debfc3dSmrg     return -1;
11321debfc3dSmrg   for (i = 1; i < len; i++)
11331debfc3dSmrg     if (buf[i] != buf[0])
11341debfc3dSmrg       return -1;
11351debfc3dSmrg   return buf[0];
11361debfc3dSmrg }
11371debfc3dSmrg 
11381debfc3dSmrg /* Generate a call to memset for PARTITION in LOOP.  */
11391debfc3dSmrg 
11401debfc3dSmrg static void
generate_memset_builtin(class loop * loop,partition * partition)11418feb0f0bSmrg generate_memset_builtin (class loop *loop, partition *partition)
11421debfc3dSmrg {
11431debfc3dSmrg   gimple_stmt_iterator gsi;
11441debfc3dSmrg   tree mem, fn, nb_bytes;
11451debfc3dSmrg   tree val;
1146a2dc1f3fSmrg   struct builtin_info *builtin = partition->builtin;
1147a2dc1f3fSmrg   gimple *fn_call;
11481debfc3dSmrg 
11491debfc3dSmrg   /* The new statements will be placed before LOOP.  */
11501debfc3dSmrg   gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
11511debfc3dSmrg 
1152a2dc1f3fSmrg   nb_bytes = rewrite_to_non_trapping_overflow (builtin->size);
11531debfc3dSmrg   nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
11541debfc3dSmrg 				       false, GSI_CONTINUE_LINKING);
11558feb0f0bSmrg   mem = rewrite_to_non_trapping_overflow (builtin->dst_base);
11561debfc3dSmrg   mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
11571debfc3dSmrg 				  false, GSI_CONTINUE_LINKING);
11581debfc3dSmrg 
11591debfc3dSmrg   /* This exactly matches the pattern recognition in classify_partition.  */
1160a2dc1f3fSmrg   val = gimple_assign_rhs1 (DR_STMT (builtin->dst_dr));
11611debfc3dSmrg   /* Handle constants like 0x15151515 and similarly
11621debfc3dSmrg      floating point constants etc. where all bytes are the same.  */
11631debfc3dSmrg   int bytev = const_with_all_bytes_same (val);
11641debfc3dSmrg   if (bytev != -1)
11651debfc3dSmrg     val = build_int_cst (integer_type_node, bytev);
11661debfc3dSmrg   else if (TREE_CODE (val) == INTEGER_CST)
11671debfc3dSmrg     val = fold_convert (integer_type_node, val);
11681debfc3dSmrg   else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
11691debfc3dSmrg     {
11701debfc3dSmrg       tree tem = make_ssa_name (integer_type_node);
11711debfc3dSmrg       gimple *cstmt = gimple_build_assign (tem, NOP_EXPR, val);
11721debfc3dSmrg       gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
11731debfc3dSmrg       val = tem;
11741debfc3dSmrg     }
11751debfc3dSmrg 
11761debfc3dSmrg   fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
11771debfc3dSmrg   fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
11788feb0f0bSmrg   gimple_set_location (fn_call, partition->loc);
11791debfc3dSmrg   gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
11808feb0f0bSmrg   fold_stmt (&gsi);
11811debfc3dSmrg 
11821debfc3dSmrg   if (dump_file && (dump_flags & TDF_DETAILS))
11831debfc3dSmrg     {
11841debfc3dSmrg       fprintf (dump_file, "generated memset");
11851debfc3dSmrg       if (bytev == 0)
11861debfc3dSmrg 	fprintf (dump_file, " zero\n");
11871debfc3dSmrg       else
11881debfc3dSmrg 	fprintf (dump_file, "\n");
11891debfc3dSmrg     }
11901debfc3dSmrg }
11911debfc3dSmrg 
11921debfc3dSmrg /* Generate a call to memcpy for PARTITION in LOOP.  */
11931debfc3dSmrg 
11941debfc3dSmrg static void
generate_memcpy_builtin(class loop * loop,partition * partition)11958feb0f0bSmrg generate_memcpy_builtin (class loop *loop, partition *partition)
11961debfc3dSmrg {
11971debfc3dSmrg   gimple_stmt_iterator gsi;
1198a2dc1f3fSmrg   gimple *fn_call;
11991debfc3dSmrg   tree dest, src, fn, nb_bytes;
12001debfc3dSmrg   enum built_in_function kind;
1201a2dc1f3fSmrg   struct builtin_info *builtin = partition->builtin;
12021debfc3dSmrg 
12031debfc3dSmrg   /* The new statements will be placed before LOOP.  */
12041debfc3dSmrg   gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
12051debfc3dSmrg 
1206a2dc1f3fSmrg   nb_bytes = rewrite_to_non_trapping_overflow (builtin->size);
12071debfc3dSmrg   nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
12081debfc3dSmrg 				       false, GSI_CONTINUE_LINKING);
12098feb0f0bSmrg   dest = rewrite_to_non_trapping_overflow (builtin->dst_base);
12108feb0f0bSmrg   src = rewrite_to_non_trapping_overflow (builtin->src_base);
12111debfc3dSmrg   if (partition->kind == PKIND_MEMCPY
12121debfc3dSmrg       || ! ptr_derefs_may_alias_p (dest, src))
12131debfc3dSmrg     kind = BUILT_IN_MEMCPY;
12141debfc3dSmrg   else
12151debfc3dSmrg     kind = BUILT_IN_MEMMOVE;
12168feb0f0bSmrg   /* Try harder if we're copying a constant size.  */
12178feb0f0bSmrg   if (kind == BUILT_IN_MEMMOVE && poly_int_tree_p (nb_bytes))
12188feb0f0bSmrg     {
12198feb0f0bSmrg       aff_tree asrc, adest;
12208feb0f0bSmrg       tree_to_aff_combination (src, ptr_type_node, &asrc);
12218feb0f0bSmrg       tree_to_aff_combination (dest, ptr_type_node, &adest);
12228feb0f0bSmrg       aff_combination_scale (&adest, -1);
12238feb0f0bSmrg       aff_combination_add (&asrc, &adest);
12248feb0f0bSmrg       if (aff_comb_cannot_overlap_p (&asrc, wi::to_poly_widest (nb_bytes),
12258feb0f0bSmrg 				     wi::to_poly_widest (nb_bytes)))
12268feb0f0bSmrg 	kind = BUILT_IN_MEMCPY;
12278feb0f0bSmrg     }
12281debfc3dSmrg 
12291debfc3dSmrg   dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
12301debfc3dSmrg 				   false, GSI_CONTINUE_LINKING);
12311debfc3dSmrg   src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
12321debfc3dSmrg 				  false, GSI_CONTINUE_LINKING);
12331debfc3dSmrg   fn = build_fold_addr_expr (builtin_decl_implicit (kind));
12341debfc3dSmrg   fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
12358feb0f0bSmrg   gimple_set_location (fn_call, partition->loc);
12361debfc3dSmrg   gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
12378feb0f0bSmrg   fold_stmt (&gsi);
12381debfc3dSmrg 
12391debfc3dSmrg   if (dump_file && (dump_flags & TDF_DETAILS))
12401debfc3dSmrg     {
12411debfc3dSmrg       if (kind == BUILT_IN_MEMCPY)
12421debfc3dSmrg 	fprintf (dump_file, "generated memcpy\n");
12431debfc3dSmrg       else
12441debfc3dSmrg 	fprintf (dump_file, "generated memmove\n");
12451debfc3dSmrg     }
12461debfc3dSmrg }
12471debfc3dSmrg 
12481debfc3dSmrg /* Remove and destroy the loop LOOP.  */
12491debfc3dSmrg 
12501debfc3dSmrg static void
destroy_loop(class loop * loop)12518feb0f0bSmrg destroy_loop (class loop *loop)
12521debfc3dSmrg {
12531debfc3dSmrg   unsigned nbbs = loop->num_nodes;
12541debfc3dSmrg   edge exit = single_exit (loop);
12551debfc3dSmrg   basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
12561debfc3dSmrg   basic_block *bbs;
12571debfc3dSmrg   unsigned i;
12581debfc3dSmrg 
12591debfc3dSmrg   bbs = get_loop_body_in_dom_order (loop);
12601debfc3dSmrg 
1261c0a68be4Smrg   gimple_stmt_iterator dst_gsi = gsi_after_labels (exit->dest);
1262c0a68be4Smrg   bool safe_p = single_pred_p (exit->dest);
12638feb0f0bSmrg   for (unsigned i = 0; i < nbbs; ++i)
12641debfc3dSmrg     {
12651debfc3dSmrg       /* We have made sure to not leave any dangling uses of SSA
12661debfc3dSmrg          names defined in the loop.  With the exception of virtuals.
12671debfc3dSmrg 	 Make sure we replace all uses of virtual defs that will remain
12681debfc3dSmrg 	 outside of the loop with the bare symbol as delete_basic_block
12691debfc3dSmrg 	 will release them.  */
12701debfc3dSmrg       for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
12711debfc3dSmrg 	   gsi_next (&gsi))
12721debfc3dSmrg 	{
12731debfc3dSmrg 	  gphi *phi = gsi.phi ();
12741debfc3dSmrg 	  if (virtual_operand_p (gimple_phi_result (phi)))
12751debfc3dSmrg 	    mark_virtual_phi_result_for_renaming (phi);
12761debfc3dSmrg 	}
1277c0a68be4Smrg       for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);)
12781debfc3dSmrg 	{
12791debfc3dSmrg 	  gimple *stmt = gsi_stmt (gsi);
12801debfc3dSmrg 	  tree vdef = gimple_vdef (stmt);
12811debfc3dSmrg 	  if (vdef && TREE_CODE (vdef) == SSA_NAME)
12821debfc3dSmrg 	    mark_virtual_operand_for_renaming (vdef);
1283c0a68be4Smrg 	  /* Also move and eventually reset debug stmts.  We can leave
1284c0a68be4Smrg 	     constant values in place in case the stmt dominates the exit.
1285c0a68be4Smrg 	     ???  Non-constant values from the last iteration can be
1286c0a68be4Smrg 	     replaced with final values if we can compute them.  */
1287c0a68be4Smrg 	  if (gimple_debug_bind_p (stmt))
1288c0a68be4Smrg 	    {
1289c0a68be4Smrg 	      tree val = gimple_debug_bind_get_value (stmt);
1290c0a68be4Smrg 	      gsi_move_before (&gsi, &dst_gsi);
1291c0a68be4Smrg 	      if (val
1292c0a68be4Smrg 		  && (!safe_p
1293c0a68be4Smrg 		      || !is_gimple_min_invariant (val)
1294c0a68be4Smrg 		      || !dominated_by_p (CDI_DOMINATORS, exit->src, bbs[i])))
1295c0a68be4Smrg 		{
1296c0a68be4Smrg 		  gimple_debug_bind_reset_value (stmt);
1297c0a68be4Smrg 		  update_stmt (stmt);
12981debfc3dSmrg 		}
1299c0a68be4Smrg 	    }
1300c0a68be4Smrg 	  else
1301c0a68be4Smrg 	    gsi_next (&gsi);
1302c0a68be4Smrg 	}
1303c0a68be4Smrg     }
1304c0a68be4Smrg 
1305c0a68be4Smrg   redirect_edge_pred (exit, src);
1306c0a68be4Smrg   exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
1307c0a68be4Smrg   exit->flags |= EDGE_FALLTHRU;
1308c0a68be4Smrg   cancel_loop_tree (loop);
1309c0a68be4Smrg   rescan_loop_exit (exit, false, true);
1310c0a68be4Smrg 
1311c0a68be4Smrg   i = nbbs;
1312c0a68be4Smrg   do
1313c0a68be4Smrg     {
1314c0a68be4Smrg       --i;
13151debfc3dSmrg       delete_basic_block (bbs[i]);
13161debfc3dSmrg     }
13171debfc3dSmrg   while (i != 0);
13181debfc3dSmrg 
13191debfc3dSmrg   free (bbs);
13201debfc3dSmrg 
13211debfc3dSmrg   set_immediate_dominator (CDI_DOMINATORS, dest,
13221debfc3dSmrg 			   recompute_dominator (CDI_DOMINATORS, dest));
13231debfc3dSmrg }
13241debfc3dSmrg 
13251debfc3dSmrg /* Generates code for PARTITION.  Return whether LOOP needs to be destroyed.  */
13261debfc3dSmrg 
13271debfc3dSmrg static bool
generate_code_for_partition(class loop * loop,partition * partition,bool copy_p)13288feb0f0bSmrg generate_code_for_partition (class loop *loop,
13291debfc3dSmrg 			     partition *partition, bool copy_p)
13301debfc3dSmrg {
13311debfc3dSmrg   switch (partition->kind)
13321debfc3dSmrg     {
13331debfc3dSmrg     case PKIND_NORMAL:
1334a2dc1f3fSmrg     case PKIND_PARTIAL_MEMSET:
13351debfc3dSmrg       /* Reductions all have to be in the last partition.  */
13361debfc3dSmrg       gcc_assert (!partition_reduction_p (partition)
13371debfc3dSmrg 		  || !copy_p);
13381debfc3dSmrg       generate_loops_for_partition (loop, partition, copy_p);
13391debfc3dSmrg       return false;
13401debfc3dSmrg 
13411debfc3dSmrg     case PKIND_MEMSET:
13421debfc3dSmrg       generate_memset_builtin (loop, partition);
13431debfc3dSmrg       break;
13441debfc3dSmrg 
13451debfc3dSmrg     case PKIND_MEMCPY:
13461debfc3dSmrg     case PKIND_MEMMOVE:
13471debfc3dSmrg       generate_memcpy_builtin (loop, partition);
13481debfc3dSmrg       break;
13491debfc3dSmrg 
13501debfc3dSmrg     default:
13511debfc3dSmrg       gcc_unreachable ();
13521debfc3dSmrg     }
13531debfc3dSmrg 
13541debfc3dSmrg   /* Common tail for partitions we turn into a call.  If this was the last
13551debfc3dSmrg      partition for which we generate code, we have to destroy the loop.  */
13561debfc3dSmrg   if (!copy_p)
13571debfc3dSmrg     return true;
13581debfc3dSmrg   return false;
13591debfc3dSmrg }
13601debfc3dSmrg 
13618feb0f0bSmrg data_dependence_relation *
get_data_dependence(struct graph * rdg,data_reference_p a,data_reference_p b)13628feb0f0bSmrg loop_distribution::get_data_dependence (struct graph *rdg, data_reference_p a,
13638feb0f0bSmrg 					data_reference_p b)
1364a2dc1f3fSmrg {
1365a2dc1f3fSmrg   struct data_dependence_relation ent, **slot;
1366a2dc1f3fSmrg   struct data_dependence_relation *ddr;
1367a2dc1f3fSmrg 
1368a2dc1f3fSmrg   gcc_assert (DR_IS_WRITE (a) || DR_IS_WRITE (b));
1369a2dc1f3fSmrg   gcc_assert (rdg_vertex_for_stmt (rdg, DR_STMT (a))
1370a2dc1f3fSmrg 	      <= rdg_vertex_for_stmt (rdg, DR_STMT (b)));
1371a2dc1f3fSmrg   ent.a = a;
1372a2dc1f3fSmrg   ent.b = b;
1373a2dc1f3fSmrg   slot = ddrs_table->find_slot (&ent, INSERT);
1374a2dc1f3fSmrg   if (*slot == NULL)
1375a2dc1f3fSmrg     {
1376a2dc1f3fSmrg       ddr = initialize_data_dependence_relation (a, b, loop_nest);
1377a2dc1f3fSmrg       compute_affine_dependence (ddr, loop_nest[0]);
1378a2dc1f3fSmrg       *slot = ddr;
1379a2dc1f3fSmrg     }
1380a2dc1f3fSmrg 
1381a2dc1f3fSmrg   return *slot;
1382a2dc1f3fSmrg }
1383a2dc1f3fSmrg 
13848feb0f0bSmrg bool
data_dep_in_cycle_p(struct graph * rdg,data_reference_p dr1,data_reference_p dr2)13858feb0f0bSmrg loop_distribution::data_dep_in_cycle_p (struct graph *rdg,
13868feb0f0bSmrg 					data_reference_p dr1,
13878feb0f0bSmrg 					data_reference_p dr2)
1388a2dc1f3fSmrg {
1389a2dc1f3fSmrg   struct data_dependence_relation *ddr;
1390a2dc1f3fSmrg 
1391a2dc1f3fSmrg   /* Re-shuffle data-refs to be in topological order.  */
1392a2dc1f3fSmrg   if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1393a2dc1f3fSmrg       > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1394a2dc1f3fSmrg     std::swap (dr1, dr2);
1395a2dc1f3fSmrg 
1396a2dc1f3fSmrg   ddr = get_data_dependence (rdg, dr1, dr2);
1397a2dc1f3fSmrg 
1398a2dc1f3fSmrg   /* In case of no data dependence.  */
1399a2dc1f3fSmrg   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1400a2dc1f3fSmrg     return false;
1401a2dc1f3fSmrg   /* For unknown data dependence or known data dependence which can't be
1402a2dc1f3fSmrg      expressed in classic distance vector, we check if it can be resolved
1403a2dc1f3fSmrg      by runtime alias check.  If yes, we still consider data dependence
1404a2dc1f3fSmrg      as won't introduce data dependence cycle.  */
1405a2dc1f3fSmrg   else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
1406a2dc1f3fSmrg 	   || DDR_NUM_DIST_VECTS (ddr) == 0)
1407a2dc1f3fSmrg     return !runtime_alias_check_p (ddr, NULL, true);
1408a2dc1f3fSmrg   else if (DDR_NUM_DIST_VECTS (ddr) > 1)
1409a2dc1f3fSmrg     return true;
1410a2dc1f3fSmrg   else if (DDR_REVERSED_P (ddr)
1411a2dc1f3fSmrg 	   || lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1412a2dc1f3fSmrg     return false;
1413a2dc1f3fSmrg 
1414a2dc1f3fSmrg   return true;
1415a2dc1f3fSmrg }
1416a2dc1f3fSmrg 
14178feb0f0bSmrg void
update_type_for_merge(struct graph * rdg,partition * partition1,partition * partition2)14188feb0f0bSmrg loop_distribution::update_type_for_merge (struct graph *rdg,
14198feb0f0bSmrg 					   partition *partition1,
14208feb0f0bSmrg 					   partition *partition2)
1421a2dc1f3fSmrg {
1422a2dc1f3fSmrg   unsigned i, j;
1423a2dc1f3fSmrg   bitmap_iterator bi, bj;
1424a2dc1f3fSmrg   data_reference_p dr1, dr2;
1425a2dc1f3fSmrg 
1426a2dc1f3fSmrg   EXECUTE_IF_SET_IN_BITMAP (partition1->datarefs, 0, i, bi)
1427a2dc1f3fSmrg     {
1428a2dc1f3fSmrg       unsigned start = (partition1 == partition2) ? i + 1 : 0;
1429a2dc1f3fSmrg 
1430a2dc1f3fSmrg       dr1 = datarefs_vec[i];
1431a2dc1f3fSmrg       EXECUTE_IF_SET_IN_BITMAP (partition2->datarefs, start, j, bj)
1432a2dc1f3fSmrg 	{
1433a2dc1f3fSmrg 	  dr2 = datarefs_vec[j];
1434a2dc1f3fSmrg 	  if (DR_IS_READ (dr1) && DR_IS_READ (dr2))
1435a2dc1f3fSmrg 	    continue;
1436a2dc1f3fSmrg 
1437a2dc1f3fSmrg 	  /* Partition can only be executed sequentially if there is any
1438a2dc1f3fSmrg 	     data dependence cycle.  */
1439a2dc1f3fSmrg 	  if (data_dep_in_cycle_p (rdg, dr1, dr2))
1440a2dc1f3fSmrg 	    {
1441a2dc1f3fSmrg 	      partition1->type = PTYPE_SEQUENTIAL;
1442a2dc1f3fSmrg 	      return;
1443a2dc1f3fSmrg 	    }
1444a2dc1f3fSmrg 	}
1445a2dc1f3fSmrg     }
1446a2dc1f3fSmrg }
14471debfc3dSmrg 
14488feb0f0bSmrg partition *
build_rdg_partition_for_vertex(struct graph * rdg,int v)14498feb0f0bSmrg loop_distribution::build_rdg_partition_for_vertex (struct graph *rdg, int v)
14501debfc3dSmrg {
1451a2dc1f3fSmrg   partition *partition = partition_alloc ();
14521debfc3dSmrg   auto_vec<int, 3> nodes;
1453a2dc1f3fSmrg   unsigned i, j;
14541debfc3dSmrg   int x;
1455a2dc1f3fSmrg   data_reference_p dr;
14561debfc3dSmrg 
14571debfc3dSmrg   graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
14581debfc3dSmrg 
14591debfc3dSmrg   FOR_EACH_VEC_ELT (nodes, i, x)
14601debfc3dSmrg     {
14611debfc3dSmrg       bitmap_set_bit (partition->stmts, x);
1462a2dc1f3fSmrg 
1463a2dc1f3fSmrg       for (j = 0; RDG_DATAREFS (rdg, x).iterate (j, &dr); ++j)
1464a2dc1f3fSmrg 	{
1465a2dc1f3fSmrg 	  unsigned idx = (unsigned) DR_INDEX (dr);
1466a2dc1f3fSmrg 	  gcc_assert (idx < datarefs_vec.length ());
1467a2dc1f3fSmrg 
1468a2dc1f3fSmrg 	  /* Partition can only be executed sequentially if there is any
1469a2dc1f3fSmrg 	     unknown data reference.  */
1470a2dc1f3fSmrg 	  if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr)
1471a2dc1f3fSmrg 	      || !DR_INIT (dr) || !DR_STEP (dr))
1472a2dc1f3fSmrg 	    partition->type = PTYPE_SEQUENTIAL;
1473a2dc1f3fSmrg 
1474a2dc1f3fSmrg 	  bitmap_set_bit (partition->datarefs, idx);
14751debfc3dSmrg 	}
1476a2dc1f3fSmrg     }
1477a2dc1f3fSmrg 
1478a2dc1f3fSmrg   if (partition->type == PTYPE_SEQUENTIAL)
1479a2dc1f3fSmrg     return partition;
1480a2dc1f3fSmrg 
1481a2dc1f3fSmrg   /* Further check if any data dependence prevents us from executing the
1482a2dc1f3fSmrg      partition parallelly.  */
1483a2dc1f3fSmrg   update_type_for_merge (rdg, partition, partition);
14841debfc3dSmrg 
14851debfc3dSmrg   return partition;
14861debfc3dSmrg }
14871debfc3dSmrg 
1488a2dc1f3fSmrg /* Given PARTITION of LOOP and RDG, record single load/store data references
1489a2dc1f3fSmrg    for builtin partition in SRC_DR/DST_DR, return false if there is no such
1490a2dc1f3fSmrg    data references.  */
14911debfc3dSmrg 
1492a2dc1f3fSmrg static bool
find_single_drs(class loop * loop,struct graph * rdg,partition * partition,data_reference_p * dst_dr,data_reference_p * src_dr)14938feb0f0bSmrg find_single_drs (class loop *loop, struct graph *rdg, partition *partition,
1494a2dc1f3fSmrg 		 data_reference_p *dst_dr, data_reference_p *src_dr)
14951debfc3dSmrg {
14961debfc3dSmrg   unsigned i;
1497a2dc1f3fSmrg   data_reference_p single_ld = NULL, single_st = NULL;
1498a2dc1f3fSmrg   bitmap_iterator bi;
14991debfc3dSmrg 
15001debfc3dSmrg   EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
15011debfc3dSmrg     {
15021debfc3dSmrg       gimple *stmt = RDG_STMT (rdg, i);
15031debfc3dSmrg       data_reference_p dr;
15041debfc3dSmrg 
15051debfc3dSmrg       if (gimple_code (stmt) == GIMPLE_PHI)
15061debfc3dSmrg 	continue;
15071debfc3dSmrg 
15081debfc3dSmrg       /* Any scalar stmts are ok.  */
15091debfc3dSmrg       if (!gimple_vuse (stmt))
15101debfc3dSmrg 	continue;
15111debfc3dSmrg 
15121debfc3dSmrg       /* Otherwise just regular loads/stores.  */
15131debfc3dSmrg       if (!gimple_assign_single_p (stmt))
1514a2dc1f3fSmrg 	return false;
15151debfc3dSmrg 
15161debfc3dSmrg       /* But exactly one store and/or load.  */
1517a2dc1f3fSmrg       for (unsigned j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
15181debfc3dSmrg 	{
15191debfc3dSmrg 	  tree type = TREE_TYPE (DR_REF (dr));
15201debfc3dSmrg 
15211debfc3dSmrg 	  /* The memset, memcpy and memmove library calls are only
15221debfc3dSmrg 	     able to deal with generic address space.  */
15231debfc3dSmrg 	  if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (type)))
1524a2dc1f3fSmrg 	    return false;
15251debfc3dSmrg 
15261debfc3dSmrg 	  if (DR_IS_READ (dr))
15271debfc3dSmrg 	    {
1528a2dc1f3fSmrg 	      if (single_ld != NULL)
1529a2dc1f3fSmrg 		return false;
1530a2dc1f3fSmrg 	      single_ld = dr;
15311debfc3dSmrg 	    }
15321debfc3dSmrg 	  else
15331debfc3dSmrg 	    {
1534a2dc1f3fSmrg 	      if (single_st != NULL)
1535a2dc1f3fSmrg 		return false;
1536a2dc1f3fSmrg 	      single_st = dr;
15371debfc3dSmrg 	    }
15381debfc3dSmrg 	}
15391debfc3dSmrg     }
15401debfc3dSmrg 
1541a2dc1f3fSmrg   if (!single_st)
1542a2dc1f3fSmrg     return false;
15431debfc3dSmrg 
1544a2dc1f3fSmrg   /* Bail out if this is a bitfield memory reference.  */
1545a2dc1f3fSmrg   if (TREE_CODE (DR_REF (single_st)) == COMPONENT_REF
1546a2dc1f3fSmrg       && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (single_st), 1)))
1547a2dc1f3fSmrg     return false;
15481debfc3dSmrg 
1549a2dc1f3fSmrg   /* Data reference must be executed exactly once per iteration of each
1550a2dc1f3fSmrg      loop in the loop nest.  We only need to check dominance information
1551a2dc1f3fSmrg      against the outermost one in a perfect loop nest because a bb can't
1552a2dc1f3fSmrg      dominate outermost loop's latch without dominating inner loop's.  */
1553a2dc1f3fSmrg   basic_block bb_st = gimple_bb (DR_STMT (single_st));
1554a2dc1f3fSmrg   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb_st))
1555a2dc1f3fSmrg     return false;
1556a2dc1f3fSmrg 
1557a2dc1f3fSmrg   if (single_ld)
15581debfc3dSmrg     {
1559a2dc1f3fSmrg       gimple *store = DR_STMT (single_st), *load = DR_STMT (single_ld);
1560a2dc1f3fSmrg       /* Direct aggregate copy or via an SSA name temporary.  */
1561a2dc1f3fSmrg       if (load != store
1562a2dc1f3fSmrg 	  && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1563a2dc1f3fSmrg 	return false;
1564a2dc1f3fSmrg 
1565a2dc1f3fSmrg       /* Bail out if this is a bitfield memory reference.  */
1566a2dc1f3fSmrg       if (TREE_CODE (DR_REF (single_ld)) == COMPONENT_REF
1567a2dc1f3fSmrg 	  && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (single_ld), 1)))
1568a2dc1f3fSmrg 	return false;
1569a2dc1f3fSmrg 
1570a2dc1f3fSmrg       /* Load and store must be in the same loop nest.  */
1571a2dc1f3fSmrg       basic_block bb_ld = gimple_bb (DR_STMT (single_ld));
1572a2dc1f3fSmrg       if (bb_st->loop_father != bb_ld->loop_father)
1573a2dc1f3fSmrg 	return false;
1574a2dc1f3fSmrg 
1575a2dc1f3fSmrg       /* Data reference must be executed exactly once per iteration.
1576a2dc1f3fSmrg 	 Same as single_st, we only need to check against the outermost
1577a2dc1f3fSmrg 	 loop.  */
1578a2dc1f3fSmrg       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb_ld))
1579a2dc1f3fSmrg 	return false;
1580a2dc1f3fSmrg 
1581a2dc1f3fSmrg       edge e = single_exit (bb_st->loop_father);
1582a2dc1f3fSmrg       bool dom_ld = dominated_by_p (CDI_DOMINATORS, e->src, bb_ld);
1583a2dc1f3fSmrg       bool dom_st = dominated_by_p (CDI_DOMINATORS, e->src, bb_st);
1584a2dc1f3fSmrg       if (dom_ld != dom_st)
1585a2dc1f3fSmrg 	return false;
1586a2dc1f3fSmrg     }
1587a2dc1f3fSmrg 
1588a2dc1f3fSmrg   *src_dr = single_ld;
1589a2dc1f3fSmrg   *dst_dr = single_st;
1590a2dc1f3fSmrg   return true;
1591a2dc1f3fSmrg }
1592a2dc1f3fSmrg 
1593a2dc1f3fSmrg /* Given data reference DR in LOOP_NEST, this function checks the enclosing
1594a2dc1f3fSmrg    loops from inner to outer to see if loop's step equals to access size at
1595a2dc1f3fSmrg    each level of loop.  Return 2 if we can prove this at all level loops;
1596a2dc1f3fSmrg    record access base and size in BASE and SIZE; save loop's step at each
1597a2dc1f3fSmrg    level of loop in STEPS if it is not null.  For example:
1598a2dc1f3fSmrg 
1599a2dc1f3fSmrg      int arr[100][100][100];
1600a2dc1f3fSmrg      for (i = 0; i < 100; i++)       ;steps[2] = 40000
1601a2dc1f3fSmrg        for (j = 100; j > 0; j--)     ;steps[1] = -400
1602a2dc1f3fSmrg 	 for (k = 0; k < 100; k++)   ;steps[0] = 4
1603a2dc1f3fSmrg 	   arr[i][j - 1][k] = 0;     ;base = &arr, size = 4000000
1604a2dc1f3fSmrg 
1605a2dc1f3fSmrg    Return 1 if we can prove the equality at the innermost loop, but not all
1606a2dc1f3fSmrg    level loops.  In this case, no information is recorded.
1607a2dc1f3fSmrg 
1608a2dc1f3fSmrg    Return 0 if no equality can be proven at any level loops.  */
1609a2dc1f3fSmrg 
1610a2dc1f3fSmrg static int
1611a2dc1f3fSmrg compute_access_range (loop_p loop_nest, data_reference_p dr, tree *base,
1612a2dc1f3fSmrg 		      tree *size, vec<tree> *steps = NULL)
1613a2dc1f3fSmrg {
1614a2dc1f3fSmrg   location_t loc = gimple_location (DR_STMT (dr));
1615a2dc1f3fSmrg   basic_block bb = gimple_bb (DR_STMT (dr));
16168feb0f0bSmrg   class loop *loop = bb->loop_father;
1617a2dc1f3fSmrg   tree ref = DR_REF (dr);
1618a2dc1f3fSmrg   tree access_base = build_fold_addr_expr (ref);
1619a2dc1f3fSmrg   tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (ref));
1620a2dc1f3fSmrg   int res = 0;
1621a2dc1f3fSmrg 
1622a2dc1f3fSmrg   do {
1623a2dc1f3fSmrg       tree scev_fn = analyze_scalar_evolution (loop, access_base);
1624a2dc1f3fSmrg       if (TREE_CODE (scev_fn) != POLYNOMIAL_CHREC)
1625a2dc1f3fSmrg 	return res;
1626a2dc1f3fSmrg 
1627a2dc1f3fSmrg       access_base = CHREC_LEFT (scev_fn);
1628a2dc1f3fSmrg       if (tree_contains_chrecs (access_base, NULL))
1629a2dc1f3fSmrg 	return res;
1630a2dc1f3fSmrg 
1631a2dc1f3fSmrg       tree scev_step = CHREC_RIGHT (scev_fn);
1632a2dc1f3fSmrg       /* Only support constant steps.  */
1633a2dc1f3fSmrg       if (TREE_CODE (scev_step) != INTEGER_CST)
1634a2dc1f3fSmrg 	return res;
1635a2dc1f3fSmrg 
1636a2dc1f3fSmrg       enum ev_direction access_dir = scev_direction (scev_fn);
1637a2dc1f3fSmrg       if (access_dir == EV_DIR_UNKNOWN)
1638a2dc1f3fSmrg 	return res;
1639a2dc1f3fSmrg 
1640a2dc1f3fSmrg       if (steps != NULL)
1641a2dc1f3fSmrg 	steps->safe_push (scev_step);
1642a2dc1f3fSmrg 
1643a2dc1f3fSmrg       scev_step = fold_convert_loc (loc, sizetype, scev_step);
1644a2dc1f3fSmrg       /* Compute absolute value of scev step.  */
1645a2dc1f3fSmrg       if (access_dir == EV_DIR_DECREASES)
1646a2dc1f3fSmrg 	scev_step = fold_build1_loc (loc, NEGATE_EXPR, sizetype, scev_step);
1647a2dc1f3fSmrg 
1648a2dc1f3fSmrg       /* At each level of loop, scev step must equal to access size.  In other
1649a2dc1f3fSmrg 	 words, DR must access consecutive memory between loop iterations.  */
1650a2dc1f3fSmrg       if (!operand_equal_p (scev_step, access_size, 0))
1651a2dc1f3fSmrg 	return res;
1652a2dc1f3fSmrg 
1653a2dc1f3fSmrg       /* Access stride can be computed for data reference at least for the
1654a2dc1f3fSmrg 	 innermost loop.  */
1655a2dc1f3fSmrg       res = 1;
1656a2dc1f3fSmrg 
1657a2dc1f3fSmrg       /* Compute DR's execution times in loop.  */
1658a2dc1f3fSmrg       tree niters = number_of_latch_executions (loop);
1659a2dc1f3fSmrg       niters = fold_convert_loc (loc, sizetype, niters);
1660a2dc1f3fSmrg       if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src, bb))
1661a2dc1f3fSmrg 	niters = size_binop_loc (loc, PLUS_EXPR, niters, size_one_node);
1662a2dc1f3fSmrg 
1663a2dc1f3fSmrg       /* Compute DR's overall access size in loop.  */
1664a2dc1f3fSmrg       access_size = fold_build2_loc (loc, MULT_EXPR, sizetype,
1665a2dc1f3fSmrg 				     niters, scev_step);
1666a2dc1f3fSmrg       /* Adjust base address in case of negative step.  */
1667a2dc1f3fSmrg       if (access_dir == EV_DIR_DECREASES)
1668a2dc1f3fSmrg 	{
1669a2dc1f3fSmrg 	  tree adj = fold_build2_loc (loc, MINUS_EXPR, sizetype,
1670a2dc1f3fSmrg 				      scev_step, access_size);
1671a2dc1f3fSmrg 	  access_base = fold_build_pointer_plus_loc (loc, access_base, adj);
1672a2dc1f3fSmrg 	}
1673a2dc1f3fSmrg   } while (loop != loop_nest && (loop = loop_outer (loop)) != NULL);
1674a2dc1f3fSmrg 
1675a2dc1f3fSmrg   *base = access_base;
1676a2dc1f3fSmrg   *size = access_size;
1677a2dc1f3fSmrg   /* Access stride can be computed for data reference at each level loop.  */
1678a2dc1f3fSmrg   return 2;
1679a2dc1f3fSmrg }
1680a2dc1f3fSmrg 
1681a2dc1f3fSmrg /* Allocate and return builtin struct.  Record information like DST_DR,
1682a2dc1f3fSmrg    SRC_DR, DST_BASE, SRC_BASE and SIZE in the allocated struct.  */
1683a2dc1f3fSmrg 
1684a2dc1f3fSmrg static struct builtin_info *
alloc_builtin(data_reference_p dst_dr,data_reference_p src_dr,tree dst_base,tree src_base,tree size)1685a2dc1f3fSmrg alloc_builtin (data_reference_p dst_dr, data_reference_p src_dr,
1686a2dc1f3fSmrg 	       tree dst_base, tree src_base, tree size)
1687a2dc1f3fSmrg {
1688a2dc1f3fSmrg   struct builtin_info *builtin = XNEW (struct builtin_info);
1689a2dc1f3fSmrg   builtin->dst_dr = dst_dr;
1690a2dc1f3fSmrg   builtin->src_dr = src_dr;
1691a2dc1f3fSmrg   builtin->dst_base = dst_base;
1692a2dc1f3fSmrg   builtin->src_base = src_base;
1693a2dc1f3fSmrg   builtin->size = size;
1694a2dc1f3fSmrg   return builtin;
1695a2dc1f3fSmrg }
1696a2dc1f3fSmrg 
1697a2dc1f3fSmrg /* Given data reference DR in loop nest LOOP, classify if it forms builtin
1698a2dc1f3fSmrg    memset call.  */
1699a2dc1f3fSmrg 
1700a2dc1f3fSmrg static void
classify_builtin_st(loop_p loop,partition * partition,data_reference_p dr)1701a2dc1f3fSmrg classify_builtin_st (loop_p loop, partition *partition, data_reference_p dr)
1702a2dc1f3fSmrg {
1703a2dc1f3fSmrg   gimple *stmt = DR_STMT (dr);
1704a2dc1f3fSmrg   tree base, size, rhs = gimple_assign_rhs1 (stmt);
1705a2dc1f3fSmrg 
17061debfc3dSmrg   if (const_with_all_bytes_same (rhs) == -1
17071debfc3dSmrg       && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
17081debfc3dSmrg 	  || (TYPE_MODE (TREE_TYPE (rhs))
17091debfc3dSmrg 	      != TYPE_MODE (unsigned_char_type_node))))
17101debfc3dSmrg     return;
1711a2dc1f3fSmrg 
17121debfc3dSmrg   if (TREE_CODE (rhs) == SSA_NAME
17131debfc3dSmrg       && !SSA_NAME_IS_DEFAULT_DEF (rhs)
17141debfc3dSmrg       && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
17151debfc3dSmrg     return;
1716a2dc1f3fSmrg 
1717a2dc1f3fSmrg   int res = compute_access_range (loop, dr, &base, &size);
1718a2dc1f3fSmrg   if (res == 0)
17191debfc3dSmrg     return;
1720a2dc1f3fSmrg   if (res == 1)
1721a2dc1f3fSmrg     {
1722a2dc1f3fSmrg       partition->kind = PKIND_PARTIAL_MEMSET;
1723a2dc1f3fSmrg       return;
1724a2dc1f3fSmrg     }
1725a2dc1f3fSmrg 
1726a2dc1f3fSmrg   poly_uint64 base_offset;
1727a2dc1f3fSmrg   unsigned HOST_WIDE_INT const_base_offset;
1728a2dc1f3fSmrg   tree base_base = strip_offset (base, &base_offset);
1729a2dc1f3fSmrg   if (!base_offset.is_constant (&const_base_offset))
1730a2dc1f3fSmrg     return;
1731a2dc1f3fSmrg 
1732a2dc1f3fSmrg   struct builtin_info *builtin;
1733a2dc1f3fSmrg   builtin = alloc_builtin (dr, NULL, base, NULL_TREE, size);
1734a2dc1f3fSmrg   builtin->dst_base_base = base_base;
1735a2dc1f3fSmrg   builtin->dst_base_offset = const_base_offset;
1736a2dc1f3fSmrg   partition->builtin = builtin;
17371debfc3dSmrg   partition->kind = PKIND_MEMSET;
17381debfc3dSmrg }
1739a2dc1f3fSmrg 
1740a2dc1f3fSmrg /* Given data references DST_DR and SRC_DR in loop nest LOOP and RDG, classify
1741a2dc1f3fSmrg    if it forms builtin memcpy or memmove call.  */
1742a2dc1f3fSmrg 
17438feb0f0bSmrg void
classify_builtin_ldst(loop_p loop,struct graph * rdg,partition * partition,data_reference_p dst_dr,data_reference_p src_dr)17448feb0f0bSmrg loop_distribution::classify_builtin_ldst (loop_p loop, struct graph *rdg,
17458feb0f0bSmrg 					  partition *partition,
17468feb0f0bSmrg 					  data_reference_p dst_dr,
17478feb0f0bSmrg 					  data_reference_p src_dr)
17481debfc3dSmrg {
1749a2dc1f3fSmrg   tree base, size, src_base, src_size;
1750a2dc1f3fSmrg   auto_vec<tree> dst_steps, src_steps;
1751a2dc1f3fSmrg 
1752a2dc1f3fSmrg   /* Compute access range of both load and store.  */
1753a2dc1f3fSmrg   int res = compute_access_range (loop, dst_dr, &base, &size, &dst_steps);
1754a2dc1f3fSmrg   if (res != 2)
17551debfc3dSmrg     return;
1756a2dc1f3fSmrg   res = compute_access_range (loop, src_dr, &src_base, &src_size, &src_steps);
1757a2dc1f3fSmrg   if (res != 2)
17581debfc3dSmrg     return;
1759a2dc1f3fSmrg 
1760a2dc1f3fSmrg   /* They much have the same access size.  */
1761a2dc1f3fSmrg   if (!operand_equal_p (size, src_size, 0))
1762a2dc1f3fSmrg     return;
1763a2dc1f3fSmrg 
1764a2dc1f3fSmrg   /* Load and store in loop nest must access memory in the same way, i.e,
1765a2dc1f3fSmrg      their must have the same steps in each loop of the nest.  */
1766a2dc1f3fSmrg   if (dst_steps.length () != src_steps.length ())
1767a2dc1f3fSmrg     return;
1768a2dc1f3fSmrg   for (unsigned i = 0; i < dst_steps.length (); ++i)
1769a2dc1f3fSmrg     if (!operand_equal_p (dst_steps[i], src_steps[i], 0))
1770a2dc1f3fSmrg       return;
1771a2dc1f3fSmrg 
1772a2dc1f3fSmrg   /* Now check that if there is a dependence.  */
1773a2dc1f3fSmrg   ddr_p ddr = get_data_dependence (rdg, src_dr, dst_dr);
1774a2dc1f3fSmrg 
17758feb0f0bSmrg   /* Classify as memmove if no dependence between load and store.  */
1776a2dc1f3fSmrg   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
17771debfc3dSmrg     {
1778a2dc1f3fSmrg       partition->builtin = alloc_builtin (dst_dr, src_dr, base, src_base, size);
17798feb0f0bSmrg       partition->kind = PKIND_MEMMOVE;
17801debfc3dSmrg       return;
17811debfc3dSmrg     }
1782a2dc1f3fSmrg 
1783a2dc1f3fSmrg   /* Can't do memmove in case of unknown dependence or dependence without
1784a2dc1f3fSmrg      classical distance vector.  */
1785a2dc1f3fSmrg   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
1786a2dc1f3fSmrg       || DDR_NUM_DIST_VECTS (ddr) == 0)
17871debfc3dSmrg     return;
1788a2dc1f3fSmrg 
1789a2dc1f3fSmrg   unsigned i;
17901debfc3dSmrg   lambda_vector dist_v;
1791a2dc1f3fSmrg   int num_lev = (DDR_LOOP_NEST (ddr)).length ();
17921debfc3dSmrg   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
17931debfc3dSmrg     {
1794a2dc1f3fSmrg       unsigned dep_lev = dependence_level (dist_v, num_lev);
1795a2dc1f3fSmrg       /* Can't do memmove if load depends on store.  */
1796a2dc1f3fSmrg       if (dep_lev > 0 && dist_v[dep_lev - 1] > 0 && !DDR_REVERSED_P (ddr))
17971debfc3dSmrg 	return;
17981debfc3dSmrg     }
1799a2dc1f3fSmrg 
1800a2dc1f3fSmrg   partition->builtin = alloc_builtin (dst_dr, src_dr, base, src_base, size);
18011debfc3dSmrg   partition->kind = PKIND_MEMMOVE;
1802a2dc1f3fSmrg   return;
18031debfc3dSmrg }
18041debfc3dSmrg 
18058feb0f0bSmrg bool
classify_partition(loop_p loop,struct graph * rdg,partition * partition,bitmap stmt_in_all_partitions)18068feb0f0bSmrg loop_distribution::classify_partition (loop_p loop,
18078feb0f0bSmrg 				       struct graph *rdg, partition *partition,
1808a2dc1f3fSmrg 				       bitmap stmt_in_all_partitions)
18091debfc3dSmrg {
1810a2dc1f3fSmrg   bitmap_iterator bi;
1811a2dc1f3fSmrg   unsigned i;
1812a2dc1f3fSmrg   data_reference_p single_ld = NULL, single_st = NULL;
1813a2dc1f3fSmrg   bool volatiles_p = false, has_reduction = false;
18141debfc3dSmrg 
1815a2dc1f3fSmrg   EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1816a2dc1f3fSmrg     {
1817a2dc1f3fSmrg       gimple *stmt = RDG_STMT (rdg, i);
1818a2dc1f3fSmrg 
1819a2dc1f3fSmrg       if (gimple_has_volatile_ops (stmt))
1820a2dc1f3fSmrg 	volatiles_p = true;
1821a2dc1f3fSmrg 
1822a2dc1f3fSmrg       /* If the stmt is not included by all partitions and there is uses
1823a2dc1f3fSmrg 	 outside of the loop, then mark the partition as reduction.  */
1824a2dc1f3fSmrg       if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1825a2dc1f3fSmrg 	{
1826a2dc1f3fSmrg 	  /* Due to limitation in the transform phase we have to fuse all
1827a2dc1f3fSmrg 	     reduction partitions.  As a result, this could cancel valid
1828a2dc1f3fSmrg 	     loop distribution especially for loop that induction variable
1829a2dc1f3fSmrg 	     is used outside of loop.  To workaround this issue, we skip
1830a2dc1f3fSmrg 	     marking partition as reudction if the reduction stmt belongs
1831a2dc1f3fSmrg 	     to all partitions.  In such case, reduction will be computed
1832a2dc1f3fSmrg 	     correctly no matter how partitions are fused/distributed.  */
1833a2dc1f3fSmrg 	  if (!bitmap_bit_p (stmt_in_all_partitions, i))
1834a2dc1f3fSmrg 	    partition->reduction_p = true;
18358feb0f0bSmrg 	  else
1836a2dc1f3fSmrg 	    has_reduction = true;
1837a2dc1f3fSmrg 	}
18381debfc3dSmrg     }
18391debfc3dSmrg 
18408feb0f0bSmrg   /* Simple workaround to prevent classifying the partition as builtin
18418feb0f0bSmrg      if it contains any use outside of loop.  For the case where all
18428feb0f0bSmrg      partitions have the reduction this simple workaround is delayed
18438feb0f0bSmrg      to only affect the last partition.  */
18448feb0f0bSmrg   if (partition->reduction_p)
18458feb0f0bSmrg      return has_reduction;
18468feb0f0bSmrg 
1847a2dc1f3fSmrg   /* Perform general partition disqualification for builtins.  */
1848a2dc1f3fSmrg   if (volatiles_p
1849a2dc1f3fSmrg       || !flag_tree_loop_distribute_patterns)
18508feb0f0bSmrg     return has_reduction;
1851a2dc1f3fSmrg 
1852a2dc1f3fSmrg   /* Find single load/store data references for builtin partition.  */
1853a2dc1f3fSmrg   if (!find_single_drs (loop, rdg, partition, &single_st, &single_ld))
18548feb0f0bSmrg     return has_reduction;
18558feb0f0bSmrg 
18568feb0f0bSmrg   partition->loc = gimple_location (DR_STMT (single_st));
1857a2dc1f3fSmrg 
1858a2dc1f3fSmrg   /* Classify the builtin kind.  */
1859a2dc1f3fSmrg   if (single_ld == NULL)
1860a2dc1f3fSmrg     classify_builtin_st (loop, partition, single_st);
1861a2dc1f3fSmrg   else
1862a2dc1f3fSmrg     classify_builtin_ldst (loop, rdg, partition, single_st, single_ld);
18638feb0f0bSmrg   return has_reduction;
1864a2dc1f3fSmrg }
1865a2dc1f3fSmrg 
18668feb0f0bSmrg bool
share_memory_accesses(struct graph * rdg,partition * partition1,partition * partition2)18678feb0f0bSmrg loop_distribution::share_memory_accesses (struct graph *rdg,
1868a2dc1f3fSmrg 		       partition *partition1, partition *partition2)
18691debfc3dSmrg {
1870a2dc1f3fSmrg   unsigned i, j;
18711debfc3dSmrg   bitmap_iterator bi, bj;
1872a2dc1f3fSmrg   data_reference_p dr1, dr2;
18731debfc3dSmrg 
18741debfc3dSmrg   /* First check whether in the intersection of the two partitions are
18751debfc3dSmrg      any loads or stores.  Common loads are the situation that happens
18761debfc3dSmrg      most often.  */
18771debfc3dSmrg   EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
18781debfc3dSmrg     if (RDG_MEM_WRITE_STMT (rdg, i)
18791debfc3dSmrg 	|| RDG_MEM_READS_STMT (rdg, i))
18801debfc3dSmrg       return true;
18811debfc3dSmrg 
1882a2dc1f3fSmrg   /* Then check whether the two partitions access the same memory object.  */
1883a2dc1f3fSmrg   EXECUTE_IF_SET_IN_BITMAP (partition1->datarefs, 0, i, bi)
18841debfc3dSmrg     {
1885a2dc1f3fSmrg       dr1 = datarefs_vec[i];
1886a2dc1f3fSmrg 
1887a2dc1f3fSmrg       if (!DR_BASE_ADDRESS (dr1)
1888a2dc1f3fSmrg 	  || !DR_OFFSET (dr1) || !DR_INIT (dr1) || !DR_STEP (dr1))
1889a2dc1f3fSmrg 	continue;
1890a2dc1f3fSmrg 
1891a2dc1f3fSmrg       EXECUTE_IF_SET_IN_BITMAP (partition2->datarefs, 0, j, bj)
18921debfc3dSmrg 	{
1893a2dc1f3fSmrg 	  dr2 = datarefs_vec[j];
1894a2dc1f3fSmrg 
1895a2dc1f3fSmrg 	  if (!DR_BASE_ADDRESS (dr2)
1896a2dc1f3fSmrg 	      || !DR_OFFSET (dr2) || !DR_INIT (dr2) || !DR_STEP (dr2))
1897a2dc1f3fSmrg 	    continue;
1898a2dc1f3fSmrg 
1899a2dc1f3fSmrg 	  if (operand_equal_p (DR_BASE_ADDRESS (dr1), DR_BASE_ADDRESS (dr2), 0)
1900a2dc1f3fSmrg 	      && operand_equal_p (DR_OFFSET (dr1), DR_OFFSET (dr2), 0)
1901a2dc1f3fSmrg 	      && operand_equal_p (DR_INIT (dr1), DR_INIT (dr2), 0)
1902a2dc1f3fSmrg 	      && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0))
19031debfc3dSmrg 	    return true;
19041debfc3dSmrg 	}
19051debfc3dSmrg     }
19061debfc3dSmrg 
19071debfc3dSmrg   return false;
19081debfc3dSmrg }
19091debfc3dSmrg 
1910a2dc1f3fSmrg /* For each seed statement in STARTING_STMTS, this function builds
1911a2dc1f3fSmrg    partition for it by adding depended statements according to RDG.
1912a2dc1f3fSmrg    All partitions are recorded in PARTITIONS.  */
19131debfc3dSmrg 
19148feb0f0bSmrg void
rdg_build_partitions(struct graph * rdg,vec<gimple * > starting_stmts,vec<partition * > * partitions)19158feb0f0bSmrg loop_distribution::rdg_build_partitions (struct graph *rdg,
19161debfc3dSmrg 					 vec<gimple *> starting_stmts,
19171debfc3dSmrg 					 vec<partition *> *partitions)
19181debfc3dSmrg {
1919a2dc1f3fSmrg   auto_bitmap processed;
19201debfc3dSmrg   int i;
19211debfc3dSmrg   gimple *stmt;
19221debfc3dSmrg 
19231debfc3dSmrg   FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
19241debfc3dSmrg     {
19251debfc3dSmrg       int v = rdg_vertex_for_stmt (rdg, stmt);
19261debfc3dSmrg 
19271debfc3dSmrg       if (dump_file && (dump_flags & TDF_DETAILS))
19281debfc3dSmrg 	fprintf (dump_file,
19291debfc3dSmrg 		 "ldist asked to generate code for vertex %d\n", v);
19301debfc3dSmrg 
19311debfc3dSmrg       /* If the vertex is already contained in another partition so
19321debfc3dSmrg          is the partition rooted at it.  */
19331debfc3dSmrg       if (bitmap_bit_p (processed, v))
19341debfc3dSmrg 	continue;
19351debfc3dSmrg 
19361debfc3dSmrg       partition *partition = build_rdg_partition_for_vertex (rdg, v);
19371debfc3dSmrg       bitmap_ior_into (processed, partition->stmts);
19381debfc3dSmrg 
19391debfc3dSmrg       if (dump_file && (dump_flags & TDF_DETAILS))
19401debfc3dSmrg 	{
1941a2dc1f3fSmrg 	  fprintf (dump_file, "ldist creates useful %s partition:\n",
1942a2dc1f3fSmrg 		   partition->type == PTYPE_PARALLEL ? "parallel" : "sequent");
1943a2dc1f3fSmrg 	  bitmap_print (dump_file, partition->stmts, "  ", "\n");
19441debfc3dSmrg 	}
19451debfc3dSmrg 
19461debfc3dSmrg       partitions->safe_push (partition);
19471debfc3dSmrg     }
19481debfc3dSmrg 
19491debfc3dSmrg   /* All vertices should have been assigned to at least one partition now,
19501debfc3dSmrg      other than vertices belonging to dead code.  */
19511debfc3dSmrg }
19521debfc3dSmrg 
19531debfc3dSmrg /* Dump to FILE the PARTITIONS.  */
19541debfc3dSmrg 
19551debfc3dSmrg static void
dump_rdg_partitions(FILE * file,vec<partition * > partitions)19561debfc3dSmrg dump_rdg_partitions (FILE *file, vec<partition *> partitions)
19571debfc3dSmrg {
19581debfc3dSmrg   int i;
19591debfc3dSmrg   partition *partition;
19601debfc3dSmrg 
19611debfc3dSmrg   FOR_EACH_VEC_ELT (partitions, i, partition)
19621debfc3dSmrg     debug_bitmap_file (file, partition->stmts);
19631debfc3dSmrg }
19641debfc3dSmrg 
19651debfc3dSmrg /* Debug PARTITIONS.  */
19661debfc3dSmrg extern void debug_rdg_partitions (vec<partition *> );
19671debfc3dSmrg 
19681debfc3dSmrg DEBUG_FUNCTION void
debug_rdg_partitions(vec<partition * > partitions)19691debfc3dSmrg debug_rdg_partitions (vec<partition *> partitions)
19701debfc3dSmrg {
19711debfc3dSmrg   dump_rdg_partitions (stderr, partitions);
19721debfc3dSmrg }
19731debfc3dSmrg 
19741debfc3dSmrg /* Returns the number of read and write operations in the RDG.  */
19751debfc3dSmrg 
19761debfc3dSmrg static int
number_of_rw_in_rdg(struct graph * rdg)19771debfc3dSmrg number_of_rw_in_rdg (struct graph *rdg)
19781debfc3dSmrg {
19791debfc3dSmrg   int i, res = 0;
19801debfc3dSmrg 
19811debfc3dSmrg   for (i = 0; i < rdg->n_vertices; i++)
19821debfc3dSmrg     {
19831debfc3dSmrg       if (RDG_MEM_WRITE_STMT (rdg, i))
19841debfc3dSmrg 	++res;
19851debfc3dSmrg 
19861debfc3dSmrg       if (RDG_MEM_READS_STMT (rdg, i))
19871debfc3dSmrg 	++res;
19881debfc3dSmrg     }
19891debfc3dSmrg 
19901debfc3dSmrg   return res;
19911debfc3dSmrg }
19921debfc3dSmrg 
19931debfc3dSmrg /* Returns the number of read and write operations in a PARTITION of
19941debfc3dSmrg    the RDG.  */
19951debfc3dSmrg 
19961debfc3dSmrg static int
number_of_rw_in_partition(struct graph * rdg,partition * partition)19971debfc3dSmrg number_of_rw_in_partition (struct graph *rdg, partition *partition)
19981debfc3dSmrg {
19991debfc3dSmrg   int res = 0;
20001debfc3dSmrg   unsigned i;
20011debfc3dSmrg   bitmap_iterator ii;
20021debfc3dSmrg 
20031debfc3dSmrg   EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
20041debfc3dSmrg     {
20051debfc3dSmrg       if (RDG_MEM_WRITE_STMT (rdg, i))
20061debfc3dSmrg 	++res;
20071debfc3dSmrg 
20081debfc3dSmrg       if (RDG_MEM_READS_STMT (rdg, i))
20091debfc3dSmrg 	++res;
20101debfc3dSmrg     }
20111debfc3dSmrg 
20121debfc3dSmrg   return res;
20131debfc3dSmrg }
20141debfc3dSmrg 
20151debfc3dSmrg /* Returns true when one of the PARTITIONS contains all the read or
20161debfc3dSmrg    write operations of RDG.  */
20171debfc3dSmrg 
20181debfc3dSmrg static bool
partition_contains_all_rw(struct graph * rdg,vec<partition * > partitions)20191debfc3dSmrg partition_contains_all_rw (struct graph *rdg,
20201debfc3dSmrg 			   vec<partition *> partitions)
20211debfc3dSmrg {
20221debfc3dSmrg   int i;
20231debfc3dSmrg   partition *partition;
20241debfc3dSmrg   int nrw = number_of_rw_in_rdg (rdg);
20251debfc3dSmrg 
20261debfc3dSmrg   FOR_EACH_VEC_ELT (partitions, i, partition)
20271debfc3dSmrg     if (nrw == number_of_rw_in_partition (rdg, partition))
20281debfc3dSmrg       return true;
20291debfc3dSmrg 
20301debfc3dSmrg   return false;
20311debfc3dSmrg }
20321debfc3dSmrg 
20338feb0f0bSmrg int
pg_add_dependence_edges(struct graph * rdg,int dir,bitmap drs1,bitmap drs2,vec<ddr_p> * alias_ddrs)20348feb0f0bSmrg loop_distribution::pg_add_dependence_edges (struct graph *rdg, int dir,
2035a2dc1f3fSmrg 			 bitmap drs1, bitmap drs2, vec<ddr_p> *alias_ddrs)
20361debfc3dSmrg {
2037a2dc1f3fSmrg   unsigned i, j;
2038a2dc1f3fSmrg   bitmap_iterator bi, bj;
2039a2dc1f3fSmrg   data_reference_p dr1, dr2, saved_dr1;
20401debfc3dSmrg 
20411debfc3dSmrg   /* dependence direction - 0 is no dependence, -1 is back,
20421debfc3dSmrg      1 is forth, 2 is both (we can stop then, merging will occur).  */
2043a2dc1f3fSmrg   EXECUTE_IF_SET_IN_BITMAP (drs1, 0, i, bi)
20441debfc3dSmrg     {
2045a2dc1f3fSmrg       dr1 = datarefs_vec[i];
2046a2dc1f3fSmrg 
2047a2dc1f3fSmrg       EXECUTE_IF_SET_IN_BITMAP (drs2, 0, j, bj)
2048a2dc1f3fSmrg 	{
2049a2dc1f3fSmrg 	  int res, this_dir = 1;
20501debfc3dSmrg 	  ddr_p ddr;
2051a2dc1f3fSmrg 
2052a2dc1f3fSmrg 	  dr2 = datarefs_vec[j];
2053a2dc1f3fSmrg 
2054a2dc1f3fSmrg 	  /* Skip all <read, read> data dependence.  */
2055a2dc1f3fSmrg 	  if (DR_IS_READ (dr1) && DR_IS_READ (dr2))
2056a2dc1f3fSmrg 	    continue;
2057a2dc1f3fSmrg 
2058a2dc1f3fSmrg 	  saved_dr1 = dr1;
2059a2dc1f3fSmrg 	  /* Re-shuffle data-refs to be in topological order.  */
20601debfc3dSmrg 	  if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
20611debfc3dSmrg 	      > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
20621debfc3dSmrg 	    {
20631debfc3dSmrg 	      std::swap (dr1, dr2);
20641debfc3dSmrg 	      this_dir = -this_dir;
20651debfc3dSmrg 	    }
2066a2dc1f3fSmrg 	  ddr = get_data_dependence (rdg, dr1, dr2);
20671debfc3dSmrg 	  if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2068a2dc1f3fSmrg 	    {
2069a2dc1f3fSmrg 	      this_dir = 0;
2070a2dc1f3fSmrg 	      res = data_ref_compare_tree (DR_BASE_ADDRESS (dr1),
2071a2dc1f3fSmrg 					   DR_BASE_ADDRESS (dr2));
2072a2dc1f3fSmrg 	      /* Be conservative.  If data references are not well analyzed,
2073a2dc1f3fSmrg 		 or the two data references have the same base address and
2074a2dc1f3fSmrg 		 offset, add dependence and consider it alias to each other.
2075a2dc1f3fSmrg 		 In other words, the dependence cannot be resolved by
2076a2dc1f3fSmrg 		 runtime alias check.  */
2077a2dc1f3fSmrg 	      if (!DR_BASE_ADDRESS (dr1) || !DR_BASE_ADDRESS (dr2)
2078a2dc1f3fSmrg 		  || !DR_OFFSET (dr1) || !DR_OFFSET (dr2)
2079a2dc1f3fSmrg 		  || !DR_INIT (dr1) || !DR_INIT (dr2)
2080a2dc1f3fSmrg 		  || !DR_STEP (dr1) || !tree_fits_uhwi_p (DR_STEP (dr1))
2081a2dc1f3fSmrg 		  || !DR_STEP (dr2) || !tree_fits_uhwi_p (DR_STEP (dr2))
2082a2dc1f3fSmrg 		  || res == 0)
20831debfc3dSmrg 		this_dir = 2;
2084a2dc1f3fSmrg 	      /* Data dependence could be resolved by runtime alias check,
2085a2dc1f3fSmrg 		 record it in ALIAS_DDRS.  */
2086a2dc1f3fSmrg 	      else if (alias_ddrs != NULL)
2087a2dc1f3fSmrg 		alias_ddrs->safe_push (ddr);
2088a2dc1f3fSmrg 	      /* Or simply ignore it.  */
2089a2dc1f3fSmrg 	    }
20901debfc3dSmrg 	  else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
20911debfc3dSmrg 	    {
20921debfc3dSmrg 	      if (DDR_REVERSED_P (ddr))
20931debfc3dSmrg 		this_dir = -this_dir;
2094a2dc1f3fSmrg 
20951debfc3dSmrg 	      /* Known dependences can still be unordered througout the
20968feb0f0bSmrg 		 iteration space, see gcc.dg/tree-ssa/ldist-16.c and
20978feb0f0bSmrg 		 gcc.dg/tree-ssa/pr94969.c.  */
20981debfc3dSmrg 	      if (DDR_NUM_DIST_VECTS (ddr) != 1)
20991debfc3dSmrg 		this_dir = 2;
21001debfc3dSmrg 	      /* If the overlap is exact preserve stmt order.  */
2101a2dc1f3fSmrg 	      else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0),
2102a2dc1f3fSmrg 					    DDR_NB_LOOPS (ddr)))
21031debfc3dSmrg 		;
2104a2dc1f3fSmrg 	      /* Else as the distance vector is lexicographic positive swap
2105a2dc1f3fSmrg 		 the dependence direction.  */
21061debfc3dSmrg 	      else
21071debfc3dSmrg 		this_dir = -this_dir;
21081debfc3dSmrg 	    }
21091debfc3dSmrg 	  else
21101debfc3dSmrg 	    this_dir = 0;
21111debfc3dSmrg 	  if (this_dir == 2)
21121debfc3dSmrg 	    return 2;
21131debfc3dSmrg 	  else if (dir == 0)
21141debfc3dSmrg 	    dir = this_dir;
21151debfc3dSmrg 	  else if (this_dir != 0 && dir != this_dir)
21161debfc3dSmrg 	    return 2;
21171debfc3dSmrg 	  /* Shuffle "back" dr1.  */
21181debfc3dSmrg 	  dr1 = saved_dr1;
21191debfc3dSmrg 	}
2120a2dc1f3fSmrg     }
21211debfc3dSmrg   return dir;
21221debfc3dSmrg }
21231debfc3dSmrg 
21241debfc3dSmrg /* Compare postorder number of the partition graph vertices V1 and V2.  */
21251debfc3dSmrg 
21261debfc3dSmrg static int
pgcmp(const void * v1_,const void * v2_)21271debfc3dSmrg pgcmp (const void *v1_, const void *v2_)
21281debfc3dSmrg {
21291debfc3dSmrg   const vertex *v1 = (const vertex *)v1_;
21301debfc3dSmrg   const vertex *v2 = (const vertex *)v2_;
21311debfc3dSmrg   return v2->post - v1->post;
21321debfc3dSmrg }
21331debfc3dSmrg 
2134a2dc1f3fSmrg /* Data attached to vertices of partition dependence graph.  */
2135a2dc1f3fSmrg struct pg_vdata
2136a2dc1f3fSmrg {
2137a2dc1f3fSmrg   /* ID of the corresponding partition.  */
2138a2dc1f3fSmrg   int id;
2139a2dc1f3fSmrg   /* The partition.  */
2140a2dc1f3fSmrg   struct partition *partition;
2141a2dc1f3fSmrg };
2142a2dc1f3fSmrg 
2143a2dc1f3fSmrg /* Data attached to edges of partition dependence graph.  */
2144a2dc1f3fSmrg struct pg_edata
2145a2dc1f3fSmrg {
2146a2dc1f3fSmrg   /* If the dependence edge can be resolved by runtime alias check,
2147a2dc1f3fSmrg      this vector contains data dependence relations for runtime alias
2148a2dc1f3fSmrg      check.  On the other hand, if the dependence edge is introduced
2149a2dc1f3fSmrg      because of compilation time known data dependence, this vector
2150a2dc1f3fSmrg      contains nothing.  */
2151a2dc1f3fSmrg   vec<ddr_p> alias_ddrs;
2152a2dc1f3fSmrg };
2153a2dc1f3fSmrg 
2154a2dc1f3fSmrg /* Callback data for traversing edges in graph.  */
2155a2dc1f3fSmrg struct pg_edge_callback_data
2156a2dc1f3fSmrg {
2157a2dc1f3fSmrg   /* Bitmap contains strong connected components should be merged.  */
2158a2dc1f3fSmrg   bitmap sccs_to_merge;
2159a2dc1f3fSmrg   /* Array constains component information for all vertices.  */
2160a2dc1f3fSmrg   int *vertices_component;
2161a2dc1f3fSmrg   /* Vector to record all data dependence relations which are needed
2162a2dc1f3fSmrg      to break strong connected components by runtime alias checks.  */
2163a2dc1f3fSmrg   vec<ddr_p> *alias_ddrs;
2164a2dc1f3fSmrg };
2165a2dc1f3fSmrg 
2166a2dc1f3fSmrg /* Initialize vertice's data for partition dependence graph PG with
2167a2dc1f3fSmrg    PARTITIONS.  */
2168a2dc1f3fSmrg 
2169a2dc1f3fSmrg static void
init_partition_graph_vertices(struct graph * pg,vec<struct partition * > * partitions)2170a2dc1f3fSmrg init_partition_graph_vertices (struct graph *pg,
2171a2dc1f3fSmrg 			       vec<struct partition *> *partitions)
2172a2dc1f3fSmrg {
2173a2dc1f3fSmrg   int i;
2174a2dc1f3fSmrg   partition *partition;
2175a2dc1f3fSmrg   struct pg_vdata *data;
2176a2dc1f3fSmrg 
2177a2dc1f3fSmrg   for (i = 0; partitions->iterate (i, &partition); ++i)
2178a2dc1f3fSmrg     {
2179a2dc1f3fSmrg       data = new pg_vdata;
2180a2dc1f3fSmrg       pg->vertices[i].data = data;
2181a2dc1f3fSmrg       data->id = i;
2182a2dc1f3fSmrg       data->partition = partition;
2183a2dc1f3fSmrg     }
2184a2dc1f3fSmrg }
2185a2dc1f3fSmrg 
2186a2dc1f3fSmrg /* Add edge <I, J> to partition dependence graph PG.  Attach vector of data
2187a2dc1f3fSmrg    dependence relations to the EDGE if DDRS isn't NULL.  */
2188a2dc1f3fSmrg 
2189a2dc1f3fSmrg static void
add_partition_graph_edge(struct graph * pg,int i,int j,vec<ddr_p> * ddrs)2190a2dc1f3fSmrg add_partition_graph_edge (struct graph *pg, int i, int j, vec<ddr_p> *ddrs)
2191a2dc1f3fSmrg {
2192a2dc1f3fSmrg   struct graph_edge *e = add_edge (pg, i, j);
2193a2dc1f3fSmrg 
2194a2dc1f3fSmrg   /* If the edge is attached with data dependence relations, it means this
2195a2dc1f3fSmrg      dependence edge can be resolved by runtime alias checks.  */
2196a2dc1f3fSmrg   if (ddrs != NULL)
2197a2dc1f3fSmrg     {
2198a2dc1f3fSmrg       struct pg_edata *data = new pg_edata;
2199a2dc1f3fSmrg 
2200a2dc1f3fSmrg       gcc_assert (ddrs->length () > 0);
2201a2dc1f3fSmrg       e->data = data;
2202a2dc1f3fSmrg       data->alias_ddrs = vNULL;
2203a2dc1f3fSmrg       data->alias_ddrs.safe_splice (*ddrs);
2204a2dc1f3fSmrg     }
2205a2dc1f3fSmrg }
2206a2dc1f3fSmrg 
2207a2dc1f3fSmrg /* Callback function for graph travesal algorithm.  It returns true
2208a2dc1f3fSmrg    if edge E should skipped when traversing the graph.  */
2209a2dc1f3fSmrg 
2210a2dc1f3fSmrg static bool
pg_skip_alias_edge(struct graph_edge * e)2211a2dc1f3fSmrg pg_skip_alias_edge (struct graph_edge *e)
2212a2dc1f3fSmrg {
2213a2dc1f3fSmrg   struct pg_edata *data = (struct pg_edata *)e->data;
2214a2dc1f3fSmrg   return (data != NULL && data->alias_ddrs.length () > 0);
2215a2dc1f3fSmrg }
2216a2dc1f3fSmrg 
2217a2dc1f3fSmrg /* Callback function freeing data attached to edge E of graph.  */
2218a2dc1f3fSmrg 
2219a2dc1f3fSmrg static void
free_partition_graph_edata_cb(struct graph *,struct graph_edge * e,void *)2220a2dc1f3fSmrg free_partition_graph_edata_cb (struct graph *, struct graph_edge *e, void *)
2221a2dc1f3fSmrg {
2222a2dc1f3fSmrg   if (e->data != NULL)
2223a2dc1f3fSmrg     {
2224a2dc1f3fSmrg       struct pg_edata *data = (struct pg_edata *)e->data;
2225a2dc1f3fSmrg       data->alias_ddrs.release ();
2226a2dc1f3fSmrg       delete data;
2227a2dc1f3fSmrg     }
2228a2dc1f3fSmrg }
2229a2dc1f3fSmrg 
2230a2dc1f3fSmrg /* Free data attached to vertice of partition dependence graph PG.  */
2231a2dc1f3fSmrg 
2232a2dc1f3fSmrg static void
free_partition_graph_vdata(struct graph * pg)2233a2dc1f3fSmrg free_partition_graph_vdata (struct graph *pg)
2234a2dc1f3fSmrg {
2235a2dc1f3fSmrg   int i;
2236a2dc1f3fSmrg   struct pg_vdata *data;
2237a2dc1f3fSmrg 
2238a2dc1f3fSmrg   for (i = 0; i < pg->n_vertices; ++i)
2239a2dc1f3fSmrg     {
2240a2dc1f3fSmrg       data = (struct pg_vdata *)pg->vertices[i].data;
2241a2dc1f3fSmrg       delete data;
2242a2dc1f3fSmrg     }
2243a2dc1f3fSmrg }
2244a2dc1f3fSmrg 
2245a2dc1f3fSmrg /* Build and return partition dependence graph for PARTITIONS.  RDG is
2246a2dc1f3fSmrg    reduced dependence graph for the loop to be distributed.  If IGNORE_ALIAS_P
2247a2dc1f3fSmrg    is true, data dependence caused by possible alias between references
2248a2dc1f3fSmrg    is ignored, as if it doesn't exist at all; otherwise all depdendences
2249a2dc1f3fSmrg    are considered.  */
2250a2dc1f3fSmrg 
22518feb0f0bSmrg struct graph *
build_partition_graph(struct graph * rdg,vec<struct partition * > * partitions,bool ignore_alias_p)22528feb0f0bSmrg loop_distribution::build_partition_graph (struct graph *rdg,
2253a2dc1f3fSmrg 					  vec<struct partition *> *partitions,
2254a2dc1f3fSmrg 					  bool ignore_alias_p)
2255a2dc1f3fSmrg {
2256a2dc1f3fSmrg   int i, j;
2257a2dc1f3fSmrg   struct partition *partition1, *partition2;
2258a2dc1f3fSmrg   graph *pg = new_graph (partitions->length ());
2259a2dc1f3fSmrg   auto_vec<ddr_p> alias_ddrs, *alias_ddrs_p;
2260a2dc1f3fSmrg 
2261a2dc1f3fSmrg   alias_ddrs_p = ignore_alias_p ? NULL : &alias_ddrs;
2262a2dc1f3fSmrg 
2263a2dc1f3fSmrg   init_partition_graph_vertices (pg, partitions);
2264a2dc1f3fSmrg 
2265a2dc1f3fSmrg   for (i = 0; partitions->iterate (i, &partition1); ++i)
2266a2dc1f3fSmrg     {
2267a2dc1f3fSmrg       for (j = i + 1; partitions->iterate (j, &partition2); ++j)
2268a2dc1f3fSmrg 	{
2269a2dc1f3fSmrg 	  /* dependence direction - 0 is no dependence, -1 is back,
2270a2dc1f3fSmrg 	     1 is forth, 2 is both (we can stop then, merging will occur).  */
2271a2dc1f3fSmrg 	  int dir = 0;
2272a2dc1f3fSmrg 
2273a2dc1f3fSmrg 	  /* If the first partition has reduction, add back edge; if the
2274a2dc1f3fSmrg 	     second partition has reduction, add forth edge.  This makes
2275a2dc1f3fSmrg 	     sure that reduction partition will be sorted as the last one.  */
2276a2dc1f3fSmrg 	  if (partition_reduction_p (partition1))
2277a2dc1f3fSmrg 	    dir = -1;
2278a2dc1f3fSmrg 	  else if (partition_reduction_p (partition2))
2279a2dc1f3fSmrg 	    dir = 1;
2280a2dc1f3fSmrg 
2281a2dc1f3fSmrg 	  /* Cleanup the temporary vector.  */
2282a2dc1f3fSmrg 	  alias_ddrs.truncate (0);
2283a2dc1f3fSmrg 
2284a2dc1f3fSmrg 	  dir = pg_add_dependence_edges (rdg, dir, partition1->datarefs,
2285a2dc1f3fSmrg 					 partition2->datarefs, alias_ddrs_p);
2286a2dc1f3fSmrg 
2287a2dc1f3fSmrg 	  /* Add edge to partition graph if there exists dependence.  There
2288a2dc1f3fSmrg 	     are two types of edges.  One type edge is caused by compilation
2289a2dc1f3fSmrg 	     time known dependence, this type cannot be resolved by runtime
2290a2dc1f3fSmrg 	     alias check.  The other type can be resolved by runtime alias
2291a2dc1f3fSmrg 	     check.  */
2292a2dc1f3fSmrg 	  if (dir == 1 || dir == 2
2293a2dc1f3fSmrg 	      || alias_ddrs.length () > 0)
2294a2dc1f3fSmrg 	    {
2295a2dc1f3fSmrg 	      /* Attach data dependence relations to edge that can be resolved
2296a2dc1f3fSmrg 		 by runtime alias check.  */
2297a2dc1f3fSmrg 	      bool alias_edge_p = (dir != 1 && dir != 2);
2298a2dc1f3fSmrg 	      add_partition_graph_edge (pg, i, j,
2299a2dc1f3fSmrg 					(alias_edge_p) ? &alias_ddrs : NULL);
2300a2dc1f3fSmrg 	    }
2301a2dc1f3fSmrg 	  if (dir == -1 || dir == 2
2302a2dc1f3fSmrg 	      || alias_ddrs.length () > 0)
2303a2dc1f3fSmrg 	    {
2304a2dc1f3fSmrg 	      /* Attach data dependence relations to edge that can be resolved
2305a2dc1f3fSmrg 		 by runtime alias check.  */
2306a2dc1f3fSmrg 	      bool alias_edge_p = (dir != -1 && dir != 2);
2307a2dc1f3fSmrg 	      add_partition_graph_edge (pg, j, i,
2308a2dc1f3fSmrg 					(alias_edge_p) ? &alias_ddrs : NULL);
2309a2dc1f3fSmrg 	    }
2310a2dc1f3fSmrg 	}
2311a2dc1f3fSmrg     }
2312a2dc1f3fSmrg   return pg;
2313a2dc1f3fSmrg }
2314a2dc1f3fSmrg 
2315a2dc1f3fSmrg /* Sort partitions in PG in descending post order and store them in
2316a2dc1f3fSmrg    PARTITIONS.  */
2317a2dc1f3fSmrg 
2318a2dc1f3fSmrg static void
sort_partitions_by_post_order(struct graph * pg,vec<struct partition * > * partitions)2319a2dc1f3fSmrg sort_partitions_by_post_order (struct graph *pg,
2320a2dc1f3fSmrg 			       vec<struct partition *> *partitions)
2321a2dc1f3fSmrg {
2322a2dc1f3fSmrg   int i;
2323a2dc1f3fSmrg   struct pg_vdata *data;
2324a2dc1f3fSmrg 
2325a2dc1f3fSmrg   /* Now order the remaining nodes in descending postorder.  */
2326a2dc1f3fSmrg   qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
2327a2dc1f3fSmrg   partitions->truncate (0);
2328a2dc1f3fSmrg   for (i = 0; i < pg->n_vertices; ++i)
2329a2dc1f3fSmrg     {
2330a2dc1f3fSmrg       data = (struct pg_vdata *)pg->vertices[i].data;
2331a2dc1f3fSmrg       if (data->partition)
2332a2dc1f3fSmrg 	partitions->safe_push (data->partition);
2333a2dc1f3fSmrg     }
2334a2dc1f3fSmrg }
2335a2dc1f3fSmrg 
23368feb0f0bSmrg void
merge_dep_scc_partitions(struct graph * rdg,vec<struct partition * > * partitions,bool ignore_alias_p)23378feb0f0bSmrg loop_distribution::merge_dep_scc_partitions (struct graph *rdg,
2338a2dc1f3fSmrg 					     vec<struct partition *> *partitions,
2339a2dc1f3fSmrg 					     bool ignore_alias_p)
2340a2dc1f3fSmrg {
2341a2dc1f3fSmrg   struct partition *partition1, *partition2;
2342a2dc1f3fSmrg   struct pg_vdata *data;
2343a2dc1f3fSmrg   graph *pg = build_partition_graph (rdg, partitions, ignore_alias_p);
2344a2dc1f3fSmrg   int i, j, num_sccs = graphds_scc (pg, NULL);
2345a2dc1f3fSmrg 
2346a2dc1f3fSmrg   /* Strong connected compoenent means dependence cycle, we cannot distribute
2347a2dc1f3fSmrg      them.  So fuse them together.  */
2348a2dc1f3fSmrg   if ((unsigned) num_sccs < partitions->length ())
2349a2dc1f3fSmrg     {
2350a2dc1f3fSmrg       for (i = 0; i < num_sccs; ++i)
2351a2dc1f3fSmrg 	{
2352a2dc1f3fSmrg 	  for (j = 0; partitions->iterate (j, &partition1); ++j)
2353a2dc1f3fSmrg 	    if (pg->vertices[j].component == i)
2354a2dc1f3fSmrg 	      break;
2355a2dc1f3fSmrg 	  for (j = j + 1; partitions->iterate (j, &partition2); ++j)
2356a2dc1f3fSmrg 	    if (pg->vertices[j].component == i)
2357a2dc1f3fSmrg 	      {
2358a2dc1f3fSmrg 		partition_merge_into (NULL, partition1,
2359a2dc1f3fSmrg 				      partition2, FUSE_SAME_SCC);
2360a2dc1f3fSmrg 		partition1->type = PTYPE_SEQUENTIAL;
2361a2dc1f3fSmrg 		(*partitions)[j] = NULL;
2362a2dc1f3fSmrg 		partition_free (partition2);
2363a2dc1f3fSmrg 		data = (struct pg_vdata *)pg->vertices[j].data;
2364a2dc1f3fSmrg 		data->partition = NULL;
2365a2dc1f3fSmrg 	      }
2366a2dc1f3fSmrg 	}
2367a2dc1f3fSmrg     }
2368a2dc1f3fSmrg 
2369a2dc1f3fSmrg   sort_partitions_by_post_order (pg, partitions);
2370a2dc1f3fSmrg   gcc_assert (partitions->length () == (unsigned)num_sccs);
2371a2dc1f3fSmrg   free_partition_graph_vdata (pg);
2372a2dc1f3fSmrg   free_graph (pg);
2373a2dc1f3fSmrg }
2374a2dc1f3fSmrg 
2375a2dc1f3fSmrg /* Callback function for traversing edge E in graph G.  DATA is private
2376a2dc1f3fSmrg    callback data.  */
2377a2dc1f3fSmrg 
2378a2dc1f3fSmrg static void
pg_collect_alias_ddrs(struct graph * g,struct graph_edge * e,void * data)2379a2dc1f3fSmrg pg_collect_alias_ddrs (struct graph *g, struct graph_edge *e, void *data)
2380a2dc1f3fSmrg {
2381a2dc1f3fSmrg   int i, j, component;
2382a2dc1f3fSmrg   struct pg_edge_callback_data *cbdata;
2383a2dc1f3fSmrg   struct pg_edata *edata = (struct pg_edata *) e->data;
2384a2dc1f3fSmrg 
2385a2dc1f3fSmrg   /* If the edge doesn't have attached data dependence, it represents
2386a2dc1f3fSmrg      compilation time known dependences.  This type dependence cannot
2387a2dc1f3fSmrg      be resolved by runtime alias check.  */
2388a2dc1f3fSmrg   if (edata == NULL || edata->alias_ddrs.length () == 0)
2389a2dc1f3fSmrg     return;
2390a2dc1f3fSmrg 
2391a2dc1f3fSmrg   cbdata = (struct pg_edge_callback_data *) data;
2392a2dc1f3fSmrg   i = e->src;
2393a2dc1f3fSmrg   j = e->dest;
2394a2dc1f3fSmrg   component = cbdata->vertices_component[i];
2395a2dc1f3fSmrg   /* Vertices are topologically sorted according to compilation time
2396a2dc1f3fSmrg      known dependences, so we can break strong connected components
2397a2dc1f3fSmrg      by removing edges of the opposite direction, i.e, edges pointing
2398a2dc1f3fSmrg      from vertice with smaller post number to vertice with bigger post
2399a2dc1f3fSmrg      number.  */
2400a2dc1f3fSmrg   if (g->vertices[i].post < g->vertices[j].post
2401a2dc1f3fSmrg       /* We only need to remove edges connecting vertices in the same
2402a2dc1f3fSmrg 	 strong connected component to break it.  */
2403a2dc1f3fSmrg       && component == cbdata->vertices_component[j]
2404a2dc1f3fSmrg       /* Check if we want to break the strong connected component or not.  */
2405a2dc1f3fSmrg       && !bitmap_bit_p (cbdata->sccs_to_merge, component))
2406a2dc1f3fSmrg     cbdata->alias_ddrs->safe_splice (edata->alias_ddrs);
2407a2dc1f3fSmrg }
2408a2dc1f3fSmrg 
2409*23f5f463Smrg /* Callback function for traversing edge E.  DATA is private
2410*23f5f463Smrg    callback data.  */
2411*23f5f463Smrg 
2412*23f5f463Smrg static void
pg_unmark_merged_alias_ddrs(struct graph *,struct graph_edge * e,void * data)2413*23f5f463Smrg pg_unmark_merged_alias_ddrs (struct graph *, struct graph_edge *e, void *data)
2414*23f5f463Smrg {
2415*23f5f463Smrg   int i, j, component;
2416*23f5f463Smrg   struct pg_edge_callback_data *cbdata;
2417*23f5f463Smrg   struct pg_edata *edata = (struct pg_edata *) e->data;
2418*23f5f463Smrg 
2419*23f5f463Smrg   if (edata == NULL || edata->alias_ddrs.length () == 0)
2420*23f5f463Smrg     return;
2421*23f5f463Smrg 
2422*23f5f463Smrg   cbdata = (struct pg_edge_callback_data *) data;
2423*23f5f463Smrg   i = e->src;
2424*23f5f463Smrg   j = e->dest;
2425*23f5f463Smrg   component = cbdata->vertices_component[i];
2426*23f5f463Smrg   /* Make sure to not skip vertices inside SCCs we are going to merge.  */
2427*23f5f463Smrg   if (component == cbdata->vertices_component[j]
2428*23f5f463Smrg       && bitmap_bit_p (cbdata->sccs_to_merge, component))
2429*23f5f463Smrg     {
2430*23f5f463Smrg       edata->alias_ddrs.release ();
2431*23f5f463Smrg       delete edata;
2432*23f5f463Smrg       e->data = NULL;
2433*23f5f463Smrg     }
2434*23f5f463Smrg }
2435*23f5f463Smrg 
2436a2dc1f3fSmrg /* This is the main function breaking strong conected components in
2437a2dc1f3fSmrg    PARTITIONS giving reduced depdendence graph RDG.  Store data dependence
2438a2dc1f3fSmrg    relations for runtime alias check in ALIAS_DDRS.  */
24398feb0f0bSmrg void
break_alias_scc_partitions(struct graph * rdg,vec<struct partition * > * partitions,vec<ddr_p> * alias_ddrs)24408feb0f0bSmrg loop_distribution::break_alias_scc_partitions (struct graph *rdg,
2441a2dc1f3fSmrg 					       vec<struct partition *> *partitions,
2442a2dc1f3fSmrg 					       vec<ddr_p> *alias_ddrs)
2443a2dc1f3fSmrg {
24448feb0f0bSmrg   int i, j, k, num_sccs, num_sccs_no_alias = 0;
2445a2dc1f3fSmrg   /* Build partition dependence graph.  */
2446a2dc1f3fSmrg   graph *pg = build_partition_graph (rdg, partitions, false);
2447a2dc1f3fSmrg 
2448a2dc1f3fSmrg   alias_ddrs->truncate (0);
2449a2dc1f3fSmrg   /* Find strong connected components in the graph, with all dependence edges
2450a2dc1f3fSmrg      considered.  */
2451a2dc1f3fSmrg   num_sccs = graphds_scc (pg, NULL);
2452a2dc1f3fSmrg   /* All SCCs now can be broken by runtime alias checks because SCCs caused by
2453a2dc1f3fSmrg      compilation time known dependences are merged before this function.  */
2454a2dc1f3fSmrg   if ((unsigned) num_sccs < partitions->length ())
2455a2dc1f3fSmrg     {
2456a2dc1f3fSmrg       struct pg_edge_callback_data cbdata;
2457a2dc1f3fSmrg       auto_bitmap sccs_to_merge;
2458a2dc1f3fSmrg       auto_vec<enum partition_type> scc_types;
2459a2dc1f3fSmrg       struct partition *partition, *first;
2460a2dc1f3fSmrg 
2461a2dc1f3fSmrg       /* If all partitions in a SCC have the same type, we can simply merge the
2462a2dc1f3fSmrg 	 SCC.  This loop finds out such SCCS and record them in bitmap.  */
2463a2dc1f3fSmrg       bitmap_set_range (sccs_to_merge, 0, (unsigned) num_sccs);
2464a2dc1f3fSmrg       for (i = 0; i < num_sccs; ++i)
2465a2dc1f3fSmrg 	{
2466a2dc1f3fSmrg 	  for (j = 0; partitions->iterate (j, &first); ++j)
2467a2dc1f3fSmrg 	    if (pg->vertices[j].component == i)
2468a2dc1f3fSmrg 	      break;
2469c0a68be4Smrg 
2470c0a68be4Smrg 	  bool same_type = true, all_builtins = partition_builtin_p (first);
2471a2dc1f3fSmrg 	  for (++j; partitions->iterate (j, &partition); ++j)
2472a2dc1f3fSmrg 	    {
2473a2dc1f3fSmrg 	      if (pg->vertices[j].component != i)
2474a2dc1f3fSmrg 		continue;
2475a2dc1f3fSmrg 
2476a2dc1f3fSmrg 	      if (first->type != partition->type)
2477a2dc1f3fSmrg 		{
2478c0a68be4Smrg 		  same_type = false;
2479a2dc1f3fSmrg 		  break;
2480a2dc1f3fSmrg 		}
2481c0a68be4Smrg 	      all_builtins &= partition_builtin_p (partition);
2482a2dc1f3fSmrg 	    }
2483c0a68be4Smrg 	  /* Merge SCC if all partitions in SCC have the same type, though the
2484c0a68be4Smrg 	     result partition is sequential, because vectorizer can do better
2485c0a68be4Smrg 	     runtime alias check.  One expecption is all partitions in SCC are
2486c0a68be4Smrg 	     builtins.  */
2487c0a68be4Smrg 	  if (!same_type || all_builtins)
2488c0a68be4Smrg 	    bitmap_clear_bit (sccs_to_merge, i);
2489a2dc1f3fSmrg 	}
2490a2dc1f3fSmrg 
2491a2dc1f3fSmrg       /* Initialize callback data for traversing.  */
2492a2dc1f3fSmrg       cbdata.sccs_to_merge = sccs_to_merge;
2493a2dc1f3fSmrg       cbdata.alias_ddrs = alias_ddrs;
2494a2dc1f3fSmrg       cbdata.vertices_component = XNEWVEC (int, pg->n_vertices);
2495a2dc1f3fSmrg       /* Record the component information which will be corrupted by next
2496a2dc1f3fSmrg 	 graph scc finding call.  */
2497a2dc1f3fSmrg       for (i = 0; i < pg->n_vertices; ++i)
2498a2dc1f3fSmrg 	cbdata.vertices_component[i] = pg->vertices[i].component;
2499a2dc1f3fSmrg 
2500a2dc1f3fSmrg       /* Collect data dependences for runtime alias checks to break SCCs.  */
2501a2dc1f3fSmrg       if (bitmap_count_bits (sccs_to_merge) != (unsigned) num_sccs)
2502a2dc1f3fSmrg 	{
2503*23f5f463Smrg 	  /* For SCCs we want to merge clear all alias_ddrs for edges
2504*23f5f463Smrg 	     inside the component.  */
2505*23f5f463Smrg 	  for_each_edge (pg, pg_unmark_merged_alias_ddrs, &cbdata);
25068feb0f0bSmrg 
2507a2dc1f3fSmrg 	  /* Run SCC finding algorithm again, with alias dependence edges
2508a2dc1f3fSmrg 	     skipped.  This is to topologically sort partitions according to
2509a2dc1f3fSmrg 	     compilation time known dependence.  Note the topological order
2510a2dc1f3fSmrg 	     is stored in the form of pg's post order number.  */
2511a2dc1f3fSmrg 	  num_sccs_no_alias = graphds_scc (pg, NULL, pg_skip_alias_edge);
2512*23f5f463Smrg 	  /* We cannot assert partitions->length () == num_sccs_no_alias
2513*23f5f463Smrg 	     since we are not ignoring alias edges in cycles we are
2514*23f5f463Smrg 	     going to merge.  That's required to compute correct postorder.  */
2515a2dc1f3fSmrg 	  /* With topological order, we can construct two subgraphs L and R.
2516a2dc1f3fSmrg 	     L contains edge <x, y> where x < y in terms of post order, while
2517a2dc1f3fSmrg 	     R contains edge <x, y> where x > y.  Edges for compilation time
2518a2dc1f3fSmrg 	     known dependence all fall in R, so we break SCCs by removing all
2519a2dc1f3fSmrg 	     (alias) edges of in subgraph L.  */
2520a2dc1f3fSmrg 	  for_each_edge (pg, pg_collect_alias_ddrs, &cbdata);
2521a2dc1f3fSmrg 	}
2522a2dc1f3fSmrg 
2523a2dc1f3fSmrg       /* For SCC that doesn't need to be broken, merge it.  */
2524a2dc1f3fSmrg       for (i = 0; i < num_sccs; ++i)
2525a2dc1f3fSmrg 	{
2526a2dc1f3fSmrg 	  if (!bitmap_bit_p (sccs_to_merge, i))
2527a2dc1f3fSmrg 	    continue;
2528a2dc1f3fSmrg 
2529a2dc1f3fSmrg 	  for (j = 0; partitions->iterate (j, &first); ++j)
2530a2dc1f3fSmrg 	    if (cbdata.vertices_component[j] == i)
2531a2dc1f3fSmrg 	      break;
2532a2dc1f3fSmrg 	  for (k = j + 1; partitions->iterate (k, &partition); ++k)
2533a2dc1f3fSmrg 	    {
2534a2dc1f3fSmrg 	      struct pg_vdata *data;
2535a2dc1f3fSmrg 
2536a2dc1f3fSmrg 	      if (cbdata.vertices_component[k] != i)
2537a2dc1f3fSmrg 		continue;
2538a2dc1f3fSmrg 
2539a2dc1f3fSmrg 	      partition_merge_into (NULL, first, partition, FUSE_SAME_SCC);
2540a2dc1f3fSmrg 	      (*partitions)[k] = NULL;
2541a2dc1f3fSmrg 	      partition_free (partition);
2542a2dc1f3fSmrg 	      data = (struct pg_vdata *)pg->vertices[k].data;
2543a2dc1f3fSmrg 	      gcc_assert (data->id == k);
2544a2dc1f3fSmrg 	      data->partition = NULL;
2545a2dc1f3fSmrg 	      /* The result partition of merged SCC must be sequential.  */
2546a2dc1f3fSmrg 	      first->type = PTYPE_SEQUENTIAL;
2547a2dc1f3fSmrg 	    }
2548a2dc1f3fSmrg 	}
2549*23f5f463Smrg       /* If reduction partition's SCC is broken by runtime alias checks,
2550*23f5f463Smrg 	 we force a negative post order to it making sure it will be scheduled
2551*23f5f463Smrg 	 in the last.  */
25528feb0f0bSmrg       if (num_sccs_no_alias > 0)
25538feb0f0bSmrg 	{
25548feb0f0bSmrg 	  j = -1;
25558feb0f0bSmrg 	  for (i = 0; i < pg->n_vertices; ++i)
25568feb0f0bSmrg 	    {
25578feb0f0bSmrg 	      struct pg_vdata *data = (struct pg_vdata *)pg->vertices[i].data;
25588feb0f0bSmrg 	      if (data->partition && partition_reduction_p (data->partition))
25598feb0f0bSmrg 		{
25608feb0f0bSmrg 		  gcc_assert (j == -1);
25618feb0f0bSmrg 		  j = i;
25628feb0f0bSmrg 		}
25638feb0f0bSmrg 	    }
25648feb0f0bSmrg 	  if (j >= 0)
25658feb0f0bSmrg 	    pg->vertices[j].post = -1;
25668feb0f0bSmrg 	}
25678feb0f0bSmrg 
25688feb0f0bSmrg       free (cbdata.vertices_component);
2569a2dc1f3fSmrg     }
2570a2dc1f3fSmrg 
2571a2dc1f3fSmrg   sort_partitions_by_post_order (pg, partitions);
2572a2dc1f3fSmrg   free_partition_graph_vdata (pg);
2573a2dc1f3fSmrg   for_each_edge (pg, free_partition_graph_edata_cb, NULL);
2574a2dc1f3fSmrg   free_graph (pg);
2575a2dc1f3fSmrg 
2576a2dc1f3fSmrg   if (dump_file && (dump_flags & TDF_DETAILS))
2577a2dc1f3fSmrg     {
2578a2dc1f3fSmrg       fprintf (dump_file, "Possible alias data dependence to break:\n");
2579a2dc1f3fSmrg       dump_data_dependence_relations (dump_file, *alias_ddrs);
2580a2dc1f3fSmrg     }
2581a2dc1f3fSmrg }
2582a2dc1f3fSmrg 
2583a2dc1f3fSmrg /* Compute and return an expression whose value is the segment length which
2584a2dc1f3fSmrg    will be accessed by DR in NITERS iterations.  */
2585a2dc1f3fSmrg 
2586a2dc1f3fSmrg static tree
data_ref_segment_size(struct data_reference * dr,tree niters)2587a2dc1f3fSmrg data_ref_segment_size (struct data_reference *dr, tree niters)
2588a2dc1f3fSmrg {
2589a2dc1f3fSmrg   niters = size_binop (MINUS_EXPR,
2590a2dc1f3fSmrg 		       fold_convert (sizetype, niters),
2591a2dc1f3fSmrg 		       size_one_node);
2592a2dc1f3fSmrg   return size_binop (MULT_EXPR,
2593a2dc1f3fSmrg 		     fold_convert (sizetype, DR_STEP (dr)),
2594a2dc1f3fSmrg 		     fold_convert (sizetype, niters));
2595a2dc1f3fSmrg }
2596a2dc1f3fSmrg 
2597a2dc1f3fSmrg /* Return true if LOOP's latch is dominated by statement for data reference
2598a2dc1f3fSmrg    DR.  */
2599a2dc1f3fSmrg 
2600a2dc1f3fSmrg static inline bool
latch_dominated_by_data_ref(class loop * loop,data_reference * dr)26018feb0f0bSmrg latch_dominated_by_data_ref (class loop *loop, data_reference *dr)
2602a2dc1f3fSmrg {
2603a2dc1f3fSmrg   return dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
2604a2dc1f3fSmrg 			 gimple_bb (DR_STMT (dr)));
2605a2dc1f3fSmrg }
2606a2dc1f3fSmrg 
2607a2dc1f3fSmrg /* Compute alias check pairs and store them in COMP_ALIAS_PAIRS for LOOP's
2608a2dc1f3fSmrg    data dependence relations ALIAS_DDRS.  */
2609a2dc1f3fSmrg 
2610a2dc1f3fSmrg static void
compute_alias_check_pairs(class loop * loop,vec<ddr_p> * alias_ddrs,vec<dr_with_seg_len_pair_t> * comp_alias_pairs)26118feb0f0bSmrg compute_alias_check_pairs (class loop *loop, vec<ddr_p> *alias_ddrs,
2612a2dc1f3fSmrg 			   vec<dr_with_seg_len_pair_t> *comp_alias_pairs)
2613a2dc1f3fSmrg {
2614a2dc1f3fSmrg   unsigned int i;
2615a2dc1f3fSmrg   unsigned HOST_WIDE_INT factor = 1;
2616a2dc1f3fSmrg   tree niters_plus_one, niters = number_of_latch_executions (loop);
2617a2dc1f3fSmrg 
2618a2dc1f3fSmrg   gcc_assert (niters != NULL_TREE && niters != chrec_dont_know);
2619a2dc1f3fSmrg   niters = fold_convert (sizetype, niters);
2620a2dc1f3fSmrg   niters_plus_one = size_binop (PLUS_EXPR, niters, size_one_node);
2621a2dc1f3fSmrg 
2622a2dc1f3fSmrg   if (dump_file && (dump_flags & TDF_DETAILS))
2623a2dc1f3fSmrg     fprintf (dump_file, "Creating alias check pairs:\n");
2624a2dc1f3fSmrg 
2625a2dc1f3fSmrg   /* Iterate all data dependence relations and compute alias check pairs.  */
2626a2dc1f3fSmrg   for (i = 0; i < alias_ddrs->length (); i++)
2627a2dc1f3fSmrg     {
2628a2dc1f3fSmrg       ddr_p ddr = (*alias_ddrs)[i];
2629a2dc1f3fSmrg       struct data_reference *dr_a = DDR_A (ddr);
2630a2dc1f3fSmrg       struct data_reference *dr_b = DDR_B (ddr);
2631a2dc1f3fSmrg       tree seg_length_a, seg_length_b;
2632a2dc1f3fSmrg 
2633a2dc1f3fSmrg       if (latch_dominated_by_data_ref (loop, dr_a))
2634a2dc1f3fSmrg 	seg_length_a = data_ref_segment_size (dr_a, niters_plus_one);
2635a2dc1f3fSmrg       else
2636a2dc1f3fSmrg 	seg_length_a = data_ref_segment_size (dr_a, niters);
2637a2dc1f3fSmrg 
2638a2dc1f3fSmrg       if (latch_dominated_by_data_ref (loop, dr_b))
2639a2dc1f3fSmrg 	seg_length_b = data_ref_segment_size (dr_b, niters_plus_one);
2640a2dc1f3fSmrg       else
2641a2dc1f3fSmrg 	seg_length_b = data_ref_segment_size (dr_b, niters);
2642a2dc1f3fSmrg 
2643a2dc1f3fSmrg       unsigned HOST_WIDE_INT access_size_a
2644a2dc1f3fSmrg 	= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a))));
2645a2dc1f3fSmrg       unsigned HOST_WIDE_INT access_size_b
2646a2dc1f3fSmrg 	= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b))));
2647a2dc1f3fSmrg       unsigned int align_a = TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_a)));
2648a2dc1f3fSmrg       unsigned int align_b = TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_b)));
2649a2dc1f3fSmrg 
2650a2dc1f3fSmrg       dr_with_seg_len_pair_t dr_with_seg_len_pair
2651a2dc1f3fSmrg 	(dr_with_seg_len (dr_a, seg_length_a, access_size_a, align_a),
26528feb0f0bSmrg 	 dr_with_seg_len (dr_b, seg_length_b, access_size_b, align_b),
26538feb0f0bSmrg 	 /* ??? Would WELL_ORDERED be safe?  */
26548feb0f0bSmrg 	 dr_with_seg_len_pair_t::REORDERED);
2655a2dc1f3fSmrg 
2656a2dc1f3fSmrg       comp_alias_pairs->safe_push (dr_with_seg_len_pair);
2657a2dc1f3fSmrg     }
2658a2dc1f3fSmrg 
2659a2dc1f3fSmrg   if (tree_fits_uhwi_p (niters))
2660a2dc1f3fSmrg     factor = tree_to_uhwi (niters);
2661a2dc1f3fSmrg 
2662a2dc1f3fSmrg   /* Prune alias check pairs.  */
2663a2dc1f3fSmrg   prune_runtime_alias_test_list (comp_alias_pairs, factor);
2664a2dc1f3fSmrg   if (dump_file && (dump_flags & TDF_DETAILS))
2665a2dc1f3fSmrg     fprintf (dump_file,
2666a2dc1f3fSmrg 	     "Improved number of alias checks from %d to %d\n",
2667a2dc1f3fSmrg 	     alias_ddrs->length (), comp_alias_pairs->length ());
2668a2dc1f3fSmrg }
2669a2dc1f3fSmrg 
2670a2dc1f3fSmrg /* Given data dependence relations in ALIAS_DDRS, generate runtime alias
2671a2dc1f3fSmrg    checks and version LOOP under condition of these runtime alias checks.  */
2672a2dc1f3fSmrg 
2673a2dc1f3fSmrg static void
version_loop_by_alias_check(vec<struct partition * > * partitions,class loop * loop,vec<ddr_p> * alias_ddrs)2674c0a68be4Smrg version_loop_by_alias_check (vec<struct partition *> *partitions,
26758feb0f0bSmrg 			     class loop *loop, vec<ddr_p> *alias_ddrs)
2676a2dc1f3fSmrg {
2677a2dc1f3fSmrg   profile_probability prob;
2678a2dc1f3fSmrg   basic_block cond_bb;
26798feb0f0bSmrg   class loop *nloop;
2680a2dc1f3fSmrg   tree lhs, arg0, cond_expr = NULL_TREE;
2681a2dc1f3fSmrg   gimple_seq cond_stmts = NULL;
2682a2dc1f3fSmrg   gimple *call_stmt = NULL;
2683a2dc1f3fSmrg   auto_vec<dr_with_seg_len_pair_t> comp_alias_pairs;
2684a2dc1f3fSmrg 
2685a2dc1f3fSmrg   /* Generate code for runtime alias checks if necessary.  */
2686a2dc1f3fSmrg   gcc_assert (alias_ddrs->length () > 0);
2687a2dc1f3fSmrg 
2688a2dc1f3fSmrg   if (dump_file && (dump_flags & TDF_DETAILS))
2689a2dc1f3fSmrg     fprintf (dump_file,
2690a2dc1f3fSmrg 	     "Version loop <%d> with runtime alias check\n", loop->num);
2691a2dc1f3fSmrg 
2692a2dc1f3fSmrg   compute_alias_check_pairs (loop, alias_ddrs, &comp_alias_pairs);
2693a2dc1f3fSmrg   create_runtime_alias_checks (loop, &comp_alias_pairs, &cond_expr);
2694a2dc1f3fSmrg   cond_expr = force_gimple_operand_1 (cond_expr, &cond_stmts,
2695a2dc1f3fSmrg 				      is_gimple_val, NULL_TREE);
2696a2dc1f3fSmrg 
2697a2dc1f3fSmrg   /* Depend on vectorizer to fold IFN_LOOP_DIST_ALIAS.  */
2698c0a68be4Smrg   bool cancelable_p = flag_tree_loop_vectorize;
2699c0a68be4Smrg   if (cancelable_p)
2700a2dc1f3fSmrg     {
2701c0a68be4Smrg       unsigned i = 0;
2702c0a68be4Smrg       struct partition *partition;
2703c0a68be4Smrg       for (; partitions->iterate (i, &partition); ++i)
2704c0a68be4Smrg 	if (!partition_builtin_p (partition))
2705c0a68be4Smrg 	  break;
2706c0a68be4Smrg 
2707c0a68be4Smrg      /* If all partitions are builtins, distributing it would be profitable and
2708c0a68be4Smrg 	we don't want to cancel the runtime alias checks.  */
2709c0a68be4Smrg       if (i == partitions->length ())
2710c0a68be4Smrg 	cancelable_p = false;
2711c0a68be4Smrg     }
2712c0a68be4Smrg 
2713c0a68be4Smrg   /* Generate internal function call for loop distribution alias check if the
2714c0a68be4Smrg      runtime alias check should be cancelable.  */
2715c0a68be4Smrg   if (cancelable_p)
2716c0a68be4Smrg     {
2717a2dc1f3fSmrg       call_stmt = gimple_build_call_internal (IFN_LOOP_DIST_ALIAS,
2718a2dc1f3fSmrg 					      2, NULL_TREE, cond_expr);
2719a2dc1f3fSmrg       lhs = make_ssa_name (boolean_type_node);
2720a2dc1f3fSmrg       gimple_call_set_lhs (call_stmt, lhs);
2721a2dc1f3fSmrg     }
2722a2dc1f3fSmrg   else
2723a2dc1f3fSmrg     lhs = cond_expr;
2724a2dc1f3fSmrg 
2725a2dc1f3fSmrg   prob = profile_probability::guessed_always ().apply_scale (9, 10);
2726a2dc1f3fSmrg   initialize_original_copy_tables ();
2727a2dc1f3fSmrg   nloop = loop_version (loop, lhs, &cond_bb, prob, prob.invert (),
2728a2dc1f3fSmrg 			prob, prob.invert (), true);
2729a2dc1f3fSmrg   free_original_copy_tables ();
2730a2dc1f3fSmrg   /* Record the original loop number in newly generated loops.  In case of
2731a2dc1f3fSmrg      distribution, the original loop will be distributed and the new loop
2732a2dc1f3fSmrg      is kept.  */
2733a2dc1f3fSmrg   loop->orig_loop_num = nloop->num;
2734a2dc1f3fSmrg   nloop->orig_loop_num = nloop->num;
2735a2dc1f3fSmrg   nloop->dont_vectorize = true;
2736a2dc1f3fSmrg   nloop->force_vectorize = false;
2737a2dc1f3fSmrg 
2738a2dc1f3fSmrg   if (call_stmt)
2739a2dc1f3fSmrg     {
2740a2dc1f3fSmrg       /* Record new loop's num in IFN_LOOP_DIST_ALIAS because the original
2741a2dc1f3fSmrg 	 loop could be destroyed.  */
2742a2dc1f3fSmrg       arg0 = build_int_cst (integer_type_node, loop->orig_loop_num);
2743a2dc1f3fSmrg       gimple_call_set_arg (call_stmt, 0, arg0);
2744a2dc1f3fSmrg       gimple_seq_add_stmt_without_update (&cond_stmts, call_stmt);
2745a2dc1f3fSmrg     }
2746a2dc1f3fSmrg 
2747a2dc1f3fSmrg   if (cond_stmts)
2748a2dc1f3fSmrg     {
2749a2dc1f3fSmrg       gimple_stmt_iterator cond_gsi = gsi_last_bb (cond_bb);
2750a2dc1f3fSmrg       gsi_insert_seq_before (&cond_gsi, cond_stmts, GSI_SAME_STMT);
2751a2dc1f3fSmrg     }
2752a2dc1f3fSmrg   update_ssa (TODO_update_ssa);
2753a2dc1f3fSmrg }
2754a2dc1f3fSmrg 
2755a2dc1f3fSmrg /* Return true if loop versioning is needed to distrubute PARTITIONS.
2756a2dc1f3fSmrg    ALIAS_DDRS are data dependence relations for runtime alias check.  */
2757a2dc1f3fSmrg 
2758a2dc1f3fSmrg static inline bool
version_for_distribution_p(vec<struct partition * > * partitions,vec<ddr_p> * alias_ddrs)2759a2dc1f3fSmrg version_for_distribution_p (vec<struct partition *> *partitions,
2760a2dc1f3fSmrg 			    vec<ddr_p> *alias_ddrs)
2761a2dc1f3fSmrg {
2762a2dc1f3fSmrg   /* No need to version loop if we have only one partition.  */
2763a2dc1f3fSmrg   if (partitions->length () == 1)
2764a2dc1f3fSmrg     return false;
2765a2dc1f3fSmrg 
2766a2dc1f3fSmrg   /* Need to version loop if runtime alias check is necessary.  */
2767a2dc1f3fSmrg   return (alias_ddrs->length () > 0);
2768a2dc1f3fSmrg }
2769a2dc1f3fSmrg 
2770a2dc1f3fSmrg /* Compare base offset of builtin mem* partitions P1 and P2.  */
2771a2dc1f3fSmrg 
2772c0a68be4Smrg static int
offset_cmp(const void * vp1,const void * vp2)2773c0a68be4Smrg offset_cmp (const void *vp1, const void *vp2)
2774a2dc1f3fSmrg {
2775c0a68be4Smrg   struct partition *p1 = *(struct partition *const *) vp1;
2776c0a68be4Smrg   struct partition *p2 = *(struct partition *const *) vp2;
2777c0a68be4Smrg   unsigned HOST_WIDE_INT o1 = p1->builtin->dst_base_offset;
2778c0a68be4Smrg   unsigned HOST_WIDE_INT o2 = p2->builtin->dst_base_offset;
2779c0a68be4Smrg   return (o2 < o1) - (o1 < o2);
2780a2dc1f3fSmrg }
2781a2dc1f3fSmrg 
2782a2dc1f3fSmrg /* Fuse adjacent memset builtin PARTITIONS if possible.  This is a special
2783a2dc1f3fSmrg    case optimization transforming below code:
2784a2dc1f3fSmrg 
2785a2dc1f3fSmrg      __builtin_memset (&obj, 0, 100);
2786a2dc1f3fSmrg      _1 = &obj + 100;
2787a2dc1f3fSmrg      __builtin_memset (_1, 0, 200);
2788a2dc1f3fSmrg      _2 = &obj + 300;
2789a2dc1f3fSmrg      __builtin_memset (_2, 0, 100);
2790a2dc1f3fSmrg 
2791a2dc1f3fSmrg    into:
2792a2dc1f3fSmrg 
2793a2dc1f3fSmrg      __builtin_memset (&obj, 0, 400);
2794a2dc1f3fSmrg 
2795a2dc1f3fSmrg    Note we don't have dependence information between different partitions
2796a2dc1f3fSmrg    at this point, as a result, we can't handle nonadjacent memset builtin
2797a2dc1f3fSmrg    partitions since dependence might be broken.  */
2798a2dc1f3fSmrg 
2799a2dc1f3fSmrg static void
fuse_memset_builtins(vec<struct partition * > * partitions)2800a2dc1f3fSmrg fuse_memset_builtins (vec<struct partition *> *partitions)
2801a2dc1f3fSmrg {
2802a2dc1f3fSmrg   unsigned i, j;
2803a2dc1f3fSmrg   struct partition *part1, *part2;
2804a2dc1f3fSmrg   tree rhs1, rhs2;
2805a2dc1f3fSmrg 
2806a2dc1f3fSmrg   for (i = 0; partitions->iterate (i, &part1);)
2807a2dc1f3fSmrg     {
2808a2dc1f3fSmrg       if (part1->kind != PKIND_MEMSET)
2809a2dc1f3fSmrg 	{
2810a2dc1f3fSmrg 	  i++;
2811a2dc1f3fSmrg 	  continue;
2812a2dc1f3fSmrg 	}
2813a2dc1f3fSmrg 
2814a2dc1f3fSmrg       /* Find sub-array of memset builtins of the same base.  Index range
2815a2dc1f3fSmrg 	 of the sub-array is [i, j) with "j > i".  */
2816a2dc1f3fSmrg       for (j = i + 1; partitions->iterate (j, &part2); ++j)
2817a2dc1f3fSmrg 	{
2818a2dc1f3fSmrg 	  if (part2->kind != PKIND_MEMSET
2819a2dc1f3fSmrg 	      || !operand_equal_p (part1->builtin->dst_base_base,
2820a2dc1f3fSmrg 				   part2->builtin->dst_base_base, 0))
2821a2dc1f3fSmrg 	    break;
2822a2dc1f3fSmrg 
2823a2dc1f3fSmrg 	  /* Memset calls setting different values can't be merged.  */
2824a2dc1f3fSmrg 	  rhs1 = gimple_assign_rhs1 (DR_STMT (part1->builtin->dst_dr));
2825a2dc1f3fSmrg 	  rhs2 = gimple_assign_rhs1 (DR_STMT (part2->builtin->dst_dr));
2826a2dc1f3fSmrg 	  if (!operand_equal_p (rhs1, rhs2, 0))
2827a2dc1f3fSmrg 	    break;
2828a2dc1f3fSmrg 	}
2829a2dc1f3fSmrg 
2830a2dc1f3fSmrg       /* Stable sort is required in order to avoid breaking dependence.  */
2831c0a68be4Smrg       gcc_stablesort (&(*partitions)[i], j - i, sizeof (*partitions)[i],
2832c0a68be4Smrg 		      offset_cmp);
2833a2dc1f3fSmrg       /* Continue with next partition.  */
2834a2dc1f3fSmrg       i = j;
2835a2dc1f3fSmrg     }
2836a2dc1f3fSmrg 
2837a2dc1f3fSmrg   /* Merge all consecutive memset builtin partitions.  */
2838a2dc1f3fSmrg   for (i = 0; i < partitions->length () - 1;)
2839a2dc1f3fSmrg     {
2840a2dc1f3fSmrg       part1 = (*partitions)[i];
2841a2dc1f3fSmrg       if (part1->kind != PKIND_MEMSET)
2842a2dc1f3fSmrg 	{
2843a2dc1f3fSmrg 	  i++;
2844a2dc1f3fSmrg 	  continue;
2845a2dc1f3fSmrg 	}
2846a2dc1f3fSmrg 
2847a2dc1f3fSmrg       part2 = (*partitions)[i + 1];
2848a2dc1f3fSmrg       /* Only merge memset partitions of the same base and with constant
2849a2dc1f3fSmrg 	 access sizes.  */
2850a2dc1f3fSmrg       if (part2->kind != PKIND_MEMSET
2851a2dc1f3fSmrg 	  || TREE_CODE (part1->builtin->size) != INTEGER_CST
2852a2dc1f3fSmrg 	  || TREE_CODE (part2->builtin->size) != INTEGER_CST
2853a2dc1f3fSmrg 	  || !operand_equal_p (part1->builtin->dst_base_base,
2854a2dc1f3fSmrg 			       part2->builtin->dst_base_base, 0))
2855a2dc1f3fSmrg 	{
2856a2dc1f3fSmrg 	  i++;
2857a2dc1f3fSmrg 	  continue;
2858a2dc1f3fSmrg 	}
2859a2dc1f3fSmrg       rhs1 = gimple_assign_rhs1 (DR_STMT (part1->builtin->dst_dr));
2860a2dc1f3fSmrg       rhs2 = gimple_assign_rhs1 (DR_STMT (part2->builtin->dst_dr));
2861a2dc1f3fSmrg       int bytev1 = const_with_all_bytes_same (rhs1);
2862a2dc1f3fSmrg       int bytev2 = const_with_all_bytes_same (rhs2);
2863a2dc1f3fSmrg       /* Only merge memset partitions of the same value.  */
2864a2dc1f3fSmrg       if (bytev1 != bytev2 || bytev1 == -1)
2865a2dc1f3fSmrg 	{
2866a2dc1f3fSmrg 	  i++;
2867a2dc1f3fSmrg 	  continue;
2868a2dc1f3fSmrg 	}
2869a2dc1f3fSmrg       wide_int end1 = wi::add (part1->builtin->dst_base_offset,
2870a2dc1f3fSmrg 			       wi::to_wide (part1->builtin->size));
2871a2dc1f3fSmrg       /* Only merge adjacent memset partitions.  */
2872a2dc1f3fSmrg       if (wi::ne_p (end1, part2->builtin->dst_base_offset))
2873a2dc1f3fSmrg 	{
2874a2dc1f3fSmrg 	  i++;
2875a2dc1f3fSmrg 	  continue;
2876a2dc1f3fSmrg 	}
2877a2dc1f3fSmrg       /* Merge partitions[i] and partitions[i+1].  */
2878a2dc1f3fSmrg       part1->builtin->size = fold_build2 (PLUS_EXPR, sizetype,
2879a2dc1f3fSmrg 					  part1->builtin->size,
2880a2dc1f3fSmrg 					  part2->builtin->size);
2881a2dc1f3fSmrg       partition_free (part2);
2882a2dc1f3fSmrg       partitions->ordered_remove (i + 1);
2883a2dc1f3fSmrg     }
2884a2dc1f3fSmrg }
2885a2dc1f3fSmrg 
28868feb0f0bSmrg void
finalize_partitions(class loop * loop,vec<struct partition * > * partitions,vec<ddr_p> * alias_ddrs)28878feb0f0bSmrg loop_distribution::finalize_partitions (class loop *loop,
28888feb0f0bSmrg 					vec<struct partition *> *partitions,
2889a2dc1f3fSmrg 					vec<ddr_p> *alias_ddrs)
2890a2dc1f3fSmrg {
2891a2dc1f3fSmrg   unsigned i;
2892a2dc1f3fSmrg   struct partition *partition, *a;
2893a2dc1f3fSmrg 
2894a2dc1f3fSmrg   if (partitions->length () == 1
2895a2dc1f3fSmrg       || alias_ddrs->length () > 0)
2896a2dc1f3fSmrg     return;
2897a2dc1f3fSmrg 
2898a2dc1f3fSmrg   unsigned num_builtin = 0, num_normal = 0, num_partial_memset = 0;
2899a2dc1f3fSmrg   bool same_type_p = true;
2900a2dc1f3fSmrg   enum partition_type type = ((*partitions)[0])->type;
2901a2dc1f3fSmrg   for (i = 0; partitions->iterate (i, &partition); ++i)
2902a2dc1f3fSmrg     {
2903a2dc1f3fSmrg       same_type_p &= (type == partition->type);
2904a2dc1f3fSmrg       if (partition_builtin_p (partition))
2905a2dc1f3fSmrg 	{
2906a2dc1f3fSmrg 	  num_builtin++;
2907a2dc1f3fSmrg 	  continue;
2908a2dc1f3fSmrg 	}
2909a2dc1f3fSmrg       num_normal++;
2910a2dc1f3fSmrg       if (partition->kind == PKIND_PARTIAL_MEMSET)
2911a2dc1f3fSmrg 	num_partial_memset++;
2912a2dc1f3fSmrg     }
2913a2dc1f3fSmrg 
2914a2dc1f3fSmrg   /* Don't distribute current loop into too many loops given we don't have
2915a2dc1f3fSmrg      memory stream cost model.  Be even more conservative in case of loop
2916a2dc1f3fSmrg      nest distribution.  */
2917a2dc1f3fSmrg   if ((same_type_p && num_builtin == 0
2918a2dc1f3fSmrg        && (loop->inner == NULL || num_normal != 2 || num_partial_memset != 1))
2919a2dc1f3fSmrg       || (loop->inner != NULL
2920a2dc1f3fSmrg 	  && i >= NUM_PARTITION_THRESHOLD && num_normal > 1)
2921a2dc1f3fSmrg       || (loop->inner == NULL
2922a2dc1f3fSmrg 	  && i >= NUM_PARTITION_THRESHOLD && num_normal > num_builtin))
2923a2dc1f3fSmrg     {
2924a2dc1f3fSmrg       a = (*partitions)[0];
2925a2dc1f3fSmrg       for (i = 1; partitions->iterate (i, &partition); ++i)
2926a2dc1f3fSmrg 	{
2927a2dc1f3fSmrg 	  partition_merge_into (NULL, a, partition, FUSE_FINALIZE);
2928a2dc1f3fSmrg 	  partition_free (partition);
2929a2dc1f3fSmrg 	}
2930a2dc1f3fSmrg       partitions->truncate (1);
2931a2dc1f3fSmrg     }
2932a2dc1f3fSmrg 
2933a2dc1f3fSmrg   /* Fuse memset builtins if possible.  */
2934a2dc1f3fSmrg   if (partitions->length () > 1)
2935a2dc1f3fSmrg     fuse_memset_builtins (partitions);
2936a2dc1f3fSmrg }
2937a2dc1f3fSmrg 
2938a2dc1f3fSmrg /* Distributes the code from LOOP in such a way that producer statements
2939a2dc1f3fSmrg    are placed before consumer statements.  Tries to separate only the
2940a2dc1f3fSmrg    statements from STMTS into separate loops.  Returns the number of
2941a2dc1f3fSmrg    distributed loops.  Set NB_CALLS to number of generated builtin calls.
2942a2dc1f3fSmrg    Set *DESTROY_P to whether LOOP needs to be destroyed.  */
29431debfc3dSmrg 
29448feb0f0bSmrg int
distribute_loop(class loop * loop,vec<gimple * > stmts,control_dependences * cd,int * nb_calls,bool * destroy_p,bool only_patterns_p)29458feb0f0bSmrg loop_distribution::distribute_loop (class loop *loop, vec<gimple *> stmts,
29468feb0f0bSmrg 		 control_dependences *cd, int *nb_calls, bool *destroy_p,
29478feb0f0bSmrg 		 bool only_patterns_p)
29481debfc3dSmrg {
2949a2dc1f3fSmrg   ddrs_table = new hash_table<ddr_hasher> (389);
29501debfc3dSmrg   struct graph *rdg;
29511debfc3dSmrg   partition *partition;
29521debfc3dSmrg   int i, nbp;
29531debfc3dSmrg 
29541debfc3dSmrg   *destroy_p = false;
29551debfc3dSmrg   *nb_calls = 0;
2956a2dc1f3fSmrg   loop_nest.create (0);
29571debfc3dSmrg   if (!find_loop_nest (loop, &loop_nest))
2958a2dc1f3fSmrg     {
2959a2dc1f3fSmrg       loop_nest.release ();
2960a2dc1f3fSmrg       delete ddrs_table;
29611debfc3dSmrg       return 0;
2962a2dc1f3fSmrg     }
29631debfc3dSmrg 
2964a2dc1f3fSmrg   datarefs_vec.create (20);
2965c0a68be4Smrg   has_nonaddressable_dataref_p = false;
2966a2dc1f3fSmrg   rdg = build_rdg (loop, cd);
29671debfc3dSmrg   if (!rdg)
29681debfc3dSmrg     {
29691debfc3dSmrg       if (dump_file && (dump_flags & TDF_DETAILS))
29701debfc3dSmrg 	fprintf (dump_file,
29711debfc3dSmrg 		 "Loop %d not distributed: failed to build the RDG.\n",
29721debfc3dSmrg 		 loop->num);
29731debfc3dSmrg 
2974a2dc1f3fSmrg       loop_nest.release ();
2975a2dc1f3fSmrg       free_data_refs (datarefs_vec);
2976a2dc1f3fSmrg       delete ddrs_table;
29771debfc3dSmrg       return 0;
29781debfc3dSmrg     }
29791debfc3dSmrg 
2980a2dc1f3fSmrg   if (datarefs_vec.length () > MAX_DATAREFS_NUM)
2981a2dc1f3fSmrg     {
2982a2dc1f3fSmrg       if (dump_file && (dump_flags & TDF_DETAILS))
2983a2dc1f3fSmrg 	fprintf (dump_file,
2984a2dc1f3fSmrg 		 "Loop %d not distributed: too many memory references.\n",
2985a2dc1f3fSmrg 		 loop->num);
2986a2dc1f3fSmrg 
2987a2dc1f3fSmrg       free_rdg (rdg);
2988a2dc1f3fSmrg       loop_nest.release ();
2989a2dc1f3fSmrg       free_data_refs (datarefs_vec);
2990a2dc1f3fSmrg       delete ddrs_table;
2991a2dc1f3fSmrg       return 0;
2992a2dc1f3fSmrg     }
2993a2dc1f3fSmrg 
2994a2dc1f3fSmrg   data_reference_p dref;
2995a2dc1f3fSmrg   for (i = 0; datarefs_vec.iterate (i, &dref); ++i)
2996a2dc1f3fSmrg     dref->aux = (void *) (uintptr_t) i;
2997a2dc1f3fSmrg 
29981debfc3dSmrg   if (dump_file && (dump_flags & TDF_DETAILS))
29991debfc3dSmrg     dump_rdg (dump_file, rdg);
30001debfc3dSmrg 
30011debfc3dSmrg   auto_vec<struct partition *, 3> partitions;
30021debfc3dSmrg   rdg_build_partitions (rdg, stmts, &partitions);
30031debfc3dSmrg 
3004a2dc1f3fSmrg   auto_vec<ddr_p> alias_ddrs;
3005a2dc1f3fSmrg 
3006a2dc1f3fSmrg   auto_bitmap stmt_in_all_partitions;
3007a2dc1f3fSmrg   bitmap_copy (stmt_in_all_partitions, partitions[0]->stmts);
3008a2dc1f3fSmrg   for (i = 1; partitions.iterate (i, &partition); ++i)
3009a2dc1f3fSmrg     bitmap_and_into (stmt_in_all_partitions, partitions[i]->stmts);
3010a2dc1f3fSmrg 
30118feb0f0bSmrg   bool any_builtin = false;
30128feb0f0bSmrg   bool reduction_in_all = false;
30131debfc3dSmrg   FOR_EACH_VEC_ELT (partitions, i, partition)
30141debfc3dSmrg     {
30158feb0f0bSmrg       reduction_in_all
30168feb0f0bSmrg 	|= classify_partition (loop, rdg, partition, stmt_in_all_partitions);
30171debfc3dSmrg       any_builtin |= partition_builtin_p (partition);
30181debfc3dSmrg     }
30191debfc3dSmrg 
30201debfc3dSmrg   /* If we are only distributing patterns but did not detect any,
30211debfc3dSmrg      simply bail out.  */
30228feb0f0bSmrg   if (only_patterns_p
30231debfc3dSmrg       && !any_builtin)
30241debfc3dSmrg     {
30251debfc3dSmrg       nbp = 0;
30261debfc3dSmrg       goto ldist_done;
30271debfc3dSmrg     }
30281debfc3dSmrg 
30291debfc3dSmrg   /* If we are only distributing patterns fuse all partitions that
30301debfc3dSmrg      were not classified as builtins.  This also avoids chopping
30311debfc3dSmrg      a loop into pieces, separated by builtin calls.  That is, we
30321debfc3dSmrg      only want no or a single loop body remaining.  */
30331debfc3dSmrg   struct partition *into;
30348feb0f0bSmrg   if (only_patterns_p)
30351debfc3dSmrg     {
30361debfc3dSmrg       for (i = 0; partitions.iterate (i, &into); ++i)
30371debfc3dSmrg 	if (!partition_builtin_p (into))
30381debfc3dSmrg 	  break;
30391debfc3dSmrg       for (++i; partitions.iterate (i, &partition); ++i)
30401debfc3dSmrg 	if (!partition_builtin_p (partition))
30411debfc3dSmrg 	  {
3042a2dc1f3fSmrg 	    partition_merge_into (NULL, into, partition, FUSE_NON_BUILTIN);
30431debfc3dSmrg 	    partitions.unordered_remove (i);
30441debfc3dSmrg 	    partition_free (partition);
30451debfc3dSmrg 	    i--;
30461debfc3dSmrg 	  }
30471debfc3dSmrg     }
30481debfc3dSmrg 
30491debfc3dSmrg   /* Due to limitations in the transform phase we have to fuse all
30501debfc3dSmrg      reduction partitions into the last partition so the existing
30511debfc3dSmrg      loop will contain all loop-closed PHI nodes.  */
30521debfc3dSmrg   for (i = 0; partitions.iterate (i, &into); ++i)
30531debfc3dSmrg     if (partition_reduction_p (into))
30541debfc3dSmrg       break;
30551debfc3dSmrg   for (i = i + 1; partitions.iterate (i, &partition); ++i)
30561debfc3dSmrg     if (partition_reduction_p (partition))
30571debfc3dSmrg       {
3058a2dc1f3fSmrg 	partition_merge_into (rdg, into, partition, FUSE_REDUCTION);
30591debfc3dSmrg 	partitions.unordered_remove (i);
30601debfc3dSmrg 	partition_free (partition);
30611debfc3dSmrg 	i--;
30621debfc3dSmrg       }
30631debfc3dSmrg 
30641debfc3dSmrg   /* Apply our simple cost model - fuse partitions with similar
30651debfc3dSmrg      memory accesses.  */
30661debfc3dSmrg   for (i = 0; partitions.iterate (i, &into); ++i)
30671debfc3dSmrg     {
30681debfc3dSmrg       bool changed = false;
3069a2dc1f3fSmrg       if (partition_builtin_p (into) || into->kind == PKIND_PARTIAL_MEMSET)
30701debfc3dSmrg 	continue;
30711debfc3dSmrg       for (int j = i + 1;
30721debfc3dSmrg 	   partitions.iterate (j, &partition); ++j)
30731debfc3dSmrg 	{
3074a2dc1f3fSmrg 	  if (share_memory_accesses (rdg, into, partition))
30751debfc3dSmrg 	    {
3076a2dc1f3fSmrg 	      partition_merge_into (rdg, into, partition, FUSE_SHARE_REF);
30771debfc3dSmrg 	      partitions.unordered_remove (j);
30781debfc3dSmrg 	      partition_free (partition);
30791debfc3dSmrg 	      j--;
30801debfc3dSmrg 	      changed = true;
30811debfc3dSmrg 	    }
30821debfc3dSmrg 	}
30831debfc3dSmrg       /* If we fused 0 1 2 in step 1 to 0,2 1 as 0 and 2 have similar
30841debfc3dSmrg          accesses when 1 and 2 have similar accesses but not 0 and 1
30851debfc3dSmrg 	 then in the next iteration we will fail to consider merging
30861debfc3dSmrg 	 1 into 0,2.  So try again if we did any merging into 0.  */
30871debfc3dSmrg       if (changed)
30881debfc3dSmrg 	i--;
30891debfc3dSmrg     }
30901debfc3dSmrg 
30918feb0f0bSmrg   /* Put a non-builtin partition last if we need to preserve a reduction.
30928feb0f0bSmrg      ???  This is a workaround that makes sort_partitions_by_post_order do
30938feb0f0bSmrg      the correct thing while in reality it should sort each component
30948feb0f0bSmrg      separately and then put the component with a reduction or a non-builtin
30958feb0f0bSmrg      last.  */
30968feb0f0bSmrg   if (reduction_in_all
30978feb0f0bSmrg       && partition_builtin_p (partitions.last()))
30988feb0f0bSmrg     FOR_EACH_VEC_ELT (partitions, i, partition)
30998feb0f0bSmrg       if (!partition_builtin_p (partition))
31008feb0f0bSmrg 	{
31018feb0f0bSmrg 	  partitions.unordered_remove (i);
31028feb0f0bSmrg 	  partitions.quick_push (partition);
31038feb0f0bSmrg 	  break;
31048feb0f0bSmrg 	}
31058feb0f0bSmrg 
3106a2dc1f3fSmrg   /* Build the partition dependency graph and fuse partitions in strong
3107a2dc1f3fSmrg      connected component.  */
31081debfc3dSmrg   if (partitions.length () > 1)
31091debfc3dSmrg     {
3110a2dc1f3fSmrg       /* Don't support loop nest distribution under runtime alias check
3111c0a68be4Smrg 	 since it's not likely to enable many vectorization opportunities.
3112c0a68be4Smrg 	 Also if loop has any data reference which may be not addressable
3113c0a68be4Smrg 	 since alias check needs to take, compare address of the object.  */
3114c0a68be4Smrg       if (loop->inner || has_nonaddressable_dataref_p)
3115a2dc1f3fSmrg 	merge_dep_scc_partitions (rdg, &partitions, false);
31161debfc3dSmrg       else
31171debfc3dSmrg 	{
3118a2dc1f3fSmrg 	  merge_dep_scc_partitions (rdg, &partitions, true);
3119a2dc1f3fSmrg 	  if (partitions.length () > 1)
3120a2dc1f3fSmrg 	    break_alias_scc_partitions (rdg, &partitions, &alias_ddrs);
31211debfc3dSmrg 	}
31221debfc3dSmrg     }
31231debfc3dSmrg 
3124a2dc1f3fSmrg   finalize_partitions (loop, &partitions, &alias_ddrs);
31251debfc3dSmrg 
31268feb0f0bSmrg   /* If there is a reduction in all partitions make sure the last one
31278feb0f0bSmrg      is not classified for builtin code generation.  */
31288feb0f0bSmrg   if (reduction_in_all)
31298feb0f0bSmrg     {
31308feb0f0bSmrg       partition = partitions.last ();
31318feb0f0bSmrg       if (only_patterns_p
31328feb0f0bSmrg 	  && partition_builtin_p (partition)
31338feb0f0bSmrg 	  && !partition_builtin_p (partitions[0]))
31348feb0f0bSmrg 	{
31358feb0f0bSmrg 	  nbp = 0;
31368feb0f0bSmrg 	  goto ldist_done;
31378feb0f0bSmrg 	}
31388feb0f0bSmrg       partition->kind = PKIND_NORMAL;
31398feb0f0bSmrg     }
31408feb0f0bSmrg 
31411debfc3dSmrg   nbp = partitions.length ();
31421debfc3dSmrg   if (nbp == 0
31431debfc3dSmrg       || (nbp == 1 && !partition_builtin_p (partitions[0]))
31441debfc3dSmrg       || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
31451debfc3dSmrg     {
31461debfc3dSmrg       nbp = 0;
31471debfc3dSmrg       goto ldist_done;
31481debfc3dSmrg     }
31491debfc3dSmrg 
3150a2dc1f3fSmrg   if (version_for_distribution_p (&partitions, &alias_ddrs))
3151c0a68be4Smrg     version_loop_by_alias_check (&partitions, loop, &alias_ddrs);
3152a2dc1f3fSmrg 
31531debfc3dSmrg   if (dump_file && (dump_flags & TDF_DETAILS))
3154a2dc1f3fSmrg     {
3155a2dc1f3fSmrg       fprintf (dump_file,
3156a2dc1f3fSmrg 	       "distribute loop <%d> into partitions:\n", loop->num);
31571debfc3dSmrg       dump_rdg_partitions (dump_file, partitions);
3158a2dc1f3fSmrg     }
31591debfc3dSmrg 
31601debfc3dSmrg   FOR_EACH_VEC_ELT (partitions, i, partition)
31611debfc3dSmrg     {
31621debfc3dSmrg       if (partition_builtin_p (partition))
31631debfc3dSmrg 	(*nb_calls)++;
31641debfc3dSmrg       *destroy_p |= generate_code_for_partition (loop, partition, i < nbp - 1);
31651debfc3dSmrg     }
31661debfc3dSmrg 
31671debfc3dSmrg  ldist_done:
3168a2dc1f3fSmrg   loop_nest.release ();
3169a2dc1f3fSmrg   free_data_refs (datarefs_vec);
3170a2dc1f3fSmrg   for (hash_table<ddr_hasher>::iterator iter = ddrs_table->begin ();
3171a2dc1f3fSmrg        iter != ddrs_table->end (); ++iter)
3172a2dc1f3fSmrg     {
3173a2dc1f3fSmrg       free_dependence_relation (*iter);
3174a2dc1f3fSmrg       *iter = NULL;
3175a2dc1f3fSmrg     }
3176a2dc1f3fSmrg   delete ddrs_table;
31771debfc3dSmrg 
31781debfc3dSmrg   FOR_EACH_VEC_ELT (partitions, i, partition)
31791debfc3dSmrg     partition_free (partition);
31801debfc3dSmrg 
31811debfc3dSmrg   free_rdg (rdg);
31821debfc3dSmrg   return nbp - *nb_calls;
31831debfc3dSmrg }
31841debfc3dSmrg 
31858feb0f0bSmrg 
bb_top_order_init(void)31868feb0f0bSmrg void loop_distribution::bb_top_order_init (void)
31878feb0f0bSmrg {
31888feb0f0bSmrg   int rpo_num;
31898feb0f0bSmrg   int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
31908feb0f0bSmrg   edge entry = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
31918feb0f0bSmrg   bitmap exit_bbs = BITMAP_ALLOC (NULL);
31928feb0f0bSmrg 
31938feb0f0bSmrg   bb_top_order_index = XNEWVEC (int, last_basic_block_for_fn (cfun));
31948feb0f0bSmrg   bb_top_order_index_size = last_basic_block_for_fn (cfun);
31958feb0f0bSmrg 
31968feb0f0bSmrg   entry->flags &= ~EDGE_DFS_BACK;
31978feb0f0bSmrg   bitmap_set_bit (exit_bbs, EXIT_BLOCK);
31988feb0f0bSmrg   rpo_num = rev_post_order_and_mark_dfs_back_seme (cfun, entry, exit_bbs, true,
31998feb0f0bSmrg 						   rpo, NULL);
32008feb0f0bSmrg   BITMAP_FREE (exit_bbs);
32018feb0f0bSmrg 
32028feb0f0bSmrg   for (int i = 0; i < rpo_num; i++)
32038feb0f0bSmrg     bb_top_order_index[rpo[i]] = i;
32048feb0f0bSmrg 
32058feb0f0bSmrg   free (rpo);
32068feb0f0bSmrg }
32078feb0f0bSmrg 
bb_top_order_destroy()32088feb0f0bSmrg void loop_distribution::bb_top_order_destroy ()
32098feb0f0bSmrg {
32108feb0f0bSmrg   free (bb_top_order_index);
32118feb0f0bSmrg   bb_top_order_index = NULL;
32128feb0f0bSmrg   bb_top_order_index_size = 0;
32138feb0f0bSmrg }
32148feb0f0bSmrg 
32158feb0f0bSmrg 
32168feb0f0bSmrg /* Given LOOP, this function records seed statements for distribution in
32178feb0f0bSmrg    WORK_LIST.  Return false if there is nothing for distribution.  */
32188feb0f0bSmrg 
32198feb0f0bSmrg static bool
find_seed_stmts_for_distribution(class loop * loop,vec<gimple * > * work_list)32208feb0f0bSmrg find_seed_stmts_for_distribution (class loop *loop, vec<gimple *> *work_list)
32218feb0f0bSmrg {
32228feb0f0bSmrg   basic_block *bbs = get_loop_body_in_dom_order (loop);
32238feb0f0bSmrg 
32248feb0f0bSmrg   /* Initialize the worklist with stmts we seed the partitions with.  */
32258feb0f0bSmrg   for (unsigned i = 0; i < loop->num_nodes; ++i)
32268feb0f0bSmrg     {
32278feb0f0bSmrg       /* In irreducible sub-regions we don't know how to redirect
32288feb0f0bSmrg 	 conditions, so fail.  See PR100492.  */
32298feb0f0bSmrg       if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
32308feb0f0bSmrg 	{
32318feb0f0bSmrg 	  if (dump_file && (dump_flags & TDF_DETAILS))
32328feb0f0bSmrg 	    fprintf (dump_file, "loop %d contains an irreducible region.\n",
32338feb0f0bSmrg 		     loop->num);
32348feb0f0bSmrg 	  work_list->truncate (0);
32358feb0f0bSmrg 	  break;
32368feb0f0bSmrg 	}
32378feb0f0bSmrg       for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
32388feb0f0bSmrg 	   !gsi_end_p (gsi); gsi_next (&gsi))
32398feb0f0bSmrg 	{
32408feb0f0bSmrg 	  gphi *phi = gsi.phi ();
32418feb0f0bSmrg 	  if (virtual_operand_p (gimple_phi_result (phi)))
32428feb0f0bSmrg 	    continue;
32438feb0f0bSmrg 	  /* Distribute stmts which have defs that are used outside of
32448feb0f0bSmrg 	     the loop.  */
32458feb0f0bSmrg 	  if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
32468feb0f0bSmrg 	    continue;
32478feb0f0bSmrg 	  work_list->safe_push (phi);
32488feb0f0bSmrg 	}
32498feb0f0bSmrg       for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
32508feb0f0bSmrg 	   !gsi_end_p (gsi); gsi_next (&gsi))
32518feb0f0bSmrg 	{
32528feb0f0bSmrg 	  gimple *stmt = gsi_stmt (gsi);
32538feb0f0bSmrg 
32548feb0f0bSmrg 	  /* Ignore clobbers, they do not have true side effects.  */
32558feb0f0bSmrg 	  if (gimple_clobber_p (stmt))
32568feb0f0bSmrg 	    continue;
32578feb0f0bSmrg 
32588feb0f0bSmrg 	  /* If there is a stmt with side-effects bail out - we
32598feb0f0bSmrg 	     cannot and should not distribute this loop.  */
32608feb0f0bSmrg 	  if (gimple_has_side_effects (stmt))
32618feb0f0bSmrg 	    {
32628feb0f0bSmrg 	      free (bbs);
32638feb0f0bSmrg 	      return false;
32648feb0f0bSmrg 	    }
32658feb0f0bSmrg 
32668feb0f0bSmrg 	  /* Distribute stmts which have defs that are used outside of
32678feb0f0bSmrg 	     the loop.  */
32688feb0f0bSmrg 	  if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
32698feb0f0bSmrg 	    ;
32708feb0f0bSmrg 	  /* Otherwise only distribute stores for now.  */
32718feb0f0bSmrg 	  else if (!gimple_vdef (stmt))
32728feb0f0bSmrg 	    continue;
32738feb0f0bSmrg 
32748feb0f0bSmrg 	  work_list->safe_push (stmt);
32758feb0f0bSmrg 	}
32768feb0f0bSmrg     }
32778feb0f0bSmrg   free (bbs);
32788feb0f0bSmrg   return work_list->length () > 0;
32798feb0f0bSmrg }
32808feb0f0bSmrg 
32818feb0f0bSmrg /* Given innermost LOOP, return the outermost enclosing loop that forms a
32828feb0f0bSmrg    perfect loop nest.  */
32838feb0f0bSmrg 
32848feb0f0bSmrg static class loop *
prepare_perfect_loop_nest(class loop * loop)32858feb0f0bSmrg prepare_perfect_loop_nest (class loop *loop)
32868feb0f0bSmrg {
32878feb0f0bSmrg   class loop *outer = loop_outer (loop);
32888feb0f0bSmrg   tree niters = number_of_latch_executions (loop);
32898feb0f0bSmrg 
32908feb0f0bSmrg   /* TODO: We only support the innermost 3-level loop nest distribution
32918feb0f0bSmrg      because of compilation time issue for now.  This should be relaxed
32928feb0f0bSmrg      in the future.  Note we only allow 3-level loop nest distribution
32938feb0f0bSmrg      when parallelizing loops.  */
32948feb0f0bSmrg   while ((loop->inner == NULL
32958feb0f0bSmrg 	  || (loop->inner->inner == NULL && flag_tree_parallelize_loops > 1))
32968feb0f0bSmrg 	 && loop_outer (outer)
32978feb0f0bSmrg 	 && outer->inner == loop && loop->next == NULL
32988feb0f0bSmrg 	 && single_exit (outer)
32998feb0f0bSmrg 	 && !chrec_contains_symbols_defined_in_loop (niters, outer->num)
33008feb0f0bSmrg 	 && (niters = number_of_latch_executions (outer)) != NULL_TREE
33018feb0f0bSmrg 	 && niters != chrec_dont_know)
33028feb0f0bSmrg     {
33038feb0f0bSmrg       loop = outer;
33048feb0f0bSmrg       outer = loop_outer (loop);
33058feb0f0bSmrg     }
33068feb0f0bSmrg 
33078feb0f0bSmrg   return loop;
33088feb0f0bSmrg }
33098feb0f0bSmrg 
33108feb0f0bSmrg 
33118feb0f0bSmrg unsigned int
execute(function * fun)33128feb0f0bSmrg loop_distribution::execute (function *fun)
33138feb0f0bSmrg {
33148feb0f0bSmrg   class loop *loop;
33158feb0f0bSmrg   bool changed = false;
33168feb0f0bSmrg   basic_block bb;
33178feb0f0bSmrg   control_dependences *cd = NULL;
33188feb0f0bSmrg   auto_vec<loop_p> loops_to_be_destroyed;
33198feb0f0bSmrg 
33208feb0f0bSmrg   if (number_of_loops (fun) <= 1)
33218feb0f0bSmrg     return 0;
33228feb0f0bSmrg 
33238feb0f0bSmrg   bb_top_order_init ();
33248feb0f0bSmrg 
33258feb0f0bSmrg   FOR_ALL_BB_FN (bb, fun)
33268feb0f0bSmrg     {
33278feb0f0bSmrg       gimple_stmt_iterator gsi;
33288feb0f0bSmrg       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
33298feb0f0bSmrg 	gimple_set_uid (gsi_stmt (gsi), -1);
33308feb0f0bSmrg       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
33318feb0f0bSmrg 	gimple_set_uid (gsi_stmt (gsi), -1);
33328feb0f0bSmrg     }
33338feb0f0bSmrg 
33348feb0f0bSmrg   /* We can at the moment only distribute non-nested loops, thus restrict
33358feb0f0bSmrg      walking to innermost loops.  */
33368feb0f0bSmrg   FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
33378feb0f0bSmrg     {
33388feb0f0bSmrg       /* Don't distribute multiple exit edges loop, or cold loop when
33398feb0f0bSmrg          not doing pattern detection.  */
33408feb0f0bSmrg       if (!single_exit (loop)
33418feb0f0bSmrg 	  || (!flag_tree_loop_distribute_patterns
33428feb0f0bSmrg 	      && !optimize_loop_for_speed_p (loop)))
33438feb0f0bSmrg 	continue;
33448feb0f0bSmrg 
33458feb0f0bSmrg       /* Don't distribute loop if niters is unknown.  */
33468feb0f0bSmrg       tree niters = number_of_latch_executions (loop);
33478feb0f0bSmrg       if (niters == NULL_TREE || niters == chrec_dont_know)
33488feb0f0bSmrg 	continue;
33498feb0f0bSmrg 
33508feb0f0bSmrg       /* Get the perfect loop nest for distribution.  */
33518feb0f0bSmrg       loop = prepare_perfect_loop_nest (loop);
33528feb0f0bSmrg       for (; loop; loop = loop->inner)
33538feb0f0bSmrg 	{
33548feb0f0bSmrg 	  auto_vec<gimple *> work_list;
33558feb0f0bSmrg 	  if (!find_seed_stmts_for_distribution (loop, &work_list))
33568feb0f0bSmrg 	    break;
33578feb0f0bSmrg 
33588feb0f0bSmrg 	  const char *str = loop->inner ? " nest" : "";
33598feb0f0bSmrg 	  dump_user_location_t loc = find_loop_location (loop);
33608feb0f0bSmrg 	  if (!cd)
33618feb0f0bSmrg 	    {
33628feb0f0bSmrg 	      calculate_dominance_info (CDI_DOMINATORS);
33638feb0f0bSmrg 	      calculate_dominance_info (CDI_POST_DOMINATORS);
33648feb0f0bSmrg 	      cd = new control_dependences ();
33658feb0f0bSmrg 	      free_dominance_info (CDI_POST_DOMINATORS);
33668feb0f0bSmrg 	    }
33678feb0f0bSmrg 
33688feb0f0bSmrg 	  bool destroy_p;
33698feb0f0bSmrg 	  int nb_generated_loops, nb_generated_calls;
33708feb0f0bSmrg 	  nb_generated_loops
33718feb0f0bSmrg 	    = distribute_loop (loop, work_list, cd, &nb_generated_calls,
33728feb0f0bSmrg 			       &destroy_p, (!optimize_loop_for_speed_p (loop)
33738feb0f0bSmrg 					    || !flag_tree_loop_distribution));
33748feb0f0bSmrg 	  if (destroy_p)
33758feb0f0bSmrg 	    loops_to_be_destroyed.safe_push (loop);
33768feb0f0bSmrg 
33778feb0f0bSmrg 	  if (nb_generated_loops + nb_generated_calls > 0)
33788feb0f0bSmrg 	    {
33798feb0f0bSmrg 	      changed = true;
33808feb0f0bSmrg 	      if (dump_enabled_p ())
33818feb0f0bSmrg 		dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
33828feb0f0bSmrg 				 loc, "Loop%s %d distributed: split to %d loops "
33838feb0f0bSmrg 				 "and %d library calls.\n", str, loop->num,
33848feb0f0bSmrg 				 nb_generated_loops, nb_generated_calls);
33858feb0f0bSmrg 
33868feb0f0bSmrg 	      break;
33878feb0f0bSmrg 	    }
33888feb0f0bSmrg 
33898feb0f0bSmrg 	  if (dump_file && (dump_flags & TDF_DETAILS))
33908feb0f0bSmrg 	    fprintf (dump_file, "Loop%s %d not distributed.\n", str, loop->num);
33918feb0f0bSmrg 	}
33928feb0f0bSmrg     }
33938feb0f0bSmrg 
33948feb0f0bSmrg   if (cd)
33958feb0f0bSmrg     delete cd;
33968feb0f0bSmrg 
33978feb0f0bSmrg   if (bb_top_order_index != NULL)
33988feb0f0bSmrg     bb_top_order_destroy ();
33998feb0f0bSmrg 
34008feb0f0bSmrg   if (changed)
34018feb0f0bSmrg     {
34028feb0f0bSmrg       /* Destroy loop bodies that could not be reused.  Do this late as we
34038feb0f0bSmrg 	 otherwise can end up refering to stale data in control dependences.  */
34048feb0f0bSmrg       unsigned i;
34058feb0f0bSmrg       FOR_EACH_VEC_ELT (loops_to_be_destroyed, i, loop)
34068feb0f0bSmrg 	destroy_loop (loop);
34078feb0f0bSmrg 
34088feb0f0bSmrg       /* Cached scalar evolutions now may refer to wrong or non-existing
34098feb0f0bSmrg 	 loops.  */
34108feb0f0bSmrg       scev_reset_htab ();
34118feb0f0bSmrg       mark_virtual_operands_for_renaming (fun);
34128feb0f0bSmrg       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
34138feb0f0bSmrg     }
34148feb0f0bSmrg 
34158feb0f0bSmrg   checking_verify_loop_structure ();
34168feb0f0bSmrg 
34178feb0f0bSmrg   return changed ? TODO_cleanup_cfg : 0;
34188feb0f0bSmrg }
34198feb0f0bSmrg 
34208feb0f0bSmrg 
34211debfc3dSmrg /* Distribute all loops in the current function.  */
34221debfc3dSmrg 
34231debfc3dSmrg namespace {
34241debfc3dSmrg 
34251debfc3dSmrg const pass_data pass_data_loop_distribution =
34261debfc3dSmrg {
34271debfc3dSmrg   GIMPLE_PASS, /* type */
34281debfc3dSmrg   "ldist", /* name */
34291debfc3dSmrg   OPTGROUP_LOOP, /* optinfo_flags */
34301debfc3dSmrg   TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
34311debfc3dSmrg   ( PROP_cfg | PROP_ssa ), /* properties_required */
34321debfc3dSmrg   0, /* properties_provided */
34331debfc3dSmrg   0, /* properties_destroyed */
34341debfc3dSmrg   0, /* todo_flags_start */
34351debfc3dSmrg   0, /* todo_flags_finish */
34361debfc3dSmrg };
34371debfc3dSmrg 
34381debfc3dSmrg class pass_loop_distribution : public gimple_opt_pass
34391debfc3dSmrg {
34401debfc3dSmrg public:
pass_loop_distribution(gcc::context * ctxt)34411debfc3dSmrg   pass_loop_distribution (gcc::context *ctxt)
34421debfc3dSmrg     : gimple_opt_pass (pass_data_loop_distribution, ctxt)
34431debfc3dSmrg   {}
34441debfc3dSmrg 
34451debfc3dSmrg   /* opt_pass methods: */
gate(function *)34461debfc3dSmrg   virtual bool gate (function *)
34471debfc3dSmrg     {
34481debfc3dSmrg       return flag_tree_loop_distribution
34491debfc3dSmrg 	|| flag_tree_loop_distribute_patterns;
34501debfc3dSmrg     }
34511debfc3dSmrg 
34521debfc3dSmrg   virtual unsigned int execute (function *);
34531debfc3dSmrg 
34541debfc3dSmrg }; // class pass_loop_distribution
34551debfc3dSmrg 
34561debfc3dSmrg unsigned int
execute(function * fun)34571debfc3dSmrg pass_loop_distribution::execute (function *fun)
34581debfc3dSmrg {
34598feb0f0bSmrg   return loop_distribution ().execute (fun);
34601debfc3dSmrg }
34611debfc3dSmrg 
34621debfc3dSmrg } // anon namespace
34631debfc3dSmrg 
34641debfc3dSmrg gimple_opt_pass *
make_pass_loop_distribution(gcc::context * ctxt)34651debfc3dSmrg make_pass_loop_distribution (gcc::context *ctxt)
34661debfc3dSmrg {
34671debfc3dSmrg   return new pass_loop_distribution (ctxt);
34681debfc3dSmrg }
3469