xref: /dflybsd-src/contrib/gcc-8.0/gcc/tree-loop-distribution.c (revision 95059079af47f9a66a175f374f2da1a5020e3255)
138fd1498Szrj /* Loop distribution.
238fd1498Szrj    Copyright (C) 2006-2018 Free Software Foundation, Inc.
338fd1498Szrj    Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
438fd1498Szrj    and Sebastian Pop <sebastian.pop@amd.com>.
538fd1498Szrj 
638fd1498Szrj This file is part of GCC.
738fd1498Szrj 
838fd1498Szrj GCC is free software; you can redistribute it and/or modify it
938fd1498Szrj under the terms of the GNU General Public License as published by the
1038fd1498Szrj Free Software Foundation; either version 3, or (at your option) any
1138fd1498Szrj later version.
1238fd1498Szrj 
1338fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT
1438fd1498Szrj ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1538fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1638fd1498Szrj for more details.
1738fd1498Szrj 
1838fd1498Szrj You should have received a copy of the GNU General Public License
1938fd1498Szrj along with GCC; see the file COPYING3.  If not see
2038fd1498Szrj <http://www.gnu.org/licenses/>.  */
2138fd1498Szrj 
2238fd1498Szrj /* This pass performs loop distribution: for example, the loop
2338fd1498Szrj 
2438fd1498Szrj    |DO I = 2, N
2538fd1498Szrj    |    A(I) = B(I) + C
2638fd1498Szrj    |    D(I) = A(I-1)*E
2738fd1498Szrj    |ENDDO
2838fd1498Szrj 
2938fd1498Szrj    is transformed to
3038fd1498Szrj 
3138fd1498Szrj    |DOALL I = 2, N
3238fd1498Szrj    |   A(I) = B(I) + C
3338fd1498Szrj    |ENDDO
3438fd1498Szrj    |
3538fd1498Szrj    |DOALL I = 2, N
3638fd1498Szrj    |   D(I) = A(I-1)*E
3738fd1498Szrj    |ENDDO
3838fd1498Szrj 
3938fd1498Szrj    Loop distribution is the dual of loop fusion.  It separates statements
4038fd1498Szrj    of a loop (or loop nest) into multiple loops (or loop nests) with the
4138fd1498Szrj    same loop header.  The major goal is to separate statements which may
4238fd1498Szrj    be vectorized from those that can't.  This pass implements distribution
4338fd1498Szrj    in the following steps:
4438fd1498Szrj 
4538fd1498Szrj      1) Seed partitions with specific type statements.  For now we support
4638fd1498Szrj 	two types seed statements: statement defining variable used outside
4738fd1498Szrj 	of loop; statement storing to memory.
4838fd1498Szrj      2) Build reduced dependence graph (RDG) for loop to be distributed.
4938fd1498Szrj 	The vertices (RDG:V) model all statements in the loop and the edges
5038fd1498Szrj 	(RDG:E) model flow and control dependencies between statements.
5138fd1498Szrj      3) Apart from RDG, compute data dependencies between memory references.
5238fd1498Szrj      4) Starting from seed statement, build up partition by adding depended
5338fd1498Szrj 	statements according to RDG's dependence information.  Partition is
5438fd1498Szrj 	classified as parallel type if it can be executed paralleled; or as
5538fd1498Szrj 	sequential type if it can't.  Parallel type partition is further
5638fd1498Szrj 	classified as different builtin kinds if it can be implemented as
5738fd1498Szrj 	builtin function calls.
5838fd1498Szrj      5) Build partition dependence graph (PG) based on data dependencies.
5938fd1498Szrj 	The vertices (PG:V) model all partitions and the edges (PG:E) model
6038fd1498Szrj 	all data dependencies between every partitions pair.  In general,
6138fd1498Szrj 	data dependence is either compilation time known or unknown.  In C
6238fd1498Szrj 	family languages, there exists quite amount compilation time unknown
6338fd1498Szrj 	dependencies because of possible alias relation of data references.
6438fd1498Szrj 	We categorize PG's edge to two types: "true" edge that represents
6538fd1498Szrj 	compilation time known data dependencies; "alias" edge for all other
6638fd1498Szrj 	data dependencies.
6738fd1498Szrj      6) Traverse subgraph of PG as if all "alias" edges don't exist.  Merge
6838fd1498Szrj 	partitions in each strong connected component (SCC) correspondingly.
6938fd1498Szrj 	Build new PG for merged partitions.
7038fd1498Szrj      7) Traverse PG again and this time with both "true" and "alias" edges
7138fd1498Szrj 	included.  We try to break SCCs by removing some edges.  Because
7238fd1498Szrj 	SCCs by "true" edges are all fused in step 6), we can break SCCs
7338fd1498Szrj 	by removing some "alias" edges.  It's NP-hard to choose optimal
7438fd1498Szrj 	edge set, fortunately simple approximation is good enough for us
7538fd1498Szrj 	given the small problem scale.
7638fd1498Szrj      8) Collect all data dependencies of the removed "alias" edges.  Create
7738fd1498Szrj 	runtime alias checks for collected data dependencies.
7838fd1498Szrj      9) Version loop under the condition of runtime alias checks.  Given
7938fd1498Szrj 	loop distribution generally introduces additional overhead, it is
8038fd1498Szrj 	only useful if vectorization is achieved in distributed loop.  We
8138fd1498Szrj 	version loop with internal function call IFN_LOOP_DIST_ALIAS.  If
8238fd1498Szrj 	no distributed loop can be vectorized, we simply remove distributed
8338fd1498Szrj 	loops and recover to the original one.
8438fd1498Szrj 
8538fd1498Szrj    TODO:
8638fd1498Szrj      1) We only distribute innermost two-level loop nest now.  We should
8738fd1498Szrj 	extend it for arbitrary loop nests in the future.
8838fd1498Szrj      2) We only fuse partitions in SCC now.  A better fusion algorithm is
8938fd1498Szrj 	desired to minimize loop overhead, maximize parallelism and maximize
9038fd1498Szrj 	data reuse.  */
9138fd1498Szrj 
9238fd1498Szrj #include "config.h"
9338fd1498Szrj #define INCLUDE_ALGORITHM /* stable_sort */
9438fd1498Szrj #include "system.h"
9538fd1498Szrj #include "coretypes.h"
9638fd1498Szrj #include "backend.h"
9738fd1498Szrj #include "tree.h"
9838fd1498Szrj #include "gimple.h"
9938fd1498Szrj #include "cfghooks.h"
10038fd1498Szrj #include "tree-pass.h"
10138fd1498Szrj #include "ssa.h"
10238fd1498Szrj #include "gimple-pretty-print.h"
10338fd1498Szrj #include "fold-const.h"
10438fd1498Szrj #include "cfganal.h"
10538fd1498Szrj #include "gimple-iterator.h"
10638fd1498Szrj #include "gimplify-me.h"
10738fd1498Szrj #include "stor-layout.h"
10838fd1498Szrj #include "tree-cfg.h"
10938fd1498Szrj #include "tree-ssa-loop-manip.h"
11038fd1498Szrj #include "tree-ssa-loop-ivopts.h"
11138fd1498Szrj #include "tree-ssa-loop.h"
11238fd1498Szrj #include "tree-into-ssa.h"
11338fd1498Szrj #include "tree-ssa.h"
11438fd1498Szrj #include "cfgloop.h"
11538fd1498Szrj #include "tree-scalar-evolution.h"
11638fd1498Szrj #include "params.h"
11738fd1498Szrj #include "tree-vectorizer.h"
118*58e805e6Szrj #include "tree-eh.h"
11938fd1498Szrj 
12038fd1498Szrj 
12138fd1498Szrj #define MAX_DATAREFS_NUM \
12238fd1498Szrj 	((unsigned) PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
12338fd1498Szrj 
12438fd1498Szrj /* Threshold controlling number of distributed partitions.  Given it may
12538fd1498Szrj    be unnecessary if a memory stream cost model is invented in the future,
12638fd1498Szrj    we define it as a temporary macro, rather than a parameter.  */
12738fd1498Szrj #define NUM_PARTITION_THRESHOLD (4)
12838fd1498Szrj 
12938fd1498Szrj /* Hashtable helpers.  */
13038fd1498Szrj 
13138fd1498Szrj struct ddr_hasher : nofree_ptr_hash <struct data_dependence_relation>
13238fd1498Szrj {
13338fd1498Szrj   static inline hashval_t hash (const data_dependence_relation *);
13438fd1498Szrj   static inline bool equal (const data_dependence_relation *,
13538fd1498Szrj 			    const data_dependence_relation *);
13638fd1498Szrj };
13738fd1498Szrj 
13838fd1498Szrj /* Hash function for data dependence.  */
13938fd1498Szrj 
14038fd1498Szrj inline hashval_t
hash(const data_dependence_relation * ddr)14138fd1498Szrj ddr_hasher::hash (const data_dependence_relation *ddr)
14238fd1498Szrj {
14338fd1498Szrj   inchash::hash h;
14438fd1498Szrj   h.add_ptr (DDR_A (ddr));
14538fd1498Szrj   h.add_ptr (DDR_B (ddr));
14638fd1498Szrj   return h.end ();
14738fd1498Szrj }
14838fd1498Szrj 
14938fd1498Szrj /* Hash table equality function for data dependence.  */
15038fd1498Szrj 
15138fd1498Szrj inline bool
equal(const data_dependence_relation * ddr1,const data_dependence_relation * ddr2)15238fd1498Szrj ddr_hasher::equal (const data_dependence_relation *ddr1,
15338fd1498Szrj 		   const data_dependence_relation *ddr2)
15438fd1498Szrj {
15538fd1498Szrj   return (DDR_A (ddr1) == DDR_A (ddr2) && DDR_B (ddr1) == DDR_B (ddr2));
15638fd1498Szrj }
15738fd1498Szrj 
15838fd1498Szrj /* The loop (nest) to be distributed.  */
15938fd1498Szrj static vec<loop_p> loop_nest;
16038fd1498Szrj 
16138fd1498Szrj /* Vector of data references in the loop to be distributed.  */
16238fd1498Szrj static vec<data_reference_p> datarefs_vec;
16338fd1498Szrj 
16438fd1498Szrj /* Store index of data reference in aux field.  */
16538fd1498Szrj #define DR_INDEX(dr)      ((uintptr_t) (dr)->aux)
16638fd1498Szrj 
16738fd1498Szrj /* Hash table for data dependence relation in the loop to be distributed.  */
16838fd1498Szrj static hash_table<ddr_hasher> *ddrs_table;
16938fd1498Szrj 
17038fd1498Szrj /* A Reduced Dependence Graph (RDG) vertex representing a statement.  */
17138fd1498Szrj struct rdg_vertex
17238fd1498Szrj {
17338fd1498Szrj   /* The statement represented by this vertex.  */
17438fd1498Szrj   gimple *stmt;
17538fd1498Szrj 
17638fd1498Szrj   /* Vector of data-references in this statement.  */
17738fd1498Szrj   vec<data_reference_p> datarefs;
17838fd1498Szrj 
17938fd1498Szrj   /* True when the statement contains a write to memory.  */
18038fd1498Szrj   bool has_mem_write;
18138fd1498Szrj 
18238fd1498Szrj   /* True when the statement contains a read from memory.  */
18338fd1498Szrj   bool has_mem_reads;
18438fd1498Szrj };
18538fd1498Szrj 
18638fd1498Szrj #define RDGV_STMT(V)     ((struct rdg_vertex *) ((V)->data))->stmt
18738fd1498Szrj #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
18838fd1498Szrj #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
18938fd1498Szrj #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
19038fd1498Szrj #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
19138fd1498Szrj #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
19238fd1498Szrj #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
19338fd1498Szrj #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
19438fd1498Szrj 
19538fd1498Szrj /* Data dependence type.  */
19638fd1498Szrj 
19738fd1498Szrj enum rdg_dep_type
19838fd1498Szrj {
19938fd1498Szrj   /* Read After Write (RAW).  */
20038fd1498Szrj   flow_dd = 'f',
20138fd1498Szrj 
20238fd1498Szrj   /* Control dependence (execute conditional on).  */
20338fd1498Szrj   control_dd = 'c'
20438fd1498Szrj };
20538fd1498Szrj 
20638fd1498Szrj /* Dependence information attached to an edge of the RDG.  */
20738fd1498Szrj 
20838fd1498Szrj struct rdg_edge
20938fd1498Szrj {
21038fd1498Szrj   /* Type of the dependence.  */
21138fd1498Szrj   enum rdg_dep_type type;
21238fd1498Szrj };
21338fd1498Szrj 
21438fd1498Szrj #define RDGE_TYPE(E)        ((struct rdg_edge *) ((E)->data))->type
21538fd1498Szrj 
21638fd1498Szrj /* Dump vertex I in RDG to FILE.  */
21738fd1498Szrj 
21838fd1498Szrj static void
dump_rdg_vertex(FILE * file,struct graph * rdg,int i)21938fd1498Szrj dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
22038fd1498Szrj {
22138fd1498Szrj   struct vertex *v = &(rdg->vertices[i]);
22238fd1498Szrj   struct graph_edge *e;
22338fd1498Szrj 
22438fd1498Szrj   fprintf (file, "(vertex %d: (%s%s) (in:", i,
22538fd1498Szrj 	   RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
22638fd1498Szrj 	   RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
22738fd1498Szrj 
22838fd1498Szrj   if (v->pred)
22938fd1498Szrj     for (e = v->pred; e; e = e->pred_next)
23038fd1498Szrj       fprintf (file, " %d", e->src);
23138fd1498Szrj 
23238fd1498Szrj   fprintf (file, ") (out:");
23338fd1498Szrj 
23438fd1498Szrj   if (v->succ)
23538fd1498Szrj     for (e = v->succ; e; e = e->succ_next)
23638fd1498Szrj       fprintf (file, " %d", e->dest);
23738fd1498Szrj 
23838fd1498Szrj   fprintf (file, ")\n");
23938fd1498Szrj   print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
24038fd1498Szrj   fprintf (file, ")\n");
24138fd1498Szrj }
24238fd1498Szrj 
24338fd1498Szrj /* Call dump_rdg_vertex on stderr.  */
24438fd1498Szrj 
24538fd1498Szrj DEBUG_FUNCTION void
debug_rdg_vertex(struct graph * rdg,int i)24638fd1498Szrj debug_rdg_vertex (struct graph *rdg, int i)
24738fd1498Szrj {
24838fd1498Szrj   dump_rdg_vertex (stderr, rdg, i);
24938fd1498Szrj }
25038fd1498Szrj 
25138fd1498Szrj /* Dump the reduced dependence graph RDG to FILE.  */
25238fd1498Szrj 
25338fd1498Szrj static void
dump_rdg(FILE * file,struct graph * rdg)25438fd1498Szrj dump_rdg (FILE *file, struct graph *rdg)
25538fd1498Szrj {
25638fd1498Szrj   fprintf (file, "(rdg\n");
25738fd1498Szrj   for (int i = 0; i < rdg->n_vertices; i++)
25838fd1498Szrj     dump_rdg_vertex (file, rdg, i);
25938fd1498Szrj   fprintf (file, ")\n");
26038fd1498Szrj }
26138fd1498Szrj 
26238fd1498Szrj /* Call dump_rdg on stderr.  */
26338fd1498Szrj 
26438fd1498Szrj DEBUG_FUNCTION void
debug_rdg(struct graph * rdg)26538fd1498Szrj debug_rdg (struct graph *rdg)
26638fd1498Szrj {
26738fd1498Szrj   dump_rdg (stderr, rdg);
26838fd1498Szrj }
26938fd1498Szrj 
27038fd1498Szrj static void
dot_rdg_1(FILE * file,struct graph * rdg)27138fd1498Szrj dot_rdg_1 (FILE *file, struct graph *rdg)
27238fd1498Szrj {
27338fd1498Szrj   int i;
27438fd1498Szrj   pretty_printer buffer;
27538fd1498Szrj   pp_needs_newline (&buffer) = false;
27638fd1498Szrj   buffer.buffer->stream = file;
27738fd1498Szrj 
27838fd1498Szrj   fprintf (file, "digraph RDG {\n");
27938fd1498Szrj 
28038fd1498Szrj   for (i = 0; i < rdg->n_vertices; i++)
28138fd1498Szrj     {
28238fd1498Szrj       struct vertex *v = &(rdg->vertices[i]);
28338fd1498Szrj       struct graph_edge *e;
28438fd1498Szrj 
28538fd1498Szrj       fprintf (file, "%d [label=\"[%d] ", i, i);
28638fd1498Szrj       pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
28738fd1498Szrj       pp_flush (&buffer);
28838fd1498Szrj       fprintf (file, "\"]\n");
28938fd1498Szrj 
29038fd1498Szrj       /* Highlight reads from memory.  */
29138fd1498Szrj       if (RDG_MEM_READS_STMT (rdg, i))
29238fd1498Szrj        fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
29338fd1498Szrj 
29438fd1498Szrj       /* Highlight stores to memory.  */
29538fd1498Szrj       if (RDG_MEM_WRITE_STMT (rdg, i))
29638fd1498Szrj        fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
29738fd1498Szrj 
29838fd1498Szrj       if (v->succ)
29938fd1498Szrj        for (e = v->succ; e; e = e->succ_next)
30038fd1498Szrj          switch (RDGE_TYPE (e))
30138fd1498Szrj            {
30238fd1498Szrj            case flow_dd:
30338fd1498Szrj              /* These are the most common dependences: don't print these. */
30438fd1498Szrj              fprintf (file, "%d -> %d \n", i, e->dest);
30538fd1498Szrj              break;
30638fd1498Szrj 
30738fd1498Szrj 	   case control_dd:
30838fd1498Szrj              fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
30938fd1498Szrj              break;
31038fd1498Szrj 
31138fd1498Szrj            default:
31238fd1498Szrj              gcc_unreachable ();
31338fd1498Szrj            }
31438fd1498Szrj     }
31538fd1498Szrj 
31638fd1498Szrj   fprintf (file, "}\n\n");
31738fd1498Szrj }
31838fd1498Szrj 
31938fd1498Szrj /* Display the Reduced Dependence Graph using dotty.  */
32038fd1498Szrj 
32138fd1498Szrj DEBUG_FUNCTION void
dot_rdg(struct graph * rdg)32238fd1498Szrj dot_rdg (struct graph *rdg)
32338fd1498Szrj {
32438fd1498Szrj   /* When debugging, you may want to enable the following code.  */
32538fd1498Szrj #ifdef HAVE_POPEN
32638fd1498Szrj   FILE *file = popen ("dot -Tx11", "w");
32738fd1498Szrj   if (!file)
32838fd1498Szrj     return;
32938fd1498Szrj   dot_rdg_1 (file, rdg);
33038fd1498Szrj   fflush (file);
33138fd1498Szrj   close (fileno (file));
33238fd1498Szrj   pclose (file);
33338fd1498Szrj #else
33438fd1498Szrj   dot_rdg_1 (stderr, rdg);
33538fd1498Szrj #endif
33638fd1498Szrj }
33738fd1498Szrj 
33838fd1498Szrj /* Returns the index of STMT in RDG.  */
33938fd1498Szrj 
34038fd1498Szrj static int
rdg_vertex_for_stmt(struct graph * rdg ATTRIBUTE_UNUSED,gimple * stmt)34138fd1498Szrj rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple *stmt)
34238fd1498Szrj {
34338fd1498Szrj   int index = gimple_uid (stmt);
34438fd1498Szrj   gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
34538fd1498Szrj   return index;
34638fd1498Szrj }
34738fd1498Szrj 
34838fd1498Szrj /* Creates dependence edges in RDG for all the uses of DEF.  IDEF is
34938fd1498Szrj    the index of DEF in RDG.  */
35038fd1498Szrj 
35138fd1498Szrj static void
create_rdg_edges_for_scalar(struct graph * rdg,tree def,int idef)35238fd1498Szrj create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
35338fd1498Szrj {
35438fd1498Szrj   use_operand_p imm_use_p;
35538fd1498Szrj   imm_use_iterator iterator;
35638fd1498Szrj 
35738fd1498Szrj   FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
35838fd1498Szrj     {
35938fd1498Szrj       struct graph_edge *e;
36038fd1498Szrj       int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
36138fd1498Szrj 
36238fd1498Szrj       if (use < 0)
36338fd1498Szrj 	continue;
36438fd1498Szrj 
36538fd1498Szrj       e = add_edge (rdg, idef, use);
36638fd1498Szrj       e->data = XNEW (struct rdg_edge);
36738fd1498Szrj       RDGE_TYPE (e) = flow_dd;
36838fd1498Szrj     }
36938fd1498Szrj }
37038fd1498Szrj 
37138fd1498Szrj /* Creates an edge for the control dependences of BB to the vertex V.  */
37238fd1498Szrj 
37338fd1498Szrj static void
create_edge_for_control_dependence(struct graph * rdg,basic_block bb,int v,control_dependences * cd)37438fd1498Szrj create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
37538fd1498Szrj 				    int v, control_dependences *cd)
37638fd1498Szrj {
37738fd1498Szrj   bitmap_iterator bi;
37838fd1498Szrj   unsigned edge_n;
37938fd1498Szrj   EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
38038fd1498Szrj 			    0, edge_n, bi)
38138fd1498Szrj     {
38238fd1498Szrj       basic_block cond_bb = cd->get_edge_src (edge_n);
38338fd1498Szrj       gimple *stmt = last_stmt (cond_bb);
38438fd1498Szrj       if (stmt && is_ctrl_stmt (stmt))
38538fd1498Szrj 	{
38638fd1498Szrj 	  struct graph_edge *e;
38738fd1498Szrj 	  int c = rdg_vertex_for_stmt (rdg, stmt);
38838fd1498Szrj 	  if (c < 0)
38938fd1498Szrj 	    continue;
39038fd1498Szrj 
39138fd1498Szrj 	  e = add_edge (rdg, c, v);
39238fd1498Szrj 	  e->data = XNEW (struct rdg_edge);
39338fd1498Szrj 	  RDGE_TYPE (e) = control_dd;
39438fd1498Szrj 	}
39538fd1498Szrj     }
39638fd1498Szrj }
39738fd1498Szrj 
39838fd1498Szrj /* Creates the edges of the reduced dependence graph RDG.  */
39938fd1498Szrj 
40038fd1498Szrj static void
create_rdg_flow_edges(struct graph * rdg)40138fd1498Szrj create_rdg_flow_edges (struct graph *rdg)
40238fd1498Szrj {
40338fd1498Szrj   int i;
40438fd1498Szrj   def_operand_p def_p;
40538fd1498Szrj   ssa_op_iter iter;
40638fd1498Szrj 
40738fd1498Szrj   for (i = 0; i < rdg->n_vertices; i++)
40838fd1498Szrj     FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
40938fd1498Szrj 			      iter, SSA_OP_DEF)
41038fd1498Szrj       create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
41138fd1498Szrj }
41238fd1498Szrj 
41338fd1498Szrj /* Creates the edges of the reduced dependence graph RDG.  */
41438fd1498Szrj 
41538fd1498Szrj static void
create_rdg_cd_edges(struct graph * rdg,control_dependences * cd,loop_p loop)41638fd1498Szrj create_rdg_cd_edges (struct graph *rdg, control_dependences *cd, loop_p loop)
41738fd1498Szrj {
41838fd1498Szrj   int i;
41938fd1498Szrj 
42038fd1498Szrj   for (i = 0; i < rdg->n_vertices; i++)
42138fd1498Szrj     {
42238fd1498Szrj       gimple *stmt = RDG_STMT (rdg, i);
42338fd1498Szrj       if (gimple_code (stmt) == GIMPLE_PHI)
42438fd1498Szrj 	{
42538fd1498Szrj 	  edge_iterator ei;
42638fd1498Szrj 	  edge e;
42738fd1498Szrj 	  FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
42838fd1498Szrj 	    if (flow_bb_inside_loop_p (loop, e->src))
42938fd1498Szrj 	      create_edge_for_control_dependence (rdg, e->src, i, cd);
43038fd1498Szrj 	}
43138fd1498Szrj       else
43238fd1498Szrj 	create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
43338fd1498Szrj     }
43438fd1498Szrj }
43538fd1498Szrj 
43638fd1498Szrj /* Build the vertices of the reduced dependence graph RDG.  Return false
43738fd1498Szrj    if that failed.  */
43838fd1498Szrj 
43938fd1498Szrj static bool
create_rdg_vertices(struct graph * rdg,vec<gimple * > stmts,loop_p loop)44038fd1498Szrj create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop)
44138fd1498Szrj {
44238fd1498Szrj   int i;
44338fd1498Szrj   gimple *stmt;
44438fd1498Szrj 
44538fd1498Szrj   FOR_EACH_VEC_ELT (stmts, i, stmt)
44638fd1498Szrj     {
44738fd1498Szrj       struct vertex *v = &(rdg->vertices[i]);
44838fd1498Szrj 
44938fd1498Szrj       /* Record statement to vertex mapping.  */
45038fd1498Szrj       gimple_set_uid (stmt, i);
45138fd1498Szrj 
45238fd1498Szrj       v->data = XNEW (struct rdg_vertex);
45338fd1498Szrj       RDGV_STMT (v) = stmt;
45438fd1498Szrj       RDGV_DATAREFS (v).create (0);
45538fd1498Szrj       RDGV_HAS_MEM_WRITE (v) = false;
45638fd1498Szrj       RDGV_HAS_MEM_READS (v) = false;
45738fd1498Szrj       if (gimple_code (stmt) == GIMPLE_PHI)
45838fd1498Szrj 	continue;
45938fd1498Szrj 
46038fd1498Szrj       unsigned drp = datarefs_vec.length ();
46138fd1498Szrj       if (!find_data_references_in_stmt (loop, stmt, &datarefs_vec))
46238fd1498Szrj 	return false;
46338fd1498Szrj       for (unsigned j = drp; j < datarefs_vec.length (); ++j)
46438fd1498Szrj 	{
46538fd1498Szrj 	  data_reference_p dr = datarefs_vec[j];
46638fd1498Szrj 	  if (DR_IS_READ (dr))
46738fd1498Szrj 	    RDGV_HAS_MEM_READS (v) = true;
46838fd1498Szrj 	  else
46938fd1498Szrj 	    RDGV_HAS_MEM_WRITE (v) = true;
47038fd1498Szrj 	  RDGV_DATAREFS (v).safe_push (dr);
47138fd1498Szrj 	}
47238fd1498Szrj     }
47338fd1498Szrj   return true;
47438fd1498Szrj }
47538fd1498Szrj 
47638fd1498Szrj /* Array mapping basic block's index to its topological order.  */
47738fd1498Szrj static int *bb_top_order_index;
47838fd1498Szrj /* And size of the array.  */
47938fd1498Szrj static int bb_top_order_index_size;
48038fd1498Szrj 
48138fd1498Szrj /* If X has a smaller topological sort number than Y, returns -1;
48238fd1498Szrj    if greater, returns 1.  */
48338fd1498Szrj 
48438fd1498Szrj static int
bb_top_order_cmp(const void * x,const void * y)48538fd1498Szrj bb_top_order_cmp (const void *x, const void *y)
48638fd1498Szrj {
48738fd1498Szrj   basic_block bb1 = *(const basic_block *) x;
48838fd1498Szrj   basic_block bb2 = *(const basic_block *) y;
48938fd1498Szrj 
49038fd1498Szrj   gcc_assert (bb1->index < bb_top_order_index_size
49138fd1498Szrj 	      && bb2->index < bb_top_order_index_size);
49238fd1498Szrj   gcc_assert (bb1 == bb2
49338fd1498Szrj 	      || bb_top_order_index[bb1->index]
49438fd1498Szrj 		 != bb_top_order_index[bb2->index]);
49538fd1498Szrj 
49638fd1498Szrj   return (bb_top_order_index[bb1->index] - bb_top_order_index[bb2->index]);
49738fd1498Szrj }
49838fd1498Szrj 
49938fd1498Szrj /* Initialize STMTS with all the statements of LOOP.  We use topological
50038fd1498Szrj    order to discover all statements.  The order is important because
50138fd1498Szrj    generate_loops_for_partition is using the same traversal for identifying
50238fd1498Szrj    statements in loop copies.  */
50338fd1498Szrj 
50438fd1498Szrj static void
stmts_from_loop(struct loop * loop,vec<gimple * > * stmts)50538fd1498Szrj stmts_from_loop (struct loop *loop, vec<gimple *> *stmts)
50638fd1498Szrj {
50738fd1498Szrj   unsigned int i;
50838fd1498Szrj   basic_block *bbs = get_loop_body_in_custom_order (loop, bb_top_order_cmp);
50938fd1498Szrj 
51038fd1498Szrj   for (i = 0; i < loop->num_nodes; i++)
51138fd1498Szrj     {
51238fd1498Szrj       basic_block bb = bbs[i];
51338fd1498Szrj 
51438fd1498Szrj       for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
51538fd1498Szrj 	   gsi_next (&bsi))
51638fd1498Szrj 	if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
51738fd1498Szrj 	  stmts->safe_push (bsi.phi ());
51838fd1498Szrj 
51938fd1498Szrj       for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
52038fd1498Szrj 	   gsi_next (&bsi))
52138fd1498Szrj 	{
52238fd1498Szrj 	  gimple *stmt = gsi_stmt (bsi);
52338fd1498Szrj 	  if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
52438fd1498Szrj 	    stmts->safe_push (stmt);
52538fd1498Szrj 	}
52638fd1498Szrj     }
52738fd1498Szrj 
52838fd1498Szrj   free (bbs);
52938fd1498Szrj }
53038fd1498Szrj 
53138fd1498Szrj /* Free the reduced dependence graph RDG.  */
53238fd1498Szrj 
53338fd1498Szrj static void
free_rdg(struct graph * rdg)53438fd1498Szrj free_rdg (struct graph *rdg)
53538fd1498Szrj {
53638fd1498Szrj   int i;
53738fd1498Szrj 
53838fd1498Szrj   for (i = 0; i < rdg->n_vertices; i++)
53938fd1498Szrj     {
54038fd1498Szrj       struct vertex *v = &(rdg->vertices[i]);
54138fd1498Szrj       struct graph_edge *e;
54238fd1498Szrj 
54338fd1498Szrj       for (e = v->succ; e; e = e->succ_next)
54438fd1498Szrj 	free (e->data);
54538fd1498Szrj 
54638fd1498Szrj       if (v->data)
54738fd1498Szrj 	{
54838fd1498Szrj 	  gimple_set_uid (RDGV_STMT (v), -1);
54938fd1498Szrj 	  (RDGV_DATAREFS (v)).release ();
55038fd1498Szrj 	  free (v->data);
55138fd1498Szrj 	}
55238fd1498Szrj     }
55338fd1498Szrj 
55438fd1498Szrj   free_graph (rdg);
55538fd1498Szrj }
55638fd1498Szrj 
55738fd1498Szrj /* Build the Reduced Dependence Graph (RDG) with one vertex per statement of
55838fd1498Szrj    LOOP, and one edge per flow dependence or control dependence from control
55938fd1498Szrj    dependence CD.  During visiting each statement, data references are also
56038fd1498Szrj    collected and recorded in global data DATAREFS_VEC.  */
56138fd1498Szrj 
56238fd1498Szrj static struct graph *
build_rdg(struct loop * loop,control_dependences * cd)56338fd1498Szrj build_rdg (struct loop *loop, control_dependences *cd)
56438fd1498Szrj {
56538fd1498Szrj   struct graph *rdg;
56638fd1498Szrj 
56738fd1498Szrj   /* Create the RDG vertices from the stmts of the loop nest.  */
56838fd1498Szrj   auto_vec<gimple *, 10> stmts;
56938fd1498Szrj   stmts_from_loop (loop, &stmts);
57038fd1498Szrj   rdg = new_graph (stmts.length ());
57138fd1498Szrj   if (!create_rdg_vertices (rdg, stmts, loop))
57238fd1498Szrj     {
57338fd1498Szrj       free_rdg (rdg);
57438fd1498Szrj       return NULL;
57538fd1498Szrj     }
57638fd1498Szrj   stmts.release ();
57738fd1498Szrj 
57838fd1498Szrj   create_rdg_flow_edges (rdg);
57938fd1498Szrj   if (cd)
58038fd1498Szrj     create_rdg_cd_edges (rdg, cd, loop);
58138fd1498Szrj 
58238fd1498Szrj   return rdg;
58338fd1498Szrj }
58438fd1498Szrj 
58538fd1498Szrj 
58638fd1498Szrj /* Kind of distributed loop.  */
58738fd1498Szrj enum partition_kind {
58838fd1498Szrj     PKIND_NORMAL,
58938fd1498Szrj     /* Partial memset stands for a paritition can be distributed into a loop
59038fd1498Szrj        of memset calls, rather than a single memset call.  It's handled just
59138fd1498Szrj        like a normal parition, i.e, distributed as separate loop, no memset
59238fd1498Szrj        call is generated.
59338fd1498Szrj 
59438fd1498Szrj        Note: This is a hacking fix trying to distribute ZERO-ing stmt in a
59538fd1498Szrj        loop nest as deep as possible.  As a result, parloop achieves better
59638fd1498Szrj        parallelization by parallelizing deeper loop nest.  This hack should
59738fd1498Szrj        be unnecessary and removed once distributed memset can be understood
59838fd1498Szrj        and analyzed in data reference analysis.  See PR82604 for more.  */
59938fd1498Szrj     PKIND_PARTIAL_MEMSET,
60038fd1498Szrj     PKIND_MEMSET, PKIND_MEMCPY, PKIND_MEMMOVE
60138fd1498Szrj };
60238fd1498Szrj 
60338fd1498Szrj /* Type of distributed loop.  */
60438fd1498Szrj enum partition_type {
60538fd1498Szrj     /* The distributed loop can be executed parallelly.  */
60638fd1498Szrj     PTYPE_PARALLEL = 0,
60738fd1498Szrj     /* The distributed loop has to be executed sequentially.  */
60838fd1498Szrj     PTYPE_SEQUENTIAL
60938fd1498Szrj };
61038fd1498Szrj 
61138fd1498Szrj /* Builtin info for loop distribution.  */
61238fd1498Szrj struct builtin_info
61338fd1498Szrj {
61438fd1498Szrj   /* data-references a kind != PKIND_NORMAL partition is about.  */
61538fd1498Szrj   data_reference_p dst_dr;
61638fd1498Szrj   data_reference_p src_dr;
61738fd1498Szrj   /* Base address and size of memory objects operated by the builtin.  Note
61838fd1498Szrj      both dest and source memory objects must have the same size.  */
61938fd1498Szrj   tree dst_base;
62038fd1498Szrj   tree src_base;
62138fd1498Szrj   tree size;
62238fd1498Szrj   /* Base and offset part of dst_base after stripping constant offset.  This
62338fd1498Szrj      is only used in memset builtin distribution for now.  */
62438fd1498Szrj   tree dst_base_base;
62538fd1498Szrj   unsigned HOST_WIDE_INT dst_base_offset;
62638fd1498Szrj };
62738fd1498Szrj 
62838fd1498Szrj /* Partition for loop distribution.  */
62938fd1498Szrj struct partition
63038fd1498Szrj {
63138fd1498Szrj   /* Statements of the partition.  */
63238fd1498Szrj   bitmap stmts;
63338fd1498Szrj   /* True if the partition defines variable which is used outside of loop.  */
63438fd1498Szrj   bool reduction_p;
63538fd1498Szrj   enum partition_kind kind;
63638fd1498Szrj   enum partition_type type;
63738fd1498Szrj   /* Data references in the partition.  */
63838fd1498Szrj   bitmap datarefs;
63938fd1498Szrj   /* Information of builtin parition.  */
64038fd1498Szrj   struct builtin_info *builtin;
64138fd1498Szrj };
64238fd1498Szrj 
64338fd1498Szrj 
64438fd1498Szrj /* Allocate and initialize a partition from BITMAP.  */
64538fd1498Szrj 
64638fd1498Szrj static partition *
partition_alloc(void)64738fd1498Szrj partition_alloc (void)
64838fd1498Szrj {
64938fd1498Szrj   partition *partition = XCNEW (struct partition);
65038fd1498Szrj   partition->stmts = BITMAP_ALLOC (NULL);
65138fd1498Szrj   partition->reduction_p = false;
65238fd1498Szrj   partition->kind = PKIND_NORMAL;
65338fd1498Szrj   partition->datarefs = BITMAP_ALLOC (NULL);
65438fd1498Szrj   return partition;
65538fd1498Szrj }
65638fd1498Szrj 
65738fd1498Szrj /* Free PARTITION.  */
65838fd1498Szrj 
65938fd1498Szrj static void
partition_free(partition * partition)66038fd1498Szrj partition_free (partition *partition)
66138fd1498Szrj {
66238fd1498Szrj   BITMAP_FREE (partition->stmts);
66338fd1498Szrj   BITMAP_FREE (partition->datarefs);
66438fd1498Szrj   if (partition->builtin)
66538fd1498Szrj     free (partition->builtin);
66638fd1498Szrj 
66738fd1498Szrj   free (partition);
66838fd1498Szrj }
66938fd1498Szrj 
67038fd1498Szrj /* Returns true if the partition can be generated as a builtin.  */
67138fd1498Szrj 
67238fd1498Szrj static bool
partition_builtin_p(partition * partition)67338fd1498Szrj partition_builtin_p (partition *partition)
67438fd1498Szrj {
67538fd1498Szrj   return partition->kind > PKIND_PARTIAL_MEMSET;
67638fd1498Szrj }
67738fd1498Szrj 
67838fd1498Szrj /* Returns true if the partition contains a reduction.  */
67938fd1498Szrj 
68038fd1498Szrj static bool
partition_reduction_p(partition * partition)68138fd1498Szrj partition_reduction_p (partition *partition)
68238fd1498Szrj {
68338fd1498Szrj   return partition->reduction_p;
68438fd1498Szrj }
68538fd1498Szrj 
68638fd1498Szrj /* Partitions are fused because of different reasons.  */
68738fd1498Szrj enum fuse_type
68838fd1498Szrj {
68938fd1498Szrj   FUSE_NON_BUILTIN = 0,
69038fd1498Szrj   FUSE_REDUCTION = 1,
69138fd1498Szrj   FUSE_SHARE_REF = 2,
69238fd1498Szrj   FUSE_SAME_SCC = 3,
69338fd1498Szrj   FUSE_FINALIZE = 4
69438fd1498Szrj };
69538fd1498Szrj 
69638fd1498Szrj /* Description on different fusing reason.  */
69738fd1498Szrj static const char *fuse_message[] = {
69838fd1498Szrj   "they are non-builtins",
69938fd1498Szrj   "they have reductions",
70038fd1498Szrj   "they have shared memory refs",
70138fd1498Szrj   "they are in the same dependence scc",
70238fd1498Szrj   "there is no point to distribute loop"};
70338fd1498Szrj 
70438fd1498Szrj static void
70538fd1498Szrj update_type_for_merge (struct graph *, partition *, partition *);
70638fd1498Szrj 
70738fd1498Szrj /* Merge PARTITION into the partition DEST.  RDG is the reduced dependence
70838fd1498Szrj    graph and we update type for result partition if it is non-NULL.  */
70938fd1498Szrj 
71038fd1498Szrj static void
partition_merge_into(struct graph * rdg,partition * dest,partition * partition,enum fuse_type ft)71138fd1498Szrj partition_merge_into (struct graph *rdg, partition *dest,
71238fd1498Szrj 		      partition *partition, enum fuse_type ft)
71338fd1498Szrj {
71438fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
71538fd1498Szrj     {
71638fd1498Szrj       fprintf (dump_file, "Fuse partitions because %s:\n", fuse_message[ft]);
71738fd1498Szrj       fprintf (dump_file, "  Part 1: ");
71838fd1498Szrj       dump_bitmap (dump_file, dest->stmts);
71938fd1498Szrj       fprintf (dump_file, "  Part 2: ");
72038fd1498Szrj       dump_bitmap (dump_file, partition->stmts);
72138fd1498Szrj     }
72238fd1498Szrj 
72338fd1498Szrj   dest->kind = PKIND_NORMAL;
72438fd1498Szrj   if (dest->type == PTYPE_PARALLEL)
72538fd1498Szrj     dest->type = partition->type;
72638fd1498Szrj 
72738fd1498Szrj   bitmap_ior_into (dest->stmts, partition->stmts);
72838fd1498Szrj   if (partition_reduction_p (partition))
72938fd1498Szrj     dest->reduction_p = true;
73038fd1498Szrj 
73138fd1498Szrj   /* Further check if any data dependence prevents us from executing the
73238fd1498Szrj      new partition parallelly.  */
73338fd1498Szrj   if (dest->type == PTYPE_PARALLEL && rdg != NULL)
73438fd1498Szrj     update_type_for_merge (rdg, dest, partition);
73538fd1498Szrj 
73638fd1498Szrj   bitmap_ior_into (dest->datarefs, partition->datarefs);
73738fd1498Szrj }
73838fd1498Szrj 
73938fd1498Szrj 
74038fd1498Szrj /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
74138fd1498Szrj    the LOOP.  */
74238fd1498Szrj 
74338fd1498Szrj static bool
ssa_name_has_uses_outside_loop_p(tree def,loop_p loop)74438fd1498Szrj ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
74538fd1498Szrj {
74638fd1498Szrj   imm_use_iterator imm_iter;
74738fd1498Szrj   use_operand_p use_p;
74838fd1498Szrj 
74938fd1498Szrj   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
75038fd1498Szrj     {
75138fd1498Szrj       if (is_gimple_debug (USE_STMT (use_p)))
75238fd1498Szrj 	continue;
75338fd1498Szrj 
75438fd1498Szrj       basic_block use_bb = gimple_bb (USE_STMT (use_p));
75538fd1498Szrj       if (!flow_bb_inside_loop_p (loop, use_bb))
75638fd1498Szrj 	return true;
75738fd1498Szrj     }
75838fd1498Szrj 
75938fd1498Szrj   return false;
76038fd1498Szrj }
76138fd1498Szrj 
76238fd1498Szrj /* Returns true when STMT defines a scalar variable used after the
76338fd1498Szrj    loop LOOP.  */
76438fd1498Szrj 
76538fd1498Szrj static bool
stmt_has_scalar_dependences_outside_loop(loop_p loop,gimple * stmt)76638fd1498Szrj stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple *stmt)
76738fd1498Szrj {
76838fd1498Szrj   def_operand_p def_p;
76938fd1498Szrj   ssa_op_iter op_iter;
77038fd1498Szrj 
77138fd1498Szrj   if (gimple_code (stmt) == GIMPLE_PHI)
77238fd1498Szrj     return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
77338fd1498Szrj 
77438fd1498Szrj   FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
77538fd1498Szrj     if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
77638fd1498Szrj       return true;
77738fd1498Szrj 
77838fd1498Szrj   return false;
77938fd1498Szrj }
78038fd1498Szrj 
78138fd1498Szrj /* Return a copy of LOOP placed before LOOP.  */
78238fd1498Szrj 
78338fd1498Szrj static struct loop *
copy_loop_before(struct loop * loop)78438fd1498Szrj copy_loop_before (struct loop *loop)
78538fd1498Szrj {
78638fd1498Szrj   struct loop *res;
78738fd1498Szrj   edge preheader = loop_preheader_edge (loop);
78838fd1498Szrj 
78938fd1498Szrj   initialize_original_copy_tables ();
79038fd1498Szrj   res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
79138fd1498Szrj   gcc_assert (res != NULL);
79238fd1498Szrj   free_original_copy_tables ();
79338fd1498Szrj   delete_update_ssa ();
79438fd1498Szrj 
79538fd1498Szrj   return res;
79638fd1498Szrj }
79738fd1498Szrj 
79838fd1498Szrj /* Creates an empty basic block after LOOP.  */
79938fd1498Szrj 
80038fd1498Szrj static void
create_bb_after_loop(struct loop * loop)80138fd1498Szrj create_bb_after_loop (struct loop *loop)
80238fd1498Szrj {
80338fd1498Szrj   edge exit = single_exit (loop);
80438fd1498Szrj 
80538fd1498Szrj   if (!exit)
80638fd1498Szrj     return;
80738fd1498Szrj 
80838fd1498Szrj   split_edge (exit);
80938fd1498Szrj }
81038fd1498Szrj 
81138fd1498Szrj /* Generate code for PARTITION from the code in LOOP.  The loop is
81238fd1498Szrj    copied when COPY_P is true.  All the statements not flagged in the
81338fd1498Szrj    PARTITION bitmap are removed from the loop or from its copy.  The
81438fd1498Szrj    statements are indexed in sequence inside a basic block, and the
81538fd1498Szrj    basic blocks of a loop are taken in dom order.  */
81638fd1498Szrj 
81738fd1498Szrj static void
generate_loops_for_partition(struct loop * loop,partition * partition,bool copy_p)81838fd1498Szrj generate_loops_for_partition (struct loop *loop, partition *partition,
81938fd1498Szrj 			      bool copy_p)
82038fd1498Szrj {
82138fd1498Szrj   unsigned i;
82238fd1498Szrj   basic_block *bbs;
82338fd1498Szrj 
82438fd1498Szrj   if (copy_p)
82538fd1498Szrj     {
82638fd1498Szrj       int orig_loop_num = loop->orig_loop_num;
82738fd1498Szrj       loop = copy_loop_before (loop);
82838fd1498Szrj       gcc_assert (loop != NULL);
82938fd1498Szrj       loop->orig_loop_num = orig_loop_num;
83038fd1498Szrj       create_preheader (loop, CP_SIMPLE_PREHEADERS);
83138fd1498Szrj       create_bb_after_loop (loop);
83238fd1498Szrj     }
83338fd1498Szrj   else
83438fd1498Szrj     {
83538fd1498Szrj       /* Origin number is set to the new versioned loop's num.  */
83638fd1498Szrj       gcc_assert (loop->orig_loop_num != loop->num);
83738fd1498Szrj     }
83838fd1498Szrj 
83938fd1498Szrj   /* Remove stmts not in the PARTITION bitmap.  */
84038fd1498Szrj   bbs = get_loop_body_in_dom_order (loop);
84138fd1498Szrj 
84238fd1498Szrj   if (MAY_HAVE_DEBUG_BIND_STMTS)
84338fd1498Szrj     for (i = 0; i < loop->num_nodes; i++)
84438fd1498Szrj       {
84538fd1498Szrj 	basic_block bb = bbs[i];
84638fd1498Szrj 
84738fd1498Szrj 	for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
84838fd1498Szrj 	     gsi_next (&bsi))
84938fd1498Szrj 	  {
85038fd1498Szrj 	    gphi *phi = bsi.phi ();
85138fd1498Szrj 	    if (!virtual_operand_p (gimple_phi_result (phi))
85238fd1498Szrj 		&& !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
85338fd1498Szrj 	      reset_debug_uses (phi);
85438fd1498Szrj 	  }
85538fd1498Szrj 
85638fd1498Szrj 	for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
85738fd1498Szrj 	  {
85838fd1498Szrj 	    gimple *stmt = gsi_stmt (bsi);
85938fd1498Szrj 	    if (gimple_code (stmt) != GIMPLE_LABEL
86038fd1498Szrj 		&& !is_gimple_debug (stmt)
86138fd1498Szrj 		&& !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
86238fd1498Szrj 	      reset_debug_uses (stmt);
86338fd1498Szrj 	  }
86438fd1498Szrj       }
86538fd1498Szrj 
86638fd1498Szrj   for (i = 0; i < loop->num_nodes; i++)
86738fd1498Szrj     {
86838fd1498Szrj       basic_block bb = bbs[i];
86938fd1498Szrj       edge inner_exit = NULL;
87038fd1498Szrj 
87138fd1498Szrj       if (loop != bb->loop_father)
87238fd1498Szrj 	inner_exit = single_exit (bb->loop_father);
87338fd1498Szrj 
87438fd1498Szrj       for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
87538fd1498Szrj 	{
87638fd1498Szrj 	  gphi *phi = bsi.phi ();
87738fd1498Szrj 	  if (!virtual_operand_p (gimple_phi_result (phi))
87838fd1498Szrj 	      && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
87938fd1498Szrj 	    remove_phi_node (&bsi, true);
88038fd1498Szrj 	  else
88138fd1498Szrj 	    gsi_next (&bsi);
88238fd1498Szrj 	}
88338fd1498Szrj 
88438fd1498Szrj       for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
88538fd1498Szrj 	{
88638fd1498Szrj 	  gimple *stmt = gsi_stmt (bsi);
88738fd1498Szrj 	  if (gimple_code (stmt) != GIMPLE_LABEL
88838fd1498Szrj 	      && !is_gimple_debug (stmt)
88938fd1498Szrj 	      && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
89038fd1498Szrj 	    {
89138fd1498Szrj 	      /* In distribution of loop nest, if bb is inner loop's exit_bb,
89238fd1498Szrj 		 we choose its exit edge/path in order to avoid generating
89338fd1498Szrj 		 infinite loop.  For all other cases, we choose an arbitrary
89438fd1498Szrj 		 path through the empty CFG part that this unnecessary
89538fd1498Szrj 		 control stmt controls.  */
89638fd1498Szrj 	      if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
89738fd1498Szrj 		{
89838fd1498Szrj 		  if (inner_exit && inner_exit->flags & EDGE_TRUE_VALUE)
89938fd1498Szrj 		    gimple_cond_make_true (cond_stmt);
90038fd1498Szrj 		  else
90138fd1498Szrj 		    gimple_cond_make_false (cond_stmt);
90238fd1498Szrj 		  update_stmt (stmt);
90338fd1498Szrj 		}
90438fd1498Szrj 	      else if (gimple_code (stmt) == GIMPLE_SWITCH)
90538fd1498Szrj 		{
90638fd1498Szrj 		  gswitch *switch_stmt = as_a <gswitch *> (stmt);
90738fd1498Szrj 		  gimple_switch_set_index
90838fd1498Szrj 		      (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1)));
90938fd1498Szrj 		  update_stmt (stmt);
91038fd1498Szrj 		}
91138fd1498Szrj 	      else
91238fd1498Szrj 		{
91338fd1498Szrj 		  unlink_stmt_vdef (stmt);
91438fd1498Szrj 		  gsi_remove (&bsi, true);
91538fd1498Szrj 		  release_defs (stmt);
91638fd1498Szrj 		  continue;
91738fd1498Szrj 		}
91838fd1498Szrj 	    }
91938fd1498Szrj 	  gsi_next (&bsi);
92038fd1498Szrj 	}
92138fd1498Szrj     }
92238fd1498Szrj 
92338fd1498Szrj   free (bbs);
92438fd1498Szrj }
92538fd1498Szrj 
92638fd1498Szrj /* If VAL memory representation contains the same value in all bytes,
92738fd1498Szrj    return that value, otherwise return -1.
92838fd1498Szrj    E.g. for 0x24242424 return 0x24, for IEEE double
92938fd1498Szrj    747708026454360457216.0 return 0x44, etc.  */
93038fd1498Szrj 
93138fd1498Szrj static int
const_with_all_bytes_same(tree val)93238fd1498Szrj const_with_all_bytes_same (tree val)
93338fd1498Szrj {
93438fd1498Szrj   unsigned char buf[64];
93538fd1498Szrj   int i, len;
93638fd1498Szrj 
93738fd1498Szrj   if (integer_zerop (val)
93838fd1498Szrj       || (TREE_CODE (val) == CONSTRUCTOR
93938fd1498Szrj           && !TREE_CLOBBER_P (val)
94038fd1498Szrj           && CONSTRUCTOR_NELTS (val) == 0))
94138fd1498Szrj     return 0;
94238fd1498Szrj 
94338fd1498Szrj   if (real_zerop (val))
94438fd1498Szrj     {
94538fd1498Szrj       /* Only return 0 for +0.0, not for -0.0, which doesn't have
94638fd1498Szrj 	 an all bytes same memory representation.  Don't transform
94738fd1498Szrj 	 -0.0 stores into +0.0 even for !HONOR_SIGNED_ZEROS.  */
94838fd1498Szrj       switch (TREE_CODE (val))
94938fd1498Szrj 	{
95038fd1498Szrj 	case REAL_CST:
95138fd1498Szrj 	  if (!real_isneg (TREE_REAL_CST_PTR (val)))
95238fd1498Szrj 	    return 0;
95338fd1498Szrj 	  break;
95438fd1498Szrj 	case COMPLEX_CST:
95538fd1498Szrj 	  if (!const_with_all_bytes_same (TREE_REALPART (val))
95638fd1498Szrj 	      && !const_with_all_bytes_same (TREE_IMAGPART (val)))
95738fd1498Szrj 	    return 0;
95838fd1498Szrj 	  break;
95938fd1498Szrj 	case VECTOR_CST:
96038fd1498Szrj 	  {
96138fd1498Szrj 	    unsigned int count = vector_cst_encoded_nelts (val);
96238fd1498Szrj 	    unsigned int j;
96338fd1498Szrj 	    for (j = 0; j < count; ++j)
96438fd1498Szrj 	      if (const_with_all_bytes_same (VECTOR_CST_ENCODED_ELT (val, j)))
96538fd1498Szrj 		break;
96638fd1498Szrj 	    if (j == count)
96738fd1498Szrj 	      return 0;
96838fd1498Szrj 	    break;
96938fd1498Szrj 	  }
97038fd1498Szrj 	default:
97138fd1498Szrj 	  break;
97238fd1498Szrj 	}
97338fd1498Szrj     }
97438fd1498Szrj 
97538fd1498Szrj   if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
97638fd1498Szrj     return -1;
97738fd1498Szrj 
97838fd1498Szrj   len = native_encode_expr (val, buf, sizeof (buf));
97938fd1498Szrj   if (len == 0)
98038fd1498Szrj     return -1;
98138fd1498Szrj   for (i = 1; i < len; i++)
98238fd1498Szrj     if (buf[i] != buf[0])
98338fd1498Szrj       return -1;
98438fd1498Szrj   return buf[0];
98538fd1498Szrj }
98638fd1498Szrj 
98738fd1498Szrj /* Generate a call to memset for PARTITION in LOOP.  */
98838fd1498Szrj 
98938fd1498Szrj static void
generate_memset_builtin(struct loop * loop,partition * partition)99038fd1498Szrj generate_memset_builtin (struct loop *loop, partition *partition)
99138fd1498Szrj {
99238fd1498Szrj   gimple_stmt_iterator gsi;
99338fd1498Szrj   tree mem, fn, nb_bytes;
99438fd1498Szrj   tree val;
99538fd1498Szrj   struct builtin_info *builtin = partition->builtin;
99638fd1498Szrj   gimple *fn_call;
99738fd1498Szrj 
99838fd1498Szrj   /* The new statements will be placed before LOOP.  */
99938fd1498Szrj   gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
100038fd1498Szrj 
1001*58e805e6Szrj   nb_bytes = rewrite_to_non_trapping_overflow (builtin->size);
100238fd1498Szrj   nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
100338fd1498Szrj 				       false, GSI_CONTINUE_LINKING);
100438fd1498Szrj   mem = builtin->dst_base;
100538fd1498Szrj   mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
100638fd1498Szrj 				  false, GSI_CONTINUE_LINKING);
100738fd1498Szrj 
100838fd1498Szrj   /* This exactly matches the pattern recognition in classify_partition.  */
100938fd1498Szrj   val = gimple_assign_rhs1 (DR_STMT (builtin->dst_dr));
101038fd1498Szrj   /* Handle constants like 0x15151515 and similarly
101138fd1498Szrj      floating point constants etc. where all bytes are the same.  */
101238fd1498Szrj   int bytev = const_with_all_bytes_same (val);
101338fd1498Szrj   if (bytev != -1)
101438fd1498Szrj     val = build_int_cst (integer_type_node, bytev);
101538fd1498Szrj   else if (TREE_CODE (val) == INTEGER_CST)
101638fd1498Szrj     val = fold_convert (integer_type_node, val);
101738fd1498Szrj   else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
101838fd1498Szrj     {
101938fd1498Szrj       tree tem = make_ssa_name (integer_type_node);
102038fd1498Szrj       gimple *cstmt = gimple_build_assign (tem, NOP_EXPR, val);
102138fd1498Szrj       gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
102238fd1498Szrj       val = tem;
102338fd1498Szrj     }
102438fd1498Szrj 
102538fd1498Szrj   fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
102638fd1498Szrj   fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
102738fd1498Szrj   gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
102838fd1498Szrj 
102938fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
103038fd1498Szrj     {
103138fd1498Szrj       fprintf (dump_file, "generated memset");
103238fd1498Szrj       if (bytev == 0)
103338fd1498Szrj 	fprintf (dump_file, " zero\n");
103438fd1498Szrj       else
103538fd1498Szrj 	fprintf (dump_file, "\n");
103638fd1498Szrj     }
103738fd1498Szrj }
103838fd1498Szrj 
103938fd1498Szrj /* Generate a call to memcpy for PARTITION in LOOP.  */
104038fd1498Szrj 
104138fd1498Szrj static void
generate_memcpy_builtin(struct loop * loop,partition * partition)104238fd1498Szrj generate_memcpy_builtin (struct loop *loop, partition *partition)
104338fd1498Szrj {
104438fd1498Szrj   gimple_stmt_iterator gsi;
104538fd1498Szrj   gimple *fn_call;
104638fd1498Szrj   tree dest, src, fn, nb_bytes;
104738fd1498Szrj   enum built_in_function kind;
104838fd1498Szrj   struct builtin_info *builtin = partition->builtin;
104938fd1498Szrj 
105038fd1498Szrj   /* The new statements will be placed before LOOP.  */
105138fd1498Szrj   gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
105238fd1498Szrj 
1053*58e805e6Szrj   nb_bytes = rewrite_to_non_trapping_overflow (builtin->size);
105438fd1498Szrj   nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
105538fd1498Szrj 				       false, GSI_CONTINUE_LINKING);
105638fd1498Szrj   dest = builtin->dst_base;
105738fd1498Szrj   src = builtin->src_base;
105838fd1498Szrj   if (partition->kind == PKIND_MEMCPY
105938fd1498Szrj       || ! ptr_derefs_may_alias_p (dest, src))
106038fd1498Szrj     kind = BUILT_IN_MEMCPY;
106138fd1498Szrj   else
106238fd1498Szrj     kind = BUILT_IN_MEMMOVE;
106338fd1498Szrj 
106438fd1498Szrj   dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
106538fd1498Szrj 				   false, GSI_CONTINUE_LINKING);
106638fd1498Szrj   src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
106738fd1498Szrj 				  false, GSI_CONTINUE_LINKING);
106838fd1498Szrj   fn = build_fold_addr_expr (builtin_decl_implicit (kind));
106938fd1498Szrj   fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
107038fd1498Szrj   gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
107138fd1498Szrj 
107238fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
107338fd1498Szrj     {
107438fd1498Szrj       if (kind == BUILT_IN_MEMCPY)
107538fd1498Szrj 	fprintf (dump_file, "generated memcpy\n");
107638fd1498Szrj       else
107738fd1498Szrj 	fprintf (dump_file, "generated memmove\n");
107838fd1498Szrj     }
107938fd1498Szrj }
108038fd1498Szrj 
108138fd1498Szrj /* Remove and destroy the loop LOOP.  */
108238fd1498Szrj 
108338fd1498Szrj static void
destroy_loop(struct loop * loop)108438fd1498Szrj destroy_loop (struct loop *loop)
108538fd1498Szrj {
108638fd1498Szrj   unsigned nbbs = loop->num_nodes;
108738fd1498Szrj   edge exit = single_exit (loop);
108838fd1498Szrj   basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
108938fd1498Szrj   basic_block *bbs;
109038fd1498Szrj   unsigned i;
109138fd1498Szrj 
109238fd1498Szrj   bbs = get_loop_body_in_dom_order (loop);
109338fd1498Szrj 
109438fd1498Szrj   redirect_edge_pred (exit, src);
109538fd1498Szrj   exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
109638fd1498Szrj   exit->flags |= EDGE_FALLTHRU;
109738fd1498Szrj   cancel_loop_tree (loop);
109838fd1498Szrj   rescan_loop_exit (exit, false, true);
109938fd1498Szrj 
110038fd1498Szrj   i = nbbs;
110138fd1498Szrj   do
110238fd1498Szrj     {
110338fd1498Szrj       /* We have made sure to not leave any dangling uses of SSA
110438fd1498Szrj          names defined in the loop.  With the exception of virtuals.
110538fd1498Szrj 	 Make sure we replace all uses of virtual defs that will remain
110638fd1498Szrj 	 outside of the loop with the bare symbol as delete_basic_block
110738fd1498Szrj 	 will release them.  */
110838fd1498Szrj       --i;
110938fd1498Szrj       for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
111038fd1498Szrj 	   gsi_next (&gsi))
111138fd1498Szrj 	{
111238fd1498Szrj 	  gphi *phi = gsi.phi ();
111338fd1498Szrj 	  if (virtual_operand_p (gimple_phi_result (phi)))
111438fd1498Szrj 	    mark_virtual_phi_result_for_renaming (phi);
111538fd1498Szrj 	}
111638fd1498Szrj       for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
111738fd1498Szrj 	   gsi_next (&gsi))
111838fd1498Szrj 	{
111938fd1498Szrj 	  gimple *stmt = gsi_stmt (gsi);
112038fd1498Szrj 	  tree vdef = gimple_vdef (stmt);
112138fd1498Szrj 	  if (vdef && TREE_CODE (vdef) == SSA_NAME)
112238fd1498Szrj 	    mark_virtual_operand_for_renaming (vdef);
112338fd1498Szrj 	}
112438fd1498Szrj       delete_basic_block (bbs[i]);
112538fd1498Szrj     }
112638fd1498Szrj   while (i != 0);
112738fd1498Szrj 
112838fd1498Szrj   free (bbs);
112938fd1498Szrj 
113038fd1498Szrj   set_immediate_dominator (CDI_DOMINATORS, dest,
113138fd1498Szrj 			   recompute_dominator (CDI_DOMINATORS, dest));
113238fd1498Szrj }
113338fd1498Szrj 
113438fd1498Szrj /* Generates code for PARTITION.  Return whether LOOP needs to be destroyed.  */
113538fd1498Szrj 
113638fd1498Szrj static bool
generate_code_for_partition(struct loop * loop,partition * partition,bool copy_p)113738fd1498Szrj generate_code_for_partition (struct loop *loop,
113838fd1498Szrj 			     partition *partition, bool copy_p)
113938fd1498Szrj {
114038fd1498Szrj   switch (partition->kind)
114138fd1498Szrj     {
114238fd1498Szrj     case PKIND_NORMAL:
114338fd1498Szrj     case PKIND_PARTIAL_MEMSET:
114438fd1498Szrj       /* Reductions all have to be in the last partition.  */
114538fd1498Szrj       gcc_assert (!partition_reduction_p (partition)
114638fd1498Szrj 		  || !copy_p);
114738fd1498Szrj       generate_loops_for_partition (loop, partition, copy_p);
114838fd1498Szrj       return false;
114938fd1498Szrj 
115038fd1498Szrj     case PKIND_MEMSET:
115138fd1498Szrj       generate_memset_builtin (loop, partition);
115238fd1498Szrj       break;
115338fd1498Szrj 
115438fd1498Szrj     case PKIND_MEMCPY:
115538fd1498Szrj     case PKIND_MEMMOVE:
115638fd1498Szrj       generate_memcpy_builtin (loop, partition);
115738fd1498Szrj       break;
115838fd1498Szrj 
115938fd1498Szrj     default:
116038fd1498Szrj       gcc_unreachable ();
116138fd1498Szrj     }
116238fd1498Szrj 
116338fd1498Szrj   /* Common tail for partitions we turn into a call.  If this was the last
116438fd1498Szrj      partition for which we generate code, we have to destroy the loop.  */
116538fd1498Szrj   if (!copy_p)
116638fd1498Szrj     return true;
116738fd1498Szrj   return false;
116838fd1498Szrj }
116938fd1498Szrj 
117038fd1498Szrj /* Return data dependence relation for data references A and B.  The two
117138fd1498Szrj    data references must be in lexicographic order wrto reduced dependence
117238fd1498Szrj    graph RDG.  We firstly try to find ddr from global ddr hash table.  If
117338fd1498Szrj    it doesn't exist, compute the ddr and cache it.  */
117438fd1498Szrj 
117538fd1498Szrj static data_dependence_relation *
get_data_dependence(struct graph * rdg,data_reference_p a,data_reference_p b)117638fd1498Szrj get_data_dependence (struct graph *rdg, data_reference_p a, data_reference_p b)
117738fd1498Szrj {
117838fd1498Szrj   struct data_dependence_relation ent, **slot;
117938fd1498Szrj   struct data_dependence_relation *ddr;
118038fd1498Szrj 
118138fd1498Szrj   gcc_assert (DR_IS_WRITE (a) || DR_IS_WRITE (b));
118238fd1498Szrj   gcc_assert (rdg_vertex_for_stmt (rdg, DR_STMT (a))
118338fd1498Szrj 	      <= rdg_vertex_for_stmt (rdg, DR_STMT (b)));
118438fd1498Szrj   ent.a = a;
118538fd1498Szrj   ent.b = b;
118638fd1498Szrj   slot = ddrs_table->find_slot (&ent, INSERT);
118738fd1498Szrj   if (*slot == NULL)
118838fd1498Szrj     {
118938fd1498Szrj       ddr = initialize_data_dependence_relation (a, b, loop_nest);
119038fd1498Szrj       compute_affine_dependence (ddr, loop_nest[0]);
119138fd1498Szrj       *slot = ddr;
119238fd1498Szrj     }
119338fd1498Szrj 
119438fd1498Szrj   return *slot;
119538fd1498Szrj }
119638fd1498Szrj 
119738fd1498Szrj /* In reduced dependence graph RDG for loop distribution, return true if
119838fd1498Szrj    dependence between references DR1 and DR2 leads to a dependence cycle
119938fd1498Szrj    and such dependence cycle can't be resolved by runtime alias check.  */
120038fd1498Szrj 
120138fd1498Szrj static bool
data_dep_in_cycle_p(struct graph * rdg,data_reference_p dr1,data_reference_p dr2)120238fd1498Szrj data_dep_in_cycle_p (struct graph *rdg,
120338fd1498Szrj 		     data_reference_p dr1, data_reference_p dr2)
120438fd1498Szrj {
120538fd1498Szrj   struct data_dependence_relation *ddr;
120638fd1498Szrj 
120738fd1498Szrj   /* Re-shuffle data-refs to be in topological order.  */
120838fd1498Szrj   if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
120938fd1498Szrj       > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
121038fd1498Szrj     std::swap (dr1, dr2);
121138fd1498Szrj 
121238fd1498Szrj   ddr = get_data_dependence (rdg, dr1, dr2);
121338fd1498Szrj 
121438fd1498Szrj   /* In case of no data dependence.  */
121538fd1498Szrj   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
121638fd1498Szrj     return false;
121738fd1498Szrj   /* For unknown data dependence or known data dependence which can't be
121838fd1498Szrj      expressed in classic distance vector, we check if it can be resolved
121938fd1498Szrj      by runtime alias check.  If yes, we still consider data dependence
122038fd1498Szrj      as won't introduce data dependence cycle.  */
122138fd1498Szrj   else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
122238fd1498Szrj 	   || DDR_NUM_DIST_VECTS (ddr) == 0)
122338fd1498Szrj     return !runtime_alias_check_p (ddr, NULL, true);
122438fd1498Szrj   else if (DDR_NUM_DIST_VECTS (ddr) > 1)
122538fd1498Szrj     return true;
122638fd1498Szrj   else if (DDR_REVERSED_P (ddr)
122738fd1498Szrj 	   || lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
122838fd1498Szrj     return false;
122938fd1498Szrj 
123038fd1498Szrj   return true;
123138fd1498Szrj }
123238fd1498Szrj 
123338fd1498Szrj /* Given reduced dependence graph RDG, PARTITION1 and PARTITION2, update
123438fd1498Szrj    PARTITION1's type after merging PARTITION2 into PARTITION1.  */
123538fd1498Szrj 
123638fd1498Szrj static void
update_type_for_merge(struct graph * rdg,partition * partition1,partition * partition2)123738fd1498Szrj update_type_for_merge (struct graph *rdg,
123838fd1498Szrj 		       partition *partition1, partition *partition2)
123938fd1498Szrj {
124038fd1498Szrj   unsigned i, j;
124138fd1498Szrj   bitmap_iterator bi, bj;
124238fd1498Szrj   data_reference_p dr1, dr2;
124338fd1498Szrj 
124438fd1498Szrj   EXECUTE_IF_SET_IN_BITMAP (partition1->datarefs, 0, i, bi)
124538fd1498Szrj     {
124638fd1498Szrj       unsigned start = (partition1 == partition2) ? i + 1 : 0;
124738fd1498Szrj 
124838fd1498Szrj       dr1 = datarefs_vec[i];
124938fd1498Szrj       EXECUTE_IF_SET_IN_BITMAP (partition2->datarefs, start, j, bj)
125038fd1498Szrj 	{
125138fd1498Szrj 	  dr2 = datarefs_vec[j];
125238fd1498Szrj 	  if (DR_IS_READ (dr1) && DR_IS_READ (dr2))
125338fd1498Szrj 	    continue;
125438fd1498Szrj 
125538fd1498Szrj 	  /* Partition can only be executed sequentially if there is any
125638fd1498Szrj 	     data dependence cycle.  */
125738fd1498Szrj 	  if (data_dep_in_cycle_p (rdg, dr1, dr2))
125838fd1498Szrj 	    {
125938fd1498Szrj 	      partition1->type = PTYPE_SEQUENTIAL;
126038fd1498Szrj 	      return;
126138fd1498Szrj 	    }
126238fd1498Szrj 	}
126338fd1498Szrj     }
126438fd1498Szrj }
126538fd1498Szrj 
126638fd1498Szrj /* Returns a partition with all the statements needed for computing
126738fd1498Szrj    the vertex V of the RDG, also including the loop exit conditions.  */
126838fd1498Szrj 
126938fd1498Szrj static partition *
build_rdg_partition_for_vertex(struct graph * rdg,int v)127038fd1498Szrj build_rdg_partition_for_vertex (struct graph *rdg, int v)
127138fd1498Szrj {
127238fd1498Szrj   partition *partition = partition_alloc ();
127338fd1498Szrj   auto_vec<int, 3> nodes;
127438fd1498Szrj   unsigned i, j;
127538fd1498Szrj   int x;
127638fd1498Szrj   data_reference_p dr;
127738fd1498Szrj 
127838fd1498Szrj   graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
127938fd1498Szrj 
128038fd1498Szrj   FOR_EACH_VEC_ELT (nodes, i, x)
128138fd1498Szrj     {
128238fd1498Szrj       bitmap_set_bit (partition->stmts, x);
128338fd1498Szrj 
128438fd1498Szrj       for (j = 0; RDG_DATAREFS (rdg, x).iterate (j, &dr); ++j)
128538fd1498Szrj 	{
128638fd1498Szrj 	  unsigned idx = (unsigned) DR_INDEX (dr);
128738fd1498Szrj 	  gcc_assert (idx < datarefs_vec.length ());
128838fd1498Szrj 
128938fd1498Szrj 	  /* Partition can only be executed sequentially if there is any
129038fd1498Szrj 	     unknown data reference.  */
129138fd1498Szrj 	  if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr)
129238fd1498Szrj 	      || !DR_INIT (dr) || !DR_STEP (dr))
129338fd1498Szrj 	    partition->type = PTYPE_SEQUENTIAL;
129438fd1498Szrj 
129538fd1498Szrj 	  bitmap_set_bit (partition->datarefs, idx);
129638fd1498Szrj 	}
129738fd1498Szrj     }
129838fd1498Szrj 
129938fd1498Szrj   if (partition->type == PTYPE_SEQUENTIAL)
130038fd1498Szrj     return partition;
130138fd1498Szrj 
130238fd1498Szrj   /* Further check if any data dependence prevents us from executing the
130338fd1498Szrj      partition parallelly.  */
130438fd1498Szrj   update_type_for_merge (rdg, partition, partition);
130538fd1498Szrj 
130638fd1498Szrj   return partition;
130738fd1498Szrj }
130838fd1498Szrj 
130938fd1498Szrj /* Given PARTITION of LOOP and RDG, record single load/store data references
131038fd1498Szrj    for builtin partition in SRC_DR/DST_DR, return false if there is no such
131138fd1498Szrj    data references.  */
131238fd1498Szrj 
131338fd1498Szrj static bool
find_single_drs(struct loop * loop,struct graph * rdg,partition * partition,data_reference_p * dst_dr,data_reference_p * src_dr)131438fd1498Szrj find_single_drs (struct loop *loop, struct graph *rdg, partition *partition,
131538fd1498Szrj 		 data_reference_p *dst_dr, data_reference_p *src_dr)
131638fd1498Szrj {
131738fd1498Szrj   unsigned i;
131838fd1498Szrj   data_reference_p single_ld = NULL, single_st = NULL;
131938fd1498Szrj   bitmap_iterator bi;
132038fd1498Szrj 
132138fd1498Szrj   EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
132238fd1498Szrj     {
132338fd1498Szrj       gimple *stmt = RDG_STMT (rdg, i);
132438fd1498Szrj       data_reference_p dr;
132538fd1498Szrj 
132638fd1498Szrj       if (gimple_code (stmt) == GIMPLE_PHI)
132738fd1498Szrj 	continue;
132838fd1498Szrj 
132938fd1498Szrj       /* Any scalar stmts are ok.  */
133038fd1498Szrj       if (!gimple_vuse (stmt))
133138fd1498Szrj 	continue;
133238fd1498Szrj 
133338fd1498Szrj       /* Otherwise just regular loads/stores.  */
133438fd1498Szrj       if (!gimple_assign_single_p (stmt))
133538fd1498Szrj 	return false;
133638fd1498Szrj 
133738fd1498Szrj       /* But exactly one store and/or load.  */
133838fd1498Szrj       for (unsigned j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
133938fd1498Szrj 	{
134038fd1498Szrj 	  tree type = TREE_TYPE (DR_REF (dr));
134138fd1498Szrj 
134238fd1498Szrj 	  /* The memset, memcpy and memmove library calls are only
134338fd1498Szrj 	     able to deal with generic address space.  */
134438fd1498Szrj 	  if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (type)))
134538fd1498Szrj 	    return false;
134638fd1498Szrj 
134738fd1498Szrj 	  if (DR_IS_READ (dr))
134838fd1498Szrj 	    {
134938fd1498Szrj 	      if (single_ld != NULL)
135038fd1498Szrj 		return false;
135138fd1498Szrj 	      single_ld = dr;
135238fd1498Szrj 	    }
135338fd1498Szrj 	  else
135438fd1498Szrj 	    {
135538fd1498Szrj 	      if (single_st != NULL)
135638fd1498Szrj 		return false;
135738fd1498Szrj 	      single_st = dr;
135838fd1498Szrj 	    }
135938fd1498Szrj 	}
136038fd1498Szrj     }
136138fd1498Szrj 
136238fd1498Szrj   if (!single_st)
136338fd1498Szrj     return false;
136438fd1498Szrj 
136538fd1498Szrj   /* Bail out if this is a bitfield memory reference.  */
136638fd1498Szrj   if (TREE_CODE (DR_REF (single_st)) == COMPONENT_REF
136738fd1498Szrj       && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (single_st), 1)))
136838fd1498Szrj     return false;
136938fd1498Szrj 
137038fd1498Szrj   /* Data reference must be executed exactly once per iteration of each
137138fd1498Szrj      loop in the loop nest.  We only need to check dominance information
137238fd1498Szrj      against the outermost one in a perfect loop nest because a bb can't
137338fd1498Szrj      dominate outermost loop's latch without dominating inner loop's.  */
137438fd1498Szrj   basic_block bb_st = gimple_bb (DR_STMT (single_st));
137538fd1498Szrj   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb_st))
137638fd1498Szrj     return false;
137738fd1498Szrj 
137838fd1498Szrj   if (single_ld)
137938fd1498Szrj     {
138038fd1498Szrj       gimple *store = DR_STMT (single_st), *load = DR_STMT (single_ld);
138138fd1498Szrj       /* Direct aggregate copy or via an SSA name temporary.  */
138238fd1498Szrj       if (load != store
138338fd1498Szrj 	  && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
138438fd1498Szrj 	return false;
138538fd1498Szrj 
138638fd1498Szrj       /* Bail out if this is a bitfield memory reference.  */
138738fd1498Szrj       if (TREE_CODE (DR_REF (single_ld)) == COMPONENT_REF
138838fd1498Szrj 	  && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (single_ld), 1)))
138938fd1498Szrj 	return false;
139038fd1498Szrj 
139138fd1498Szrj       /* Load and store must be in the same loop nest.  */
139238fd1498Szrj       basic_block bb_ld = gimple_bb (DR_STMT (single_ld));
139338fd1498Szrj       if (bb_st->loop_father != bb_ld->loop_father)
139438fd1498Szrj 	return false;
139538fd1498Szrj 
139638fd1498Szrj       /* Data reference must be executed exactly once per iteration.
139738fd1498Szrj 	 Same as single_st, we only need to check against the outermost
139838fd1498Szrj 	 loop.  */
139938fd1498Szrj       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb_ld))
140038fd1498Szrj 	return false;
140138fd1498Szrj 
140238fd1498Szrj       edge e = single_exit (bb_st->loop_father);
140338fd1498Szrj       bool dom_ld = dominated_by_p (CDI_DOMINATORS, e->src, bb_ld);
140438fd1498Szrj       bool dom_st = dominated_by_p (CDI_DOMINATORS, e->src, bb_st);
140538fd1498Szrj       if (dom_ld != dom_st)
140638fd1498Szrj 	return false;
140738fd1498Szrj     }
140838fd1498Szrj 
140938fd1498Szrj   *src_dr = single_ld;
141038fd1498Szrj   *dst_dr = single_st;
141138fd1498Szrj   return true;
141238fd1498Szrj }
141338fd1498Szrj 
141438fd1498Szrj /* Given data reference DR in LOOP_NEST, this function checks the enclosing
141538fd1498Szrj    loops from inner to outer to see if loop's step equals to access size at
141638fd1498Szrj    each level of loop.  Return 2 if we can prove this at all level loops;
141738fd1498Szrj    record access base and size in BASE and SIZE; save loop's step at each
141838fd1498Szrj    level of loop in STEPS if it is not null.  For example:
141938fd1498Szrj 
142038fd1498Szrj      int arr[100][100][100];
142138fd1498Szrj      for (i = 0; i < 100; i++)       ;steps[2] = 40000
142238fd1498Szrj        for (j = 100; j > 0; j--)     ;steps[1] = -400
142338fd1498Szrj 	 for (k = 0; k < 100; k++)   ;steps[0] = 4
142438fd1498Szrj 	   arr[i][j - 1][k] = 0;     ;base = &arr, size = 4000000
142538fd1498Szrj 
142638fd1498Szrj    Return 1 if we can prove the equality at the innermost loop, but not all
142738fd1498Szrj    level loops.  In this case, no information is recorded.
142838fd1498Szrj 
142938fd1498Szrj    Return 0 if no equality can be proven at any level loops.  */
143038fd1498Szrj 
143138fd1498Szrj static int
143238fd1498Szrj compute_access_range (loop_p loop_nest, data_reference_p dr, tree *base,
143338fd1498Szrj 		      tree *size, vec<tree> *steps = NULL)
143438fd1498Szrj {
143538fd1498Szrj   location_t loc = gimple_location (DR_STMT (dr));
143638fd1498Szrj   basic_block bb = gimple_bb (DR_STMT (dr));
143738fd1498Szrj   struct loop *loop = bb->loop_father;
143838fd1498Szrj   tree ref = DR_REF (dr);
143938fd1498Szrj   tree access_base = build_fold_addr_expr (ref);
144038fd1498Szrj   tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (ref));
144138fd1498Szrj   int res = 0;
144238fd1498Szrj 
144338fd1498Szrj   do {
144438fd1498Szrj       tree scev_fn = analyze_scalar_evolution (loop, access_base);
144538fd1498Szrj       if (TREE_CODE (scev_fn) != POLYNOMIAL_CHREC)
144638fd1498Szrj 	return res;
144738fd1498Szrj 
144838fd1498Szrj       access_base = CHREC_LEFT (scev_fn);
144938fd1498Szrj       if (tree_contains_chrecs (access_base, NULL))
145038fd1498Szrj 	return res;
145138fd1498Szrj 
145238fd1498Szrj       tree scev_step = CHREC_RIGHT (scev_fn);
145338fd1498Szrj       /* Only support constant steps.  */
145438fd1498Szrj       if (TREE_CODE (scev_step) != INTEGER_CST)
145538fd1498Szrj 	return res;
145638fd1498Szrj 
145738fd1498Szrj       enum ev_direction access_dir = scev_direction (scev_fn);
145838fd1498Szrj       if (access_dir == EV_DIR_UNKNOWN)
145938fd1498Szrj 	return res;
146038fd1498Szrj 
146138fd1498Szrj       if (steps != NULL)
146238fd1498Szrj 	steps->safe_push (scev_step);
146338fd1498Szrj 
146438fd1498Szrj       scev_step = fold_convert_loc (loc, sizetype, scev_step);
146538fd1498Szrj       /* Compute absolute value of scev step.  */
146638fd1498Szrj       if (access_dir == EV_DIR_DECREASES)
146738fd1498Szrj 	scev_step = fold_build1_loc (loc, NEGATE_EXPR, sizetype, scev_step);
146838fd1498Szrj 
146938fd1498Szrj       /* At each level of loop, scev step must equal to access size.  In other
147038fd1498Szrj 	 words, DR must access consecutive memory between loop iterations.  */
147138fd1498Szrj       if (!operand_equal_p (scev_step, access_size, 0))
147238fd1498Szrj 	return res;
147338fd1498Szrj 
147438fd1498Szrj       /* Access stride can be computed for data reference at least for the
147538fd1498Szrj 	 innermost loop.  */
147638fd1498Szrj       res = 1;
147738fd1498Szrj 
147838fd1498Szrj       /* Compute DR's execution times in loop.  */
147938fd1498Szrj       tree niters = number_of_latch_executions (loop);
148038fd1498Szrj       niters = fold_convert_loc (loc, sizetype, niters);
148138fd1498Szrj       if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src, bb))
148238fd1498Szrj 	niters = size_binop_loc (loc, PLUS_EXPR, niters, size_one_node);
148338fd1498Szrj 
148438fd1498Szrj       /* Compute DR's overall access size in loop.  */
148538fd1498Szrj       access_size = fold_build2_loc (loc, MULT_EXPR, sizetype,
148638fd1498Szrj 				     niters, scev_step);
148738fd1498Szrj       /* Adjust base address in case of negative step.  */
148838fd1498Szrj       if (access_dir == EV_DIR_DECREASES)
148938fd1498Szrj 	{
149038fd1498Szrj 	  tree adj = fold_build2_loc (loc, MINUS_EXPR, sizetype,
149138fd1498Szrj 				      scev_step, access_size);
149238fd1498Szrj 	  access_base = fold_build_pointer_plus_loc (loc, access_base, adj);
149338fd1498Szrj 	}
149438fd1498Szrj   } while (loop != loop_nest && (loop = loop_outer (loop)) != NULL);
149538fd1498Szrj 
149638fd1498Szrj   *base = access_base;
149738fd1498Szrj   *size = access_size;
149838fd1498Szrj   /* Access stride can be computed for data reference at each level loop.  */
149938fd1498Szrj   return 2;
150038fd1498Szrj }
150138fd1498Szrj 
150238fd1498Szrj /* Allocate and return builtin struct.  Record information like DST_DR,
150338fd1498Szrj    SRC_DR, DST_BASE, SRC_BASE and SIZE in the allocated struct.  */
150438fd1498Szrj 
150538fd1498Szrj static struct builtin_info *
alloc_builtin(data_reference_p dst_dr,data_reference_p src_dr,tree dst_base,tree src_base,tree size)150638fd1498Szrj alloc_builtin (data_reference_p dst_dr, data_reference_p src_dr,
150738fd1498Szrj 	       tree dst_base, tree src_base, tree size)
150838fd1498Szrj {
150938fd1498Szrj   struct builtin_info *builtin = XNEW (struct builtin_info);
151038fd1498Szrj   builtin->dst_dr = dst_dr;
151138fd1498Szrj   builtin->src_dr = src_dr;
151238fd1498Szrj   builtin->dst_base = dst_base;
151338fd1498Szrj   builtin->src_base = src_base;
151438fd1498Szrj   builtin->size = size;
151538fd1498Szrj   return builtin;
151638fd1498Szrj }
151738fd1498Szrj 
151838fd1498Szrj /* Given data reference DR in loop nest LOOP, classify if it forms builtin
151938fd1498Szrj    memset call.  */
152038fd1498Szrj 
152138fd1498Szrj static void
classify_builtin_st(loop_p loop,partition * partition,data_reference_p dr)152238fd1498Szrj classify_builtin_st (loop_p loop, partition *partition, data_reference_p dr)
152338fd1498Szrj {
152438fd1498Szrj   gimple *stmt = DR_STMT (dr);
152538fd1498Szrj   tree base, size, rhs = gimple_assign_rhs1 (stmt);
152638fd1498Szrj 
152738fd1498Szrj   if (const_with_all_bytes_same (rhs) == -1
152838fd1498Szrj       && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
152938fd1498Szrj 	  || (TYPE_MODE (TREE_TYPE (rhs))
153038fd1498Szrj 	      != TYPE_MODE (unsigned_char_type_node))))
153138fd1498Szrj     return;
153238fd1498Szrj 
153338fd1498Szrj   if (TREE_CODE (rhs) == SSA_NAME
153438fd1498Szrj       && !SSA_NAME_IS_DEFAULT_DEF (rhs)
153538fd1498Szrj       && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
153638fd1498Szrj     return;
153738fd1498Szrj 
153838fd1498Szrj   int res = compute_access_range (loop, dr, &base, &size);
153938fd1498Szrj   if (res == 0)
154038fd1498Szrj     return;
154138fd1498Szrj   if (res == 1)
154238fd1498Szrj     {
154338fd1498Szrj       partition->kind = PKIND_PARTIAL_MEMSET;
154438fd1498Szrj       return;
154538fd1498Szrj     }
154638fd1498Szrj 
154738fd1498Szrj   poly_uint64 base_offset;
154838fd1498Szrj   unsigned HOST_WIDE_INT const_base_offset;
154938fd1498Szrj   tree base_base = strip_offset (base, &base_offset);
155038fd1498Szrj   if (!base_offset.is_constant (&const_base_offset))
155138fd1498Szrj     return;
155238fd1498Szrj 
155338fd1498Szrj   struct builtin_info *builtin;
155438fd1498Szrj   builtin = alloc_builtin (dr, NULL, base, NULL_TREE, size);
155538fd1498Szrj   builtin->dst_base_base = base_base;
155638fd1498Szrj   builtin->dst_base_offset = const_base_offset;
155738fd1498Szrj   partition->builtin = builtin;
155838fd1498Szrj   partition->kind = PKIND_MEMSET;
155938fd1498Szrj }
156038fd1498Szrj 
156138fd1498Szrj /* Given data references DST_DR and SRC_DR in loop nest LOOP and RDG, classify
156238fd1498Szrj    if it forms builtin memcpy or memmove call.  */
156338fd1498Szrj 
156438fd1498Szrj static void
classify_builtin_ldst(loop_p loop,struct graph * rdg,partition * partition,data_reference_p dst_dr,data_reference_p src_dr)156538fd1498Szrj classify_builtin_ldst (loop_p loop, struct graph *rdg, partition *partition,
156638fd1498Szrj 		       data_reference_p dst_dr, data_reference_p src_dr)
156738fd1498Szrj {
156838fd1498Szrj   tree base, size, src_base, src_size;
156938fd1498Szrj   auto_vec<tree> dst_steps, src_steps;
157038fd1498Szrj 
157138fd1498Szrj   /* Compute access range of both load and store.  */
157238fd1498Szrj   int res = compute_access_range (loop, dst_dr, &base, &size, &dst_steps);
157338fd1498Szrj   if (res != 2)
157438fd1498Szrj     return;
157538fd1498Szrj   res = compute_access_range (loop, src_dr, &src_base, &src_size, &src_steps);
157638fd1498Szrj   if (res != 2)
157738fd1498Szrj     return;
157838fd1498Szrj 
157938fd1498Szrj   /* They much have the same access size.  */
158038fd1498Szrj   if (!operand_equal_p (size, src_size, 0))
158138fd1498Szrj     return;
158238fd1498Szrj 
158338fd1498Szrj   /* Load and store in loop nest must access memory in the same way, i.e,
158438fd1498Szrj      their must have the same steps in each loop of the nest.  */
158538fd1498Szrj   if (dst_steps.length () != src_steps.length ())
158638fd1498Szrj     return;
158738fd1498Szrj   for (unsigned i = 0; i < dst_steps.length (); ++i)
158838fd1498Szrj     if (!operand_equal_p (dst_steps[i], src_steps[i], 0))
158938fd1498Szrj       return;
159038fd1498Szrj 
159138fd1498Szrj   /* Now check that if there is a dependence.  */
159238fd1498Szrj   ddr_p ddr = get_data_dependence (rdg, src_dr, dst_dr);
159338fd1498Szrj 
159438fd1498Szrj   /* Classify as memcpy if no dependence between load and store.  */
159538fd1498Szrj   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
159638fd1498Szrj     {
159738fd1498Szrj       partition->builtin = alloc_builtin (dst_dr, src_dr, base, src_base, size);
159838fd1498Szrj       partition->kind = PKIND_MEMCPY;
159938fd1498Szrj       return;
160038fd1498Szrj     }
160138fd1498Szrj 
160238fd1498Szrj   /* Can't do memmove in case of unknown dependence or dependence without
160338fd1498Szrj      classical distance vector.  */
160438fd1498Szrj   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
160538fd1498Szrj       || DDR_NUM_DIST_VECTS (ddr) == 0)
160638fd1498Szrj     return;
160738fd1498Szrj 
160838fd1498Szrj   unsigned i;
160938fd1498Szrj   lambda_vector dist_v;
161038fd1498Szrj   int num_lev = (DDR_LOOP_NEST (ddr)).length ();
161138fd1498Szrj   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
161238fd1498Szrj     {
161338fd1498Szrj       unsigned dep_lev = dependence_level (dist_v, num_lev);
161438fd1498Szrj       /* Can't do memmove if load depends on store.  */
161538fd1498Szrj       if (dep_lev > 0 && dist_v[dep_lev - 1] > 0 && !DDR_REVERSED_P (ddr))
161638fd1498Szrj 	return;
161738fd1498Szrj     }
161838fd1498Szrj 
161938fd1498Szrj   partition->builtin = alloc_builtin (dst_dr, src_dr, base, src_base, size);
162038fd1498Szrj   partition->kind = PKIND_MEMMOVE;
162138fd1498Szrj   return;
162238fd1498Szrj }
162338fd1498Szrj 
162438fd1498Szrj /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
162538fd1498Szrj    For the moment we detect memset, memcpy and memmove patterns.  Bitmap
162638fd1498Szrj    STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions.  */
162738fd1498Szrj 
162838fd1498Szrj static void
classify_partition(loop_p loop,struct graph * rdg,partition * partition,bitmap stmt_in_all_partitions)162938fd1498Szrj classify_partition (loop_p loop, struct graph *rdg, partition *partition,
163038fd1498Szrj 		    bitmap stmt_in_all_partitions)
163138fd1498Szrj {
163238fd1498Szrj   bitmap_iterator bi;
163338fd1498Szrj   unsigned i;
163438fd1498Szrj   data_reference_p single_ld = NULL, single_st = NULL;
163538fd1498Szrj   bool volatiles_p = false, has_reduction = false;
163638fd1498Szrj 
163738fd1498Szrj   EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
163838fd1498Szrj     {
163938fd1498Szrj       gimple *stmt = RDG_STMT (rdg, i);
164038fd1498Szrj 
164138fd1498Szrj       if (gimple_has_volatile_ops (stmt))
164238fd1498Szrj 	volatiles_p = true;
164338fd1498Szrj 
164438fd1498Szrj       /* If the stmt is not included by all partitions and there is uses
164538fd1498Szrj 	 outside of the loop, then mark the partition as reduction.  */
164638fd1498Szrj       if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
164738fd1498Szrj 	{
164838fd1498Szrj 	  /* Due to limitation in the transform phase we have to fuse all
164938fd1498Szrj 	     reduction partitions.  As a result, this could cancel valid
165038fd1498Szrj 	     loop distribution especially for loop that induction variable
165138fd1498Szrj 	     is used outside of loop.  To workaround this issue, we skip
165238fd1498Szrj 	     marking partition as reudction if the reduction stmt belongs
165338fd1498Szrj 	     to all partitions.  In such case, reduction will be computed
165438fd1498Szrj 	     correctly no matter how partitions are fused/distributed.  */
165538fd1498Szrj 	  if (!bitmap_bit_p (stmt_in_all_partitions, i))
165638fd1498Szrj 	    {
165738fd1498Szrj 	      partition->reduction_p = true;
165838fd1498Szrj 	      return;
165938fd1498Szrj 	    }
166038fd1498Szrj 	  has_reduction = true;
166138fd1498Szrj 	}
166238fd1498Szrj     }
166338fd1498Szrj 
166438fd1498Szrj   /* Perform general partition disqualification for builtins.  */
166538fd1498Szrj   if (volatiles_p
166638fd1498Szrj       /* Simple workaround to prevent classifying the partition as builtin
166738fd1498Szrj 	 if it contains any use outside of loop.  */
166838fd1498Szrj       || has_reduction
166938fd1498Szrj       || !flag_tree_loop_distribute_patterns)
167038fd1498Szrj     return;
167138fd1498Szrj 
167238fd1498Szrj   /* Find single load/store data references for builtin partition.  */
167338fd1498Szrj   if (!find_single_drs (loop, rdg, partition, &single_st, &single_ld))
167438fd1498Szrj     return;
167538fd1498Szrj 
167638fd1498Szrj   /* Classify the builtin kind.  */
167738fd1498Szrj   if (single_ld == NULL)
167838fd1498Szrj     classify_builtin_st (loop, partition, single_st);
167938fd1498Szrj   else
168038fd1498Szrj     classify_builtin_ldst (loop, rdg, partition, single_st, single_ld);
168138fd1498Szrj }
168238fd1498Szrj 
168338fd1498Szrj /* Returns true when PARTITION1 and PARTITION2 access the same memory
168438fd1498Szrj    object in RDG.  */
168538fd1498Szrj 
168638fd1498Szrj static bool
share_memory_accesses(struct graph * rdg,partition * partition1,partition * partition2)168738fd1498Szrj share_memory_accesses (struct graph *rdg,
168838fd1498Szrj 		       partition *partition1, partition *partition2)
168938fd1498Szrj {
169038fd1498Szrj   unsigned i, j;
169138fd1498Szrj   bitmap_iterator bi, bj;
169238fd1498Szrj   data_reference_p dr1, dr2;
169338fd1498Szrj 
169438fd1498Szrj   /* First check whether in the intersection of the two partitions are
169538fd1498Szrj      any loads or stores.  Common loads are the situation that happens
169638fd1498Szrj      most often.  */
169738fd1498Szrj   EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
169838fd1498Szrj     if (RDG_MEM_WRITE_STMT (rdg, i)
169938fd1498Szrj 	|| RDG_MEM_READS_STMT (rdg, i))
170038fd1498Szrj       return true;
170138fd1498Szrj 
170238fd1498Szrj   /* Then check whether the two partitions access the same memory object.  */
170338fd1498Szrj   EXECUTE_IF_SET_IN_BITMAP (partition1->datarefs, 0, i, bi)
170438fd1498Szrj     {
170538fd1498Szrj       dr1 = datarefs_vec[i];
170638fd1498Szrj 
170738fd1498Szrj       if (!DR_BASE_ADDRESS (dr1)
170838fd1498Szrj 	  || !DR_OFFSET (dr1) || !DR_INIT (dr1) || !DR_STEP (dr1))
170938fd1498Szrj 	continue;
171038fd1498Szrj 
171138fd1498Szrj       EXECUTE_IF_SET_IN_BITMAP (partition2->datarefs, 0, j, bj)
171238fd1498Szrj 	{
171338fd1498Szrj 	  dr2 = datarefs_vec[j];
171438fd1498Szrj 
171538fd1498Szrj 	  if (!DR_BASE_ADDRESS (dr2)
171638fd1498Szrj 	      || !DR_OFFSET (dr2) || !DR_INIT (dr2) || !DR_STEP (dr2))
171738fd1498Szrj 	    continue;
171838fd1498Szrj 
171938fd1498Szrj 	  if (operand_equal_p (DR_BASE_ADDRESS (dr1), DR_BASE_ADDRESS (dr2), 0)
172038fd1498Szrj 	      && operand_equal_p (DR_OFFSET (dr1), DR_OFFSET (dr2), 0)
172138fd1498Szrj 	      && operand_equal_p (DR_INIT (dr1), DR_INIT (dr2), 0)
172238fd1498Szrj 	      && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0))
172338fd1498Szrj 	    return true;
172438fd1498Szrj 	}
172538fd1498Szrj     }
172638fd1498Szrj 
172738fd1498Szrj   return false;
172838fd1498Szrj }
172938fd1498Szrj 
173038fd1498Szrj /* For each seed statement in STARTING_STMTS, this function builds
173138fd1498Szrj    partition for it by adding depended statements according to RDG.
173238fd1498Szrj    All partitions are recorded in PARTITIONS.  */
173338fd1498Szrj 
173438fd1498Szrj static void
rdg_build_partitions(struct graph * rdg,vec<gimple * > starting_stmts,vec<partition * > * partitions)173538fd1498Szrj rdg_build_partitions (struct graph *rdg,
173638fd1498Szrj 		      vec<gimple *> starting_stmts,
173738fd1498Szrj 		      vec<partition *> *partitions)
173838fd1498Szrj {
173938fd1498Szrj   auto_bitmap processed;
174038fd1498Szrj   int i;
174138fd1498Szrj   gimple *stmt;
174238fd1498Szrj 
174338fd1498Szrj   FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
174438fd1498Szrj     {
174538fd1498Szrj       int v = rdg_vertex_for_stmt (rdg, stmt);
174638fd1498Szrj 
174738fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
174838fd1498Szrj 	fprintf (dump_file,
174938fd1498Szrj 		 "ldist asked to generate code for vertex %d\n", v);
175038fd1498Szrj 
175138fd1498Szrj       /* If the vertex is already contained in another partition so
175238fd1498Szrj          is the partition rooted at it.  */
175338fd1498Szrj       if (bitmap_bit_p (processed, v))
175438fd1498Szrj 	continue;
175538fd1498Szrj 
175638fd1498Szrj       partition *partition = build_rdg_partition_for_vertex (rdg, v);
175738fd1498Szrj       bitmap_ior_into (processed, partition->stmts);
175838fd1498Szrj 
175938fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
176038fd1498Szrj 	{
176138fd1498Szrj 	  fprintf (dump_file, "ldist creates useful %s partition:\n",
176238fd1498Szrj 		   partition->type == PTYPE_PARALLEL ? "parallel" : "sequent");
176338fd1498Szrj 	  bitmap_print (dump_file, partition->stmts, "  ", "\n");
176438fd1498Szrj 	}
176538fd1498Szrj 
176638fd1498Szrj       partitions->safe_push (partition);
176738fd1498Szrj     }
176838fd1498Szrj 
176938fd1498Szrj   /* All vertices should have been assigned to at least one partition now,
177038fd1498Szrj      other than vertices belonging to dead code.  */
177138fd1498Szrj }
177238fd1498Szrj 
177338fd1498Szrj /* Dump to FILE the PARTITIONS.  */
177438fd1498Szrj 
177538fd1498Szrj static void
dump_rdg_partitions(FILE * file,vec<partition * > partitions)177638fd1498Szrj dump_rdg_partitions (FILE *file, vec<partition *> partitions)
177738fd1498Szrj {
177838fd1498Szrj   int i;
177938fd1498Szrj   partition *partition;
178038fd1498Szrj 
178138fd1498Szrj   FOR_EACH_VEC_ELT (partitions, i, partition)
178238fd1498Szrj     debug_bitmap_file (file, partition->stmts);
178338fd1498Szrj }
178438fd1498Szrj 
178538fd1498Szrj /* Debug PARTITIONS.  */
178638fd1498Szrj extern void debug_rdg_partitions (vec<partition *> );
178738fd1498Szrj 
178838fd1498Szrj DEBUG_FUNCTION void
debug_rdg_partitions(vec<partition * > partitions)178938fd1498Szrj debug_rdg_partitions (vec<partition *> partitions)
179038fd1498Szrj {
179138fd1498Szrj   dump_rdg_partitions (stderr, partitions);
179238fd1498Szrj }
179338fd1498Szrj 
179438fd1498Szrj /* Returns the number of read and write operations in the RDG.  */
179538fd1498Szrj 
179638fd1498Szrj static int
number_of_rw_in_rdg(struct graph * rdg)179738fd1498Szrj number_of_rw_in_rdg (struct graph *rdg)
179838fd1498Szrj {
179938fd1498Szrj   int i, res = 0;
180038fd1498Szrj 
180138fd1498Szrj   for (i = 0; i < rdg->n_vertices; i++)
180238fd1498Szrj     {
180338fd1498Szrj       if (RDG_MEM_WRITE_STMT (rdg, i))
180438fd1498Szrj 	++res;
180538fd1498Szrj 
180638fd1498Szrj       if (RDG_MEM_READS_STMT (rdg, i))
180738fd1498Szrj 	++res;
180838fd1498Szrj     }
180938fd1498Szrj 
181038fd1498Szrj   return res;
181138fd1498Szrj }
181238fd1498Szrj 
181338fd1498Szrj /* Returns the number of read and write operations in a PARTITION of
181438fd1498Szrj    the RDG.  */
181538fd1498Szrj 
181638fd1498Szrj static int
number_of_rw_in_partition(struct graph * rdg,partition * partition)181738fd1498Szrj number_of_rw_in_partition (struct graph *rdg, partition *partition)
181838fd1498Szrj {
181938fd1498Szrj   int res = 0;
182038fd1498Szrj   unsigned i;
182138fd1498Szrj   bitmap_iterator ii;
182238fd1498Szrj 
182338fd1498Szrj   EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
182438fd1498Szrj     {
182538fd1498Szrj       if (RDG_MEM_WRITE_STMT (rdg, i))
182638fd1498Szrj 	++res;
182738fd1498Szrj 
182838fd1498Szrj       if (RDG_MEM_READS_STMT (rdg, i))
182938fd1498Szrj 	++res;
183038fd1498Szrj     }
183138fd1498Szrj 
183238fd1498Szrj   return res;
183338fd1498Szrj }
183438fd1498Szrj 
183538fd1498Szrj /* Returns true when one of the PARTITIONS contains all the read or
183638fd1498Szrj    write operations of RDG.  */
183738fd1498Szrj 
183838fd1498Szrj static bool
partition_contains_all_rw(struct graph * rdg,vec<partition * > partitions)183938fd1498Szrj partition_contains_all_rw (struct graph *rdg,
184038fd1498Szrj 			   vec<partition *> partitions)
184138fd1498Szrj {
184238fd1498Szrj   int i;
184338fd1498Szrj   partition *partition;
184438fd1498Szrj   int nrw = number_of_rw_in_rdg (rdg);
184538fd1498Szrj 
184638fd1498Szrj   FOR_EACH_VEC_ELT (partitions, i, partition)
184738fd1498Szrj     if (nrw == number_of_rw_in_partition (rdg, partition))
184838fd1498Szrj       return true;
184938fd1498Szrj 
185038fd1498Szrj   return false;
185138fd1498Szrj }
185238fd1498Szrj 
185338fd1498Szrj /* Compute partition dependence created by the data references in DRS1
185438fd1498Szrj    and DRS2, modify and return DIR according to that.  IF ALIAS_DDR is
185538fd1498Szrj    not NULL, we record dependence introduced by possible alias between
185638fd1498Szrj    two data references in ALIAS_DDRS; otherwise, we simply ignore such
185738fd1498Szrj    dependence as if it doesn't exist at all.  */
185838fd1498Szrj 
185938fd1498Szrj static int
pg_add_dependence_edges(struct graph * rdg,int dir,bitmap drs1,bitmap drs2,vec<ddr_p> * alias_ddrs)186038fd1498Szrj pg_add_dependence_edges (struct graph *rdg, int dir,
186138fd1498Szrj 			 bitmap drs1, bitmap drs2, vec<ddr_p> *alias_ddrs)
186238fd1498Szrj {
186338fd1498Szrj   unsigned i, j;
186438fd1498Szrj   bitmap_iterator bi, bj;
186538fd1498Szrj   data_reference_p dr1, dr2, saved_dr1;
186638fd1498Szrj 
186738fd1498Szrj   /* dependence direction - 0 is no dependence, -1 is back,
186838fd1498Szrj      1 is forth, 2 is both (we can stop then, merging will occur).  */
186938fd1498Szrj   EXECUTE_IF_SET_IN_BITMAP (drs1, 0, i, bi)
187038fd1498Szrj     {
187138fd1498Szrj       dr1 = datarefs_vec[i];
187238fd1498Szrj 
187338fd1498Szrj       EXECUTE_IF_SET_IN_BITMAP (drs2, 0, j, bj)
187438fd1498Szrj 	{
187538fd1498Szrj 	  int res, this_dir = 1;
187638fd1498Szrj 	  ddr_p ddr;
187738fd1498Szrj 
187838fd1498Szrj 	  dr2 = datarefs_vec[j];
187938fd1498Szrj 
188038fd1498Szrj 	  /* Skip all <read, read> data dependence.  */
188138fd1498Szrj 	  if (DR_IS_READ (dr1) && DR_IS_READ (dr2))
188238fd1498Szrj 	    continue;
188338fd1498Szrj 
188438fd1498Szrj 	  saved_dr1 = dr1;
188538fd1498Szrj 	  /* Re-shuffle data-refs to be in topological order.  */
188638fd1498Szrj 	  if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
188738fd1498Szrj 	      > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
188838fd1498Szrj 	    {
188938fd1498Szrj 	      std::swap (dr1, dr2);
189038fd1498Szrj 	      this_dir = -this_dir;
189138fd1498Szrj 	    }
189238fd1498Szrj 	  ddr = get_data_dependence (rdg, dr1, dr2);
189338fd1498Szrj 	  if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
189438fd1498Szrj 	    {
189538fd1498Szrj 	      this_dir = 0;
189638fd1498Szrj 	      res = data_ref_compare_tree (DR_BASE_ADDRESS (dr1),
189738fd1498Szrj 					   DR_BASE_ADDRESS (dr2));
189838fd1498Szrj 	      /* Be conservative.  If data references are not well analyzed,
189938fd1498Szrj 		 or the two data references have the same base address and
190038fd1498Szrj 		 offset, add dependence and consider it alias to each other.
190138fd1498Szrj 		 In other words, the dependence can not be resolved by
190238fd1498Szrj 		 runtime alias check.  */
190338fd1498Szrj 	      if (!DR_BASE_ADDRESS (dr1) || !DR_BASE_ADDRESS (dr2)
190438fd1498Szrj 		  || !DR_OFFSET (dr1) || !DR_OFFSET (dr2)
190538fd1498Szrj 		  || !DR_INIT (dr1) || !DR_INIT (dr2)
190638fd1498Szrj 		  || !DR_STEP (dr1) || !tree_fits_uhwi_p (DR_STEP (dr1))
190738fd1498Szrj 		  || !DR_STEP (dr2) || !tree_fits_uhwi_p (DR_STEP (dr2))
190838fd1498Szrj 		  || res == 0)
190938fd1498Szrj 		this_dir = 2;
191038fd1498Szrj 	      /* Data dependence could be resolved by runtime alias check,
191138fd1498Szrj 		 record it in ALIAS_DDRS.  */
191238fd1498Szrj 	      else if (alias_ddrs != NULL)
191338fd1498Szrj 		alias_ddrs->safe_push (ddr);
191438fd1498Szrj 	      /* Or simply ignore it.  */
191538fd1498Szrj 	    }
191638fd1498Szrj 	  else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
191738fd1498Szrj 	    {
191838fd1498Szrj 	      if (DDR_REVERSED_P (ddr))
191938fd1498Szrj 		this_dir = -this_dir;
192038fd1498Szrj 
192138fd1498Szrj 	      /* Known dependences can still be unordered througout the
192238fd1498Szrj 		 iteration space, see gcc.dg/tree-ssa/ldist-16.c.  */
192338fd1498Szrj 	      if (DDR_NUM_DIST_VECTS (ddr) != 1)
192438fd1498Szrj 		this_dir = 2;
192538fd1498Szrj 	      /* If the overlap is exact preserve stmt order.  */
1926*58e805e6Szrj 	      else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0),
1927*58e805e6Szrj 					    DDR_NB_LOOPS (ddr)))
192838fd1498Szrj 		;
192938fd1498Szrj 	      /* Else as the distance vector is lexicographic positive swap
193038fd1498Szrj 		 the dependence direction.  */
193138fd1498Szrj 	      else
193238fd1498Szrj 		this_dir = -this_dir;
193338fd1498Szrj 	    }
193438fd1498Szrj 	  else
193538fd1498Szrj 	    this_dir = 0;
193638fd1498Szrj 	  if (this_dir == 2)
193738fd1498Szrj 	    return 2;
193838fd1498Szrj 	  else if (dir == 0)
193938fd1498Szrj 	    dir = this_dir;
194038fd1498Szrj 	  else if (this_dir != 0 && dir != this_dir)
194138fd1498Szrj 	    return 2;
194238fd1498Szrj 	  /* Shuffle "back" dr1.  */
194338fd1498Szrj 	  dr1 = saved_dr1;
194438fd1498Szrj 	}
194538fd1498Szrj     }
194638fd1498Szrj   return dir;
194738fd1498Szrj }
194838fd1498Szrj 
194938fd1498Szrj /* Compare postorder number of the partition graph vertices V1 and V2.  */
195038fd1498Szrj 
195138fd1498Szrj static int
pgcmp(const void * v1_,const void * v2_)195238fd1498Szrj pgcmp (const void *v1_, const void *v2_)
195338fd1498Szrj {
195438fd1498Szrj   const vertex *v1 = (const vertex *)v1_;
195538fd1498Szrj   const vertex *v2 = (const vertex *)v2_;
195638fd1498Szrj   return v2->post - v1->post;
195738fd1498Szrj }
195838fd1498Szrj 
195938fd1498Szrj /* Data attached to vertices of partition dependence graph.  */
196038fd1498Szrj struct pg_vdata
196138fd1498Szrj {
196238fd1498Szrj   /* ID of the corresponding partition.  */
196338fd1498Szrj   int id;
196438fd1498Szrj   /* The partition.  */
196538fd1498Szrj   struct partition *partition;
196638fd1498Szrj };
196738fd1498Szrj 
196838fd1498Szrj /* Data attached to edges of partition dependence graph.  */
196938fd1498Szrj struct pg_edata
197038fd1498Szrj {
197138fd1498Szrj   /* If the dependence edge can be resolved by runtime alias check,
197238fd1498Szrj      this vector contains data dependence relations for runtime alias
197338fd1498Szrj      check.  On the other hand, if the dependence edge is introduced
197438fd1498Szrj      because of compilation time known data dependence, this vector
197538fd1498Szrj      contains nothing.  */
197638fd1498Szrj   vec<ddr_p> alias_ddrs;
197738fd1498Szrj };
197838fd1498Szrj 
197938fd1498Szrj /* Callback data for traversing edges in graph.  */
198038fd1498Szrj struct pg_edge_callback_data
198138fd1498Szrj {
198238fd1498Szrj   /* Bitmap contains strong connected components should be merged.  */
198338fd1498Szrj   bitmap sccs_to_merge;
198438fd1498Szrj   /* Array constains component information for all vertices.  */
198538fd1498Szrj   int *vertices_component;
198638fd1498Szrj   /* Vector to record all data dependence relations which are needed
198738fd1498Szrj      to break strong connected components by runtime alias checks.  */
198838fd1498Szrj   vec<ddr_p> *alias_ddrs;
198938fd1498Szrj };
199038fd1498Szrj 
199138fd1498Szrj /* Initialize vertice's data for partition dependence graph PG with
199238fd1498Szrj    PARTITIONS.  */
199338fd1498Szrj 
199438fd1498Szrj static void
init_partition_graph_vertices(struct graph * pg,vec<struct partition * > * partitions)199538fd1498Szrj init_partition_graph_vertices (struct graph *pg,
199638fd1498Szrj 			       vec<struct partition *> *partitions)
199738fd1498Szrj {
199838fd1498Szrj   int i;
199938fd1498Szrj   partition *partition;
200038fd1498Szrj   struct pg_vdata *data;
200138fd1498Szrj 
200238fd1498Szrj   for (i = 0; partitions->iterate (i, &partition); ++i)
200338fd1498Szrj     {
200438fd1498Szrj       data = new pg_vdata;
200538fd1498Szrj       pg->vertices[i].data = data;
200638fd1498Szrj       data->id = i;
200738fd1498Szrj       data->partition = partition;
200838fd1498Szrj     }
200938fd1498Szrj }
201038fd1498Szrj 
201138fd1498Szrj /* Add edge <I, J> to partition dependence graph PG.  Attach vector of data
201238fd1498Szrj    dependence relations to the EDGE if DDRS isn't NULL.  */
201338fd1498Szrj 
201438fd1498Szrj static void
add_partition_graph_edge(struct graph * pg,int i,int j,vec<ddr_p> * ddrs)201538fd1498Szrj add_partition_graph_edge (struct graph *pg, int i, int j, vec<ddr_p> *ddrs)
201638fd1498Szrj {
201738fd1498Szrj   struct graph_edge *e = add_edge (pg, i, j);
201838fd1498Szrj 
201938fd1498Szrj   /* If the edge is attached with data dependence relations, it means this
202038fd1498Szrj      dependence edge can be resolved by runtime alias checks.  */
202138fd1498Szrj   if (ddrs != NULL)
202238fd1498Szrj     {
202338fd1498Szrj       struct pg_edata *data = new pg_edata;
202438fd1498Szrj 
202538fd1498Szrj       gcc_assert (ddrs->length () > 0);
202638fd1498Szrj       e->data = data;
202738fd1498Szrj       data->alias_ddrs = vNULL;
202838fd1498Szrj       data->alias_ddrs.safe_splice (*ddrs);
202938fd1498Szrj     }
203038fd1498Szrj }
203138fd1498Szrj 
203238fd1498Szrj /* Callback function for graph travesal algorithm.  It returns true
203338fd1498Szrj    if edge E should skipped when traversing the graph.  */
203438fd1498Szrj 
203538fd1498Szrj static bool
pg_skip_alias_edge(struct graph_edge * e)203638fd1498Szrj pg_skip_alias_edge (struct graph_edge *e)
203738fd1498Szrj {
203838fd1498Szrj   struct pg_edata *data = (struct pg_edata *)e->data;
203938fd1498Szrj   return (data != NULL && data->alias_ddrs.length () > 0);
204038fd1498Szrj }
204138fd1498Szrj 
204238fd1498Szrj /* Callback function freeing data attached to edge E of graph.  */
204338fd1498Szrj 
204438fd1498Szrj static void
free_partition_graph_edata_cb(struct graph *,struct graph_edge * e,void *)204538fd1498Szrj free_partition_graph_edata_cb (struct graph *, struct graph_edge *e, void *)
204638fd1498Szrj {
204738fd1498Szrj   if (e->data != NULL)
204838fd1498Szrj     {
204938fd1498Szrj       struct pg_edata *data = (struct pg_edata *)e->data;
205038fd1498Szrj       data->alias_ddrs.release ();
205138fd1498Szrj       delete data;
205238fd1498Szrj     }
205338fd1498Szrj }
205438fd1498Szrj 
205538fd1498Szrj /* Free data attached to vertice of partition dependence graph PG.  */
205638fd1498Szrj 
205738fd1498Szrj static void
free_partition_graph_vdata(struct graph * pg)205838fd1498Szrj free_partition_graph_vdata (struct graph *pg)
205938fd1498Szrj {
206038fd1498Szrj   int i;
206138fd1498Szrj   struct pg_vdata *data;
206238fd1498Szrj 
206338fd1498Szrj   for (i = 0; i < pg->n_vertices; ++i)
206438fd1498Szrj     {
206538fd1498Szrj       data = (struct pg_vdata *)pg->vertices[i].data;
206638fd1498Szrj       delete data;
206738fd1498Szrj     }
206838fd1498Szrj }
206938fd1498Szrj 
207038fd1498Szrj /* Build and return partition dependence graph for PARTITIONS.  RDG is
207138fd1498Szrj    reduced dependence graph for the loop to be distributed.  If IGNORE_ALIAS_P
207238fd1498Szrj    is true, data dependence caused by possible alias between references
207338fd1498Szrj    is ignored, as if it doesn't exist at all; otherwise all depdendences
207438fd1498Szrj    are considered.  */
207538fd1498Szrj 
207638fd1498Szrj static struct graph *
build_partition_graph(struct graph * rdg,vec<struct partition * > * partitions,bool ignore_alias_p)207738fd1498Szrj build_partition_graph (struct graph *rdg,
207838fd1498Szrj 		       vec<struct partition *> *partitions,
207938fd1498Szrj 		       bool ignore_alias_p)
208038fd1498Szrj {
208138fd1498Szrj   int i, j;
208238fd1498Szrj   struct partition *partition1, *partition2;
208338fd1498Szrj   graph *pg = new_graph (partitions->length ());
208438fd1498Szrj   auto_vec<ddr_p> alias_ddrs, *alias_ddrs_p;
208538fd1498Szrj 
208638fd1498Szrj   alias_ddrs_p = ignore_alias_p ? NULL : &alias_ddrs;
208738fd1498Szrj 
208838fd1498Szrj   init_partition_graph_vertices (pg, partitions);
208938fd1498Szrj 
209038fd1498Szrj   for (i = 0; partitions->iterate (i, &partition1); ++i)
209138fd1498Szrj     {
209238fd1498Szrj       for (j = i + 1; partitions->iterate (j, &partition2); ++j)
209338fd1498Szrj 	{
209438fd1498Szrj 	  /* dependence direction - 0 is no dependence, -1 is back,
209538fd1498Szrj 	     1 is forth, 2 is both (we can stop then, merging will occur).  */
209638fd1498Szrj 	  int dir = 0;
209738fd1498Szrj 
209838fd1498Szrj 	  /* If the first partition has reduction, add back edge; if the
209938fd1498Szrj 	     second partition has reduction, add forth edge.  This makes
210038fd1498Szrj 	     sure that reduction partition will be sorted as the last one.  */
210138fd1498Szrj 	  if (partition_reduction_p (partition1))
210238fd1498Szrj 	    dir = -1;
210338fd1498Szrj 	  else if (partition_reduction_p (partition2))
210438fd1498Szrj 	    dir = 1;
210538fd1498Szrj 
210638fd1498Szrj 	  /* Cleanup the temporary vector.  */
210738fd1498Szrj 	  alias_ddrs.truncate (0);
210838fd1498Szrj 
210938fd1498Szrj 	  dir = pg_add_dependence_edges (rdg, dir, partition1->datarefs,
211038fd1498Szrj 					 partition2->datarefs, alias_ddrs_p);
211138fd1498Szrj 
211238fd1498Szrj 	  /* Add edge to partition graph if there exists dependence.  There
211338fd1498Szrj 	     are two types of edges.  One type edge is caused by compilation
211438fd1498Szrj 	     time known dependence, this type can not be resolved by runtime
211538fd1498Szrj 	     alias check.  The other type can be resolved by runtime alias
211638fd1498Szrj 	     check.  */
211738fd1498Szrj 	  if (dir == 1 || dir == 2
211838fd1498Szrj 	      || alias_ddrs.length () > 0)
211938fd1498Szrj 	    {
212038fd1498Szrj 	      /* Attach data dependence relations to edge that can be resolved
212138fd1498Szrj 		 by runtime alias check.  */
212238fd1498Szrj 	      bool alias_edge_p = (dir != 1 && dir != 2);
212338fd1498Szrj 	      add_partition_graph_edge (pg, i, j,
212438fd1498Szrj 					(alias_edge_p) ? &alias_ddrs : NULL);
212538fd1498Szrj 	    }
212638fd1498Szrj 	  if (dir == -1 || dir == 2
212738fd1498Szrj 	      || alias_ddrs.length () > 0)
212838fd1498Szrj 	    {
212938fd1498Szrj 	      /* Attach data dependence relations to edge that can be resolved
213038fd1498Szrj 		 by runtime alias check.  */
213138fd1498Szrj 	      bool alias_edge_p = (dir != -1 && dir != 2);
213238fd1498Szrj 	      add_partition_graph_edge (pg, j, i,
213338fd1498Szrj 					(alias_edge_p) ? &alias_ddrs : NULL);
213438fd1498Szrj 	    }
213538fd1498Szrj 	}
213638fd1498Szrj     }
213738fd1498Szrj   return pg;
213838fd1498Szrj }
213938fd1498Szrj 
214038fd1498Szrj /* Sort partitions in PG in descending post order and store them in
214138fd1498Szrj    PARTITIONS.  */
214238fd1498Szrj 
214338fd1498Szrj static void
sort_partitions_by_post_order(struct graph * pg,vec<struct partition * > * partitions)214438fd1498Szrj sort_partitions_by_post_order (struct graph *pg,
214538fd1498Szrj 			       vec<struct partition *> *partitions)
214638fd1498Szrj {
214738fd1498Szrj   int i;
214838fd1498Szrj   struct pg_vdata *data;
214938fd1498Szrj 
215038fd1498Szrj   /* Now order the remaining nodes in descending postorder.  */
215138fd1498Szrj   qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
215238fd1498Szrj   partitions->truncate (0);
215338fd1498Szrj   for (i = 0; i < pg->n_vertices; ++i)
215438fd1498Szrj     {
215538fd1498Szrj       data = (struct pg_vdata *)pg->vertices[i].data;
215638fd1498Szrj       if (data->partition)
215738fd1498Szrj 	partitions->safe_push (data->partition);
215838fd1498Szrj     }
215938fd1498Szrj }
216038fd1498Szrj 
216138fd1498Szrj /* Given reduced dependence graph RDG merge strong connected components
216238fd1498Szrj    of PARTITIONS.  If IGNORE_ALIAS_P is true, data dependence caused by
216338fd1498Szrj    possible alias between references is ignored, as if it doesn't exist
216438fd1498Szrj    at all; otherwise all depdendences are considered.  */
216538fd1498Szrj 
216638fd1498Szrj static void
merge_dep_scc_partitions(struct graph * rdg,vec<struct partition * > * partitions,bool ignore_alias_p)216738fd1498Szrj merge_dep_scc_partitions (struct graph *rdg,
216838fd1498Szrj 			  vec<struct partition *> *partitions,
216938fd1498Szrj 			  bool ignore_alias_p)
217038fd1498Szrj {
217138fd1498Szrj   struct partition *partition1, *partition2;
217238fd1498Szrj   struct pg_vdata *data;
217338fd1498Szrj   graph *pg = build_partition_graph (rdg, partitions, ignore_alias_p);
217438fd1498Szrj   int i, j, num_sccs = graphds_scc (pg, NULL);
217538fd1498Szrj 
217638fd1498Szrj   /* Strong connected compoenent means dependence cycle, we cannot distribute
217738fd1498Szrj      them.  So fuse them together.  */
217838fd1498Szrj   if ((unsigned) num_sccs < partitions->length ())
217938fd1498Szrj     {
218038fd1498Szrj       for (i = 0; i < num_sccs; ++i)
218138fd1498Szrj 	{
218238fd1498Szrj 	  for (j = 0; partitions->iterate (j, &partition1); ++j)
218338fd1498Szrj 	    if (pg->vertices[j].component == i)
218438fd1498Szrj 	      break;
218538fd1498Szrj 	  for (j = j + 1; partitions->iterate (j, &partition2); ++j)
218638fd1498Szrj 	    if (pg->vertices[j].component == i)
218738fd1498Szrj 	      {
218838fd1498Szrj 		partition_merge_into (NULL, partition1,
218938fd1498Szrj 				      partition2, FUSE_SAME_SCC);
219038fd1498Szrj 		partition1->type = PTYPE_SEQUENTIAL;
219138fd1498Szrj 		(*partitions)[j] = NULL;
219238fd1498Szrj 		partition_free (partition2);
219338fd1498Szrj 		data = (struct pg_vdata *)pg->vertices[j].data;
219438fd1498Szrj 		data->partition = NULL;
219538fd1498Szrj 	      }
219638fd1498Szrj 	}
219738fd1498Szrj     }
219838fd1498Szrj 
219938fd1498Szrj   sort_partitions_by_post_order (pg, partitions);
220038fd1498Szrj   gcc_assert (partitions->length () == (unsigned)num_sccs);
220138fd1498Szrj   free_partition_graph_vdata (pg);
220238fd1498Szrj   free_graph (pg);
220338fd1498Szrj }
220438fd1498Szrj 
220538fd1498Szrj /* Callback function for traversing edge E in graph G.  DATA is private
220638fd1498Szrj    callback data.  */
220738fd1498Szrj 
220838fd1498Szrj static void
pg_collect_alias_ddrs(struct graph * g,struct graph_edge * e,void * data)220938fd1498Szrj pg_collect_alias_ddrs (struct graph *g, struct graph_edge *e, void *data)
221038fd1498Szrj {
221138fd1498Szrj   int i, j, component;
221238fd1498Szrj   struct pg_edge_callback_data *cbdata;
221338fd1498Szrj   struct pg_edata *edata = (struct pg_edata *) e->data;
221438fd1498Szrj 
221538fd1498Szrj   /* If the edge doesn't have attached data dependence, it represents
221638fd1498Szrj      compilation time known dependences.  This type dependence cannot
221738fd1498Szrj      be resolved by runtime alias check.  */
221838fd1498Szrj   if (edata == NULL || edata->alias_ddrs.length () == 0)
221938fd1498Szrj     return;
222038fd1498Szrj 
222138fd1498Szrj   cbdata = (struct pg_edge_callback_data *) data;
222238fd1498Szrj   i = e->src;
222338fd1498Szrj   j = e->dest;
222438fd1498Szrj   component = cbdata->vertices_component[i];
222538fd1498Szrj   /* Vertices are topologically sorted according to compilation time
222638fd1498Szrj      known dependences, so we can break strong connected components
222738fd1498Szrj      by removing edges of the opposite direction, i.e, edges pointing
222838fd1498Szrj      from vertice with smaller post number to vertice with bigger post
222938fd1498Szrj      number.  */
223038fd1498Szrj   if (g->vertices[i].post < g->vertices[j].post
223138fd1498Szrj       /* We only need to remove edges connecting vertices in the same
223238fd1498Szrj 	 strong connected component to break it.  */
223338fd1498Szrj       && component == cbdata->vertices_component[j]
223438fd1498Szrj       /* Check if we want to break the strong connected component or not.  */
223538fd1498Szrj       && !bitmap_bit_p (cbdata->sccs_to_merge, component))
223638fd1498Szrj     cbdata->alias_ddrs->safe_splice (edata->alias_ddrs);
223738fd1498Szrj }
223838fd1498Szrj 
223938fd1498Szrj /* This is the main function breaking strong conected components in
224038fd1498Szrj    PARTITIONS giving reduced depdendence graph RDG.  Store data dependence
224138fd1498Szrj    relations for runtime alias check in ALIAS_DDRS.  */
224238fd1498Szrj 
224338fd1498Szrj static void
break_alias_scc_partitions(struct graph * rdg,vec<struct partition * > * partitions,vec<ddr_p> * alias_ddrs)224438fd1498Szrj break_alias_scc_partitions (struct graph *rdg,
224538fd1498Szrj 			    vec<struct partition *> *partitions,
224638fd1498Szrj 			    vec<ddr_p> *alias_ddrs)
224738fd1498Szrj {
224838fd1498Szrj   int i, j, k, num_sccs, num_sccs_no_alias;
224938fd1498Szrj   /* Build partition dependence graph.  */
225038fd1498Szrj   graph *pg = build_partition_graph (rdg, partitions, false);
225138fd1498Szrj 
225238fd1498Szrj   alias_ddrs->truncate (0);
225338fd1498Szrj   /* Find strong connected components in the graph, with all dependence edges
225438fd1498Szrj      considered.  */
225538fd1498Szrj   num_sccs = graphds_scc (pg, NULL);
225638fd1498Szrj   /* All SCCs now can be broken by runtime alias checks because SCCs caused by
225738fd1498Szrj      compilation time known dependences are merged before this function.  */
225838fd1498Szrj   if ((unsigned) num_sccs < partitions->length ())
225938fd1498Szrj     {
226038fd1498Szrj       struct pg_edge_callback_data cbdata;
226138fd1498Szrj       auto_bitmap sccs_to_merge;
226238fd1498Szrj       auto_vec<enum partition_type> scc_types;
226338fd1498Szrj       struct partition *partition, *first;
226438fd1498Szrj 
226538fd1498Szrj       /* If all partitions in a SCC have the same type, we can simply merge the
226638fd1498Szrj 	 SCC.  This loop finds out such SCCS and record them in bitmap.  */
226738fd1498Szrj       bitmap_set_range (sccs_to_merge, 0, (unsigned) num_sccs);
226838fd1498Szrj       for (i = 0; i < num_sccs; ++i)
226938fd1498Szrj 	{
227038fd1498Szrj 	  for (j = 0; partitions->iterate (j, &first); ++j)
227138fd1498Szrj 	    if (pg->vertices[j].component == i)
227238fd1498Szrj 	      break;
227338fd1498Szrj 	  for (++j; partitions->iterate (j, &partition); ++j)
227438fd1498Szrj 	    {
227538fd1498Szrj 	      if (pg->vertices[j].component != i)
227638fd1498Szrj 		continue;
227738fd1498Szrj 
227838fd1498Szrj 	      /* Note we Merge partitions of parallel type on purpose, though
227938fd1498Szrj 		 the result partition is sequential.  The reason is vectorizer
228038fd1498Szrj 		 can do more accurate runtime alias check in this case.  Also
228138fd1498Szrj 		 it results in more conservative distribution.  */
228238fd1498Szrj 	      if (first->type != partition->type)
228338fd1498Szrj 		{
228438fd1498Szrj 		  bitmap_clear_bit (sccs_to_merge, i);
228538fd1498Szrj 		  break;
228638fd1498Szrj 		}
228738fd1498Szrj 	    }
228838fd1498Szrj 	}
228938fd1498Szrj 
229038fd1498Szrj       /* Initialize callback data for traversing.  */
229138fd1498Szrj       cbdata.sccs_to_merge = sccs_to_merge;
229238fd1498Szrj       cbdata.alias_ddrs = alias_ddrs;
229338fd1498Szrj       cbdata.vertices_component = XNEWVEC (int, pg->n_vertices);
229438fd1498Szrj       /* Record the component information which will be corrupted by next
229538fd1498Szrj 	 graph scc finding call.  */
229638fd1498Szrj       for (i = 0; i < pg->n_vertices; ++i)
229738fd1498Szrj 	cbdata.vertices_component[i] = pg->vertices[i].component;
229838fd1498Szrj 
229938fd1498Szrj       /* Collect data dependences for runtime alias checks to break SCCs.  */
230038fd1498Szrj       if (bitmap_count_bits (sccs_to_merge) != (unsigned) num_sccs)
230138fd1498Szrj 	{
230238fd1498Szrj 	  /* Run SCC finding algorithm again, with alias dependence edges
230338fd1498Szrj 	     skipped.  This is to topologically sort partitions according to
230438fd1498Szrj 	     compilation time known dependence.  Note the topological order
230538fd1498Szrj 	     is stored in the form of pg's post order number.  */
230638fd1498Szrj 	  num_sccs_no_alias = graphds_scc (pg, NULL, pg_skip_alias_edge);
230738fd1498Szrj 	  gcc_assert (partitions->length () == (unsigned) num_sccs_no_alias);
230838fd1498Szrj 	  /* With topological order, we can construct two subgraphs L and R.
230938fd1498Szrj 	     L contains edge <x, y> where x < y in terms of post order, while
231038fd1498Szrj 	     R contains edge <x, y> where x > y.  Edges for compilation time
231138fd1498Szrj 	     known dependence all fall in R, so we break SCCs by removing all
231238fd1498Szrj 	     (alias) edges of in subgraph L.  */
231338fd1498Szrj 	  for_each_edge (pg, pg_collect_alias_ddrs, &cbdata);
231438fd1498Szrj 	}
231538fd1498Szrj 
231638fd1498Szrj       /* For SCC that doesn't need to be broken, merge it.  */
231738fd1498Szrj       for (i = 0; i < num_sccs; ++i)
231838fd1498Szrj 	{
231938fd1498Szrj 	  if (!bitmap_bit_p (sccs_to_merge, i))
232038fd1498Szrj 	    continue;
232138fd1498Szrj 
232238fd1498Szrj 	  for (j = 0; partitions->iterate (j, &first); ++j)
232338fd1498Szrj 	    if (cbdata.vertices_component[j] == i)
232438fd1498Szrj 	      break;
232538fd1498Szrj 	  for (k = j + 1; partitions->iterate (k, &partition); ++k)
232638fd1498Szrj 	    {
232738fd1498Szrj 	      struct pg_vdata *data;
232838fd1498Szrj 
232938fd1498Szrj 	      if (cbdata.vertices_component[k] != i)
233038fd1498Szrj 		continue;
233138fd1498Szrj 
233238fd1498Szrj 	      /* Update postorder number so that merged reduction partition is
233338fd1498Szrj 		 sorted after other partitions.  */
233438fd1498Szrj 	      if (!partition_reduction_p (first)
233538fd1498Szrj 		  && partition_reduction_p (partition))
233638fd1498Szrj 		{
233738fd1498Szrj 		  gcc_assert (pg->vertices[k].post < pg->vertices[j].post);
233838fd1498Szrj 		  pg->vertices[j].post = pg->vertices[k].post;
233938fd1498Szrj 		}
234038fd1498Szrj 	      partition_merge_into (NULL, first, partition, FUSE_SAME_SCC);
234138fd1498Szrj 	      (*partitions)[k] = NULL;
234238fd1498Szrj 	      partition_free (partition);
234338fd1498Szrj 	      data = (struct pg_vdata *)pg->vertices[k].data;
234438fd1498Szrj 	      gcc_assert (data->id == k);
234538fd1498Szrj 	      data->partition = NULL;
234638fd1498Szrj 	      /* The result partition of merged SCC must be sequential.  */
234738fd1498Szrj 	      first->type = PTYPE_SEQUENTIAL;
234838fd1498Szrj 	    }
234938fd1498Szrj 	}
235038fd1498Szrj     }
235138fd1498Szrj 
235238fd1498Szrj   sort_partitions_by_post_order (pg, partitions);
235338fd1498Szrj   free_partition_graph_vdata (pg);
235438fd1498Szrj   for_each_edge (pg, free_partition_graph_edata_cb, NULL);
235538fd1498Szrj   free_graph (pg);
235638fd1498Szrj 
235738fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
235838fd1498Szrj     {
235938fd1498Szrj       fprintf (dump_file, "Possible alias data dependence to break:\n");
236038fd1498Szrj       dump_data_dependence_relations (dump_file, *alias_ddrs);
236138fd1498Szrj     }
236238fd1498Szrj }
236338fd1498Szrj 
236438fd1498Szrj /* Compute and return an expression whose value is the segment length which
236538fd1498Szrj    will be accessed by DR in NITERS iterations.  */
236638fd1498Szrj 
236738fd1498Szrj static tree
data_ref_segment_size(struct data_reference * dr,tree niters)236838fd1498Szrj data_ref_segment_size (struct data_reference *dr, tree niters)
236938fd1498Szrj {
237038fd1498Szrj   niters = size_binop (MINUS_EXPR,
237138fd1498Szrj 		       fold_convert (sizetype, niters),
237238fd1498Szrj 		       size_one_node);
237338fd1498Szrj   return size_binop (MULT_EXPR,
237438fd1498Szrj 		     fold_convert (sizetype, DR_STEP (dr)),
237538fd1498Szrj 		     fold_convert (sizetype, niters));
237638fd1498Szrj }
237738fd1498Szrj 
237838fd1498Szrj /* Return true if LOOP's latch is dominated by statement for data reference
237938fd1498Szrj    DR.  */
238038fd1498Szrj 
238138fd1498Szrj static inline bool
latch_dominated_by_data_ref(struct loop * loop,data_reference * dr)238238fd1498Szrj latch_dominated_by_data_ref (struct loop *loop, data_reference *dr)
238338fd1498Szrj {
238438fd1498Szrj   return dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
238538fd1498Szrj 			 gimple_bb (DR_STMT (dr)));
238638fd1498Szrj }
238738fd1498Szrj 
238838fd1498Szrj /* Compute alias check pairs and store them in COMP_ALIAS_PAIRS for LOOP's
238938fd1498Szrj    data dependence relations ALIAS_DDRS.  */
239038fd1498Szrj 
239138fd1498Szrj static void
compute_alias_check_pairs(struct loop * loop,vec<ddr_p> * alias_ddrs,vec<dr_with_seg_len_pair_t> * comp_alias_pairs)239238fd1498Szrj compute_alias_check_pairs (struct loop *loop, vec<ddr_p> *alias_ddrs,
239338fd1498Szrj 			   vec<dr_with_seg_len_pair_t> *comp_alias_pairs)
239438fd1498Szrj {
239538fd1498Szrj   unsigned int i;
239638fd1498Szrj   unsigned HOST_WIDE_INT factor = 1;
239738fd1498Szrj   tree niters_plus_one, niters = number_of_latch_executions (loop);
239838fd1498Szrj 
239938fd1498Szrj   gcc_assert (niters != NULL_TREE && niters != chrec_dont_know);
240038fd1498Szrj   niters = fold_convert (sizetype, niters);
240138fd1498Szrj   niters_plus_one = size_binop (PLUS_EXPR, niters, size_one_node);
240238fd1498Szrj 
240338fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
240438fd1498Szrj     fprintf (dump_file, "Creating alias check pairs:\n");
240538fd1498Szrj 
240638fd1498Szrj   /* Iterate all data dependence relations and compute alias check pairs.  */
240738fd1498Szrj   for (i = 0; i < alias_ddrs->length (); i++)
240838fd1498Szrj     {
240938fd1498Szrj       ddr_p ddr = (*alias_ddrs)[i];
241038fd1498Szrj       struct data_reference *dr_a = DDR_A (ddr);
241138fd1498Szrj       struct data_reference *dr_b = DDR_B (ddr);
241238fd1498Szrj       tree seg_length_a, seg_length_b;
241338fd1498Szrj       int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
241438fd1498Szrj 					    DR_BASE_ADDRESS (dr_b));
241538fd1498Szrj 
241638fd1498Szrj       if (comp_res == 0)
241738fd1498Szrj 	comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
241838fd1498Szrj       gcc_assert (comp_res != 0);
241938fd1498Szrj 
242038fd1498Szrj       if (latch_dominated_by_data_ref (loop, dr_a))
242138fd1498Szrj 	seg_length_a = data_ref_segment_size (dr_a, niters_plus_one);
242238fd1498Szrj       else
242338fd1498Szrj 	seg_length_a = data_ref_segment_size (dr_a, niters);
242438fd1498Szrj 
242538fd1498Szrj       if (latch_dominated_by_data_ref (loop, dr_b))
242638fd1498Szrj 	seg_length_b = data_ref_segment_size (dr_b, niters_plus_one);
242738fd1498Szrj       else
242838fd1498Szrj 	seg_length_b = data_ref_segment_size (dr_b, niters);
242938fd1498Szrj 
243038fd1498Szrj       unsigned HOST_WIDE_INT access_size_a
243138fd1498Szrj 	= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a))));
243238fd1498Szrj       unsigned HOST_WIDE_INT access_size_b
243338fd1498Szrj 	= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b))));
243438fd1498Szrj       unsigned int align_a = TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_a)));
243538fd1498Szrj       unsigned int align_b = TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_b)));
243638fd1498Szrj 
243738fd1498Szrj       dr_with_seg_len_pair_t dr_with_seg_len_pair
243838fd1498Szrj 	(dr_with_seg_len (dr_a, seg_length_a, access_size_a, align_a),
243938fd1498Szrj 	 dr_with_seg_len (dr_b, seg_length_b, access_size_b, align_b));
244038fd1498Szrj 
244138fd1498Szrj       /* Canonicalize pairs by sorting the two DR members.  */
244238fd1498Szrj       if (comp_res > 0)
244338fd1498Szrj 	std::swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second);
244438fd1498Szrj 
244538fd1498Szrj       comp_alias_pairs->safe_push (dr_with_seg_len_pair);
244638fd1498Szrj     }
244738fd1498Szrj 
244838fd1498Szrj   if (tree_fits_uhwi_p (niters))
244938fd1498Szrj     factor = tree_to_uhwi (niters);
245038fd1498Szrj 
245138fd1498Szrj   /* Prune alias check pairs.  */
245238fd1498Szrj   prune_runtime_alias_test_list (comp_alias_pairs, factor);
245338fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
245438fd1498Szrj     fprintf (dump_file,
245538fd1498Szrj 	     "Improved number of alias checks from %d to %d\n",
245638fd1498Szrj 	     alias_ddrs->length (), comp_alias_pairs->length ());
245738fd1498Szrj }
245838fd1498Szrj 
245938fd1498Szrj /* Given data dependence relations in ALIAS_DDRS, generate runtime alias
246038fd1498Szrj    checks and version LOOP under condition of these runtime alias checks.  */
246138fd1498Szrj 
246238fd1498Szrj static void
version_loop_by_alias_check(struct loop * loop,vec<ddr_p> * alias_ddrs)246338fd1498Szrj version_loop_by_alias_check (struct loop *loop, vec<ddr_p> *alias_ddrs)
246438fd1498Szrj {
246538fd1498Szrj   profile_probability prob;
246638fd1498Szrj   basic_block cond_bb;
246738fd1498Szrj   struct loop *nloop;
246838fd1498Szrj   tree lhs, arg0, cond_expr = NULL_TREE;
246938fd1498Szrj   gimple_seq cond_stmts = NULL;
247038fd1498Szrj   gimple *call_stmt = NULL;
247138fd1498Szrj   auto_vec<dr_with_seg_len_pair_t> comp_alias_pairs;
247238fd1498Szrj 
247338fd1498Szrj   /* Generate code for runtime alias checks if necessary.  */
247438fd1498Szrj   gcc_assert (alias_ddrs->length () > 0);
247538fd1498Szrj 
247638fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
247738fd1498Szrj     fprintf (dump_file,
247838fd1498Szrj 	     "Version loop <%d> with runtime alias check\n", loop->num);
247938fd1498Szrj 
248038fd1498Szrj   compute_alias_check_pairs (loop, alias_ddrs, &comp_alias_pairs);
248138fd1498Szrj   create_runtime_alias_checks (loop, &comp_alias_pairs, &cond_expr);
248238fd1498Szrj   cond_expr = force_gimple_operand_1 (cond_expr, &cond_stmts,
248338fd1498Szrj 				      is_gimple_val, NULL_TREE);
248438fd1498Szrj 
248538fd1498Szrj   /* Depend on vectorizer to fold IFN_LOOP_DIST_ALIAS.  */
248638fd1498Szrj   if (flag_tree_loop_vectorize)
248738fd1498Szrj     {
248838fd1498Szrj       /* Generate internal function call for loop distribution alias check.  */
248938fd1498Szrj       call_stmt = gimple_build_call_internal (IFN_LOOP_DIST_ALIAS,
249038fd1498Szrj 					      2, NULL_TREE, cond_expr);
249138fd1498Szrj       lhs = make_ssa_name (boolean_type_node);
249238fd1498Szrj       gimple_call_set_lhs (call_stmt, lhs);
249338fd1498Szrj     }
249438fd1498Szrj   else
249538fd1498Szrj     lhs = cond_expr;
249638fd1498Szrj 
249738fd1498Szrj   prob = profile_probability::guessed_always ().apply_scale (9, 10);
249838fd1498Szrj   initialize_original_copy_tables ();
249938fd1498Szrj   nloop = loop_version (loop, lhs, &cond_bb, prob, prob.invert (),
250038fd1498Szrj 			prob, prob.invert (), true);
250138fd1498Szrj   free_original_copy_tables ();
250238fd1498Szrj   /* Record the original loop number in newly generated loops.  In case of
250338fd1498Szrj      distribution, the original loop will be distributed and the new loop
250438fd1498Szrj      is kept.  */
250538fd1498Szrj   loop->orig_loop_num = nloop->num;
250638fd1498Szrj   nloop->orig_loop_num = nloop->num;
250738fd1498Szrj   nloop->dont_vectorize = true;
250838fd1498Szrj   nloop->force_vectorize = false;
250938fd1498Szrj 
251038fd1498Szrj   if (call_stmt)
251138fd1498Szrj     {
251238fd1498Szrj       /* Record new loop's num in IFN_LOOP_DIST_ALIAS because the original
251338fd1498Szrj 	 loop could be destroyed.  */
251438fd1498Szrj       arg0 = build_int_cst (integer_type_node, loop->orig_loop_num);
251538fd1498Szrj       gimple_call_set_arg (call_stmt, 0, arg0);
251638fd1498Szrj       gimple_seq_add_stmt_without_update (&cond_stmts, call_stmt);
251738fd1498Szrj     }
251838fd1498Szrj 
251938fd1498Szrj   if (cond_stmts)
252038fd1498Szrj     {
252138fd1498Szrj       gimple_stmt_iterator cond_gsi = gsi_last_bb (cond_bb);
252238fd1498Szrj       gsi_insert_seq_before (&cond_gsi, cond_stmts, GSI_SAME_STMT);
252338fd1498Szrj     }
252438fd1498Szrj   update_ssa (TODO_update_ssa);
252538fd1498Szrj }
252638fd1498Szrj 
252738fd1498Szrj /* Return true if loop versioning is needed to distrubute PARTITIONS.
252838fd1498Szrj    ALIAS_DDRS are data dependence relations for runtime alias check.  */
252938fd1498Szrj 
253038fd1498Szrj static inline bool
version_for_distribution_p(vec<struct partition * > * partitions,vec<ddr_p> * alias_ddrs)253138fd1498Szrj version_for_distribution_p (vec<struct partition *> *partitions,
253238fd1498Szrj 			    vec<ddr_p> *alias_ddrs)
253338fd1498Szrj {
253438fd1498Szrj   /* No need to version loop if we have only one partition.  */
253538fd1498Szrj   if (partitions->length () == 1)
253638fd1498Szrj     return false;
253738fd1498Szrj 
253838fd1498Szrj   /* Need to version loop if runtime alias check is necessary.  */
253938fd1498Szrj   return (alias_ddrs->length () > 0);
254038fd1498Szrj }
254138fd1498Szrj 
254238fd1498Szrj /* Compare base offset of builtin mem* partitions P1 and P2.  */
254338fd1498Szrj 
254438fd1498Szrj static bool
offset_cmp(struct partition * p1,struct partition * p2)254538fd1498Szrj offset_cmp (struct partition *p1, struct partition *p2)
254638fd1498Szrj {
254738fd1498Szrj   gcc_assert (p1 != NULL && p1->builtin != NULL);
254838fd1498Szrj   gcc_assert (p2 != NULL && p2->builtin != NULL);
254938fd1498Szrj   return p1->builtin->dst_base_offset < p2->builtin->dst_base_offset;
255038fd1498Szrj }
255138fd1498Szrj 
255238fd1498Szrj /* Fuse adjacent memset builtin PARTITIONS if possible.  This is a special
255338fd1498Szrj    case optimization transforming below code:
255438fd1498Szrj 
255538fd1498Szrj      __builtin_memset (&obj, 0, 100);
255638fd1498Szrj      _1 = &obj + 100;
255738fd1498Szrj      __builtin_memset (_1, 0, 200);
255838fd1498Szrj      _2 = &obj + 300;
255938fd1498Szrj      __builtin_memset (_2, 0, 100);
256038fd1498Szrj 
256138fd1498Szrj    into:
256238fd1498Szrj 
256338fd1498Szrj      __builtin_memset (&obj, 0, 400);
256438fd1498Szrj 
256538fd1498Szrj    Note we don't have dependence information between different partitions
256638fd1498Szrj    at this point, as a result, we can't handle nonadjacent memset builtin
256738fd1498Szrj    partitions since dependence might be broken.  */
256838fd1498Szrj 
256938fd1498Szrj static void
fuse_memset_builtins(vec<struct partition * > * partitions)257038fd1498Szrj fuse_memset_builtins (vec<struct partition *> *partitions)
257138fd1498Szrj {
257238fd1498Szrj   unsigned i, j;
257338fd1498Szrj   struct partition *part1, *part2;
257438fd1498Szrj   tree rhs1, rhs2;
257538fd1498Szrj 
257638fd1498Szrj   for (i = 0; partitions->iterate (i, &part1);)
257738fd1498Szrj     {
257838fd1498Szrj       if (part1->kind != PKIND_MEMSET)
257938fd1498Szrj 	{
258038fd1498Szrj 	  i++;
258138fd1498Szrj 	  continue;
258238fd1498Szrj 	}
258338fd1498Szrj 
258438fd1498Szrj       /* Find sub-array of memset builtins of the same base.  Index range
258538fd1498Szrj 	 of the sub-array is [i, j) with "j > i".  */
258638fd1498Szrj       for (j = i + 1; partitions->iterate (j, &part2); ++j)
258738fd1498Szrj 	{
258838fd1498Szrj 	  if (part2->kind != PKIND_MEMSET
258938fd1498Szrj 	      || !operand_equal_p (part1->builtin->dst_base_base,
259038fd1498Szrj 				   part2->builtin->dst_base_base, 0))
259138fd1498Szrj 	    break;
259238fd1498Szrj 
259338fd1498Szrj 	  /* Memset calls setting different values can't be merged.  */
259438fd1498Szrj 	  rhs1 = gimple_assign_rhs1 (DR_STMT (part1->builtin->dst_dr));
259538fd1498Szrj 	  rhs2 = gimple_assign_rhs1 (DR_STMT (part2->builtin->dst_dr));
259638fd1498Szrj 	  if (!operand_equal_p (rhs1, rhs2, 0))
259738fd1498Szrj 	    break;
259838fd1498Szrj 	}
259938fd1498Szrj 
260038fd1498Szrj       /* Stable sort is required in order to avoid breaking dependence.  */
260138fd1498Szrj       std::stable_sort (&(*partitions)[i],
260238fd1498Szrj 			&(*partitions)[i] + j - i, offset_cmp);
260338fd1498Szrj       /* Continue with next partition.  */
260438fd1498Szrj       i = j;
260538fd1498Szrj     }
260638fd1498Szrj 
260738fd1498Szrj   /* Merge all consecutive memset builtin partitions.  */
260838fd1498Szrj   for (i = 0; i < partitions->length () - 1;)
260938fd1498Szrj     {
261038fd1498Szrj       part1 = (*partitions)[i];
261138fd1498Szrj       if (part1->kind != PKIND_MEMSET)
261238fd1498Szrj 	{
261338fd1498Szrj 	  i++;
261438fd1498Szrj 	  continue;
261538fd1498Szrj 	}
261638fd1498Szrj 
261738fd1498Szrj       part2 = (*partitions)[i + 1];
261838fd1498Szrj       /* Only merge memset partitions of the same base and with constant
261938fd1498Szrj 	 access sizes.  */
262038fd1498Szrj       if (part2->kind != PKIND_MEMSET
262138fd1498Szrj 	  || TREE_CODE (part1->builtin->size) != INTEGER_CST
262238fd1498Szrj 	  || TREE_CODE (part2->builtin->size) != INTEGER_CST
262338fd1498Szrj 	  || !operand_equal_p (part1->builtin->dst_base_base,
262438fd1498Szrj 			       part2->builtin->dst_base_base, 0))
262538fd1498Szrj 	{
262638fd1498Szrj 	  i++;
262738fd1498Szrj 	  continue;
262838fd1498Szrj 	}
262938fd1498Szrj       rhs1 = gimple_assign_rhs1 (DR_STMT (part1->builtin->dst_dr));
263038fd1498Szrj       rhs2 = gimple_assign_rhs1 (DR_STMT (part2->builtin->dst_dr));
263138fd1498Szrj       int bytev1 = const_with_all_bytes_same (rhs1);
263238fd1498Szrj       int bytev2 = const_with_all_bytes_same (rhs2);
263338fd1498Szrj       /* Only merge memset partitions of the same value.  */
263438fd1498Szrj       if (bytev1 != bytev2 || bytev1 == -1)
263538fd1498Szrj 	{
263638fd1498Szrj 	  i++;
263738fd1498Szrj 	  continue;
263838fd1498Szrj 	}
263938fd1498Szrj       wide_int end1 = wi::add (part1->builtin->dst_base_offset,
264038fd1498Szrj 			       wi::to_wide (part1->builtin->size));
264138fd1498Szrj       /* Only merge adjacent memset partitions.  */
264238fd1498Szrj       if (wi::ne_p (end1, part2->builtin->dst_base_offset))
264338fd1498Szrj 	{
264438fd1498Szrj 	  i++;
264538fd1498Szrj 	  continue;
264638fd1498Szrj 	}
264738fd1498Szrj       /* Merge partitions[i] and partitions[i+1].  */
264838fd1498Szrj       part1->builtin->size = fold_build2 (PLUS_EXPR, sizetype,
264938fd1498Szrj 					  part1->builtin->size,
265038fd1498Szrj 					  part2->builtin->size);
265138fd1498Szrj       partition_free (part2);
265238fd1498Szrj       partitions->ordered_remove (i + 1);
265338fd1498Szrj     }
265438fd1498Szrj }
265538fd1498Szrj 
265638fd1498Szrj /* Fuse PARTITIONS of LOOP if necessary before finalizing distribution.
265738fd1498Szrj    ALIAS_DDRS contains ddrs which need runtime alias check.  */
265838fd1498Szrj 
265938fd1498Szrj static void
finalize_partitions(struct loop * loop,vec<struct partition * > * partitions,vec<ddr_p> * alias_ddrs)266038fd1498Szrj finalize_partitions (struct loop *loop, vec<struct partition *> *partitions,
266138fd1498Szrj 		     vec<ddr_p> *alias_ddrs)
266238fd1498Szrj {
266338fd1498Szrj   unsigned i;
266438fd1498Szrj   struct partition *partition, *a;
266538fd1498Szrj 
266638fd1498Szrj   if (partitions->length () == 1
266738fd1498Szrj       || alias_ddrs->length () > 0)
266838fd1498Szrj     return;
266938fd1498Szrj 
267038fd1498Szrj   unsigned num_builtin = 0, num_normal = 0, num_partial_memset = 0;
267138fd1498Szrj   bool same_type_p = true;
267238fd1498Szrj   enum partition_type type = ((*partitions)[0])->type;
267338fd1498Szrj   for (i = 0; partitions->iterate (i, &partition); ++i)
267438fd1498Szrj     {
267538fd1498Szrj       same_type_p &= (type == partition->type);
267638fd1498Szrj       if (partition_builtin_p (partition))
267738fd1498Szrj 	{
267838fd1498Szrj 	  num_builtin++;
267938fd1498Szrj 	  continue;
268038fd1498Szrj 	}
268138fd1498Szrj       num_normal++;
268238fd1498Szrj       if (partition->kind == PKIND_PARTIAL_MEMSET)
268338fd1498Szrj 	num_partial_memset++;
268438fd1498Szrj     }
268538fd1498Szrj 
268638fd1498Szrj   /* Don't distribute current loop into too many loops given we don't have
268738fd1498Szrj      memory stream cost model.  Be even more conservative in case of loop
268838fd1498Szrj      nest distribution.  */
268938fd1498Szrj   if ((same_type_p && num_builtin == 0
269038fd1498Szrj        && (loop->inner == NULL || num_normal != 2 || num_partial_memset != 1))
269138fd1498Szrj       || (loop->inner != NULL
269238fd1498Szrj 	  && i >= NUM_PARTITION_THRESHOLD && num_normal > 1)
269338fd1498Szrj       || (loop->inner == NULL
269438fd1498Szrj 	  && i >= NUM_PARTITION_THRESHOLD && num_normal > num_builtin))
269538fd1498Szrj     {
269638fd1498Szrj       a = (*partitions)[0];
269738fd1498Szrj       for (i = 1; partitions->iterate (i, &partition); ++i)
269838fd1498Szrj 	{
269938fd1498Szrj 	  partition_merge_into (NULL, a, partition, FUSE_FINALIZE);
270038fd1498Szrj 	  partition_free (partition);
270138fd1498Szrj 	}
270238fd1498Szrj       partitions->truncate (1);
270338fd1498Szrj     }
270438fd1498Szrj 
270538fd1498Szrj   /* Fuse memset builtins if possible.  */
270638fd1498Szrj   if (partitions->length () > 1)
270738fd1498Szrj     fuse_memset_builtins (partitions);
270838fd1498Szrj }
270938fd1498Szrj 
271038fd1498Szrj /* Distributes the code from LOOP in such a way that producer statements
271138fd1498Szrj    are placed before consumer statements.  Tries to separate only the
271238fd1498Szrj    statements from STMTS into separate loops.  Returns the number of
271338fd1498Szrj    distributed loops.  Set NB_CALLS to number of generated builtin calls.
271438fd1498Szrj    Set *DESTROY_P to whether LOOP needs to be destroyed.  */
271538fd1498Szrj 
271638fd1498Szrj static int
distribute_loop(struct loop * loop,vec<gimple * > stmts,control_dependences * cd,int * nb_calls,bool * destroy_p)271738fd1498Szrj distribute_loop (struct loop *loop, vec<gimple *> stmts,
271838fd1498Szrj 		 control_dependences *cd, int *nb_calls, bool *destroy_p)
271938fd1498Szrj {
272038fd1498Szrj   ddrs_table = new hash_table<ddr_hasher> (389);
272138fd1498Szrj   struct graph *rdg;
272238fd1498Szrj   partition *partition;
272338fd1498Szrj   bool any_builtin;
272438fd1498Szrj   int i, nbp;
272538fd1498Szrj 
272638fd1498Szrj   *destroy_p = false;
272738fd1498Szrj   *nb_calls = 0;
272838fd1498Szrj   loop_nest.create (0);
272938fd1498Szrj   if (!find_loop_nest (loop, &loop_nest))
273038fd1498Szrj     {
273138fd1498Szrj       loop_nest.release ();
273238fd1498Szrj       delete ddrs_table;
273338fd1498Szrj       return 0;
273438fd1498Szrj     }
273538fd1498Szrj 
273638fd1498Szrj   datarefs_vec.create (20);
273738fd1498Szrj   rdg = build_rdg (loop, cd);
273838fd1498Szrj   if (!rdg)
273938fd1498Szrj     {
274038fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
274138fd1498Szrj 	fprintf (dump_file,
274238fd1498Szrj 		 "Loop %d not distributed: failed to build the RDG.\n",
274338fd1498Szrj 		 loop->num);
274438fd1498Szrj 
274538fd1498Szrj       loop_nest.release ();
274638fd1498Szrj       free_data_refs (datarefs_vec);
274738fd1498Szrj       delete ddrs_table;
274838fd1498Szrj       return 0;
274938fd1498Szrj     }
275038fd1498Szrj 
275138fd1498Szrj   if (datarefs_vec.length () > MAX_DATAREFS_NUM)
275238fd1498Szrj     {
275338fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
275438fd1498Szrj 	fprintf (dump_file,
275538fd1498Szrj 		 "Loop %d not distributed: too many memory references.\n",
275638fd1498Szrj 		 loop->num);
275738fd1498Szrj 
275838fd1498Szrj       free_rdg (rdg);
275938fd1498Szrj       loop_nest.release ();
276038fd1498Szrj       free_data_refs (datarefs_vec);
276138fd1498Szrj       delete ddrs_table;
276238fd1498Szrj       return 0;
276338fd1498Szrj     }
276438fd1498Szrj 
276538fd1498Szrj   data_reference_p dref;
276638fd1498Szrj   for (i = 0; datarefs_vec.iterate (i, &dref); ++i)
276738fd1498Szrj     dref->aux = (void *) (uintptr_t) i;
276838fd1498Szrj 
276938fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
277038fd1498Szrj     dump_rdg (dump_file, rdg);
277138fd1498Szrj 
277238fd1498Szrj   auto_vec<struct partition *, 3> partitions;
277338fd1498Szrj   rdg_build_partitions (rdg, stmts, &partitions);
277438fd1498Szrj 
277538fd1498Szrj   auto_vec<ddr_p> alias_ddrs;
277638fd1498Szrj 
277738fd1498Szrj   auto_bitmap stmt_in_all_partitions;
277838fd1498Szrj   bitmap_copy (stmt_in_all_partitions, partitions[0]->stmts);
277938fd1498Szrj   for (i = 1; partitions.iterate (i, &partition); ++i)
278038fd1498Szrj     bitmap_and_into (stmt_in_all_partitions, partitions[i]->stmts);
278138fd1498Szrj 
278238fd1498Szrj   any_builtin = false;
278338fd1498Szrj   FOR_EACH_VEC_ELT (partitions, i, partition)
278438fd1498Szrj     {
278538fd1498Szrj       classify_partition (loop, rdg, partition, stmt_in_all_partitions);
278638fd1498Szrj       any_builtin |= partition_builtin_p (partition);
278738fd1498Szrj     }
278838fd1498Szrj 
278938fd1498Szrj   /* If we are only distributing patterns but did not detect any,
279038fd1498Szrj      simply bail out.  */
279138fd1498Szrj   if (!flag_tree_loop_distribution
279238fd1498Szrj       && !any_builtin)
279338fd1498Szrj     {
279438fd1498Szrj       nbp = 0;
279538fd1498Szrj       goto ldist_done;
279638fd1498Szrj     }
279738fd1498Szrj 
279838fd1498Szrj   /* If we are only distributing patterns fuse all partitions that
279938fd1498Szrj      were not classified as builtins.  This also avoids chopping
280038fd1498Szrj      a loop into pieces, separated by builtin calls.  That is, we
280138fd1498Szrj      only want no or a single loop body remaining.  */
280238fd1498Szrj   struct partition *into;
280338fd1498Szrj   if (!flag_tree_loop_distribution)
280438fd1498Szrj     {
280538fd1498Szrj       for (i = 0; partitions.iterate (i, &into); ++i)
280638fd1498Szrj 	if (!partition_builtin_p (into))
280738fd1498Szrj 	  break;
280838fd1498Szrj       for (++i; partitions.iterate (i, &partition); ++i)
280938fd1498Szrj 	if (!partition_builtin_p (partition))
281038fd1498Szrj 	  {
281138fd1498Szrj 	    partition_merge_into (NULL, into, partition, FUSE_NON_BUILTIN);
281238fd1498Szrj 	    partitions.unordered_remove (i);
281338fd1498Szrj 	    partition_free (partition);
281438fd1498Szrj 	    i--;
281538fd1498Szrj 	  }
281638fd1498Szrj     }
281738fd1498Szrj 
281838fd1498Szrj   /* Due to limitations in the transform phase we have to fuse all
281938fd1498Szrj      reduction partitions into the last partition so the existing
282038fd1498Szrj      loop will contain all loop-closed PHI nodes.  */
282138fd1498Szrj   for (i = 0; partitions.iterate (i, &into); ++i)
282238fd1498Szrj     if (partition_reduction_p (into))
282338fd1498Szrj       break;
282438fd1498Szrj   for (i = i + 1; partitions.iterate (i, &partition); ++i)
282538fd1498Szrj     if (partition_reduction_p (partition))
282638fd1498Szrj       {
282738fd1498Szrj 	partition_merge_into (rdg, into, partition, FUSE_REDUCTION);
282838fd1498Szrj 	partitions.unordered_remove (i);
282938fd1498Szrj 	partition_free (partition);
283038fd1498Szrj 	i--;
283138fd1498Szrj       }
283238fd1498Szrj 
283338fd1498Szrj   /* Apply our simple cost model - fuse partitions with similar
283438fd1498Szrj      memory accesses.  */
283538fd1498Szrj   for (i = 0; partitions.iterate (i, &into); ++i)
283638fd1498Szrj     {
283738fd1498Szrj       bool changed = false;
283838fd1498Szrj       if (partition_builtin_p (into) || into->kind == PKIND_PARTIAL_MEMSET)
283938fd1498Szrj 	continue;
284038fd1498Szrj       for (int j = i + 1;
284138fd1498Szrj 	   partitions.iterate (j, &partition); ++j)
284238fd1498Szrj 	{
284338fd1498Szrj 	  if (share_memory_accesses (rdg, into, partition))
284438fd1498Szrj 	    {
284538fd1498Szrj 	      partition_merge_into (rdg, into, partition, FUSE_SHARE_REF);
284638fd1498Szrj 	      partitions.unordered_remove (j);
284738fd1498Szrj 	      partition_free (partition);
284838fd1498Szrj 	      j--;
284938fd1498Szrj 	      changed = true;
285038fd1498Szrj 	    }
285138fd1498Szrj 	}
285238fd1498Szrj       /* If we fused 0 1 2 in step 1 to 0,2 1 as 0 and 2 have similar
285338fd1498Szrj          accesses when 1 and 2 have similar accesses but not 0 and 1
285438fd1498Szrj 	 then in the next iteration we will fail to consider merging
285538fd1498Szrj 	 1 into 0,2.  So try again if we did any merging into 0.  */
285638fd1498Szrj       if (changed)
285738fd1498Szrj 	i--;
285838fd1498Szrj     }
285938fd1498Szrj 
286038fd1498Szrj   /* Build the partition dependency graph and fuse partitions in strong
286138fd1498Szrj      connected component.  */
286238fd1498Szrj   if (partitions.length () > 1)
286338fd1498Szrj     {
286438fd1498Szrj       /* Don't support loop nest distribution under runtime alias check
286538fd1498Szrj 	 since it's not likely to enable many vectorization opportunities.  */
286638fd1498Szrj       if (loop->inner)
286738fd1498Szrj 	merge_dep_scc_partitions (rdg, &partitions, false);
286838fd1498Szrj       else
286938fd1498Szrj 	{
287038fd1498Szrj 	  merge_dep_scc_partitions (rdg, &partitions, true);
287138fd1498Szrj 	  if (partitions.length () > 1)
287238fd1498Szrj 	    break_alias_scc_partitions (rdg, &partitions, &alias_ddrs);
287338fd1498Szrj 	}
287438fd1498Szrj     }
287538fd1498Szrj 
287638fd1498Szrj   finalize_partitions (loop, &partitions, &alias_ddrs);
287738fd1498Szrj 
287838fd1498Szrj   nbp = partitions.length ();
287938fd1498Szrj   if (nbp == 0
288038fd1498Szrj       || (nbp == 1 && !partition_builtin_p (partitions[0]))
288138fd1498Szrj       || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
288238fd1498Szrj     {
288338fd1498Szrj       nbp = 0;
288438fd1498Szrj       goto ldist_done;
288538fd1498Szrj     }
288638fd1498Szrj 
288738fd1498Szrj   if (version_for_distribution_p (&partitions, &alias_ddrs))
288838fd1498Szrj     version_loop_by_alias_check (loop, &alias_ddrs);
288938fd1498Szrj 
289038fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
289138fd1498Szrj     {
289238fd1498Szrj       fprintf (dump_file,
289338fd1498Szrj 	       "distribute loop <%d> into partitions:\n", loop->num);
289438fd1498Szrj       dump_rdg_partitions (dump_file, partitions);
289538fd1498Szrj     }
289638fd1498Szrj 
289738fd1498Szrj   FOR_EACH_VEC_ELT (partitions, i, partition)
289838fd1498Szrj     {
289938fd1498Szrj       if (partition_builtin_p (partition))
290038fd1498Szrj 	(*nb_calls)++;
290138fd1498Szrj       *destroy_p |= generate_code_for_partition (loop, partition, i < nbp - 1);
290238fd1498Szrj     }
290338fd1498Szrj 
290438fd1498Szrj  ldist_done:
290538fd1498Szrj   loop_nest.release ();
290638fd1498Szrj   free_data_refs (datarefs_vec);
290738fd1498Szrj   for (hash_table<ddr_hasher>::iterator iter = ddrs_table->begin ();
290838fd1498Szrj        iter != ddrs_table->end (); ++iter)
290938fd1498Szrj     {
291038fd1498Szrj       free_dependence_relation (*iter);
291138fd1498Szrj       *iter = NULL;
291238fd1498Szrj     }
291338fd1498Szrj   delete ddrs_table;
291438fd1498Szrj 
291538fd1498Szrj   FOR_EACH_VEC_ELT (partitions, i, partition)
291638fd1498Szrj     partition_free (partition);
291738fd1498Szrj 
291838fd1498Szrj   free_rdg (rdg);
291938fd1498Szrj   return nbp - *nb_calls;
292038fd1498Szrj }
292138fd1498Szrj 
292238fd1498Szrj /* Distribute all loops in the current function.  */
292338fd1498Szrj 
292438fd1498Szrj namespace {
292538fd1498Szrj 
292638fd1498Szrj const pass_data pass_data_loop_distribution =
292738fd1498Szrj {
292838fd1498Szrj   GIMPLE_PASS, /* type */
292938fd1498Szrj   "ldist", /* name */
293038fd1498Szrj   OPTGROUP_LOOP, /* optinfo_flags */
293138fd1498Szrj   TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
293238fd1498Szrj   ( PROP_cfg | PROP_ssa ), /* properties_required */
293338fd1498Szrj   0, /* properties_provided */
293438fd1498Szrj   0, /* properties_destroyed */
293538fd1498Szrj   0, /* todo_flags_start */
293638fd1498Szrj   0, /* todo_flags_finish */
293738fd1498Szrj };
293838fd1498Szrj 
293938fd1498Szrj class pass_loop_distribution : public gimple_opt_pass
294038fd1498Szrj {
294138fd1498Szrj public:
pass_loop_distribution(gcc::context * ctxt)294238fd1498Szrj   pass_loop_distribution (gcc::context *ctxt)
294338fd1498Szrj     : gimple_opt_pass (pass_data_loop_distribution, ctxt)
294438fd1498Szrj   {}
294538fd1498Szrj 
294638fd1498Szrj   /* opt_pass methods: */
gate(function *)294738fd1498Szrj   virtual bool gate (function *)
294838fd1498Szrj     {
294938fd1498Szrj       return flag_tree_loop_distribution
295038fd1498Szrj 	|| flag_tree_loop_distribute_patterns;
295138fd1498Szrj     }
295238fd1498Szrj 
295338fd1498Szrj   virtual unsigned int execute (function *);
295438fd1498Szrj 
295538fd1498Szrj }; // class pass_loop_distribution
295638fd1498Szrj 
295738fd1498Szrj 
295838fd1498Szrj /* Given LOOP, this function records seed statements for distribution in
295938fd1498Szrj    WORK_LIST.  Return false if there is nothing for distribution.  */
296038fd1498Szrj 
296138fd1498Szrj static bool
find_seed_stmts_for_distribution(struct loop * loop,vec<gimple * > * work_list)296238fd1498Szrj find_seed_stmts_for_distribution (struct loop *loop, vec<gimple *> *work_list)
296338fd1498Szrj {
296438fd1498Szrj   basic_block *bbs = get_loop_body_in_dom_order (loop);
296538fd1498Szrj 
296638fd1498Szrj   /* Initialize the worklist with stmts we seed the partitions with.  */
296738fd1498Szrj   for (unsigned i = 0; i < loop->num_nodes; ++i)
296838fd1498Szrj     {
296938fd1498Szrj       for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
297038fd1498Szrj 	   !gsi_end_p (gsi); gsi_next (&gsi))
297138fd1498Szrj 	{
297238fd1498Szrj 	  gphi *phi = gsi.phi ();
297338fd1498Szrj 	  if (virtual_operand_p (gimple_phi_result (phi)))
297438fd1498Szrj 	    continue;
297538fd1498Szrj 	  /* Distribute stmts which have defs that are used outside of
297638fd1498Szrj 	     the loop.  */
297738fd1498Szrj 	  if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
297838fd1498Szrj 	    continue;
297938fd1498Szrj 	  work_list->safe_push (phi);
298038fd1498Szrj 	}
298138fd1498Szrj       for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
298238fd1498Szrj 	   !gsi_end_p (gsi); gsi_next (&gsi))
298338fd1498Szrj 	{
298438fd1498Szrj 	  gimple *stmt = gsi_stmt (gsi);
298538fd1498Szrj 
298638fd1498Szrj 	  /* If there is a stmt with side-effects bail out - we
298738fd1498Szrj 	     cannot and should not distribute this loop.  */
298838fd1498Szrj 	  if (gimple_has_side_effects (stmt))
298938fd1498Szrj 	    {
299038fd1498Szrj 	      free (bbs);
299138fd1498Szrj 	      return false;
299238fd1498Szrj 	    }
299338fd1498Szrj 
299438fd1498Szrj 	  /* Distribute stmts which have defs that are used outside of
299538fd1498Szrj 	     the loop.  */
299638fd1498Szrj 	  if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
299738fd1498Szrj 	    ;
299838fd1498Szrj 	  /* Otherwise only distribute stores for now.  */
299938fd1498Szrj 	  else if (!gimple_vdef (stmt))
300038fd1498Szrj 	    continue;
300138fd1498Szrj 
300238fd1498Szrj 	  work_list->safe_push (stmt);
300338fd1498Szrj 	}
300438fd1498Szrj     }
300538fd1498Szrj   free (bbs);
300638fd1498Szrj   return work_list->length () > 0;
300738fd1498Szrj }
300838fd1498Szrj 
300938fd1498Szrj /* Given innermost LOOP, return the outermost enclosing loop that forms a
301038fd1498Szrj    perfect loop nest.  */
301138fd1498Szrj 
301238fd1498Szrj static struct loop *
prepare_perfect_loop_nest(struct loop * loop)301338fd1498Szrj prepare_perfect_loop_nest (struct loop *loop)
301438fd1498Szrj {
301538fd1498Szrj   struct loop *outer = loop_outer (loop);
301638fd1498Szrj   tree niters = number_of_latch_executions (loop);
301738fd1498Szrj 
301838fd1498Szrj   /* TODO: We only support the innermost 3-level loop nest distribution
301938fd1498Szrj      because of compilation time issue for now.  This should be relaxed
302038fd1498Szrj      in the future.  Note we only allow 3-level loop nest distribution
302138fd1498Szrj      when parallelizing loops.  */
302238fd1498Szrj   while ((loop->inner == NULL
302338fd1498Szrj 	  || (loop->inner->inner == NULL && flag_tree_parallelize_loops > 1))
302438fd1498Szrj 	 && loop_outer (outer)
302538fd1498Szrj 	 && outer->inner == loop && loop->next == NULL
302638fd1498Szrj 	 && single_exit (outer)
302738fd1498Szrj 	 && optimize_loop_for_speed_p (outer)
302838fd1498Szrj 	 && !chrec_contains_symbols_defined_in_loop (niters, outer->num)
302938fd1498Szrj 	 && (niters = number_of_latch_executions (outer)) != NULL_TREE
303038fd1498Szrj 	 && niters != chrec_dont_know)
303138fd1498Szrj     {
303238fd1498Szrj       loop = outer;
303338fd1498Szrj       outer = loop_outer (loop);
303438fd1498Szrj     }
303538fd1498Szrj 
303638fd1498Szrj   return loop;
303738fd1498Szrj }
303838fd1498Szrj 
303938fd1498Szrj unsigned int
execute(function * fun)304038fd1498Szrj pass_loop_distribution::execute (function *fun)
304138fd1498Szrj {
304238fd1498Szrj   struct loop *loop;
304338fd1498Szrj   bool changed = false;
304438fd1498Szrj   basic_block bb;
304538fd1498Szrj   control_dependences *cd = NULL;
304638fd1498Szrj   auto_vec<loop_p> loops_to_be_destroyed;
304738fd1498Szrj 
304838fd1498Szrj   if (number_of_loops (fun) <= 1)
304938fd1498Szrj     return 0;
305038fd1498Szrj 
305138fd1498Szrj   /* Compute topological order for basic blocks.  Topological order is
305238fd1498Szrj      needed because data dependence is computed for data references in
305338fd1498Szrj      lexicographical order.  */
305438fd1498Szrj   if (bb_top_order_index == NULL)
305538fd1498Szrj     {
305638fd1498Szrj       int rpo_num;
305738fd1498Szrj       int *rpo = XNEWVEC (int, last_basic_block_for_fn (cfun));
305838fd1498Szrj 
305938fd1498Szrj       bb_top_order_index = XNEWVEC (int, last_basic_block_for_fn (cfun));
306038fd1498Szrj       bb_top_order_index_size = last_basic_block_for_fn (cfun);
306138fd1498Szrj       rpo_num = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, true);
306238fd1498Szrj       for (int i = 0; i < rpo_num; i++)
306338fd1498Szrj 	bb_top_order_index[rpo[i]] = i;
306438fd1498Szrj 
306538fd1498Szrj       free (rpo);
306638fd1498Szrj     }
306738fd1498Szrj 
306838fd1498Szrj   FOR_ALL_BB_FN (bb, fun)
306938fd1498Szrj     {
307038fd1498Szrj       gimple_stmt_iterator gsi;
307138fd1498Szrj       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
307238fd1498Szrj 	gimple_set_uid (gsi_stmt (gsi), -1);
307338fd1498Szrj       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
307438fd1498Szrj 	gimple_set_uid (gsi_stmt (gsi), -1);
307538fd1498Szrj     }
307638fd1498Szrj 
307738fd1498Szrj   /* We can at the moment only distribute non-nested loops, thus restrict
307838fd1498Szrj      walking to innermost loops.  */
307938fd1498Szrj   FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
308038fd1498Szrj     {
308138fd1498Szrj       /* Don't distribute multiple exit edges loop, or cold loop.  */
308238fd1498Szrj       if (!single_exit (loop)
308338fd1498Szrj 	  || !optimize_loop_for_speed_p (loop))
308438fd1498Szrj 	continue;
308538fd1498Szrj 
308638fd1498Szrj       /* Don't distribute loop if niters is unknown.  */
308738fd1498Szrj       tree niters = number_of_latch_executions (loop);
308838fd1498Szrj       if (niters == NULL_TREE || niters == chrec_dont_know)
308938fd1498Szrj 	continue;
309038fd1498Szrj 
309138fd1498Szrj       /* Get the perfect loop nest for distribution.  */
309238fd1498Szrj       loop = prepare_perfect_loop_nest (loop);
309338fd1498Szrj       for (; loop; loop = loop->inner)
309438fd1498Szrj 	{
309538fd1498Szrj 	  auto_vec<gimple *> work_list;
309638fd1498Szrj 	  if (!find_seed_stmts_for_distribution (loop, &work_list))
309738fd1498Szrj 	    break;
309838fd1498Szrj 
309938fd1498Szrj 	  const char *str = loop->inner ? " nest" : "";
310038fd1498Szrj 	  location_t loc = find_loop_location (loop);
310138fd1498Szrj 	  if (!cd)
310238fd1498Szrj 	    {
310338fd1498Szrj 	      calculate_dominance_info (CDI_DOMINATORS);
310438fd1498Szrj 	      calculate_dominance_info (CDI_POST_DOMINATORS);
310538fd1498Szrj 	      cd = new control_dependences ();
310638fd1498Szrj 	      free_dominance_info (CDI_POST_DOMINATORS);
310738fd1498Szrj 	    }
310838fd1498Szrj 
310938fd1498Szrj 	  bool destroy_p;
311038fd1498Szrj 	  int nb_generated_loops, nb_generated_calls;
311138fd1498Szrj 	  nb_generated_loops = distribute_loop (loop, work_list, cd,
311238fd1498Szrj 						&nb_generated_calls,
311338fd1498Szrj 						&destroy_p);
311438fd1498Szrj 	  if (destroy_p)
311538fd1498Szrj 	    loops_to_be_destroyed.safe_push (loop);
311638fd1498Szrj 
311738fd1498Szrj 	  if (nb_generated_loops + nb_generated_calls > 0)
311838fd1498Szrj 	    {
311938fd1498Szrj 	      changed = true;
312038fd1498Szrj 	      dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
312138fd1498Szrj 			       loc, "Loop%s %d distributed: split to %d loops "
312238fd1498Szrj 			       "and %d library calls.\n", str, loop->num,
312338fd1498Szrj 			       nb_generated_loops, nb_generated_calls);
312438fd1498Szrj 
312538fd1498Szrj 	      break;
312638fd1498Szrj 	    }
312738fd1498Szrj 
312838fd1498Szrj 	  if (dump_file && (dump_flags & TDF_DETAILS))
312938fd1498Szrj 	    fprintf (dump_file, "Loop%s %d not distributed.\n", str, loop->num);
313038fd1498Szrj 	}
313138fd1498Szrj     }
313238fd1498Szrj 
313338fd1498Szrj   if (cd)
313438fd1498Szrj     delete cd;
313538fd1498Szrj 
313638fd1498Szrj   if (bb_top_order_index != NULL)
313738fd1498Szrj     {
313838fd1498Szrj       free (bb_top_order_index);
313938fd1498Szrj       bb_top_order_index = NULL;
314038fd1498Szrj       bb_top_order_index_size = 0;
314138fd1498Szrj     }
314238fd1498Szrj 
314338fd1498Szrj   if (changed)
314438fd1498Szrj     {
314538fd1498Szrj       /* Destroy loop bodies that could not be reused.  Do this late as we
314638fd1498Szrj 	 otherwise can end up refering to stale data in control dependences.  */
314738fd1498Szrj       unsigned i;
314838fd1498Szrj       FOR_EACH_VEC_ELT (loops_to_be_destroyed, i, loop)
314938fd1498Szrj 	destroy_loop (loop);
315038fd1498Szrj 
315138fd1498Szrj       /* Cached scalar evolutions now may refer to wrong or non-existing
315238fd1498Szrj 	 loops.  */
315338fd1498Szrj       scev_reset_htab ();
315438fd1498Szrj       mark_virtual_operands_for_renaming (fun);
315538fd1498Szrj       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
315638fd1498Szrj     }
315738fd1498Szrj 
315838fd1498Szrj   checking_verify_loop_structure ();
315938fd1498Szrj 
316038fd1498Szrj   return changed ? TODO_cleanup_cfg : 0;
316138fd1498Szrj }
316238fd1498Szrj 
316338fd1498Szrj } // anon namespace
316438fd1498Szrj 
316538fd1498Szrj gimple_opt_pass *
make_pass_loop_distribution(gcc::context * ctxt)316638fd1498Szrj make_pass_loop_distribution (gcc::context *ctxt)
316738fd1498Szrj {
316838fd1498Szrj   return new pass_loop_distribution (ctxt);
316938fd1498Szrj }
3170