138fd1498Szrj /* Loop distribution.
238fd1498Szrj Copyright (C) 2006-2018 Free Software Foundation, Inc.
338fd1498Szrj Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
438fd1498Szrj and Sebastian Pop <sebastian.pop@amd.com>.
538fd1498Szrj
638fd1498Szrj This file is part of GCC.
738fd1498Szrj
838fd1498Szrj GCC is free software; you can redistribute it and/or modify it
938fd1498Szrj under the terms of the GNU General Public License as published by the
1038fd1498Szrj Free Software Foundation; either version 3, or (at your option) any
1138fd1498Szrj later version.
1238fd1498Szrj
1338fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT
1438fd1498Szrj ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1538fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
1638fd1498Szrj for more details.
1738fd1498Szrj
1838fd1498Szrj You should have received a copy of the GNU General Public License
1938fd1498Szrj along with GCC; see the file COPYING3. If not see
2038fd1498Szrj <http://www.gnu.org/licenses/>. */
2138fd1498Szrj
2238fd1498Szrj /* This pass performs loop distribution: for example, the loop
2338fd1498Szrj
2438fd1498Szrj |DO I = 2, N
2538fd1498Szrj | A(I) = B(I) + C
2638fd1498Szrj | D(I) = A(I-1)*E
2738fd1498Szrj |ENDDO
2838fd1498Szrj
2938fd1498Szrj is transformed to
3038fd1498Szrj
3138fd1498Szrj |DOALL I = 2, N
3238fd1498Szrj | A(I) = B(I) + C
3338fd1498Szrj |ENDDO
3438fd1498Szrj |
3538fd1498Szrj |DOALL I = 2, N
3638fd1498Szrj | D(I) = A(I-1)*E
3738fd1498Szrj |ENDDO
3838fd1498Szrj
3938fd1498Szrj Loop distribution is the dual of loop fusion. It separates statements
4038fd1498Szrj of a loop (or loop nest) into multiple loops (or loop nests) with the
4138fd1498Szrj same loop header. The major goal is to separate statements which may
4238fd1498Szrj be vectorized from those that can't. This pass implements distribution
4338fd1498Szrj in the following steps:
4438fd1498Szrj
4538fd1498Szrj 1) Seed partitions with specific type statements. For now we support
4638fd1498Szrj two types seed statements: statement defining variable used outside
4738fd1498Szrj of loop; statement storing to memory.
4838fd1498Szrj 2) Build reduced dependence graph (RDG) for loop to be distributed.
4938fd1498Szrj The vertices (RDG:V) model all statements in the loop and the edges
5038fd1498Szrj (RDG:E) model flow and control dependencies between statements.
5138fd1498Szrj 3) Apart from RDG, compute data dependencies between memory references.
5238fd1498Szrj 4) Starting from seed statement, build up partition by adding depended
5338fd1498Szrj statements according to RDG's dependence information. Partition is
5438fd1498Szrj classified as parallel type if it can be executed paralleled; or as
5538fd1498Szrj sequential type if it can't. Parallel type partition is further
5638fd1498Szrj classified as different builtin kinds if it can be implemented as
5738fd1498Szrj builtin function calls.
5838fd1498Szrj 5) Build partition dependence graph (PG) based on data dependencies.
5938fd1498Szrj The vertices (PG:V) model all partitions and the edges (PG:E) model
6038fd1498Szrj all data dependencies between every partitions pair. In general,
6138fd1498Szrj data dependence is either compilation time known or unknown. In C
6238fd1498Szrj family languages, there exists quite amount compilation time unknown
6338fd1498Szrj dependencies because of possible alias relation of data references.
6438fd1498Szrj We categorize PG's edge to two types: "true" edge that represents
6538fd1498Szrj compilation time known data dependencies; "alias" edge for all other
6638fd1498Szrj data dependencies.
6738fd1498Szrj 6) Traverse subgraph of PG as if all "alias" edges don't exist. Merge
6838fd1498Szrj partitions in each strong connected component (SCC) correspondingly.
6938fd1498Szrj Build new PG for merged partitions.
7038fd1498Szrj 7) Traverse PG again and this time with both "true" and "alias" edges
7138fd1498Szrj included. We try to break SCCs by removing some edges. Because
7238fd1498Szrj SCCs by "true" edges are all fused in step 6), we can break SCCs
7338fd1498Szrj by removing some "alias" edges. It's NP-hard to choose optimal
7438fd1498Szrj edge set, fortunately simple approximation is good enough for us
7538fd1498Szrj given the small problem scale.
7638fd1498Szrj 8) Collect all data dependencies of the removed "alias" edges. Create
7738fd1498Szrj runtime alias checks for collected data dependencies.
7838fd1498Szrj 9) Version loop under the condition of runtime alias checks. Given
7938fd1498Szrj loop distribution generally introduces additional overhead, it is
8038fd1498Szrj only useful if vectorization is achieved in distributed loop. We
8138fd1498Szrj version loop with internal function call IFN_LOOP_DIST_ALIAS. If
8238fd1498Szrj no distributed loop can be vectorized, we simply remove distributed
8338fd1498Szrj loops and recover to the original one.
8438fd1498Szrj
8538fd1498Szrj TODO:
8638fd1498Szrj 1) We only distribute innermost two-level loop nest now. We should
8738fd1498Szrj extend it for arbitrary loop nests in the future.
8838fd1498Szrj 2) We only fuse partitions in SCC now. A better fusion algorithm is
8938fd1498Szrj desired to minimize loop overhead, maximize parallelism and maximize
9038fd1498Szrj data reuse. */
9138fd1498Szrj
9238fd1498Szrj #include "config.h"
9338fd1498Szrj #define INCLUDE_ALGORITHM /* stable_sort */
9438fd1498Szrj #include "system.h"
9538fd1498Szrj #include "coretypes.h"
9638fd1498Szrj #include "backend.h"
9738fd1498Szrj #include "tree.h"
9838fd1498Szrj #include "gimple.h"
9938fd1498Szrj #include "cfghooks.h"
10038fd1498Szrj #include "tree-pass.h"
10138fd1498Szrj #include "ssa.h"
10238fd1498Szrj #include "gimple-pretty-print.h"
10338fd1498Szrj #include "fold-const.h"
10438fd1498Szrj #include "cfganal.h"
10538fd1498Szrj #include "gimple-iterator.h"
10638fd1498Szrj #include "gimplify-me.h"
10738fd1498Szrj #include "stor-layout.h"
10838fd1498Szrj #include "tree-cfg.h"
10938fd1498Szrj #include "tree-ssa-loop-manip.h"
11038fd1498Szrj #include "tree-ssa-loop-ivopts.h"
11138fd1498Szrj #include "tree-ssa-loop.h"
11238fd1498Szrj #include "tree-into-ssa.h"
11338fd1498Szrj #include "tree-ssa.h"
11438fd1498Szrj #include "cfgloop.h"
11538fd1498Szrj #include "tree-scalar-evolution.h"
11638fd1498Szrj #include "params.h"
11738fd1498Szrj #include "tree-vectorizer.h"
118*58e805e6Szrj #include "tree-eh.h"
11938fd1498Szrj
12038fd1498Szrj
12138fd1498Szrj #define MAX_DATAREFS_NUM \
12238fd1498Szrj ((unsigned) PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
12338fd1498Szrj
12438fd1498Szrj /* Threshold controlling number of distributed partitions. Given it may
12538fd1498Szrj be unnecessary if a memory stream cost model is invented in the future,
12638fd1498Szrj we define it as a temporary macro, rather than a parameter. */
12738fd1498Szrj #define NUM_PARTITION_THRESHOLD (4)
12838fd1498Szrj
12938fd1498Szrj /* Hashtable helpers. */
13038fd1498Szrj
13138fd1498Szrj struct ddr_hasher : nofree_ptr_hash <struct data_dependence_relation>
13238fd1498Szrj {
13338fd1498Szrj static inline hashval_t hash (const data_dependence_relation *);
13438fd1498Szrj static inline bool equal (const data_dependence_relation *,
13538fd1498Szrj const data_dependence_relation *);
13638fd1498Szrj };
13738fd1498Szrj
13838fd1498Szrj /* Hash function for data dependence. */
13938fd1498Szrj
14038fd1498Szrj inline hashval_t
hash(const data_dependence_relation * ddr)14138fd1498Szrj ddr_hasher::hash (const data_dependence_relation *ddr)
14238fd1498Szrj {
14338fd1498Szrj inchash::hash h;
14438fd1498Szrj h.add_ptr (DDR_A (ddr));
14538fd1498Szrj h.add_ptr (DDR_B (ddr));
14638fd1498Szrj return h.end ();
14738fd1498Szrj }
14838fd1498Szrj
14938fd1498Szrj /* Hash table equality function for data dependence. */
15038fd1498Szrj
15138fd1498Szrj inline bool
equal(const data_dependence_relation * ddr1,const data_dependence_relation * ddr2)15238fd1498Szrj ddr_hasher::equal (const data_dependence_relation *ddr1,
15338fd1498Szrj const data_dependence_relation *ddr2)
15438fd1498Szrj {
15538fd1498Szrj return (DDR_A (ddr1) == DDR_A (ddr2) && DDR_B (ddr1) == DDR_B (ddr2));
15638fd1498Szrj }
15738fd1498Szrj
15838fd1498Szrj /* The loop (nest) to be distributed. */
15938fd1498Szrj static vec<loop_p> loop_nest;
16038fd1498Szrj
16138fd1498Szrj /* Vector of data references in the loop to be distributed. */
16238fd1498Szrj static vec<data_reference_p> datarefs_vec;
16338fd1498Szrj
16438fd1498Szrj /* Store index of data reference in aux field. */
16538fd1498Szrj #define DR_INDEX(dr) ((uintptr_t) (dr)->aux)
16638fd1498Szrj
16738fd1498Szrj /* Hash table for data dependence relation in the loop to be distributed. */
16838fd1498Szrj static hash_table<ddr_hasher> *ddrs_table;
16938fd1498Szrj
17038fd1498Szrj /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
17138fd1498Szrj struct rdg_vertex
17238fd1498Szrj {
17338fd1498Szrj /* The statement represented by this vertex. */
17438fd1498Szrj gimple *stmt;
17538fd1498Szrj
17638fd1498Szrj /* Vector of data-references in this statement. */
17738fd1498Szrj vec<data_reference_p> datarefs;
17838fd1498Szrj
17938fd1498Szrj /* True when the statement contains a write to memory. */
18038fd1498Szrj bool has_mem_write;
18138fd1498Szrj
18238fd1498Szrj /* True when the statement contains a read from memory. */
18338fd1498Szrj bool has_mem_reads;
18438fd1498Szrj };
18538fd1498Szrj
18638fd1498Szrj #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
18738fd1498Szrj #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
18838fd1498Szrj #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
18938fd1498Szrj #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
19038fd1498Szrj #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
19138fd1498Szrj #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
19238fd1498Szrj #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
19338fd1498Szrj #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
19438fd1498Szrj
19538fd1498Szrj /* Data dependence type. */
19638fd1498Szrj
19738fd1498Szrj enum rdg_dep_type
19838fd1498Szrj {
19938fd1498Szrj /* Read After Write (RAW). */
20038fd1498Szrj flow_dd = 'f',
20138fd1498Szrj
20238fd1498Szrj /* Control dependence (execute conditional on). */
20338fd1498Szrj control_dd = 'c'
20438fd1498Szrj };
20538fd1498Szrj
20638fd1498Szrj /* Dependence information attached to an edge of the RDG. */
20738fd1498Szrj
20838fd1498Szrj struct rdg_edge
20938fd1498Szrj {
21038fd1498Szrj /* Type of the dependence. */
21138fd1498Szrj enum rdg_dep_type type;
21238fd1498Szrj };
21338fd1498Szrj
21438fd1498Szrj #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
21538fd1498Szrj
21638fd1498Szrj /* Dump vertex I in RDG to FILE. */
21738fd1498Szrj
21838fd1498Szrj static void
dump_rdg_vertex(FILE * file,struct graph * rdg,int i)21938fd1498Szrj dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
22038fd1498Szrj {
22138fd1498Szrj struct vertex *v = &(rdg->vertices[i]);
22238fd1498Szrj struct graph_edge *e;
22338fd1498Szrj
22438fd1498Szrj fprintf (file, "(vertex %d: (%s%s) (in:", i,
22538fd1498Szrj RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
22638fd1498Szrj RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
22738fd1498Szrj
22838fd1498Szrj if (v->pred)
22938fd1498Szrj for (e = v->pred; e; e = e->pred_next)
23038fd1498Szrj fprintf (file, " %d", e->src);
23138fd1498Szrj
23238fd1498Szrj fprintf (file, ") (out:");
23338fd1498Szrj
23438fd1498Szrj if (v->succ)
23538fd1498Szrj for (e = v->succ; e; e = e->succ_next)
23638fd1498Szrj fprintf (file, " %d", e->dest);
23738fd1498Szrj
23838fd1498Szrj fprintf (file, ")\n");
23938fd1498Szrj print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
24038fd1498Szrj fprintf (file, ")\n");
24138fd1498Szrj }
24238fd1498Szrj
24338fd1498Szrj /* Call dump_rdg_vertex on stderr. */
24438fd1498Szrj
24538fd1498Szrj DEBUG_FUNCTION void
debug_rdg_vertex(struct graph * rdg,int i)24638fd1498Szrj debug_rdg_vertex (struct graph *rdg, int i)
24738fd1498Szrj {
24838fd1498Szrj dump_rdg_vertex (stderr, rdg, i);
24938fd1498Szrj }
25038fd1498Szrj
25138fd1498Szrj /* Dump the reduced dependence graph RDG to FILE. */
25238fd1498Szrj
25338fd1498Szrj static void
dump_rdg(FILE * file,struct graph * rdg)25438fd1498Szrj dump_rdg (FILE *file, struct graph *rdg)
25538fd1498Szrj {
25638fd1498Szrj fprintf (file, "(rdg\n");
25738fd1498Szrj for (int i = 0; i < rdg->n_vertices; i++)
25838fd1498Szrj dump_rdg_vertex (file, rdg, i);
25938fd1498Szrj fprintf (file, ")\n");
26038fd1498Szrj }
26138fd1498Szrj
26238fd1498Szrj /* Call dump_rdg on stderr. */
26338fd1498Szrj
26438fd1498Szrj DEBUG_FUNCTION void
debug_rdg(struct graph * rdg)26538fd1498Szrj debug_rdg (struct graph *rdg)
26638fd1498Szrj {
26738fd1498Szrj dump_rdg (stderr, rdg);
26838fd1498Szrj }
26938fd1498Szrj
27038fd1498Szrj static void
dot_rdg_1(FILE * file,struct graph * rdg)27138fd1498Szrj dot_rdg_1 (FILE *file, struct graph *rdg)
27238fd1498Szrj {
27338fd1498Szrj int i;
27438fd1498Szrj pretty_printer buffer;
27538fd1498Szrj pp_needs_newline (&buffer) = false;
27638fd1498Szrj buffer.buffer->stream = file;
27738fd1498Szrj
27838fd1498Szrj fprintf (file, "digraph RDG {\n");
27938fd1498Szrj
28038fd1498Szrj for (i = 0; i < rdg->n_vertices; i++)
28138fd1498Szrj {
28238fd1498Szrj struct vertex *v = &(rdg->vertices[i]);
28338fd1498Szrj struct graph_edge *e;
28438fd1498Szrj
28538fd1498Szrj fprintf (file, "%d [label=\"[%d] ", i, i);
28638fd1498Szrj pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
28738fd1498Szrj pp_flush (&buffer);
28838fd1498Szrj fprintf (file, "\"]\n");
28938fd1498Szrj
29038fd1498Szrj /* Highlight reads from memory. */
29138fd1498Szrj if (RDG_MEM_READS_STMT (rdg, i))
29238fd1498Szrj fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
29338fd1498Szrj
29438fd1498Szrj /* Highlight stores to memory. */
29538fd1498Szrj if (RDG_MEM_WRITE_STMT (rdg, i))
29638fd1498Szrj fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
29738fd1498Szrj
29838fd1498Szrj if (v->succ)
29938fd1498Szrj for (e = v->succ; e; e = e->succ_next)
30038fd1498Szrj switch (RDGE_TYPE (e))
30138fd1498Szrj {
30238fd1498Szrj case flow_dd:
30338fd1498Szrj /* These are the most common dependences: don't print these. */
30438fd1498Szrj fprintf (file, "%d -> %d \n", i, e->dest);
30538fd1498Szrj break;
30638fd1498Szrj
30738fd1498Szrj case control_dd:
30838fd1498Szrj fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
30938fd1498Szrj break;
31038fd1498Szrj
31138fd1498Szrj default:
31238fd1498Szrj gcc_unreachable ();
31338fd1498Szrj }
31438fd1498Szrj }
31538fd1498Szrj
31638fd1498Szrj fprintf (file, "}\n\n");
31738fd1498Szrj }
31838fd1498Szrj
31938fd1498Szrj /* Display the Reduced Dependence Graph using dotty. */
32038fd1498Szrj
32138fd1498Szrj DEBUG_FUNCTION void
dot_rdg(struct graph * rdg)32238fd1498Szrj dot_rdg (struct graph *rdg)
32338fd1498Szrj {
32438fd1498Szrj /* When debugging, you may want to enable the following code. */
32538fd1498Szrj #ifdef HAVE_POPEN
32638fd1498Szrj FILE *file = popen ("dot -Tx11", "w");
32738fd1498Szrj if (!file)
32838fd1498Szrj return;
32938fd1498Szrj dot_rdg_1 (file, rdg);
33038fd1498Szrj fflush (file);
33138fd1498Szrj close (fileno (file));
33238fd1498Szrj pclose (file);
33338fd1498Szrj #else
33438fd1498Szrj dot_rdg_1 (stderr, rdg);
33538fd1498Szrj #endif
33638fd1498Szrj }
33738fd1498Szrj
33838fd1498Szrj /* Returns the index of STMT in RDG. */
33938fd1498Szrj
34038fd1498Szrj static int
rdg_vertex_for_stmt(struct graph * rdg ATTRIBUTE_UNUSED,gimple * stmt)34138fd1498Szrj rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple *stmt)
34238fd1498Szrj {
34338fd1498Szrj int index = gimple_uid (stmt);
34438fd1498Szrj gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
34538fd1498Szrj return index;
34638fd1498Szrj }
34738fd1498Szrj
34838fd1498Szrj /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
34938fd1498Szrj the index of DEF in RDG. */
35038fd1498Szrj
35138fd1498Szrj static void
create_rdg_edges_for_scalar(struct graph * rdg,tree def,int idef)35238fd1498Szrj create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
35338fd1498Szrj {
35438fd1498Szrj use_operand_p imm_use_p;
35538fd1498Szrj imm_use_iterator iterator;
35638fd1498Szrj
35738fd1498Szrj FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
35838fd1498Szrj {
35938fd1498Szrj struct graph_edge *e;
36038fd1498Szrj int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
36138fd1498Szrj
36238fd1498Szrj if (use < 0)
36338fd1498Szrj continue;
36438fd1498Szrj
36538fd1498Szrj e = add_edge (rdg, idef, use);
36638fd1498Szrj e->data = XNEW (struct rdg_edge);
36738fd1498Szrj RDGE_TYPE (e) = flow_dd;
36838fd1498Szrj }
36938fd1498Szrj }
37038fd1498Szrj
37138fd1498Szrj /* Creates an edge for the control dependences of BB to the vertex V. */
37238fd1498Szrj
37338fd1498Szrj static void
create_edge_for_control_dependence(struct graph * rdg,basic_block bb,int v,control_dependences * cd)37438fd1498Szrj create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
37538fd1498Szrj int v, control_dependences *cd)
37638fd1498Szrj {
37738fd1498Szrj bitmap_iterator bi;
37838fd1498Szrj unsigned edge_n;
37938fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
38038fd1498Szrj 0, edge_n, bi)
38138fd1498Szrj {
38238fd1498Szrj basic_block cond_bb = cd->get_edge_src (edge_n);
38338fd1498Szrj gimple *stmt = last_stmt (cond_bb);
38438fd1498Szrj if (stmt && is_ctrl_stmt (stmt))
38538fd1498Szrj {
38638fd1498Szrj struct graph_edge *e;
38738fd1498Szrj int c = rdg_vertex_for_stmt (rdg, stmt);
38838fd1498Szrj if (c < 0)
38938fd1498Szrj continue;
39038fd1498Szrj
39138fd1498Szrj e = add_edge (rdg, c, v);
39238fd1498Szrj e->data = XNEW (struct rdg_edge);
39338fd1498Szrj RDGE_TYPE (e) = control_dd;
39438fd1498Szrj }
39538fd1498Szrj }
39638fd1498Szrj }
39738fd1498Szrj
39838fd1498Szrj /* Creates the edges of the reduced dependence graph RDG. */
39938fd1498Szrj
40038fd1498Szrj static void
create_rdg_flow_edges(struct graph * rdg)40138fd1498Szrj create_rdg_flow_edges (struct graph *rdg)
40238fd1498Szrj {
40338fd1498Szrj int i;
40438fd1498Szrj def_operand_p def_p;
40538fd1498Szrj ssa_op_iter iter;
40638fd1498Szrj
40738fd1498Szrj for (i = 0; i < rdg->n_vertices; i++)
40838fd1498Szrj FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
40938fd1498Szrj iter, SSA_OP_DEF)
41038fd1498Szrj create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
41138fd1498Szrj }
41238fd1498Szrj
41338fd1498Szrj /* Creates the edges of the reduced dependence graph RDG. */
41438fd1498Szrj
41538fd1498Szrj static void
create_rdg_cd_edges(struct graph * rdg,control_dependences * cd,loop_p loop)41638fd1498Szrj create_rdg_cd_edges (struct graph *rdg, control_dependences *cd, loop_p loop)
41738fd1498Szrj {
41838fd1498Szrj int i;
41938fd1498Szrj
42038fd1498Szrj for (i = 0; i < rdg->n_vertices; i++)
42138fd1498Szrj {
42238fd1498Szrj gimple *stmt = RDG_STMT (rdg, i);
42338fd1498Szrj if (gimple_code (stmt) == GIMPLE_PHI)
42438fd1498Szrj {
42538fd1498Szrj edge_iterator ei;
42638fd1498Szrj edge e;
42738fd1498Szrj FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
42838fd1498Szrj if (flow_bb_inside_loop_p (loop, e->src))
42938fd1498Szrj create_edge_for_control_dependence (rdg, e->src, i, cd);
43038fd1498Szrj }
43138fd1498Szrj else
43238fd1498Szrj create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
43338fd1498Szrj }
43438fd1498Szrj }
43538fd1498Szrj
43638fd1498Szrj /* Build the vertices of the reduced dependence graph RDG. Return false
43738fd1498Szrj if that failed. */
43838fd1498Szrj
43938fd1498Szrj static bool
create_rdg_vertices(struct graph * rdg,vec<gimple * > stmts,loop_p loop)44038fd1498Szrj create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop)
44138fd1498Szrj {
44238fd1498Szrj int i;
44338fd1498Szrj gimple *stmt;
44438fd1498Szrj
44538fd1498Szrj FOR_EACH_VEC_ELT (stmts, i, stmt)
44638fd1498Szrj {
44738fd1498Szrj struct vertex *v = &(rdg->vertices[i]);
44838fd1498Szrj
44938fd1498Szrj /* Record statement to vertex mapping. */
45038fd1498Szrj gimple_set_uid (stmt, i);
45138fd1498Szrj
45238fd1498Szrj v->data = XNEW (struct rdg_vertex);
45338fd1498Szrj RDGV_STMT (v) = stmt;
45438fd1498Szrj RDGV_DATAREFS (v).create (0);
45538fd1498Szrj RDGV_HAS_MEM_WRITE (v) = false;
45638fd1498Szrj RDGV_HAS_MEM_READS (v) = false;
45738fd1498Szrj if (gimple_code (stmt) == GIMPLE_PHI)
45838fd1498Szrj continue;
45938fd1498Szrj
46038fd1498Szrj unsigned drp = datarefs_vec.length ();
46138fd1498Szrj if (!find_data_references_in_stmt (loop, stmt, &datarefs_vec))
46238fd1498Szrj return false;
46338fd1498Szrj for (unsigned j = drp; j < datarefs_vec.length (); ++j)
46438fd1498Szrj {
46538fd1498Szrj data_reference_p dr = datarefs_vec[j];
46638fd1498Szrj if (DR_IS_READ (dr))
46738fd1498Szrj RDGV_HAS_MEM_READS (v) = true;
46838fd1498Szrj else
46938fd1498Szrj RDGV_HAS_MEM_WRITE (v) = true;
47038fd1498Szrj RDGV_DATAREFS (v).safe_push (dr);
47138fd1498Szrj }
47238fd1498Szrj }
47338fd1498Szrj return true;
47438fd1498Szrj }
47538fd1498Szrj
47638fd1498Szrj /* Array mapping basic block's index to its topological order. */
47738fd1498Szrj static int *bb_top_order_index;
47838fd1498Szrj /* And size of the array. */
47938fd1498Szrj static int bb_top_order_index_size;
48038fd1498Szrj
48138fd1498Szrj /* If X has a smaller topological sort number than Y, returns -1;
48238fd1498Szrj if greater, returns 1. */
48338fd1498Szrj
48438fd1498Szrj static int
bb_top_order_cmp(const void * x,const void * y)48538fd1498Szrj bb_top_order_cmp (const void *x, const void *y)
48638fd1498Szrj {
48738fd1498Szrj basic_block bb1 = *(const basic_block *) x;
48838fd1498Szrj basic_block bb2 = *(const basic_block *) y;
48938fd1498Szrj
49038fd1498Szrj gcc_assert (bb1->index < bb_top_order_index_size
49138fd1498Szrj && bb2->index < bb_top_order_index_size);
49238fd1498Szrj gcc_assert (bb1 == bb2
49338fd1498Szrj || bb_top_order_index[bb1->index]
49438fd1498Szrj != bb_top_order_index[bb2->index]);
49538fd1498Szrj
49638fd1498Szrj return (bb_top_order_index[bb1->index] - bb_top_order_index[bb2->index]);
49738fd1498Szrj }
49838fd1498Szrj
49938fd1498Szrj /* Initialize STMTS with all the statements of LOOP. We use topological
50038fd1498Szrj order to discover all statements. The order is important because
50138fd1498Szrj generate_loops_for_partition is using the same traversal for identifying
50238fd1498Szrj statements in loop copies. */
50338fd1498Szrj
50438fd1498Szrj static void
stmts_from_loop(struct loop * loop,vec<gimple * > * stmts)50538fd1498Szrj stmts_from_loop (struct loop *loop, vec<gimple *> *stmts)
50638fd1498Szrj {
50738fd1498Szrj unsigned int i;
50838fd1498Szrj basic_block *bbs = get_loop_body_in_custom_order (loop, bb_top_order_cmp);
50938fd1498Szrj
51038fd1498Szrj for (i = 0; i < loop->num_nodes; i++)
51138fd1498Szrj {
51238fd1498Szrj basic_block bb = bbs[i];
51338fd1498Szrj
51438fd1498Szrj for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
51538fd1498Szrj gsi_next (&bsi))
51638fd1498Szrj if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
51738fd1498Szrj stmts->safe_push (bsi.phi ());
51838fd1498Szrj
51938fd1498Szrj for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
52038fd1498Szrj gsi_next (&bsi))
52138fd1498Szrj {
52238fd1498Szrj gimple *stmt = gsi_stmt (bsi);
52338fd1498Szrj if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
52438fd1498Szrj stmts->safe_push (stmt);
52538fd1498Szrj }
52638fd1498Szrj }
52738fd1498Szrj
52838fd1498Szrj free (bbs);
52938fd1498Szrj }
53038fd1498Szrj
53138fd1498Szrj /* Free the reduced dependence graph RDG. */
53238fd1498Szrj
53338fd1498Szrj static void
free_rdg(struct graph * rdg)53438fd1498Szrj free_rdg (struct graph *rdg)
53538fd1498Szrj {
53638fd1498Szrj int i;
53738fd1498Szrj
53838fd1498Szrj for (i = 0; i < rdg->n_vertices; i++)
53938fd1498Szrj {
54038fd1498Szrj struct vertex *v = &(rdg->vertices[i]);
54138fd1498Szrj struct graph_edge *e;
54238fd1498Szrj
54338fd1498Szrj for (e = v->succ; e; e = e->succ_next)
54438fd1498Szrj free (e->data);
54538fd1498Szrj
54638fd1498Szrj if (v->data)
54738fd1498Szrj {
54838fd1498Szrj gimple_set_uid (RDGV_STMT (v), -1);
54938fd1498Szrj (RDGV_DATAREFS (v)).release ();
55038fd1498Szrj free (v->data);
55138fd1498Szrj }
55238fd1498Szrj }
55338fd1498Szrj
55438fd1498Szrj free_graph (rdg);
55538fd1498Szrj }
55638fd1498Szrj
55738fd1498Szrj /* Build the Reduced Dependence Graph (RDG) with one vertex per statement of
55838fd1498Szrj LOOP, and one edge per flow dependence or control dependence from control
55938fd1498Szrj dependence CD. During visiting each statement, data references are also
56038fd1498Szrj collected and recorded in global data DATAREFS_VEC. */
56138fd1498Szrj
56238fd1498Szrj static struct graph *
build_rdg(struct loop * loop,control_dependences * cd)56338fd1498Szrj build_rdg (struct loop *loop, control_dependences *cd)
56438fd1498Szrj {
56538fd1498Szrj struct graph *rdg;
56638fd1498Szrj
56738fd1498Szrj /* Create the RDG vertices from the stmts of the loop nest. */
56838fd1498Szrj auto_vec<gimple *, 10> stmts;
56938fd1498Szrj stmts_from_loop (loop, &stmts);
57038fd1498Szrj rdg = new_graph (stmts.length ());
57138fd1498Szrj if (!create_rdg_vertices (rdg, stmts, loop))
57238fd1498Szrj {
57338fd1498Szrj free_rdg (rdg);
57438fd1498Szrj return NULL;
57538fd1498Szrj }
57638fd1498Szrj stmts.release ();
57738fd1498Szrj
57838fd1498Szrj create_rdg_flow_edges (rdg);
57938fd1498Szrj if (cd)
58038fd1498Szrj create_rdg_cd_edges (rdg, cd, loop);
58138fd1498Szrj
58238fd1498Szrj return rdg;
58338fd1498Szrj }
58438fd1498Szrj
58538fd1498Szrj
58638fd1498Szrj /* Kind of distributed loop. */
58738fd1498Szrj enum partition_kind {
58838fd1498Szrj PKIND_NORMAL,
58938fd1498Szrj /* Partial memset stands for a paritition can be distributed into a loop
59038fd1498Szrj of memset calls, rather than a single memset call. It's handled just
59138fd1498Szrj like a normal parition, i.e, distributed as separate loop, no memset
59238fd1498Szrj call is generated.
59338fd1498Szrj
59438fd1498Szrj Note: This is a hacking fix trying to distribute ZERO-ing stmt in a
59538fd1498Szrj loop nest as deep as possible. As a result, parloop achieves better
59638fd1498Szrj parallelization by parallelizing deeper loop nest. This hack should
59738fd1498Szrj be unnecessary and removed once distributed memset can be understood
59838fd1498Szrj and analyzed in data reference analysis. See PR82604 for more. */
59938fd1498Szrj PKIND_PARTIAL_MEMSET,
60038fd1498Szrj PKIND_MEMSET, PKIND_MEMCPY, PKIND_MEMMOVE
60138fd1498Szrj };
60238fd1498Szrj
60338fd1498Szrj /* Type of distributed loop. */
60438fd1498Szrj enum partition_type {
60538fd1498Szrj /* The distributed loop can be executed parallelly. */
60638fd1498Szrj PTYPE_PARALLEL = 0,
60738fd1498Szrj /* The distributed loop has to be executed sequentially. */
60838fd1498Szrj PTYPE_SEQUENTIAL
60938fd1498Szrj };
61038fd1498Szrj
61138fd1498Szrj /* Builtin info for loop distribution. */
61238fd1498Szrj struct builtin_info
61338fd1498Szrj {
61438fd1498Szrj /* data-references a kind != PKIND_NORMAL partition is about. */
61538fd1498Szrj data_reference_p dst_dr;
61638fd1498Szrj data_reference_p src_dr;
61738fd1498Szrj /* Base address and size of memory objects operated by the builtin. Note
61838fd1498Szrj both dest and source memory objects must have the same size. */
61938fd1498Szrj tree dst_base;
62038fd1498Szrj tree src_base;
62138fd1498Szrj tree size;
62238fd1498Szrj /* Base and offset part of dst_base after stripping constant offset. This
62338fd1498Szrj is only used in memset builtin distribution for now. */
62438fd1498Szrj tree dst_base_base;
62538fd1498Szrj unsigned HOST_WIDE_INT dst_base_offset;
62638fd1498Szrj };
62738fd1498Szrj
62838fd1498Szrj /* Partition for loop distribution. */
62938fd1498Szrj struct partition
63038fd1498Szrj {
63138fd1498Szrj /* Statements of the partition. */
63238fd1498Szrj bitmap stmts;
63338fd1498Szrj /* True if the partition defines variable which is used outside of loop. */
63438fd1498Szrj bool reduction_p;
63538fd1498Szrj enum partition_kind kind;
63638fd1498Szrj enum partition_type type;
63738fd1498Szrj /* Data references in the partition. */
63838fd1498Szrj bitmap datarefs;
63938fd1498Szrj /* Information of builtin parition. */
64038fd1498Szrj struct builtin_info *builtin;
64138fd1498Szrj };
64238fd1498Szrj
64338fd1498Szrj
64438fd1498Szrj /* Allocate and initialize a partition from BITMAP. */
64538fd1498Szrj
64638fd1498Szrj static partition *
partition_alloc(void)64738fd1498Szrj partition_alloc (void)
64838fd1498Szrj {
64938fd1498Szrj partition *partition = XCNEW (struct partition);
65038fd1498Szrj partition->stmts = BITMAP_ALLOC (NULL);
65138fd1498Szrj partition->reduction_p = false;
65238fd1498Szrj partition->kind = PKIND_NORMAL;
65338fd1498Szrj partition->datarefs = BITMAP_ALLOC (NULL);
65438fd1498Szrj return partition;
65538fd1498Szrj }
65638fd1498Szrj
65738fd1498Szrj /* Free PARTITION. */
65838fd1498Szrj
65938fd1498Szrj static void
partition_free(partition * partition)66038fd1498Szrj partition_free (partition *partition)
66138fd1498Szrj {
66238fd1498Szrj BITMAP_FREE (partition->stmts);
66338fd1498Szrj BITMAP_FREE (partition->datarefs);
66438fd1498Szrj if (partition->builtin)
66538fd1498Szrj free (partition->builtin);
66638fd1498Szrj
66738fd1498Szrj free (partition);
66838fd1498Szrj }
66938fd1498Szrj
67038fd1498Szrj /* Returns true if the partition can be generated as a builtin. */
67138fd1498Szrj
67238fd1498Szrj static bool
partition_builtin_p(partition * partition)67338fd1498Szrj partition_builtin_p (partition *partition)
67438fd1498Szrj {
67538fd1498Szrj return partition->kind > PKIND_PARTIAL_MEMSET;
67638fd1498Szrj }
67738fd1498Szrj
67838fd1498Szrj /* Returns true if the partition contains a reduction. */
67938fd1498Szrj
68038fd1498Szrj static bool
partition_reduction_p(partition * partition)68138fd1498Szrj partition_reduction_p (partition *partition)
68238fd1498Szrj {
68338fd1498Szrj return partition->reduction_p;
68438fd1498Szrj }
68538fd1498Szrj
68638fd1498Szrj /* Partitions are fused because of different reasons. */
68738fd1498Szrj enum fuse_type
68838fd1498Szrj {
68938fd1498Szrj FUSE_NON_BUILTIN = 0,
69038fd1498Szrj FUSE_REDUCTION = 1,
69138fd1498Szrj FUSE_SHARE_REF = 2,
69238fd1498Szrj FUSE_SAME_SCC = 3,
69338fd1498Szrj FUSE_FINALIZE = 4
69438fd1498Szrj };
69538fd1498Szrj
69638fd1498Szrj /* Description on different fusing reason. */
69738fd1498Szrj static const char *fuse_message[] = {
69838fd1498Szrj "they are non-builtins",
69938fd1498Szrj "they have reductions",
70038fd1498Szrj "they have shared memory refs",
70138fd1498Szrj "they are in the same dependence scc",
70238fd1498Szrj "there is no point to distribute loop"};
70338fd1498Szrj
70438fd1498Szrj static void
70538fd1498Szrj update_type_for_merge (struct graph *, partition *, partition *);
70638fd1498Szrj
70738fd1498Szrj /* Merge PARTITION into the partition DEST. RDG is the reduced dependence
70838fd1498Szrj graph and we update type for result partition if it is non-NULL. */
70938fd1498Szrj
71038fd1498Szrj static void
partition_merge_into(struct graph * rdg,partition * dest,partition * partition,enum fuse_type ft)71138fd1498Szrj partition_merge_into (struct graph *rdg, partition *dest,
71238fd1498Szrj partition *partition, enum fuse_type ft)
71338fd1498Szrj {
71438fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
71538fd1498Szrj {
71638fd1498Szrj fprintf (dump_file, "Fuse partitions because %s:\n", fuse_message[ft]);
71738fd1498Szrj fprintf (dump_file, " Part 1: ");
71838fd1498Szrj dump_bitmap (dump_file, dest->stmts);
71938fd1498Szrj fprintf (dump_file, " Part 2: ");
72038fd1498Szrj dump_bitmap (dump_file, partition->stmts);
72138fd1498Szrj }
72238fd1498Szrj
72338fd1498Szrj dest->kind = PKIND_NORMAL;
72438fd1498Szrj if (dest->type == PTYPE_PARALLEL)
72538fd1498Szrj dest->type = partition->type;
72638fd1498Szrj
72738fd1498Szrj bitmap_ior_into (dest->stmts, partition->stmts);
72838fd1498Szrj if (partition_reduction_p (partition))
72938fd1498Szrj dest->reduction_p = true;
73038fd1498Szrj
73138fd1498Szrj /* Further check if any data dependence prevents us from executing the
73238fd1498Szrj new partition parallelly. */
73338fd1498Szrj if (dest->type == PTYPE_PARALLEL && rdg != NULL)
73438fd1498Szrj update_type_for_merge (rdg, dest, partition);
73538fd1498Szrj
73638fd1498Szrj bitmap_ior_into (dest->datarefs, partition->datarefs);
73738fd1498Szrj }
73838fd1498Szrj
73938fd1498Szrj
74038fd1498Szrj /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
74138fd1498Szrj the LOOP. */
74238fd1498Szrj
74338fd1498Szrj static bool
ssa_name_has_uses_outside_loop_p(tree def,loop_p loop)74438fd1498Szrj ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
74538fd1498Szrj {
74638fd1498Szrj imm_use_iterator imm_iter;
74738fd1498Szrj use_operand_p use_p;
74838fd1498Szrj
74938fd1498Szrj FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
75038fd1498Szrj {
75138fd1498Szrj if (is_gimple_debug (USE_STMT (use_p)))
75238fd1498Szrj continue;
75338fd1498Szrj
75438fd1498Szrj basic_block use_bb = gimple_bb (USE_STMT (use_p));
75538fd1498Szrj if (!flow_bb_inside_loop_p (loop, use_bb))
75638fd1498Szrj return true;
75738fd1498Szrj }
75838fd1498Szrj
75938fd1498Szrj return false;
76038fd1498Szrj }
76138fd1498Szrj
76238fd1498Szrj /* Returns true when STMT defines a scalar variable used after the
76338fd1498Szrj loop LOOP. */
76438fd1498Szrj
76538fd1498Szrj static bool
stmt_has_scalar_dependences_outside_loop(loop_p loop,gimple * stmt)76638fd1498Szrj stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple *stmt)
76738fd1498Szrj {
76838fd1498Szrj def_operand_p def_p;
76938fd1498Szrj ssa_op_iter op_iter;
77038fd1498Szrj
77138fd1498Szrj if (gimple_code (stmt) == GIMPLE_PHI)
77238fd1498Szrj return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
77338fd1498Szrj
77438fd1498Szrj FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
77538fd1498Szrj if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
77638fd1498Szrj return true;
77738fd1498Szrj
77838fd1498Szrj return false;
77938fd1498Szrj }
78038fd1498Szrj
78138fd1498Szrj /* Return a copy of LOOP placed before LOOP. */
78238fd1498Szrj
78338fd1498Szrj static struct loop *
copy_loop_before(struct loop * loop)78438fd1498Szrj copy_loop_before (struct loop *loop)
78538fd1498Szrj {
78638fd1498Szrj struct loop *res;
78738fd1498Szrj edge preheader = loop_preheader_edge (loop);
78838fd1498Szrj
78938fd1498Szrj initialize_original_copy_tables ();
79038fd1498Szrj res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
79138fd1498Szrj gcc_assert (res != NULL);
79238fd1498Szrj free_original_copy_tables ();
79338fd1498Szrj delete_update_ssa ();
79438fd1498Szrj
79538fd1498Szrj return res;
79638fd1498Szrj }
79738fd1498Szrj
79838fd1498Szrj /* Creates an empty basic block after LOOP. */
79938fd1498Szrj
80038fd1498Szrj static void
create_bb_after_loop(struct loop * loop)80138fd1498Szrj create_bb_after_loop (struct loop *loop)
80238fd1498Szrj {
80338fd1498Szrj edge exit = single_exit (loop);
80438fd1498Szrj
80538fd1498Szrj if (!exit)
80638fd1498Szrj return;
80738fd1498Szrj
80838fd1498Szrj split_edge (exit);
80938fd1498Szrj }
81038fd1498Szrj
81138fd1498Szrj /* Generate code for PARTITION from the code in LOOP. The loop is
81238fd1498Szrj copied when COPY_P is true. All the statements not flagged in the
81338fd1498Szrj PARTITION bitmap are removed from the loop or from its copy. The
81438fd1498Szrj statements are indexed in sequence inside a basic block, and the
81538fd1498Szrj basic blocks of a loop are taken in dom order. */
81638fd1498Szrj
81738fd1498Szrj static void
generate_loops_for_partition(struct loop * loop,partition * partition,bool copy_p)81838fd1498Szrj generate_loops_for_partition (struct loop *loop, partition *partition,
81938fd1498Szrj bool copy_p)
82038fd1498Szrj {
82138fd1498Szrj unsigned i;
82238fd1498Szrj basic_block *bbs;
82338fd1498Szrj
82438fd1498Szrj if (copy_p)
82538fd1498Szrj {
82638fd1498Szrj int orig_loop_num = loop->orig_loop_num;
82738fd1498Szrj loop = copy_loop_before (loop);
82838fd1498Szrj gcc_assert (loop != NULL);
82938fd1498Szrj loop->orig_loop_num = orig_loop_num;
83038fd1498Szrj create_preheader (loop, CP_SIMPLE_PREHEADERS);
83138fd1498Szrj create_bb_after_loop (loop);
83238fd1498Szrj }
83338fd1498Szrj else
83438fd1498Szrj {
83538fd1498Szrj /* Origin number is set to the new versioned loop's num. */
83638fd1498Szrj gcc_assert (loop->orig_loop_num != loop->num);
83738fd1498Szrj }
83838fd1498Szrj
83938fd1498Szrj /* Remove stmts not in the PARTITION bitmap. */
84038fd1498Szrj bbs = get_loop_body_in_dom_order (loop);
84138fd1498Szrj
84238fd1498Szrj if (MAY_HAVE_DEBUG_BIND_STMTS)
84338fd1498Szrj for (i = 0; i < loop->num_nodes; i++)
84438fd1498Szrj {
84538fd1498Szrj basic_block bb = bbs[i];
84638fd1498Szrj
84738fd1498Szrj for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
84838fd1498Szrj gsi_next (&bsi))
84938fd1498Szrj {
85038fd1498Szrj gphi *phi = bsi.phi ();
85138fd1498Szrj if (!virtual_operand_p (gimple_phi_result (phi))
85238fd1498Szrj && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
85338fd1498Szrj reset_debug_uses (phi);
85438fd1498Szrj }
85538fd1498Szrj
85638fd1498Szrj for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
85738fd1498Szrj {
85838fd1498Szrj gimple *stmt = gsi_stmt (bsi);
85938fd1498Szrj if (gimple_code (stmt) != GIMPLE_LABEL
86038fd1498Szrj && !is_gimple_debug (stmt)
86138fd1498Szrj && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
86238fd1498Szrj reset_debug_uses (stmt);
86338fd1498Szrj }
86438fd1498Szrj }
86538fd1498Szrj
86638fd1498Szrj for (i = 0; i < loop->num_nodes; i++)
86738fd1498Szrj {
86838fd1498Szrj basic_block bb = bbs[i];
86938fd1498Szrj edge inner_exit = NULL;
87038fd1498Szrj
87138fd1498Szrj if (loop != bb->loop_father)
87238fd1498Szrj inner_exit = single_exit (bb->loop_father);
87338fd1498Szrj
87438fd1498Szrj for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
87538fd1498Szrj {
87638fd1498Szrj gphi *phi = bsi.phi ();
87738fd1498Szrj if (!virtual_operand_p (gimple_phi_result (phi))
87838fd1498Szrj && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
87938fd1498Szrj remove_phi_node (&bsi, true);
88038fd1498Szrj else
88138fd1498Szrj gsi_next (&bsi);
88238fd1498Szrj }
88338fd1498Szrj
88438fd1498Szrj for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
88538fd1498Szrj {
88638fd1498Szrj gimple *stmt = gsi_stmt (bsi);
88738fd1498Szrj if (gimple_code (stmt) != GIMPLE_LABEL
88838fd1498Szrj && !is_gimple_debug (stmt)
88938fd1498Szrj && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
89038fd1498Szrj {
89138fd1498Szrj /* In distribution of loop nest, if bb is inner loop's exit_bb,
89238fd1498Szrj we choose its exit edge/path in order to avoid generating
89338fd1498Szrj infinite loop. For all other cases, we choose an arbitrary
89438fd1498Szrj path through the empty CFG part that this unnecessary
89538fd1498Szrj control stmt controls. */
89638fd1498Szrj if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
89738fd1498Szrj {
89838fd1498Szrj if (inner_exit && inner_exit->flags & EDGE_TRUE_VALUE)
89938fd1498Szrj gimple_cond_make_true (cond_stmt);
90038fd1498Szrj else
90138fd1498Szrj gimple_cond_make_false (cond_stmt);
90238fd1498Szrj update_stmt (stmt);
90338fd1498Szrj }
90438fd1498Szrj else if (gimple_code (stmt) == GIMPLE_SWITCH)
90538fd1498Szrj {
90638fd1498Szrj gswitch *switch_stmt = as_a <gswitch *> (stmt);
90738fd1498Szrj gimple_switch_set_index
90838fd1498Szrj (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1)));
90938fd1498Szrj update_stmt (stmt);
91038fd1498Szrj }
91138fd1498Szrj else
91238fd1498Szrj {
91338fd1498Szrj unlink_stmt_vdef (stmt);
91438fd1498Szrj gsi_remove (&bsi, true);
91538fd1498Szrj release_defs (stmt);
91638fd1498Szrj continue;
91738fd1498Szrj }
91838fd1498Szrj }
91938fd1498Szrj gsi_next (&bsi);
92038fd1498Szrj }
92138fd1498Szrj }
92238fd1498Szrj
92338fd1498Szrj free (bbs);
92438fd1498Szrj }
92538fd1498Szrj
92638fd1498Szrj /* If VAL memory representation contains the same value in all bytes,
92738fd1498Szrj return that value, otherwise return -1.
92838fd1498Szrj E.g. for 0x24242424 return 0x24, for IEEE double
92938fd1498Szrj 747708026454360457216.0 return 0x44, etc. */
93038fd1498Szrj
93138fd1498Szrj static int
const_with_all_bytes_same(tree val)93238fd1498Szrj const_with_all_bytes_same (tree val)
93338fd1498Szrj {
93438fd1498Szrj unsigned char buf[64];
93538fd1498Szrj int i, len;
93638fd1498Szrj
93738fd1498Szrj if (integer_zerop (val)
93838fd1498Szrj || (TREE_CODE (val) == CONSTRUCTOR
93938fd1498Szrj && !TREE_CLOBBER_P (val)
94038fd1498Szrj && CONSTRUCTOR_NELTS (val) == 0))
94138fd1498Szrj return 0;
94238fd1498Szrj
94338fd1498Szrj if (real_zerop (val))
94438fd1498Szrj {
94538fd1498Szrj /* Only return 0 for +0.0, not for -0.0, which doesn't have
94638fd1498Szrj an all bytes same memory representation. Don't transform
94738fd1498Szrj -0.0 stores into +0.0 even for !HONOR_SIGNED_ZEROS. */
94838fd1498Szrj switch (TREE_CODE (val))
94938fd1498Szrj {
95038fd1498Szrj case REAL_CST:
95138fd1498Szrj if (!real_isneg (TREE_REAL_CST_PTR (val)))
95238fd1498Szrj return 0;
95338fd1498Szrj break;
95438fd1498Szrj case COMPLEX_CST:
95538fd1498Szrj if (!const_with_all_bytes_same (TREE_REALPART (val))
95638fd1498Szrj && !const_with_all_bytes_same (TREE_IMAGPART (val)))
95738fd1498Szrj return 0;
95838fd1498Szrj break;
95938fd1498Szrj case VECTOR_CST:
96038fd1498Szrj {
96138fd1498Szrj unsigned int count = vector_cst_encoded_nelts (val);
96238fd1498Szrj unsigned int j;
96338fd1498Szrj for (j = 0; j < count; ++j)
96438fd1498Szrj if (const_with_all_bytes_same (VECTOR_CST_ENCODED_ELT (val, j)))
96538fd1498Szrj break;
96638fd1498Szrj if (j == count)
96738fd1498Szrj return 0;
96838fd1498Szrj break;
96938fd1498Szrj }
97038fd1498Szrj default:
97138fd1498Szrj break;
97238fd1498Szrj }
97338fd1498Szrj }
97438fd1498Szrj
97538fd1498Szrj if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
97638fd1498Szrj return -1;
97738fd1498Szrj
97838fd1498Szrj len = native_encode_expr (val, buf, sizeof (buf));
97938fd1498Szrj if (len == 0)
98038fd1498Szrj return -1;
98138fd1498Szrj for (i = 1; i < len; i++)
98238fd1498Szrj if (buf[i] != buf[0])
98338fd1498Szrj return -1;
98438fd1498Szrj return buf[0];
98538fd1498Szrj }
98638fd1498Szrj
98738fd1498Szrj /* Generate a call to memset for PARTITION in LOOP. */
98838fd1498Szrj
98938fd1498Szrj static void
generate_memset_builtin(struct loop * loop,partition * partition)99038fd1498Szrj generate_memset_builtin (struct loop *loop, partition *partition)
99138fd1498Szrj {
99238fd1498Szrj gimple_stmt_iterator gsi;
99338fd1498Szrj tree mem, fn, nb_bytes;
99438fd1498Szrj tree val;
99538fd1498Szrj struct builtin_info *builtin = partition->builtin;
99638fd1498Szrj gimple *fn_call;
99738fd1498Szrj
99838fd1498Szrj /* The new statements will be placed before LOOP. */
99938fd1498Szrj gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
100038fd1498Szrj
1001*58e805e6Szrj nb_bytes = rewrite_to_non_trapping_overflow (builtin->size);
100238fd1498Szrj nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
100338fd1498Szrj false, GSI_CONTINUE_LINKING);
100438fd1498Szrj mem = builtin->dst_base;
100538fd1498Szrj mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
100638fd1498Szrj false, GSI_CONTINUE_LINKING);
100738fd1498Szrj
100838fd1498Szrj /* This exactly matches the pattern recognition in classify_partition. */
100938fd1498Szrj val = gimple_assign_rhs1 (DR_STMT (builtin->dst_dr));
101038fd1498Szrj /* Handle constants like 0x15151515 and similarly
101138fd1498Szrj floating point constants etc. where all bytes are the same. */
101238fd1498Szrj int bytev = const_with_all_bytes_same (val);
101338fd1498Szrj if (bytev != -1)
101438fd1498Szrj val = build_int_cst (integer_type_node, bytev);
101538fd1498Szrj else if (TREE_CODE (val) == INTEGER_CST)
101638fd1498Szrj val = fold_convert (integer_type_node, val);
101738fd1498Szrj else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
101838fd1498Szrj {
101938fd1498Szrj tree tem = make_ssa_name (integer_type_node);
102038fd1498Szrj gimple *cstmt = gimple_build_assign (tem, NOP_EXPR, val);
102138fd1498Szrj gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
102238fd1498Szrj val = tem;
102338fd1498Szrj }
102438fd1498Szrj
102538fd1498Szrj fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
102638fd1498Szrj fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
102738fd1498Szrj gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
102838fd1498Szrj
102938fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
103038fd1498Szrj {
103138fd1498Szrj fprintf (dump_file, "generated memset");
103238fd1498Szrj if (bytev == 0)
103338fd1498Szrj fprintf (dump_file, " zero\n");
103438fd1498Szrj else
103538fd1498Szrj fprintf (dump_file, "\n");
103638fd1498Szrj }
103738fd1498Szrj }
103838fd1498Szrj
103938fd1498Szrj /* Generate a call to memcpy for PARTITION in LOOP. */
104038fd1498Szrj
104138fd1498Szrj static void
generate_memcpy_builtin(struct loop * loop,partition * partition)104238fd1498Szrj generate_memcpy_builtin (struct loop *loop, partition *partition)
104338fd1498Szrj {
104438fd1498Szrj gimple_stmt_iterator gsi;
104538fd1498Szrj gimple *fn_call;
104638fd1498Szrj tree dest, src, fn, nb_bytes;
104738fd1498Szrj enum built_in_function kind;
104838fd1498Szrj struct builtin_info *builtin = partition->builtin;
104938fd1498Szrj
105038fd1498Szrj /* The new statements will be placed before LOOP. */
105138fd1498Szrj gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
105238fd1498Szrj
1053*58e805e6Szrj nb_bytes = rewrite_to_non_trapping_overflow (builtin->size);
105438fd1498Szrj nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
105538fd1498Szrj false, GSI_CONTINUE_LINKING);
105638fd1498Szrj dest = builtin->dst_base;
105738fd1498Szrj src = builtin->src_base;
105838fd1498Szrj if (partition->kind == PKIND_MEMCPY
105938fd1498Szrj || ! ptr_derefs_may_alias_p (dest, src))
106038fd1498Szrj kind = BUILT_IN_MEMCPY;
106138fd1498Szrj else
106238fd1498Szrj kind = BUILT_IN_MEMMOVE;
106338fd1498Szrj
106438fd1498Szrj dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
106538fd1498Szrj false, GSI_CONTINUE_LINKING);
106638fd1498Szrj src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
106738fd1498Szrj false, GSI_CONTINUE_LINKING);
106838fd1498Szrj fn = build_fold_addr_expr (builtin_decl_implicit (kind));
106938fd1498Szrj fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
107038fd1498Szrj gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
107138fd1498Szrj
107238fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
107338fd1498Szrj {
107438fd1498Szrj if (kind == BUILT_IN_MEMCPY)
107538fd1498Szrj fprintf (dump_file, "generated memcpy\n");
107638fd1498Szrj else
107738fd1498Szrj fprintf (dump_file, "generated memmove\n");
107838fd1498Szrj }
107938fd1498Szrj }
108038fd1498Szrj
108138fd1498Szrj /* Remove and destroy the loop LOOP. */
108238fd1498Szrj
108338fd1498Szrj static void
destroy_loop(struct loop * loop)108438fd1498Szrj destroy_loop (struct loop *loop)
108538fd1498Szrj {
108638fd1498Szrj unsigned nbbs = loop->num_nodes;
108738fd1498Szrj edge exit = single_exit (loop);
108838fd1498Szrj basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
108938fd1498Szrj basic_block *bbs;
109038fd1498Szrj unsigned i;
109138fd1498Szrj
109238fd1498Szrj bbs = get_loop_body_in_dom_order (loop);
109338fd1498Szrj
109438fd1498Szrj redirect_edge_pred (exit, src);
109538fd1498Szrj exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
109638fd1498Szrj exit->flags |= EDGE_FALLTHRU;
109738fd1498Szrj cancel_loop_tree (loop);
109838fd1498Szrj rescan_loop_exit (exit, false, true);
109938fd1498Szrj
110038fd1498Szrj i = nbbs;
110138fd1498Szrj do
110238fd1498Szrj {
110338fd1498Szrj /* We have made sure to not leave any dangling uses of SSA
110438fd1498Szrj names defined in the loop. With the exception of virtuals.
110538fd1498Szrj Make sure we replace all uses of virtual defs that will remain
110638fd1498Szrj outside of the loop with the bare symbol as delete_basic_block
110738fd1498Szrj will release them. */
110838fd1498Szrj --i;
110938fd1498Szrj for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
111038fd1498Szrj gsi_next (&gsi))
111138fd1498Szrj {
111238fd1498Szrj gphi *phi = gsi.phi ();
111338fd1498Szrj if (virtual_operand_p (gimple_phi_result (phi)))
111438fd1498Szrj mark_virtual_phi_result_for_renaming (phi);
111538fd1498Szrj }
111638fd1498Szrj for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
111738fd1498Szrj gsi_next (&gsi))
111838fd1498Szrj {
111938fd1498Szrj gimple *stmt = gsi_stmt (gsi);
112038fd1498Szrj tree vdef = gimple_vdef (stmt);
112138fd1498Szrj if (vdef && TREE_CODE (vdef) == SSA_NAME)
112238fd1498Szrj mark_virtual_operand_for_renaming (vdef);
112338fd1498Szrj }
112438fd1498Szrj delete_basic_block (bbs[i]);
112538fd1498Szrj }
112638fd1498Szrj while (i != 0);
112738fd1498Szrj
112838fd1498Szrj free (bbs);
112938fd1498Szrj
113038fd1498Szrj set_immediate_dominator (CDI_DOMINATORS, dest,
113138fd1498Szrj recompute_dominator (CDI_DOMINATORS, dest));
113238fd1498Szrj }
113338fd1498Szrj
113438fd1498Szrj /* Generates code for PARTITION. Return whether LOOP needs to be destroyed. */
113538fd1498Szrj
113638fd1498Szrj static bool
generate_code_for_partition(struct loop * loop,partition * partition,bool copy_p)113738fd1498Szrj generate_code_for_partition (struct loop *loop,
113838fd1498Szrj partition *partition, bool copy_p)
113938fd1498Szrj {
114038fd1498Szrj switch (partition->kind)
114138fd1498Szrj {
114238fd1498Szrj case PKIND_NORMAL:
114338fd1498Szrj case PKIND_PARTIAL_MEMSET:
114438fd1498Szrj /* Reductions all have to be in the last partition. */
114538fd1498Szrj gcc_assert (!partition_reduction_p (partition)
114638fd1498Szrj || !copy_p);
114738fd1498Szrj generate_loops_for_partition (loop, partition, copy_p);
114838fd1498Szrj return false;
114938fd1498Szrj
115038fd1498Szrj case PKIND_MEMSET:
115138fd1498Szrj generate_memset_builtin (loop, partition);
115238fd1498Szrj break;
115338fd1498Szrj
115438fd1498Szrj case PKIND_MEMCPY:
115538fd1498Szrj case PKIND_MEMMOVE:
115638fd1498Szrj generate_memcpy_builtin (loop, partition);
115738fd1498Szrj break;
115838fd1498Szrj
115938fd1498Szrj default:
116038fd1498Szrj gcc_unreachable ();
116138fd1498Szrj }
116238fd1498Szrj
116338fd1498Szrj /* Common tail for partitions we turn into a call. If this was the last
116438fd1498Szrj partition for which we generate code, we have to destroy the loop. */
116538fd1498Szrj if (!copy_p)
116638fd1498Szrj return true;
116738fd1498Szrj return false;
116838fd1498Szrj }
116938fd1498Szrj
117038fd1498Szrj /* Return data dependence relation for data references A and B. The two
117138fd1498Szrj data references must be in lexicographic order wrto reduced dependence
117238fd1498Szrj graph RDG. We firstly try to find ddr from global ddr hash table. If
117338fd1498Szrj it doesn't exist, compute the ddr and cache it. */
117438fd1498Szrj
117538fd1498Szrj static data_dependence_relation *
get_data_dependence(struct graph * rdg,data_reference_p a,data_reference_p b)117638fd1498Szrj get_data_dependence (struct graph *rdg, data_reference_p a, data_reference_p b)
117738fd1498Szrj {
117838fd1498Szrj struct data_dependence_relation ent, **slot;
117938fd1498Szrj struct data_dependence_relation *ddr;
118038fd1498Szrj
118138fd1498Szrj gcc_assert (DR_IS_WRITE (a) || DR_IS_WRITE (b));
118238fd1498Szrj gcc_assert (rdg_vertex_for_stmt (rdg, DR_STMT (a))
118338fd1498Szrj <= rdg_vertex_for_stmt (rdg, DR_STMT (b)));
118438fd1498Szrj ent.a = a;
118538fd1498Szrj ent.b = b;
118638fd1498Szrj slot = ddrs_table->find_slot (&ent, INSERT);
118738fd1498Szrj if (*slot == NULL)
118838fd1498Szrj {
118938fd1498Szrj ddr = initialize_data_dependence_relation (a, b, loop_nest);
119038fd1498Szrj compute_affine_dependence (ddr, loop_nest[0]);
119138fd1498Szrj *slot = ddr;
119238fd1498Szrj }
119338fd1498Szrj
119438fd1498Szrj return *slot;
119538fd1498Szrj }
119638fd1498Szrj
119738fd1498Szrj /* In reduced dependence graph RDG for loop distribution, return true if
119838fd1498Szrj dependence between references DR1 and DR2 leads to a dependence cycle
119938fd1498Szrj and such dependence cycle can't be resolved by runtime alias check. */
120038fd1498Szrj
120138fd1498Szrj static bool
data_dep_in_cycle_p(struct graph * rdg,data_reference_p dr1,data_reference_p dr2)120238fd1498Szrj data_dep_in_cycle_p (struct graph *rdg,
120338fd1498Szrj data_reference_p dr1, data_reference_p dr2)
120438fd1498Szrj {
120538fd1498Szrj struct data_dependence_relation *ddr;
120638fd1498Szrj
120738fd1498Szrj /* Re-shuffle data-refs to be in topological order. */
120838fd1498Szrj if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
120938fd1498Szrj > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
121038fd1498Szrj std::swap (dr1, dr2);
121138fd1498Szrj
121238fd1498Szrj ddr = get_data_dependence (rdg, dr1, dr2);
121338fd1498Szrj
121438fd1498Szrj /* In case of no data dependence. */
121538fd1498Szrj if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
121638fd1498Szrj return false;
121738fd1498Szrj /* For unknown data dependence or known data dependence which can't be
121838fd1498Szrj expressed in classic distance vector, we check if it can be resolved
121938fd1498Szrj by runtime alias check. If yes, we still consider data dependence
122038fd1498Szrj as won't introduce data dependence cycle. */
122138fd1498Szrj else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
122238fd1498Szrj || DDR_NUM_DIST_VECTS (ddr) == 0)
122338fd1498Szrj return !runtime_alias_check_p (ddr, NULL, true);
122438fd1498Szrj else if (DDR_NUM_DIST_VECTS (ddr) > 1)
122538fd1498Szrj return true;
122638fd1498Szrj else if (DDR_REVERSED_P (ddr)
122738fd1498Szrj || lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
122838fd1498Szrj return false;
122938fd1498Szrj
123038fd1498Szrj return true;
123138fd1498Szrj }
123238fd1498Szrj
123338fd1498Szrj /* Given reduced dependence graph RDG, PARTITION1 and PARTITION2, update
123438fd1498Szrj PARTITION1's type after merging PARTITION2 into PARTITION1. */
123538fd1498Szrj
123638fd1498Szrj static void
update_type_for_merge(struct graph * rdg,partition * partition1,partition * partition2)123738fd1498Szrj update_type_for_merge (struct graph *rdg,
123838fd1498Szrj partition *partition1, partition *partition2)
123938fd1498Szrj {
124038fd1498Szrj unsigned i, j;
124138fd1498Szrj bitmap_iterator bi, bj;
124238fd1498Szrj data_reference_p dr1, dr2;
124338fd1498Szrj
124438fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (partition1->datarefs, 0, i, bi)
124538fd1498Szrj {
124638fd1498Szrj unsigned start = (partition1 == partition2) ? i + 1 : 0;
124738fd1498Szrj
124838fd1498Szrj dr1 = datarefs_vec[i];
124938fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (partition2->datarefs, start, j, bj)
125038fd1498Szrj {
125138fd1498Szrj dr2 = datarefs_vec[j];
125238fd1498Szrj if (DR_IS_READ (dr1) && DR_IS_READ (dr2))
125338fd1498Szrj continue;
125438fd1498Szrj
125538fd1498Szrj /* Partition can only be executed sequentially if there is any
125638fd1498Szrj data dependence cycle. */
125738fd1498Szrj if (data_dep_in_cycle_p (rdg, dr1, dr2))
125838fd1498Szrj {
125938fd1498Szrj partition1->type = PTYPE_SEQUENTIAL;
126038fd1498Szrj return;
126138fd1498Szrj }
126238fd1498Szrj }
126338fd1498Szrj }
126438fd1498Szrj }
126538fd1498Szrj
126638fd1498Szrj /* Returns a partition with all the statements needed for computing
126738fd1498Szrj the vertex V of the RDG, also including the loop exit conditions. */
126838fd1498Szrj
126938fd1498Szrj static partition *
build_rdg_partition_for_vertex(struct graph * rdg,int v)127038fd1498Szrj build_rdg_partition_for_vertex (struct graph *rdg, int v)
127138fd1498Szrj {
127238fd1498Szrj partition *partition = partition_alloc ();
127338fd1498Szrj auto_vec<int, 3> nodes;
127438fd1498Szrj unsigned i, j;
127538fd1498Szrj int x;
127638fd1498Szrj data_reference_p dr;
127738fd1498Szrj
127838fd1498Szrj graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
127938fd1498Szrj
128038fd1498Szrj FOR_EACH_VEC_ELT (nodes, i, x)
128138fd1498Szrj {
128238fd1498Szrj bitmap_set_bit (partition->stmts, x);
128338fd1498Szrj
128438fd1498Szrj for (j = 0; RDG_DATAREFS (rdg, x).iterate (j, &dr); ++j)
128538fd1498Szrj {
128638fd1498Szrj unsigned idx = (unsigned) DR_INDEX (dr);
128738fd1498Szrj gcc_assert (idx < datarefs_vec.length ());
128838fd1498Szrj
128938fd1498Szrj /* Partition can only be executed sequentially if there is any
129038fd1498Szrj unknown data reference. */
129138fd1498Szrj if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr)
129238fd1498Szrj || !DR_INIT (dr) || !DR_STEP (dr))
129338fd1498Szrj partition->type = PTYPE_SEQUENTIAL;
129438fd1498Szrj
129538fd1498Szrj bitmap_set_bit (partition->datarefs, idx);
129638fd1498Szrj }
129738fd1498Szrj }
129838fd1498Szrj
129938fd1498Szrj if (partition->type == PTYPE_SEQUENTIAL)
130038fd1498Szrj return partition;
130138fd1498Szrj
130238fd1498Szrj /* Further check if any data dependence prevents us from executing the
130338fd1498Szrj partition parallelly. */
130438fd1498Szrj update_type_for_merge (rdg, partition, partition);
130538fd1498Szrj
130638fd1498Szrj return partition;
130738fd1498Szrj }
130838fd1498Szrj
130938fd1498Szrj /* Given PARTITION of LOOP and RDG, record single load/store data references
131038fd1498Szrj for builtin partition in SRC_DR/DST_DR, return false if there is no such
131138fd1498Szrj data references. */
131238fd1498Szrj
131338fd1498Szrj static bool
find_single_drs(struct loop * loop,struct graph * rdg,partition * partition,data_reference_p * dst_dr,data_reference_p * src_dr)131438fd1498Szrj find_single_drs (struct loop *loop, struct graph *rdg, partition *partition,
131538fd1498Szrj data_reference_p *dst_dr, data_reference_p *src_dr)
131638fd1498Szrj {
131738fd1498Szrj unsigned i;
131838fd1498Szrj data_reference_p single_ld = NULL, single_st = NULL;
131938fd1498Szrj bitmap_iterator bi;
132038fd1498Szrj
132138fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
132238fd1498Szrj {
132338fd1498Szrj gimple *stmt = RDG_STMT (rdg, i);
132438fd1498Szrj data_reference_p dr;
132538fd1498Szrj
132638fd1498Szrj if (gimple_code (stmt) == GIMPLE_PHI)
132738fd1498Szrj continue;
132838fd1498Szrj
132938fd1498Szrj /* Any scalar stmts are ok. */
133038fd1498Szrj if (!gimple_vuse (stmt))
133138fd1498Szrj continue;
133238fd1498Szrj
133338fd1498Szrj /* Otherwise just regular loads/stores. */
133438fd1498Szrj if (!gimple_assign_single_p (stmt))
133538fd1498Szrj return false;
133638fd1498Szrj
133738fd1498Szrj /* But exactly one store and/or load. */
133838fd1498Szrj for (unsigned j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
133938fd1498Szrj {
134038fd1498Szrj tree type = TREE_TYPE (DR_REF (dr));
134138fd1498Szrj
134238fd1498Szrj /* The memset, memcpy and memmove library calls are only
134338fd1498Szrj able to deal with generic address space. */
134438fd1498Szrj if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (type)))
134538fd1498Szrj return false;
134638fd1498Szrj
134738fd1498Szrj if (DR_IS_READ (dr))
134838fd1498Szrj {
134938fd1498Szrj if (single_ld != NULL)
135038fd1498Szrj return false;
135138fd1498Szrj single_ld = dr;
135238fd1498Szrj }
135338fd1498Szrj else
135438fd1498Szrj {
135538fd1498Szrj if (single_st != NULL)
135638fd1498Szrj return false;
135738fd1498Szrj single_st = dr;
135838fd1498Szrj }
135938fd1498Szrj }
136038fd1498Szrj }
136138fd1498Szrj
136238fd1498Szrj if (!single_st)
136338fd1498Szrj return false;
136438fd1498Szrj
136538fd1498Szrj /* Bail out if this is a bitfield memory reference. */
136638fd1498Szrj if (TREE_CODE (DR_REF (single_st)) == COMPONENT_REF
136738fd1498Szrj && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (single_st), 1)))
136838fd1498Szrj return false;
136938fd1498Szrj
137038fd1498Szrj /* Data reference must be executed exactly once per iteration of each
137138fd1498Szrj loop in the loop nest. We only need to check dominance information
137238fd1498Szrj against the outermost one in a perfect loop nest because a bb can't
137338fd1498Szrj dominate outermost loop's latch without dominating inner loop's. */
137438fd1498Szrj basic_block bb_st = gimple_bb (DR_STMT (single_st));
137538fd1498Szrj if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb_st))
137638fd1498Szrj return false;
137738fd1498Szrj
137838fd1498Szrj if (single_ld)
137938fd1498Szrj {
138038fd1498Szrj gimple *store = DR_STMT (single_st), *load = DR_STMT (single_ld);
138138fd1498Szrj /* Direct aggregate copy or via an SSA name temporary. */
138238fd1498Szrj if (load != store
138338fd1498Szrj && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
138438fd1498Szrj return false;
138538fd1498Szrj
138638fd1498Szrj /* Bail out if this is a bitfield memory reference. */
138738fd1498Szrj if (TREE_CODE (DR_REF (single_ld)) == COMPONENT_REF
138838fd1498Szrj && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (single_ld), 1)))
138938fd1498Szrj return false;
139038fd1498Szrj
139138fd1498Szrj /* Load and store must be in the same loop nest. */
139238fd1498Szrj basic_block bb_ld = gimple_bb (DR_STMT (single_ld));
139338fd1498Szrj if (bb_st->loop_father != bb_ld->loop_father)
139438fd1498Szrj return false;
139538fd1498Szrj
139638fd1498Szrj /* Data reference must be executed exactly once per iteration.
139738fd1498Szrj Same as single_st, we only need to check against the outermost
139838fd1498Szrj loop. */
139938fd1498Szrj if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb_ld))
140038fd1498Szrj return false;
140138fd1498Szrj
140238fd1498Szrj edge e = single_exit (bb_st->loop_father);
140338fd1498Szrj bool dom_ld = dominated_by_p (CDI_DOMINATORS, e->src, bb_ld);
140438fd1498Szrj bool dom_st = dominated_by_p (CDI_DOMINATORS, e->src, bb_st);
140538fd1498Szrj if (dom_ld != dom_st)
140638fd1498Szrj return false;
140738fd1498Szrj }
140838fd1498Szrj
140938fd1498Szrj *src_dr = single_ld;
141038fd1498Szrj *dst_dr = single_st;
141138fd1498Szrj return true;
141238fd1498Szrj }
141338fd1498Szrj
141438fd1498Szrj /* Given data reference DR in LOOP_NEST, this function checks the enclosing
141538fd1498Szrj loops from inner to outer to see if loop's step equals to access size at
141638fd1498Szrj each level of loop. Return 2 if we can prove this at all level loops;
141738fd1498Szrj record access base and size in BASE and SIZE; save loop's step at each
141838fd1498Szrj level of loop in STEPS if it is not null. For example:
141938fd1498Szrj
142038fd1498Szrj int arr[100][100][100];
142138fd1498Szrj for (i = 0; i < 100; i++) ;steps[2] = 40000
142238fd1498Szrj for (j = 100; j > 0; j--) ;steps[1] = -400
142338fd1498Szrj for (k = 0; k < 100; k++) ;steps[0] = 4
142438fd1498Szrj arr[i][j - 1][k] = 0; ;base = &arr, size = 4000000
142538fd1498Szrj
142638fd1498Szrj Return 1 if we can prove the equality at the innermost loop, but not all
142738fd1498Szrj level loops. In this case, no information is recorded.
142838fd1498Szrj
142938fd1498Szrj Return 0 if no equality can be proven at any level loops. */
143038fd1498Szrj
143138fd1498Szrj static int
143238fd1498Szrj compute_access_range (loop_p loop_nest, data_reference_p dr, tree *base,
143338fd1498Szrj tree *size, vec<tree> *steps = NULL)
143438fd1498Szrj {
143538fd1498Szrj location_t loc = gimple_location (DR_STMT (dr));
143638fd1498Szrj basic_block bb = gimple_bb (DR_STMT (dr));
143738fd1498Szrj struct loop *loop = bb->loop_father;
143838fd1498Szrj tree ref = DR_REF (dr);
143938fd1498Szrj tree access_base = build_fold_addr_expr (ref);
144038fd1498Szrj tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (ref));
144138fd1498Szrj int res = 0;
144238fd1498Szrj
144338fd1498Szrj do {
144438fd1498Szrj tree scev_fn = analyze_scalar_evolution (loop, access_base);
144538fd1498Szrj if (TREE_CODE (scev_fn) != POLYNOMIAL_CHREC)
144638fd1498Szrj return res;
144738fd1498Szrj
144838fd1498Szrj access_base = CHREC_LEFT (scev_fn);
144938fd1498Szrj if (tree_contains_chrecs (access_base, NULL))
145038fd1498Szrj return res;
145138fd1498Szrj
145238fd1498Szrj tree scev_step = CHREC_RIGHT (scev_fn);
145338fd1498Szrj /* Only support constant steps. */
145438fd1498Szrj if (TREE_CODE (scev_step) != INTEGER_CST)
145538fd1498Szrj return res;
145638fd1498Szrj
145738fd1498Szrj enum ev_direction access_dir = scev_direction (scev_fn);
145838fd1498Szrj if (access_dir == EV_DIR_UNKNOWN)
145938fd1498Szrj return res;
146038fd1498Szrj
146138fd1498Szrj if (steps != NULL)
146238fd1498Szrj steps->safe_push (scev_step);
146338fd1498Szrj
146438fd1498Szrj scev_step = fold_convert_loc (loc, sizetype, scev_step);
146538fd1498Szrj /* Compute absolute value of scev step. */
146638fd1498Szrj if (access_dir == EV_DIR_DECREASES)
146738fd1498Szrj scev_step = fold_build1_loc (loc, NEGATE_EXPR, sizetype, scev_step);
146838fd1498Szrj
146938fd1498Szrj /* At each level of loop, scev step must equal to access size. In other
147038fd1498Szrj words, DR must access consecutive memory between loop iterations. */
147138fd1498Szrj if (!operand_equal_p (scev_step, access_size, 0))
147238fd1498Szrj return res;
147338fd1498Szrj
147438fd1498Szrj /* Access stride can be computed for data reference at least for the
147538fd1498Szrj innermost loop. */
147638fd1498Szrj res = 1;
147738fd1498Szrj
147838fd1498Szrj /* Compute DR's execution times in loop. */
147938fd1498Szrj tree niters = number_of_latch_executions (loop);
148038fd1498Szrj niters = fold_convert_loc (loc, sizetype, niters);
148138fd1498Szrj if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src, bb))
148238fd1498Szrj niters = size_binop_loc (loc, PLUS_EXPR, niters, size_one_node);
148338fd1498Szrj
148438fd1498Szrj /* Compute DR's overall access size in loop. */
148538fd1498Szrj access_size = fold_build2_loc (loc, MULT_EXPR, sizetype,
148638fd1498Szrj niters, scev_step);
148738fd1498Szrj /* Adjust base address in case of negative step. */
148838fd1498Szrj if (access_dir == EV_DIR_DECREASES)
148938fd1498Szrj {
149038fd1498Szrj tree adj = fold_build2_loc (loc, MINUS_EXPR, sizetype,
149138fd1498Szrj scev_step, access_size);
149238fd1498Szrj access_base = fold_build_pointer_plus_loc (loc, access_base, adj);
149338fd1498Szrj }
149438fd1498Szrj } while (loop != loop_nest && (loop = loop_outer (loop)) != NULL);
149538fd1498Szrj
149638fd1498Szrj *base = access_base;
149738fd1498Szrj *size = access_size;
149838fd1498Szrj /* Access stride can be computed for data reference at each level loop. */
149938fd1498Szrj return 2;
150038fd1498Szrj }
150138fd1498Szrj
150238fd1498Szrj /* Allocate and return builtin struct. Record information like DST_DR,
150338fd1498Szrj SRC_DR, DST_BASE, SRC_BASE and SIZE in the allocated struct. */
150438fd1498Szrj
150538fd1498Szrj static struct builtin_info *
alloc_builtin(data_reference_p dst_dr,data_reference_p src_dr,tree dst_base,tree src_base,tree size)150638fd1498Szrj alloc_builtin (data_reference_p dst_dr, data_reference_p src_dr,
150738fd1498Szrj tree dst_base, tree src_base, tree size)
150838fd1498Szrj {
150938fd1498Szrj struct builtin_info *builtin = XNEW (struct builtin_info);
151038fd1498Szrj builtin->dst_dr = dst_dr;
151138fd1498Szrj builtin->src_dr = src_dr;
151238fd1498Szrj builtin->dst_base = dst_base;
151338fd1498Szrj builtin->src_base = src_base;
151438fd1498Szrj builtin->size = size;
151538fd1498Szrj return builtin;
151638fd1498Szrj }
151738fd1498Szrj
151838fd1498Szrj /* Given data reference DR in loop nest LOOP, classify if it forms builtin
151938fd1498Szrj memset call. */
152038fd1498Szrj
152138fd1498Szrj static void
classify_builtin_st(loop_p loop,partition * partition,data_reference_p dr)152238fd1498Szrj classify_builtin_st (loop_p loop, partition *partition, data_reference_p dr)
152338fd1498Szrj {
152438fd1498Szrj gimple *stmt = DR_STMT (dr);
152538fd1498Szrj tree base, size, rhs = gimple_assign_rhs1 (stmt);
152638fd1498Szrj
152738fd1498Szrj if (const_with_all_bytes_same (rhs) == -1
152838fd1498Szrj && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
152938fd1498Szrj || (TYPE_MODE (TREE_TYPE (rhs))
153038fd1498Szrj != TYPE_MODE (unsigned_char_type_node))))
153138fd1498Szrj return;
153238fd1498Szrj
153338fd1498Szrj if (TREE_CODE (rhs) == SSA_NAME
153438fd1498Szrj && !SSA_NAME_IS_DEFAULT_DEF (rhs)
153538fd1498Szrj && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
153638fd1498Szrj return;
153738fd1498Szrj
153838fd1498Szrj int res = compute_access_range (loop, dr, &base, &size);
153938fd1498Szrj if (res == 0)
154038fd1498Szrj return;
154138fd1498Szrj if (res == 1)
154238fd1498Szrj {
154338fd1498Szrj partition->kind = PKIND_PARTIAL_MEMSET;
154438fd1498Szrj return;
154538fd1498Szrj }
154638fd1498Szrj
154738fd1498Szrj poly_uint64 base_offset;
154838fd1498Szrj unsigned HOST_WIDE_INT const_base_offset;
154938fd1498Szrj tree base_base = strip_offset (base, &base_offset);
155038fd1498Szrj if (!base_offset.is_constant (&const_base_offset))
155138fd1498Szrj return;
155238fd1498Szrj
155338fd1498Szrj struct builtin_info *builtin;
155438fd1498Szrj builtin = alloc_builtin (dr, NULL, base, NULL_TREE, size);
155538fd1498Szrj builtin->dst_base_base = base_base;
155638fd1498Szrj builtin->dst_base_offset = const_base_offset;
155738fd1498Szrj partition->builtin = builtin;
155838fd1498Szrj partition->kind = PKIND_MEMSET;
155938fd1498Szrj }
156038fd1498Szrj
156138fd1498Szrj /* Given data references DST_DR and SRC_DR in loop nest LOOP and RDG, classify
156238fd1498Szrj if it forms builtin memcpy or memmove call. */
156338fd1498Szrj
156438fd1498Szrj static void
classify_builtin_ldst(loop_p loop,struct graph * rdg,partition * partition,data_reference_p dst_dr,data_reference_p src_dr)156538fd1498Szrj classify_builtin_ldst (loop_p loop, struct graph *rdg, partition *partition,
156638fd1498Szrj data_reference_p dst_dr, data_reference_p src_dr)
156738fd1498Szrj {
156838fd1498Szrj tree base, size, src_base, src_size;
156938fd1498Szrj auto_vec<tree> dst_steps, src_steps;
157038fd1498Szrj
157138fd1498Szrj /* Compute access range of both load and store. */
157238fd1498Szrj int res = compute_access_range (loop, dst_dr, &base, &size, &dst_steps);
157338fd1498Szrj if (res != 2)
157438fd1498Szrj return;
157538fd1498Szrj res = compute_access_range (loop, src_dr, &src_base, &src_size, &src_steps);
157638fd1498Szrj if (res != 2)
157738fd1498Szrj return;
157838fd1498Szrj
157938fd1498Szrj /* They much have the same access size. */
158038fd1498Szrj if (!operand_equal_p (size, src_size, 0))
158138fd1498Szrj return;
158238fd1498Szrj
158338fd1498Szrj /* Load and store in loop nest must access memory in the same way, i.e,
158438fd1498Szrj their must have the same steps in each loop of the nest. */
158538fd1498Szrj if (dst_steps.length () != src_steps.length ())
158638fd1498Szrj return;
158738fd1498Szrj for (unsigned i = 0; i < dst_steps.length (); ++i)
158838fd1498Szrj if (!operand_equal_p (dst_steps[i], src_steps[i], 0))
158938fd1498Szrj return;
159038fd1498Szrj
159138fd1498Szrj /* Now check that if there is a dependence. */
159238fd1498Szrj ddr_p ddr = get_data_dependence (rdg, src_dr, dst_dr);
159338fd1498Szrj
159438fd1498Szrj /* Classify as memcpy if no dependence between load and store. */
159538fd1498Szrj if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
159638fd1498Szrj {
159738fd1498Szrj partition->builtin = alloc_builtin (dst_dr, src_dr, base, src_base, size);
159838fd1498Szrj partition->kind = PKIND_MEMCPY;
159938fd1498Szrj return;
160038fd1498Szrj }
160138fd1498Szrj
160238fd1498Szrj /* Can't do memmove in case of unknown dependence or dependence without
160338fd1498Szrj classical distance vector. */
160438fd1498Szrj if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
160538fd1498Szrj || DDR_NUM_DIST_VECTS (ddr) == 0)
160638fd1498Szrj return;
160738fd1498Szrj
160838fd1498Szrj unsigned i;
160938fd1498Szrj lambda_vector dist_v;
161038fd1498Szrj int num_lev = (DDR_LOOP_NEST (ddr)).length ();
161138fd1498Szrj FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
161238fd1498Szrj {
161338fd1498Szrj unsigned dep_lev = dependence_level (dist_v, num_lev);
161438fd1498Szrj /* Can't do memmove if load depends on store. */
161538fd1498Szrj if (dep_lev > 0 && dist_v[dep_lev - 1] > 0 && !DDR_REVERSED_P (ddr))
161638fd1498Szrj return;
161738fd1498Szrj }
161838fd1498Szrj
161938fd1498Szrj partition->builtin = alloc_builtin (dst_dr, src_dr, base, src_base, size);
162038fd1498Szrj partition->kind = PKIND_MEMMOVE;
162138fd1498Szrj return;
162238fd1498Szrj }
162338fd1498Szrj
162438fd1498Szrj /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
162538fd1498Szrj For the moment we detect memset, memcpy and memmove patterns. Bitmap
162638fd1498Szrj STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions. */
162738fd1498Szrj
162838fd1498Szrj static void
classify_partition(loop_p loop,struct graph * rdg,partition * partition,bitmap stmt_in_all_partitions)162938fd1498Szrj classify_partition (loop_p loop, struct graph *rdg, partition *partition,
163038fd1498Szrj bitmap stmt_in_all_partitions)
163138fd1498Szrj {
163238fd1498Szrj bitmap_iterator bi;
163338fd1498Szrj unsigned i;
163438fd1498Szrj data_reference_p single_ld = NULL, single_st = NULL;
163538fd1498Szrj bool volatiles_p = false, has_reduction = false;
163638fd1498Szrj
163738fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
163838fd1498Szrj {
163938fd1498Szrj gimple *stmt = RDG_STMT (rdg, i);
164038fd1498Szrj
164138fd1498Szrj if (gimple_has_volatile_ops (stmt))
164238fd1498Szrj volatiles_p = true;
164338fd1498Szrj
164438fd1498Szrj /* If the stmt is not included by all partitions and there is uses
164538fd1498Szrj outside of the loop, then mark the partition as reduction. */
164638fd1498Szrj if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
164738fd1498Szrj {
164838fd1498Szrj /* Due to limitation in the transform phase we have to fuse all
164938fd1498Szrj reduction partitions. As a result, this could cancel valid
165038fd1498Szrj loop distribution especially for loop that induction variable
165138fd1498Szrj is used outside of loop. To workaround this issue, we skip
165238fd1498Szrj marking partition as reudction if the reduction stmt belongs
165338fd1498Szrj to all partitions. In such case, reduction will be computed
165438fd1498Szrj correctly no matter how partitions are fused/distributed. */
165538fd1498Szrj if (!bitmap_bit_p (stmt_in_all_partitions, i))
165638fd1498Szrj {
165738fd1498Szrj partition->reduction_p = true;
165838fd1498Szrj return;
165938fd1498Szrj }
166038fd1498Szrj has_reduction = true;
166138fd1498Szrj }
166238fd1498Szrj }
166338fd1498Szrj
166438fd1498Szrj /* Perform general partition disqualification for builtins. */
166538fd1498Szrj if (volatiles_p
166638fd1498Szrj /* Simple workaround to prevent classifying the partition as builtin
166738fd1498Szrj if it contains any use outside of loop. */
166838fd1498Szrj || has_reduction
166938fd1498Szrj || !flag_tree_loop_distribute_patterns)
167038fd1498Szrj return;
167138fd1498Szrj
167238fd1498Szrj /* Find single load/store data references for builtin partition. */
167338fd1498Szrj if (!find_single_drs (loop, rdg, partition, &single_st, &single_ld))
167438fd1498Szrj return;
167538fd1498Szrj
167638fd1498Szrj /* Classify the builtin kind. */
167738fd1498Szrj if (single_ld == NULL)
167838fd1498Szrj classify_builtin_st (loop, partition, single_st);
167938fd1498Szrj else
168038fd1498Szrj classify_builtin_ldst (loop, rdg, partition, single_st, single_ld);
168138fd1498Szrj }
168238fd1498Szrj
168338fd1498Szrj /* Returns true when PARTITION1 and PARTITION2 access the same memory
168438fd1498Szrj object in RDG. */
168538fd1498Szrj
168638fd1498Szrj static bool
share_memory_accesses(struct graph * rdg,partition * partition1,partition * partition2)168738fd1498Szrj share_memory_accesses (struct graph *rdg,
168838fd1498Szrj partition *partition1, partition *partition2)
168938fd1498Szrj {
169038fd1498Szrj unsigned i, j;
169138fd1498Szrj bitmap_iterator bi, bj;
169238fd1498Szrj data_reference_p dr1, dr2;
169338fd1498Szrj
169438fd1498Szrj /* First check whether in the intersection of the two partitions are
169538fd1498Szrj any loads or stores. Common loads are the situation that happens
169638fd1498Szrj most often. */
169738fd1498Szrj EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
169838fd1498Szrj if (RDG_MEM_WRITE_STMT (rdg, i)
169938fd1498Szrj || RDG_MEM_READS_STMT (rdg, i))
170038fd1498Szrj return true;
170138fd1498Szrj
170238fd1498Szrj /* Then check whether the two partitions access the same memory object. */
170338fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (partition1->datarefs, 0, i, bi)
170438fd1498Szrj {
170538fd1498Szrj dr1 = datarefs_vec[i];
170638fd1498Szrj
170738fd1498Szrj if (!DR_BASE_ADDRESS (dr1)
170838fd1498Szrj || !DR_OFFSET (dr1) || !DR_INIT (dr1) || !DR_STEP (dr1))
170938fd1498Szrj continue;
171038fd1498Szrj
171138fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (partition2->datarefs, 0, j, bj)
171238fd1498Szrj {
171338fd1498Szrj dr2 = datarefs_vec[j];
171438fd1498Szrj
171538fd1498Szrj if (!DR_BASE_ADDRESS (dr2)
171638fd1498Szrj || !DR_OFFSET (dr2) || !DR_INIT (dr2) || !DR_STEP (dr2))
171738fd1498Szrj continue;
171838fd1498Szrj
171938fd1498Szrj if (operand_equal_p (DR_BASE_ADDRESS (dr1), DR_BASE_ADDRESS (dr2), 0)
172038fd1498Szrj && operand_equal_p (DR_OFFSET (dr1), DR_OFFSET (dr2), 0)
172138fd1498Szrj && operand_equal_p (DR_INIT (dr1), DR_INIT (dr2), 0)
172238fd1498Szrj && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0))
172338fd1498Szrj return true;
172438fd1498Szrj }
172538fd1498Szrj }
172638fd1498Szrj
172738fd1498Szrj return false;
172838fd1498Szrj }
172938fd1498Szrj
173038fd1498Szrj /* For each seed statement in STARTING_STMTS, this function builds
173138fd1498Szrj partition for it by adding depended statements according to RDG.
173238fd1498Szrj All partitions are recorded in PARTITIONS. */
173338fd1498Szrj
173438fd1498Szrj static void
rdg_build_partitions(struct graph * rdg,vec<gimple * > starting_stmts,vec<partition * > * partitions)173538fd1498Szrj rdg_build_partitions (struct graph *rdg,
173638fd1498Szrj vec<gimple *> starting_stmts,
173738fd1498Szrj vec<partition *> *partitions)
173838fd1498Szrj {
173938fd1498Szrj auto_bitmap processed;
174038fd1498Szrj int i;
174138fd1498Szrj gimple *stmt;
174238fd1498Szrj
174338fd1498Szrj FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
174438fd1498Szrj {
174538fd1498Szrj int v = rdg_vertex_for_stmt (rdg, stmt);
174638fd1498Szrj
174738fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
174838fd1498Szrj fprintf (dump_file,
174938fd1498Szrj "ldist asked to generate code for vertex %d\n", v);
175038fd1498Szrj
175138fd1498Szrj /* If the vertex is already contained in another partition so
175238fd1498Szrj is the partition rooted at it. */
175338fd1498Szrj if (bitmap_bit_p (processed, v))
175438fd1498Szrj continue;
175538fd1498Szrj
175638fd1498Szrj partition *partition = build_rdg_partition_for_vertex (rdg, v);
175738fd1498Szrj bitmap_ior_into (processed, partition->stmts);
175838fd1498Szrj
175938fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
176038fd1498Szrj {
176138fd1498Szrj fprintf (dump_file, "ldist creates useful %s partition:\n",
176238fd1498Szrj partition->type == PTYPE_PARALLEL ? "parallel" : "sequent");
176338fd1498Szrj bitmap_print (dump_file, partition->stmts, " ", "\n");
176438fd1498Szrj }
176538fd1498Szrj
176638fd1498Szrj partitions->safe_push (partition);
176738fd1498Szrj }
176838fd1498Szrj
176938fd1498Szrj /* All vertices should have been assigned to at least one partition now,
177038fd1498Szrj other than vertices belonging to dead code. */
177138fd1498Szrj }
177238fd1498Szrj
177338fd1498Szrj /* Dump to FILE the PARTITIONS. */
177438fd1498Szrj
177538fd1498Szrj static void
dump_rdg_partitions(FILE * file,vec<partition * > partitions)177638fd1498Szrj dump_rdg_partitions (FILE *file, vec<partition *> partitions)
177738fd1498Szrj {
177838fd1498Szrj int i;
177938fd1498Szrj partition *partition;
178038fd1498Szrj
178138fd1498Szrj FOR_EACH_VEC_ELT (partitions, i, partition)
178238fd1498Szrj debug_bitmap_file (file, partition->stmts);
178338fd1498Szrj }
178438fd1498Szrj
178538fd1498Szrj /* Debug PARTITIONS. */
178638fd1498Szrj extern void debug_rdg_partitions (vec<partition *> );
178738fd1498Szrj
178838fd1498Szrj DEBUG_FUNCTION void
debug_rdg_partitions(vec<partition * > partitions)178938fd1498Szrj debug_rdg_partitions (vec<partition *> partitions)
179038fd1498Szrj {
179138fd1498Szrj dump_rdg_partitions (stderr, partitions);
179238fd1498Szrj }
179338fd1498Szrj
179438fd1498Szrj /* Returns the number of read and write operations in the RDG. */
179538fd1498Szrj
179638fd1498Szrj static int
number_of_rw_in_rdg(struct graph * rdg)179738fd1498Szrj number_of_rw_in_rdg (struct graph *rdg)
179838fd1498Szrj {
179938fd1498Szrj int i, res = 0;
180038fd1498Szrj
180138fd1498Szrj for (i = 0; i < rdg->n_vertices; i++)
180238fd1498Szrj {
180338fd1498Szrj if (RDG_MEM_WRITE_STMT (rdg, i))
180438fd1498Szrj ++res;
180538fd1498Szrj
180638fd1498Szrj if (RDG_MEM_READS_STMT (rdg, i))
180738fd1498Szrj ++res;
180838fd1498Szrj }
180938fd1498Szrj
181038fd1498Szrj return res;
181138fd1498Szrj }
181238fd1498Szrj
181338fd1498Szrj /* Returns the number of read and write operations in a PARTITION of
181438fd1498Szrj the RDG. */
181538fd1498Szrj
181638fd1498Szrj static int
number_of_rw_in_partition(struct graph * rdg,partition * partition)181738fd1498Szrj number_of_rw_in_partition (struct graph *rdg, partition *partition)
181838fd1498Szrj {
181938fd1498Szrj int res = 0;
182038fd1498Szrj unsigned i;
182138fd1498Szrj bitmap_iterator ii;
182238fd1498Szrj
182338fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
182438fd1498Szrj {
182538fd1498Szrj if (RDG_MEM_WRITE_STMT (rdg, i))
182638fd1498Szrj ++res;
182738fd1498Szrj
182838fd1498Szrj if (RDG_MEM_READS_STMT (rdg, i))
182938fd1498Szrj ++res;
183038fd1498Szrj }
183138fd1498Szrj
183238fd1498Szrj return res;
183338fd1498Szrj }
183438fd1498Szrj
183538fd1498Szrj /* Returns true when one of the PARTITIONS contains all the read or
183638fd1498Szrj write operations of RDG. */
183738fd1498Szrj
183838fd1498Szrj static bool
partition_contains_all_rw(struct graph * rdg,vec<partition * > partitions)183938fd1498Szrj partition_contains_all_rw (struct graph *rdg,
184038fd1498Szrj vec<partition *> partitions)
184138fd1498Szrj {
184238fd1498Szrj int i;
184338fd1498Szrj partition *partition;
184438fd1498Szrj int nrw = number_of_rw_in_rdg (rdg);
184538fd1498Szrj
184638fd1498Szrj FOR_EACH_VEC_ELT (partitions, i, partition)
184738fd1498Szrj if (nrw == number_of_rw_in_partition (rdg, partition))
184838fd1498Szrj return true;
184938fd1498Szrj
185038fd1498Szrj return false;
185138fd1498Szrj }
185238fd1498Szrj
185338fd1498Szrj /* Compute partition dependence created by the data references in DRS1
185438fd1498Szrj and DRS2, modify and return DIR according to that. IF ALIAS_DDR is
185538fd1498Szrj not NULL, we record dependence introduced by possible alias between
185638fd1498Szrj two data references in ALIAS_DDRS; otherwise, we simply ignore such
185738fd1498Szrj dependence as if it doesn't exist at all. */
185838fd1498Szrj
185938fd1498Szrj static int
pg_add_dependence_edges(struct graph * rdg,int dir,bitmap drs1,bitmap drs2,vec<ddr_p> * alias_ddrs)186038fd1498Szrj pg_add_dependence_edges (struct graph *rdg, int dir,
186138fd1498Szrj bitmap drs1, bitmap drs2, vec<ddr_p> *alias_ddrs)
186238fd1498Szrj {
186338fd1498Szrj unsigned i, j;
186438fd1498Szrj bitmap_iterator bi, bj;
186538fd1498Szrj data_reference_p dr1, dr2, saved_dr1;
186638fd1498Szrj
186738fd1498Szrj /* dependence direction - 0 is no dependence, -1 is back,
186838fd1498Szrj 1 is forth, 2 is both (we can stop then, merging will occur). */
186938fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (drs1, 0, i, bi)
187038fd1498Szrj {
187138fd1498Szrj dr1 = datarefs_vec[i];
187238fd1498Szrj
187338fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (drs2, 0, j, bj)
187438fd1498Szrj {
187538fd1498Szrj int res, this_dir = 1;
187638fd1498Szrj ddr_p ddr;
187738fd1498Szrj
187838fd1498Szrj dr2 = datarefs_vec[j];
187938fd1498Szrj
188038fd1498Szrj /* Skip all <read, read> data dependence. */
188138fd1498Szrj if (DR_IS_READ (dr1) && DR_IS_READ (dr2))
188238fd1498Szrj continue;
188338fd1498Szrj
188438fd1498Szrj saved_dr1 = dr1;
188538fd1498Szrj /* Re-shuffle data-refs to be in topological order. */
188638fd1498Szrj if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
188738fd1498Szrj > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
188838fd1498Szrj {
188938fd1498Szrj std::swap (dr1, dr2);
189038fd1498Szrj this_dir = -this_dir;
189138fd1498Szrj }
189238fd1498Szrj ddr = get_data_dependence (rdg, dr1, dr2);
189338fd1498Szrj if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
189438fd1498Szrj {
189538fd1498Szrj this_dir = 0;
189638fd1498Szrj res = data_ref_compare_tree (DR_BASE_ADDRESS (dr1),
189738fd1498Szrj DR_BASE_ADDRESS (dr2));
189838fd1498Szrj /* Be conservative. If data references are not well analyzed,
189938fd1498Szrj or the two data references have the same base address and
190038fd1498Szrj offset, add dependence and consider it alias to each other.
190138fd1498Szrj In other words, the dependence can not be resolved by
190238fd1498Szrj runtime alias check. */
190338fd1498Szrj if (!DR_BASE_ADDRESS (dr1) || !DR_BASE_ADDRESS (dr2)
190438fd1498Szrj || !DR_OFFSET (dr1) || !DR_OFFSET (dr2)
190538fd1498Szrj || !DR_INIT (dr1) || !DR_INIT (dr2)
190638fd1498Szrj || !DR_STEP (dr1) || !tree_fits_uhwi_p (DR_STEP (dr1))
190738fd1498Szrj || !DR_STEP (dr2) || !tree_fits_uhwi_p (DR_STEP (dr2))
190838fd1498Szrj || res == 0)
190938fd1498Szrj this_dir = 2;
191038fd1498Szrj /* Data dependence could be resolved by runtime alias check,
191138fd1498Szrj record it in ALIAS_DDRS. */
191238fd1498Szrj else if (alias_ddrs != NULL)
191338fd1498Szrj alias_ddrs->safe_push (ddr);
191438fd1498Szrj /* Or simply ignore it. */
191538fd1498Szrj }
191638fd1498Szrj else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
191738fd1498Szrj {
191838fd1498Szrj if (DDR_REVERSED_P (ddr))
191938fd1498Szrj this_dir = -this_dir;
192038fd1498Szrj
192138fd1498Szrj /* Known dependences can still be unordered througout the
192238fd1498Szrj iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
192338fd1498Szrj if (DDR_NUM_DIST_VECTS (ddr) != 1)
192438fd1498Szrj this_dir = 2;
192538fd1498Szrj /* If the overlap is exact preserve stmt order. */
1926*58e805e6Szrj else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0),
1927*58e805e6Szrj DDR_NB_LOOPS (ddr)))
192838fd1498Szrj ;
192938fd1498Szrj /* Else as the distance vector is lexicographic positive swap
193038fd1498Szrj the dependence direction. */
193138fd1498Szrj else
193238fd1498Szrj this_dir = -this_dir;
193338fd1498Szrj }
193438fd1498Szrj else
193538fd1498Szrj this_dir = 0;
193638fd1498Szrj if (this_dir == 2)
193738fd1498Szrj return 2;
193838fd1498Szrj else if (dir == 0)
193938fd1498Szrj dir = this_dir;
194038fd1498Szrj else if (this_dir != 0 && dir != this_dir)
194138fd1498Szrj return 2;
194238fd1498Szrj /* Shuffle "back" dr1. */
194338fd1498Szrj dr1 = saved_dr1;
194438fd1498Szrj }
194538fd1498Szrj }
194638fd1498Szrj return dir;
194738fd1498Szrj }
194838fd1498Szrj
194938fd1498Szrj /* Compare postorder number of the partition graph vertices V1 and V2. */
195038fd1498Szrj
195138fd1498Szrj static int
pgcmp(const void * v1_,const void * v2_)195238fd1498Szrj pgcmp (const void *v1_, const void *v2_)
195338fd1498Szrj {
195438fd1498Szrj const vertex *v1 = (const vertex *)v1_;
195538fd1498Szrj const vertex *v2 = (const vertex *)v2_;
195638fd1498Szrj return v2->post - v1->post;
195738fd1498Szrj }
195838fd1498Szrj
195938fd1498Szrj /* Data attached to vertices of partition dependence graph. */
196038fd1498Szrj struct pg_vdata
196138fd1498Szrj {
196238fd1498Szrj /* ID of the corresponding partition. */
196338fd1498Szrj int id;
196438fd1498Szrj /* The partition. */
196538fd1498Szrj struct partition *partition;
196638fd1498Szrj };
196738fd1498Szrj
196838fd1498Szrj /* Data attached to edges of partition dependence graph. */
196938fd1498Szrj struct pg_edata
197038fd1498Szrj {
197138fd1498Szrj /* If the dependence edge can be resolved by runtime alias check,
197238fd1498Szrj this vector contains data dependence relations for runtime alias
197338fd1498Szrj check. On the other hand, if the dependence edge is introduced
197438fd1498Szrj because of compilation time known data dependence, this vector
197538fd1498Szrj contains nothing. */
197638fd1498Szrj vec<ddr_p> alias_ddrs;
197738fd1498Szrj };
197838fd1498Szrj
197938fd1498Szrj /* Callback data for traversing edges in graph. */
198038fd1498Szrj struct pg_edge_callback_data
198138fd1498Szrj {
198238fd1498Szrj /* Bitmap contains strong connected components should be merged. */
198338fd1498Szrj bitmap sccs_to_merge;
198438fd1498Szrj /* Array constains component information for all vertices. */
198538fd1498Szrj int *vertices_component;
198638fd1498Szrj /* Vector to record all data dependence relations which are needed
198738fd1498Szrj to break strong connected components by runtime alias checks. */
198838fd1498Szrj vec<ddr_p> *alias_ddrs;
198938fd1498Szrj };
199038fd1498Szrj
199138fd1498Szrj /* Initialize vertice's data for partition dependence graph PG with
199238fd1498Szrj PARTITIONS. */
199338fd1498Szrj
199438fd1498Szrj static void
init_partition_graph_vertices(struct graph * pg,vec<struct partition * > * partitions)199538fd1498Szrj init_partition_graph_vertices (struct graph *pg,
199638fd1498Szrj vec<struct partition *> *partitions)
199738fd1498Szrj {
199838fd1498Szrj int i;
199938fd1498Szrj partition *partition;
200038fd1498Szrj struct pg_vdata *data;
200138fd1498Szrj
200238fd1498Szrj for (i = 0; partitions->iterate (i, &partition); ++i)
200338fd1498Szrj {
200438fd1498Szrj data = new pg_vdata;
200538fd1498Szrj pg->vertices[i].data = data;
200638fd1498Szrj data->id = i;
200738fd1498Szrj data->partition = partition;
200838fd1498Szrj }
200938fd1498Szrj }
201038fd1498Szrj
201138fd1498Szrj /* Add edge <I, J> to partition dependence graph PG. Attach vector of data
201238fd1498Szrj dependence relations to the EDGE if DDRS isn't NULL. */
201338fd1498Szrj
201438fd1498Szrj static void
add_partition_graph_edge(struct graph * pg,int i,int j,vec<ddr_p> * ddrs)201538fd1498Szrj add_partition_graph_edge (struct graph *pg, int i, int j, vec<ddr_p> *ddrs)
201638fd1498Szrj {
201738fd1498Szrj struct graph_edge *e = add_edge (pg, i, j);
201838fd1498Szrj
201938fd1498Szrj /* If the edge is attached with data dependence relations, it means this
202038fd1498Szrj dependence edge can be resolved by runtime alias checks. */
202138fd1498Szrj if (ddrs != NULL)
202238fd1498Szrj {
202338fd1498Szrj struct pg_edata *data = new pg_edata;
202438fd1498Szrj
202538fd1498Szrj gcc_assert (ddrs->length () > 0);
202638fd1498Szrj e->data = data;
202738fd1498Szrj data->alias_ddrs = vNULL;
202838fd1498Szrj data->alias_ddrs.safe_splice (*ddrs);
202938fd1498Szrj }
203038fd1498Szrj }
203138fd1498Szrj
203238fd1498Szrj /* Callback function for graph travesal algorithm. It returns true
203338fd1498Szrj if edge E should skipped when traversing the graph. */
203438fd1498Szrj
203538fd1498Szrj static bool
pg_skip_alias_edge(struct graph_edge * e)203638fd1498Szrj pg_skip_alias_edge (struct graph_edge *e)
203738fd1498Szrj {
203838fd1498Szrj struct pg_edata *data = (struct pg_edata *)e->data;
203938fd1498Szrj return (data != NULL && data->alias_ddrs.length () > 0);
204038fd1498Szrj }
204138fd1498Szrj
204238fd1498Szrj /* Callback function freeing data attached to edge E of graph. */
204338fd1498Szrj
204438fd1498Szrj static void
free_partition_graph_edata_cb(struct graph *,struct graph_edge * e,void *)204538fd1498Szrj free_partition_graph_edata_cb (struct graph *, struct graph_edge *e, void *)
204638fd1498Szrj {
204738fd1498Szrj if (e->data != NULL)
204838fd1498Szrj {
204938fd1498Szrj struct pg_edata *data = (struct pg_edata *)e->data;
205038fd1498Szrj data->alias_ddrs.release ();
205138fd1498Szrj delete data;
205238fd1498Szrj }
205338fd1498Szrj }
205438fd1498Szrj
205538fd1498Szrj /* Free data attached to vertice of partition dependence graph PG. */
205638fd1498Szrj
205738fd1498Szrj static void
free_partition_graph_vdata(struct graph * pg)205838fd1498Szrj free_partition_graph_vdata (struct graph *pg)
205938fd1498Szrj {
206038fd1498Szrj int i;
206138fd1498Szrj struct pg_vdata *data;
206238fd1498Szrj
206338fd1498Szrj for (i = 0; i < pg->n_vertices; ++i)
206438fd1498Szrj {
206538fd1498Szrj data = (struct pg_vdata *)pg->vertices[i].data;
206638fd1498Szrj delete data;
206738fd1498Szrj }
206838fd1498Szrj }
206938fd1498Szrj
207038fd1498Szrj /* Build and return partition dependence graph for PARTITIONS. RDG is
207138fd1498Szrj reduced dependence graph for the loop to be distributed. If IGNORE_ALIAS_P
207238fd1498Szrj is true, data dependence caused by possible alias between references
207338fd1498Szrj is ignored, as if it doesn't exist at all; otherwise all depdendences
207438fd1498Szrj are considered. */
207538fd1498Szrj
207638fd1498Szrj static struct graph *
build_partition_graph(struct graph * rdg,vec<struct partition * > * partitions,bool ignore_alias_p)207738fd1498Szrj build_partition_graph (struct graph *rdg,
207838fd1498Szrj vec<struct partition *> *partitions,
207938fd1498Szrj bool ignore_alias_p)
208038fd1498Szrj {
208138fd1498Szrj int i, j;
208238fd1498Szrj struct partition *partition1, *partition2;
208338fd1498Szrj graph *pg = new_graph (partitions->length ());
208438fd1498Szrj auto_vec<ddr_p> alias_ddrs, *alias_ddrs_p;
208538fd1498Szrj
208638fd1498Szrj alias_ddrs_p = ignore_alias_p ? NULL : &alias_ddrs;
208738fd1498Szrj
208838fd1498Szrj init_partition_graph_vertices (pg, partitions);
208938fd1498Szrj
209038fd1498Szrj for (i = 0; partitions->iterate (i, &partition1); ++i)
209138fd1498Szrj {
209238fd1498Szrj for (j = i + 1; partitions->iterate (j, &partition2); ++j)
209338fd1498Szrj {
209438fd1498Szrj /* dependence direction - 0 is no dependence, -1 is back,
209538fd1498Szrj 1 is forth, 2 is both (we can stop then, merging will occur). */
209638fd1498Szrj int dir = 0;
209738fd1498Szrj
209838fd1498Szrj /* If the first partition has reduction, add back edge; if the
209938fd1498Szrj second partition has reduction, add forth edge. This makes
210038fd1498Szrj sure that reduction partition will be sorted as the last one. */
210138fd1498Szrj if (partition_reduction_p (partition1))
210238fd1498Szrj dir = -1;
210338fd1498Szrj else if (partition_reduction_p (partition2))
210438fd1498Szrj dir = 1;
210538fd1498Szrj
210638fd1498Szrj /* Cleanup the temporary vector. */
210738fd1498Szrj alias_ddrs.truncate (0);
210838fd1498Szrj
210938fd1498Szrj dir = pg_add_dependence_edges (rdg, dir, partition1->datarefs,
211038fd1498Szrj partition2->datarefs, alias_ddrs_p);
211138fd1498Szrj
211238fd1498Szrj /* Add edge to partition graph if there exists dependence. There
211338fd1498Szrj are two types of edges. One type edge is caused by compilation
211438fd1498Szrj time known dependence, this type can not be resolved by runtime
211538fd1498Szrj alias check. The other type can be resolved by runtime alias
211638fd1498Szrj check. */
211738fd1498Szrj if (dir == 1 || dir == 2
211838fd1498Szrj || alias_ddrs.length () > 0)
211938fd1498Szrj {
212038fd1498Szrj /* Attach data dependence relations to edge that can be resolved
212138fd1498Szrj by runtime alias check. */
212238fd1498Szrj bool alias_edge_p = (dir != 1 && dir != 2);
212338fd1498Szrj add_partition_graph_edge (pg, i, j,
212438fd1498Szrj (alias_edge_p) ? &alias_ddrs : NULL);
212538fd1498Szrj }
212638fd1498Szrj if (dir == -1 || dir == 2
212738fd1498Szrj || alias_ddrs.length () > 0)
212838fd1498Szrj {
212938fd1498Szrj /* Attach data dependence relations to edge that can be resolved
213038fd1498Szrj by runtime alias check. */
213138fd1498Szrj bool alias_edge_p = (dir != -1 && dir != 2);
213238fd1498Szrj add_partition_graph_edge (pg, j, i,
213338fd1498Szrj (alias_edge_p) ? &alias_ddrs : NULL);
213438fd1498Szrj }
213538fd1498Szrj }
213638fd1498Szrj }
213738fd1498Szrj return pg;
213838fd1498Szrj }
213938fd1498Szrj
214038fd1498Szrj /* Sort partitions in PG in descending post order and store them in
214138fd1498Szrj PARTITIONS. */
214238fd1498Szrj
214338fd1498Szrj static void
sort_partitions_by_post_order(struct graph * pg,vec<struct partition * > * partitions)214438fd1498Szrj sort_partitions_by_post_order (struct graph *pg,
214538fd1498Szrj vec<struct partition *> *partitions)
214638fd1498Szrj {
214738fd1498Szrj int i;
214838fd1498Szrj struct pg_vdata *data;
214938fd1498Szrj
215038fd1498Szrj /* Now order the remaining nodes in descending postorder. */
215138fd1498Szrj qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
215238fd1498Szrj partitions->truncate (0);
215338fd1498Szrj for (i = 0; i < pg->n_vertices; ++i)
215438fd1498Szrj {
215538fd1498Szrj data = (struct pg_vdata *)pg->vertices[i].data;
215638fd1498Szrj if (data->partition)
215738fd1498Szrj partitions->safe_push (data->partition);
215838fd1498Szrj }
215938fd1498Szrj }
216038fd1498Szrj
216138fd1498Szrj /* Given reduced dependence graph RDG merge strong connected components
216238fd1498Szrj of PARTITIONS. If IGNORE_ALIAS_P is true, data dependence caused by
216338fd1498Szrj possible alias between references is ignored, as if it doesn't exist
216438fd1498Szrj at all; otherwise all depdendences are considered. */
216538fd1498Szrj
216638fd1498Szrj static void
merge_dep_scc_partitions(struct graph * rdg,vec<struct partition * > * partitions,bool ignore_alias_p)216738fd1498Szrj merge_dep_scc_partitions (struct graph *rdg,
216838fd1498Szrj vec<struct partition *> *partitions,
216938fd1498Szrj bool ignore_alias_p)
217038fd1498Szrj {
217138fd1498Szrj struct partition *partition1, *partition2;
217238fd1498Szrj struct pg_vdata *data;
217338fd1498Szrj graph *pg = build_partition_graph (rdg, partitions, ignore_alias_p);
217438fd1498Szrj int i, j, num_sccs = graphds_scc (pg, NULL);
217538fd1498Szrj
217638fd1498Szrj /* Strong connected compoenent means dependence cycle, we cannot distribute
217738fd1498Szrj them. So fuse them together. */
217838fd1498Szrj if ((unsigned) num_sccs < partitions->length ())
217938fd1498Szrj {
218038fd1498Szrj for (i = 0; i < num_sccs; ++i)
218138fd1498Szrj {
218238fd1498Szrj for (j = 0; partitions->iterate (j, &partition1); ++j)
218338fd1498Szrj if (pg->vertices[j].component == i)
218438fd1498Szrj break;
218538fd1498Szrj for (j = j + 1; partitions->iterate (j, &partition2); ++j)
218638fd1498Szrj if (pg->vertices[j].component == i)
218738fd1498Szrj {
218838fd1498Szrj partition_merge_into (NULL, partition1,
218938fd1498Szrj partition2, FUSE_SAME_SCC);
219038fd1498Szrj partition1->type = PTYPE_SEQUENTIAL;
219138fd1498Szrj (*partitions)[j] = NULL;
219238fd1498Szrj partition_free (partition2);
219338fd1498Szrj data = (struct pg_vdata *)pg->vertices[j].data;
219438fd1498Szrj data->partition = NULL;
219538fd1498Szrj }
219638fd1498Szrj }
219738fd1498Szrj }
219838fd1498Szrj
219938fd1498Szrj sort_partitions_by_post_order (pg, partitions);
220038fd1498Szrj gcc_assert (partitions->length () == (unsigned)num_sccs);
220138fd1498Szrj free_partition_graph_vdata (pg);
220238fd1498Szrj free_graph (pg);
220338fd1498Szrj }
220438fd1498Szrj
220538fd1498Szrj /* Callback function for traversing edge E in graph G. DATA is private
220638fd1498Szrj callback data. */
220738fd1498Szrj
220838fd1498Szrj static void
pg_collect_alias_ddrs(struct graph * g,struct graph_edge * e,void * data)220938fd1498Szrj pg_collect_alias_ddrs (struct graph *g, struct graph_edge *e, void *data)
221038fd1498Szrj {
221138fd1498Szrj int i, j, component;
221238fd1498Szrj struct pg_edge_callback_data *cbdata;
221338fd1498Szrj struct pg_edata *edata = (struct pg_edata *) e->data;
221438fd1498Szrj
221538fd1498Szrj /* If the edge doesn't have attached data dependence, it represents
221638fd1498Szrj compilation time known dependences. This type dependence cannot
221738fd1498Szrj be resolved by runtime alias check. */
221838fd1498Szrj if (edata == NULL || edata->alias_ddrs.length () == 0)
221938fd1498Szrj return;
222038fd1498Szrj
222138fd1498Szrj cbdata = (struct pg_edge_callback_data *) data;
222238fd1498Szrj i = e->src;
222338fd1498Szrj j = e->dest;
222438fd1498Szrj component = cbdata->vertices_component[i];
222538fd1498Szrj /* Vertices are topologically sorted according to compilation time
222638fd1498Szrj known dependences, so we can break strong connected components
222738fd1498Szrj by removing edges of the opposite direction, i.e, edges pointing
222838fd1498Szrj from vertice with smaller post number to vertice with bigger post
222938fd1498Szrj number. */
223038fd1498Szrj if (g->vertices[i].post < g->vertices[j].post
223138fd1498Szrj /* We only need to remove edges connecting vertices in the same
223238fd1498Szrj strong connected component to break it. */
223338fd1498Szrj && component == cbdata->vertices_component[j]
223438fd1498Szrj /* Check if we want to break the strong connected component or not. */
223538fd1498Szrj && !bitmap_bit_p (cbdata->sccs_to_merge, component))
223638fd1498Szrj cbdata->alias_ddrs->safe_splice (edata->alias_ddrs);
223738fd1498Szrj }
223838fd1498Szrj
223938fd1498Szrj /* This is the main function breaking strong conected components in
224038fd1498Szrj PARTITIONS giving reduced depdendence graph RDG. Store data dependence
224138fd1498Szrj relations for runtime alias check in ALIAS_DDRS. */
224238fd1498Szrj
224338fd1498Szrj static void
break_alias_scc_partitions(struct graph * rdg,vec<struct partition * > * partitions,vec<ddr_p> * alias_ddrs)224438fd1498Szrj break_alias_scc_partitions (struct graph *rdg,
224538fd1498Szrj vec<struct partition *> *partitions,
224638fd1498Szrj vec<ddr_p> *alias_ddrs)
224738fd1498Szrj {
224838fd1498Szrj int i, j, k, num_sccs, num_sccs_no_alias;
224938fd1498Szrj /* Build partition dependence graph. */
225038fd1498Szrj graph *pg = build_partition_graph (rdg, partitions, false);
225138fd1498Szrj
225238fd1498Szrj alias_ddrs->truncate (0);
225338fd1498Szrj /* Find strong connected components in the graph, with all dependence edges
225438fd1498Szrj considered. */
225538fd1498Szrj num_sccs = graphds_scc (pg, NULL);
225638fd1498Szrj /* All SCCs now can be broken by runtime alias checks because SCCs caused by
225738fd1498Szrj compilation time known dependences are merged before this function. */
225838fd1498Szrj if ((unsigned) num_sccs < partitions->length ())
225938fd1498Szrj {
226038fd1498Szrj struct pg_edge_callback_data cbdata;
226138fd1498Szrj auto_bitmap sccs_to_merge;
226238fd1498Szrj auto_vec<enum partition_type> scc_types;
226338fd1498Szrj struct partition *partition, *first;
226438fd1498Szrj
226538fd1498Szrj /* If all partitions in a SCC have the same type, we can simply merge the
226638fd1498Szrj SCC. This loop finds out such SCCS and record them in bitmap. */
226738fd1498Szrj bitmap_set_range (sccs_to_merge, 0, (unsigned) num_sccs);
226838fd1498Szrj for (i = 0; i < num_sccs; ++i)
226938fd1498Szrj {
227038fd1498Szrj for (j = 0; partitions->iterate (j, &first); ++j)
227138fd1498Szrj if (pg->vertices[j].component == i)
227238fd1498Szrj break;
227338fd1498Szrj for (++j; partitions->iterate (j, &partition); ++j)
227438fd1498Szrj {
227538fd1498Szrj if (pg->vertices[j].component != i)
227638fd1498Szrj continue;
227738fd1498Szrj
227838fd1498Szrj /* Note we Merge partitions of parallel type on purpose, though
227938fd1498Szrj the result partition is sequential. The reason is vectorizer
228038fd1498Szrj can do more accurate runtime alias check in this case. Also
228138fd1498Szrj it results in more conservative distribution. */
228238fd1498Szrj if (first->type != partition->type)
228338fd1498Szrj {
228438fd1498Szrj bitmap_clear_bit (sccs_to_merge, i);
228538fd1498Szrj break;
228638fd1498Szrj }
228738fd1498Szrj }
228838fd1498Szrj }
228938fd1498Szrj
229038fd1498Szrj /* Initialize callback data for traversing. */
229138fd1498Szrj cbdata.sccs_to_merge = sccs_to_merge;
229238fd1498Szrj cbdata.alias_ddrs = alias_ddrs;
229338fd1498Szrj cbdata.vertices_component = XNEWVEC (int, pg->n_vertices);
229438fd1498Szrj /* Record the component information which will be corrupted by next
229538fd1498Szrj graph scc finding call. */
229638fd1498Szrj for (i = 0; i < pg->n_vertices; ++i)
229738fd1498Szrj cbdata.vertices_component[i] = pg->vertices[i].component;
229838fd1498Szrj
229938fd1498Szrj /* Collect data dependences for runtime alias checks to break SCCs. */
230038fd1498Szrj if (bitmap_count_bits (sccs_to_merge) != (unsigned) num_sccs)
230138fd1498Szrj {
230238fd1498Szrj /* Run SCC finding algorithm again, with alias dependence edges
230338fd1498Szrj skipped. This is to topologically sort partitions according to
230438fd1498Szrj compilation time known dependence. Note the topological order
230538fd1498Szrj is stored in the form of pg's post order number. */
230638fd1498Szrj num_sccs_no_alias = graphds_scc (pg, NULL, pg_skip_alias_edge);
230738fd1498Szrj gcc_assert (partitions->length () == (unsigned) num_sccs_no_alias);
230838fd1498Szrj /* With topological order, we can construct two subgraphs L and R.
230938fd1498Szrj L contains edge <x, y> where x < y in terms of post order, while
231038fd1498Szrj R contains edge <x, y> where x > y. Edges for compilation time
231138fd1498Szrj known dependence all fall in R, so we break SCCs by removing all
231238fd1498Szrj (alias) edges of in subgraph L. */
231338fd1498Szrj for_each_edge (pg, pg_collect_alias_ddrs, &cbdata);
231438fd1498Szrj }
231538fd1498Szrj
231638fd1498Szrj /* For SCC that doesn't need to be broken, merge it. */
231738fd1498Szrj for (i = 0; i < num_sccs; ++i)
231838fd1498Szrj {
231938fd1498Szrj if (!bitmap_bit_p (sccs_to_merge, i))
232038fd1498Szrj continue;
232138fd1498Szrj
232238fd1498Szrj for (j = 0; partitions->iterate (j, &first); ++j)
232338fd1498Szrj if (cbdata.vertices_component[j] == i)
232438fd1498Szrj break;
232538fd1498Szrj for (k = j + 1; partitions->iterate (k, &partition); ++k)
232638fd1498Szrj {
232738fd1498Szrj struct pg_vdata *data;
232838fd1498Szrj
232938fd1498Szrj if (cbdata.vertices_component[k] != i)
233038fd1498Szrj continue;
233138fd1498Szrj
233238fd1498Szrj /* Update postorder number so that merged reduction partition is
233338fd1498Szrj sorted after other partitions. */
233438fd1498Szrj if (!partition_reduction_p (first)
233538fd1498Szrj && partition_reduction_p (partition))
233638fd1498Szrj {
233738fd1498Szrj gcc_assert (pg->vertices[k].post < pg->vertices[j].post);
233838fd1498Szrj pg->vertices[j].post = pg->vertices[k].post;
233938fd1498Szrj }
234038fd1498Szrj partition_merge_into (NULL, first, partition, FUSE_SAME_SCC);
234138fd1498Szrj (*partitions)[k] = NULL;
234238fd1498Szrj partition_free (partition);
234338fd1498Szrj data = (struct pg_vdata *)pg->vertices[k].data;
234438fd1498Szrj gcc_assert (data->id == k);
234538fd1498Szrj data->partition = NULL;
234638fd1498Szrj /* The result partition of merged SCC must be sequential. */
234738fd1498Szrj first->type = PTYPE_SEQUENTIAL;
234838fd1498Szrj }
234938fd1498Szrj }
235038fd1498Szrj }
235138fd1498Szrj
235238fd1498Szrj sort_partitions_by_post_order (pg, partitions);
235338fd1498Szrj free_partition_graph_vdata (pg);
235438fd1498Szrj for_each_edge (pg, free_partition_graph_edata_cb, NULL);
235538fd1498Szrj free_graph (pg);
235638fd1498Szrj
235738fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
235838fd1498Szrj {
235938fd1498Szrj fprintf (dump_file, "Possible alias data dependence to break:\n");
236038fd1498Szrj dump_data_dependence_relations (dump_file, *alias_ddrs);
236138fd1498Szrj }
236238fd1498Szrj }
236338fd1498Szrj
236438fd1498Szrj /* Compute and return an expression whose value is the segment length which
236538fd1498Szrj will be accessed by DR in NITERS iterations. */
236638fd1498Szrj
236738fd1498Szrj static tree
data_ref_segment_size(struct data_reference * dr,tree niters)236838fd1498Szrj data_ref_segment_size (struct data_reference *dr, tree niters)
236938fd1498Szrj {
237038fd1498Szrj niters = size_binop (MINUS_EXPR,
237138fd1498Szrj fold_convert (sizetype, niters),
237238fd1498Szrj size_one_node);
237338fd1498Szrj return size_binop (MULT_EXPR,
237438fd1498Szrj fold_convert (sizetype, DR_STEP (dr)),
237538fd1498Szrj fold_convert (sizetype, niters));
237638fd1498Szrj }
237738fd1498Szrj
237838fd1498Szrj /* Return true if LOOP's latch is dominated by statement for data reference
237938fd1498Szrj DR. */
238038fd1498Szrj
238138fd1498Szrj static inline bool
latch_dominated_by_data_ref(struct loop * loop,data_reference * dr)238238fd1498Szrj latch_dominated_by_data_ref (struct loop *loop, data_reference *dr)
238338fd1498Szrj {
238438fd1498Szrj return dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
238538fd1498Szrj gimple_bb (DR_STMT (dr)));
238638fd1498Szrj }
238738fd1498Szrj
238838fd1498Szrj /* Compute alias check pairs and store them in COMP_ALIAS_PAIRS for LOOP's
238938fd1498Szrj data dependence relations ALIAS_DDRS. */
239038fd1498Szrj
239138fd1498Szrj static void
compute_alias_check_pairs(struct loop * loop,vec<ddr_p> * alias_ddrs,vec<dr_with_seg_len_pair_t> * comp_alias_pairs)239238fd1498Szrj compute_alias_check_pairs (struct loop *loop, vec<ddr_p> *alias_ddrs,
239338fd1498Szrj vec<dr_with_seg_len_pair_t> *comp_alias_pairs)
239438fd1498Szrj {
239538fd1498Szrj unsigned int i;
239638fd1498Szrj unsigned HOST_WIDE_INT factor = 1;
239738fd1498Szrj tree niters_plus_one, niters = number_of_latch_executions (loop);
239838fd1498Szrj
239938fd1498Szrj gcc_assert (niters != NULL_TREE && niters != chrec_dont_know);
240038fd1498Szrj niters = fold_convert (sizetype, niters);
240138fd1498Szrj niters_plus_one = size_binop (PLUS_EXPR, niters, size_one_node);
240238fd1498Szrj
240338fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
240438fd1498Szrj fprintf (dump_file, "Creating alias check pairs:\n");
240538fd1498Szrj
240638fd1498Szrj /* Iterate all data dependence relations and compute alias check pairs. */
240738fd1498Szrj for (i = 0; i < alias_ddrs->length (); i++)
240838fd1498Szrj {
240938fd1498Szrj ddr_p ddr = (*alias_ddrs)[i];
241038fd1498Szrj struct data_reference *dr_a = DDR_A (ddr);
241138fd1498Szrj struct data_reference *dr_b = DDR_B (ddr);
241238fd1498Szrj tree seg_length_a, seg_length_b;
241338fd1498Szrj int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
241438fd1498Szrj DR_BASE_ADDRESS (dr_b));
241538fd1498Szrj
241638fd1498Szrj if (comp_res == 0)
241738fd1498Szrj comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
241838fd1498Szrj gcc_assert (comp_res != 0);
241938fd1498Szrj
242038fd1498Szrj if (latch_dominated_by_data_ref (loop, dr_a))
242138fd1498Szrj seg_length_a = data_ref_segment_size (dr_a, niters_plus_one);
242238fd1498Szrj else
242338fd1498Szrj seg_length_a = data_ref_segment_size (dr_a, niters);
242438fd1498Szrj
242538fd1498Szrj if (latch_dominated_by_data_ref (loop, dr_b))
242638fd1498Szrj seg_length_b = data_ref_segment_size (dr_b, niters_plus_one);
242738fd1498Szrj else
242838fd1498Szrj seg_length_b = data_ref_segment_size (dr_b, niters);
242938fd1498Szrj
243038fd1498Szrj unsigned HOST_WIDE_INT access_size_a
243138fd1498Szrj = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a))));
243238fd1498Szrj unsigned HOST_WIDE_INT access_size_b
243338fd1498Szrj = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b))));
243438fd1498Szrj unsigned int align_a = TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_a)));
243538fd1498Szrj unsigned int align_b = TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_b)));
243638fd1498Szrj
243738fd1498Szrj dr_with_seg_len_pair_t dr_with_seg_len_pair
243838fd1498Szrj (dr_with_seg_len (dr_a, seg_length_a, access_size_a, align_a),
243938fd1498Szrj dr_with_seg_len (dr_b, seg_length_b, access_size_b, align_b));
244038fd1498Szrj
244138fd1498Szrj /* Canonicalize pairs by sorting the two DR members. */
244238fd1498Szrj if (comp_res > 0)
244338fd1498Szrj std::swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second);
244438fd1498Szrj
244538fd1498Szrj comp_alias_pairs->safe_push (dr_with_seg_len_pair);
244638fd1498Szrj }
244738fd1498Szrj
244838fd1498Szrj if (tree_fits_uhwi_p (niters))
244938fd1498Szrj factor = tree_to_uhwi (niters);
245038fd1498Szrj
245138fd1498Szrj /* Prune alias check pairs. */
245238fd1498Szrj prune_runtime_alias_test_list (comp_alias_pairs, factor);
245338fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
245438fd1498Szrj fprintf (dump_file,
245538fd1498Szrj "Improved number of alias checks from %d to %d\n",
245638fd1498Szrj alias_ddrs->length (), comp_alias_pairs->length ());
245738fd1498Szrj }
245838fd1498Szrj
245938fd1498Szrj /* Given data dependence relations in ALIAS_DDRS, generate runtime alias
246038fd1498Szrj checks and version LOOP under condition of these runtime alias checks. */
246138fd1498Szrj
246238fd1498Szrj static void
version_loop_by_alias_check(struct loop * loop,vec<ddr_p> * alias_ddrs)246338fd1498Szrj version_loop_by_alias_check (struct loop *loop, vec<ddr_p> *alias_ddrs)
246438fd1498Szrj {
246538fd1498Szrj profile_probability prob;
246638fd1498Szrj basic_block cond_bb;
246738fd1498Szrj struct loop *nloop;
246838fd1498Szrj tree lhs, arg0, cond_expr = NULL_TREE;
246938fd1498Szrj gimple_seq cond_stmts = NULL;
247038fd1498Szrj gimple *call_stmt = NULL;
247138fd1498Szrj auto_vec<dr_with_seg_len_pair_t> comp_alias_pairs;
247238fd1498Szrj
247338fd1498Szrj /* Generate code for runtime alias checks if necessary. */
247438fd1498Szrj gcc_assert (alias_ddrs->length () > 0);
247538fd1498Szrj
247638fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
247738fd1498Szrj fprintf (dump_file,
247838fd1498Szrj "Version loop <%d> with runtime alias check\n", loop->num);
247938fd1498Szrj
248038fd1498Szrj compute_alias_check_pairs (loop, alias_ddrs, &comp_alias_pairs);
248138fd1498Szrj create_runtime_alias_checks (loop, &comp_alias_pairs, &cond_expr);
248238fd1498Szrj cond_expr = force_gimple_operand_1 (cond_expr, &cond_stmts,
248338fd1498Szrj is_gimple_val, NULL_TREE);
248438fd1498Szrj
248538fd1498Szrj /* Depend on vectorizer to fold IFN_LOOP_DIST_ALIAS. */
248638fd1498Szrj if (flag_tree_loop_vectorize)
248738fd1498Szrj {
248838fd1498Szrj /* Generate internal function call for loop distribution alias check. */
248938fd1498Szrj call_stmt = gimple_build_call_internal (IFN_LOOP_DIST_ALIAS,
249038fd1498Szrj 2, NULL_TREE, cond_expr);
249138fd1498Szrj lhs = make_ssa_name (boolean_type_node);
249238fd1498Szrj gimple_call_set_lhs (call_stmt, lhs);
249338fd1498Szrj }
249438fd1498Szrj else
249538fd1498Szrj lhs = cond_expr;
249638fd1498Szrj
249738fd1498Szrj prob = profile_probability::guessed_always ().apply_scale (9, 10);
249838fd1498Szrj initialize_original_copy_tables ();
249938fd1498Szrj nloop = loop_version (loop, lhs, &cond_bb, prob, prob.invert (),
250038fd1498Szrj prob, prob.invert (), true);
250138fd1498Szrj free_original_copy_tables ();
250238fd1498Szrj /* Record the original loop number in newly generated loops. In case of
250338fd1498Szrj distribution, the original loop will be distributed and the new loop
250438fd1498Szrj is kept. */
250538fd1498Szrj loop->orig_loop_num = nloop->num;
250638fd1498Szrj nloop->orig_loop_num = nloop->num;
250738fd1498Szrj nloop->dont_vectorize = true;
250838fd1498Szrj nloop->force_vectorize = false;
250938fd1498Szrj
251038fd1498Szrj if (call_stmt)
251138fd1498Szrj {
251238fd1498Szrj /* Record new loop's num in IFN_LOOP_DIST_ALIAS because the original
251338fd1498Szrj loop could be destroyed. */
251438fd1498Szrj arg0 = build_int_cst (integer_type_node, loop->orig_loop_num);
251538fd1498Szrj gimple_call_set_arg (call_stmt, 0, arg0);
251638fd1498Szrj gimple_seq_add_stmt_without_update (&cond_stmts, call_stmt);
251738fd1498Szrj }
251838fd1498Szrj
251938fd1498Szrj if (cond_stmts)
252038fd1498Szrj {
252138fd1498Szrj gimple_stmt_iterator cond_gsi = gsi_last_bb (cond_bb);
252238fd1498Szrj gsi_insert_seq_before (&cond_gsi, cond_stmts, GSI_SAME_STMT);
252338fd1498Szrj }
252438fd1498Szrj update_ssa (TODO_update_ssa);
252538fd1498Szrj }
252638fd1498Szrj
252738fd1498Szrj /* Return true if loop versioning is needed to distrubute PARTITIONS.
252838fd1498Szrj ALIAS_DDRS are data dependence relations for runtime alias check. */
252938fd1498Szrj
253038fd1498Szrj static inline bool
version_for_distribution_p(vec<struct partition * > * partitions,vec<ddr_p> * alias_ddrs)253138fd1498Szrj version_for_distribution_p (vec<struct partition *> *partitions,
253238fd1498Szrj vec<ddr_p> *alias_ddrs)
253338fd1498Szrj {
253438fd1498Szrj /* No need to version loop if we have only one partition. */
253538fd1498Szrj if (partitions->length () == 1)
253638fd1498Szrj return false;
253738fd1498Szrj
253838fd1498Szrj /* Need to version loop if runtime alias check is necessary. */
253938fd1498Szrj return (alias_ddrs->length () > 0);
254038fd1498Szrj }
254138fd1498Szrj
254238fd1498Szrj /* Compare base offset of builtin mem* partitions P1 and P2. */
254338fd1498Szrj
254438fd1498Szrj static bool
offset_cmp(struct partition * p1,struct partition * p2)254538fd1498Szrj offset_cmp (struct partition *p1, struct partition *p2)
254638fd1498Szrj {
254738fd1498Szrj gcc_assert (p1 != NULL && p1->builtin != NULL);
254838fd1498Szrj gcc_assert (p2 != NULL && p2->builtin != NULL);
254938fd1498Szrj return p1->builtin->dst_base_offset < p2->builtin->dst_base_offset;
255038fd1498Szrj }
255138fd1498Szrj
255238fd1498Szrj /* Fuse adjacent memset builtin PARTITIONS if possible. This is a special
255338fd1498Szrj case optimization transforming below code:
255438fd1498Szrj
255538fd1498Szrj __builtin_memset (&obj, 0, 100);
255638fd1498Szrj _1 = &obj + 100;
255738fd1498Szrj __builtin_memset (_1, 0, 200);
255838fd1498Szrj _2 = &obj + 300;
255938fd1498Szrj __builtin_memset (_2, 0, 100);
256038fd1498Szrj
256138fd1498Szrj into:
256238fd1498Szrj
256338fd1498Szrj __builtin_memset (&obj, 0, 400);
256438fd1498Szrj
256538fd1498Szrj Note we don't have dependence information between different partitions
256638fd1498Szrj at this point, as a result, we can't handle nonadjacent memset builtin
256738fd1498Szrj partitions since dependence might be broken. */
256838fd1498Szrj
256938fd1498Szrj static void
fuse_memset_builtins(vec<struct partition * > * partitions)257038fd1498Szrj fuse_memset_builtins (vec<struct partition *> *partitions)
257138fd1498Szrj {
257238fd1498Szrj unsigned i, j;
257338fd1498Szrj struct partition *part1, *part2;
257438fd1498Szrj tree rhs1, rhs2;
257538fd1498Szrj
257638fd1498Szrj for (i = 0; partitions->iterate (i, &part1);)
257738fd1498Szrj {
257838fd1498Szrj if (part1->kind != PKIND_MEMSET)
257938fd1498Szrj {
258038fd1498Szrj i++;
258138fd1498Szrj continue;
258238fd1498Szrj }
258338fd1498Szrj
258438fd1498Szrj /* Find sub-array of memset builtins of the same base. Index range
258538fd1498Szrj of the sub-array is [i, j) with "j > i". */
258638fd1498Szrj for (j = i + 1; partitions->iterate (j, &part2); ++j)
258738fd1498Szrj {
258838fd1498Szrj if (part2->kind != PKIND_MEMSET
258938fd1498Szrj || !operand_equal_p (part1->builtin->dst_base_base,
259038fd1498Szrj part2->builtin->dst_base_base, 0))
259138fd1498Szrj break;
259238fd1498Szrj
259338fd1498Szrj /* Memset calls setting different values can't be merged. */
259438fd1498Szrj rhs1 = gimple_assign_rhs1 (DR_STMT (part1->builtin->dst_dr));
259538fd1498Szrj rhs2 = gimple_assign_rhs1 (DR_STMT (part2->builtin->dst_dr));
259638fd1498Szrj if (!operand_equal_p (rhs1, rhs2, 0))
259738fd1498Szrj break;
259838fd1498Szrj }
259938fd1498Szrj
260038fd1498Szrj /* Stable sort is required in order to avoid breaking dependence. */
260138fd1498Szrj std::stable_sort (&(*partitions)[i],
260238fd1498Szrj &(*partitions)[i] + j - i, offset_cmp);
260338fd1498Szrj /* Continue with next partition. */
260438fd1498Szrj i = j;
260538fd1498Szrj }
260638fd1498Szrj
260738fd1498Szrj /* Merge all consecutive memset builtin partitions. */
260838fd1498Szrj for (i = 0; i < partitions->length () - 1;)
260938fd1498Szrj {
261038fd1498Szrj part1 = (*partitions)[i];
261138fd1498Szrj if (part1->kind != PKIND_MEMSET)
261238fd1498Szrj {
261338fd1498Szrj i++;
261438fd1498Szrj continue;
261538fd1498Szrj }
261638fd1498Szrj
261738fd1498Szrj part2 = (*partitions)[i + 1];
261838fd1498Szrj /* Only merge memset partitions of the same base and with constant
261938fd1498Szrj access sizes. */
262038fd1498Szrj if (part2->kind != PKIND_MEMSET
262138fd1498Szrj || TREE_CODE (part1->builtin->size) != INTEGER_CST
262238fd1498Szrj || TREE_CODE (part2->builtin->size) != INTEGER_CST
262338fd1498Szrj || !operand_equal_p (part1->builtin->dst_base_base,
262438fd1498Szrj part2->builtin->dst_base_base, 0))
262538fd1498Szrj {
262638fd1498Szrj i++;
262738fd1498Szrj continue;
262838fd1498Szrj }
262938fd1498Szrj rhs1 = gimple_assign_rhs1 (DR_STMT (part1->builtin->dst_dr));
263038fd1498Szrj rhs2 = gimple_assign_rhs1 (DR_STMT (part2->builtin->dst_dr));
263138fd1498Szrj int bytev1 = const_with_all_bytes_same (rhs1);
263238fd1498Szrj int bytev2 = const_with_all_bytes_same (rhs2);
263338fd1498Szrj /* Only merge memset partitions of the same value. */
263438fd1498Szrj if (bytev1 != bytev2 || bytev1 == -1)
263538fd1498Szrj {
263638fd1498Szrj i++;
263738fd1498Szrj continue;
263838fd1498Szrj }
263938fd1498Szrj wide_int end1 = wi::add (part1->builtin->dst_base_offset,
264038fd1498Szrj wi::to_wide (part1->builtin->size));
264138fd1498Szrj /* Only merge adjacent memset partitions. */
264238fd1498Szrj if (wi::ne_p (end1, part2->builtin->dst_base_offset))
264338fd1498Szrj {
264438fd1498Szrj i++;
264538fd1498Szrj continue;
264638fd1498Szrj }
264738fd1498Szrj /* Merge partitions[i] and partitions[i+1]. */
264838fd1498Szrj part1->builtin->size = fold_build2 (PLUS_EXPR, sizetype,
264938fd1498Szrj part1->builtin->size,
265038fd1498Szrj part2->builtin->size);
265138fd1498Szrj partition_free (part2);
265238fd1498Szrj partitions->ordered_remove (i + 1);
265338fd1498Szrj }
265438fd1498Szrj }
265538fd1498Szrj
265638fd1498Szrj /* Fuse PARTITIONS of LOOP if necessary before finalizing distribution.
265738fd1498Szrj ALIAS_DDRS contains ddrs which need runtime alias check. */
265838fd1498Szrj
265938fd1498Szrj static void
finalize_partitions(struct loop * loop,vec<struct partition * > * partitions,vec<ddr_p> * alias_ddrs)266038fd1498Szrj finalize_partitions (struct loop *loop, vec<struct partition *> *partitions,
266138fd1498Szrj vec<ddr_p> *alias_ddrs)
266238fd1498Szrj {
266338fd1498Szrj unsigned i;
266438fd1498Szrj struct partition *partition, *a;
266538fd1498Szrj
266638fd1498Szrj if (partitions->length () == 1
266738fd1498Szrj || alias_ddrs->length () > 0)
266838fd1498Szrj return;
266938fd1498Szrj
267038fd1498Szrj unsigned num_builtin = 0, num_normal = 0, num_partial_memset = 0;
267138fd1498Szrj bool same_type_p = true;
267238fd1498Szrj enum partition_type type = ((*partitions)[0])->type;
267338fd1498Szrj for (i = 0; partitions->iterate (i, &partition); ++i)
267438fd1498Szrj {
267538fd1498Szrj same_type_p &= (type == partition->type);
267638fd1498Szrj if (partition_builtin_p (partition))
267738fd1498Szrj {
267838fd1498Szrj num_builtin++;
267938fd1498Szrj continue;
268038fd1498Szrj }
268138fd1498Szrj num_normal++;
268238fd1498Szrj if (partition->kind == PKIND_PARTIAL_MEMSET)
268338fd1498Szrj num_partial_memset++;
268438fd1498Szrj }
268538fd1498Szrj
268638fd1498Szrj /* Don't distribute current loop into too many loops given we don't have
268738fd1498Szrj memory stream cost model. Be even more conservative in case of loop
268838fd1498Szrj nest distribution. */
268938fd1498Szrj if ((same_type_p && num_builtin == 0
269038fd1498Szrj && (loop->inner == NULL || num_normal != 2 || num_partial_memset != 1))
269138fd1498Szrj || (loop->inner != NULL
269238fd1498Szrj && i >= NUM_PARTITION_THRESHOLD && num_normal > 1)
269338fd1498Szrj || (loop->inner == NULL
269438fd1498Szrj && i >= NUM_PARTITION_THRESHOLD && num_normal > num_builtin))
269538fd1498Szrj {
269638fd1498Szrj a = (*partitions)[0];
269738fd1498Szrj for (i = 1; partitions->iterate (i, &partition); ++i)
269838fd1498Szrj {
269938fd1498Szrj partition_merge_into (NULL, a, partition, FUSE_FINALIZE);
270038fd1498Szrj partition_free (partition);
270138fd1498Szrj }
270238fd1498Szrj partitions->truncate (1);
270338fd1498Szrj }
270438fd1498Szrj
270538fd1498Szrj /* Fuse memset builtins if possible. */
270638fd1498Szrj if (partitions->length () > 1)
270738fd1498Szrj fuse_memset_builtins (partitions);
270838fd1498Szrj }
270938fd1498Szrj
271038fd1498Szrj /* Distributes the code from LOOP in such a way that producer statements
271138fd1498Szrj are placed before consumer statements. Tries to separate only the
271238fd1498Szrj statements from STMTS into separate loops. Returns the number of
271338fd1498Szrj distributed loops. Set NB_CALLS to number of generated builtin calls.
271438fd1498Szrj Set *DESTROY_P to whether LOOP needs to be destroyed. */
271538fd1498Szrj
271638fd1498Szrj static int
distribute_loop(struct loop * loop,vec<gimple * > stmts,control_dependences * cd,int * nb_calls,bool * destroy_p)271738fd1498Szrj distribute_loop (struct loop *loop, vec<gimple *> stmts,
271838fd1498Szrj control_dependences *cd, int *nb_calls, bool *destroy_p)
271938fd1498Szrj {
272038fd1498Szrj ddrs_table = new hash_table<ddr_hasher> (389);
272138fd1498Szrj struct graph *rdg;
272238fd1498Szrj partition *partition;
272338fd1498Szrj bool any_builtin;
272438fd1498Szrj int i, nbp;
272538fd1498Szrj
272638fd1498Szrj *destroy_p = false;
272738fd1498Szrj *nb_calls = 0;
272838fd1498Szrj loop_nest.create (0);
272938fd1498Szrj if (!find_loop_nest (loop, &loop_nest))
273038fd1498Szrj {
273138fd1498Szrj loop_nest.release ();
273238fd1498Szrj delete ddrs_table;
273338fd1498Szrj return 0;
273438fd1498Szrj }
273538fd1498Szrj
273638fd1498Szrj datarefs_vec.create (20);
273738fd1498Szrj rdg = build_rdg (loop, cd);
273838fd1498Szrj if (!rdg)
273938fd1498Szrj {
274038fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
274138fd1498Szrj fprintf (dump_file,
274238fd1498Szrj "Loop %d not distributed: failed to build the RDG.\n",
274338fd1498Szrj loop->num);
274438fd1498Szrj
274538fd1498Szrj loop_nest.release ();
274638fd1498Szrj free_data_refs (datarefs_vec);
274738fd1498Szrj delete ddrs_table;
274838fd1498Szrj return 0;
274938fd1498Szrj }
275038fd1498Szrj
275138fd1498Szrj if (datarefs_vec.length () > MAX_DATAREFS_NUM)
275238fd1498Szrj {
275338fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
275438fd1498Szrj fprintf (dump_file,
275538fd1498Szrj "Loop %d not distributed: too many memory references.\n",
275638fd1498Szrj loop->num);
275738fd1498Szrj
275838fd1498Szrj free_rdg (rdg);
275938fd1498Szrj loop_nest.release ();
276038fd1498Szrj free_data_refs (datarefs_vec);
276138fd1498Szrj delete ddrs_table;
276238fd1498Szrj return 0;
276338fd1498Szrj }
276438fd1498Szrj
276538fd1498Szrj data_reference_p dref;
276638fd1498Szrj for (i = 0; datarefs_vec.iterate (i, &dref); ++i)
276738fd1498Szrj dref->aux = (void *) (uintptr_t) i;
276838fd1498Szrj
276938fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
277038fd1498Szrj dump_rdg (dump_file, rdg);
277138fd1498Szrj
277238fd1498Szrj auto_vec<struct partition *, 3> partitions;
277338fd1498Szrj rdg_build_partitions (rdg, stmts, &partitions);
277438fd1498Szrj
277538fd1498Szrj auto_vec<ddr_p> alias_ddrs;
277638fd1498Szrj
277738fd1498Szrj auto_bitmap stmt_in_all_partitions;
277838fd1498Szrj bitmap_copy (stmt_in_all_partitions, partitions[0]->stmts);
277938fd1498Szrj for (i = 1; partitions.iterate (i, &partition); ++i)
278038fd1498Szrj bitmap_and_into (stmt_in_all_partitions, partitions[i]->stmts);
278138fd1498Szrj
278238fd1498Szrj any_builtin = false;
278338fd1498Szrj FOR_EACH_VEC_ELT (partitions, i, partition)
278438fd1498Szrj {
278538fd1498Szrj classify_partition (loop, rdg, partition, stmt_in_all_partitions);
278638fd1498Szrj any_builtin |= partition_builtin_p (partition);
278738fd1498Szrj }
278838fd1498Szrj
278938fd1498Szrj /* If we are only distributing patterns but did not detect any,
279038fd1498Szrj simply bail out. */
279138fd1498Szrj if (!flag_tree_loop_distribution
279238fd1498Szrj && !any_builtin)
279338fd1498Szrj {
279438fd1498Szrj nbp = 0;
279538fd1498Szrj goto ldist_done;
279638fd1498Szrj }
279738fd1498Szrj
279838fd1498Szrj /* If we are only distributing patterns fuse all partitions that
279938fd1498Szrj were not classified as builtins. This also avoids chopping
280038fd1498Szrj a loop into pieces, separated by builtin calls. That is, we
280138fd1498Szrj only want no or a single loop body remaining. */
280238fd1498Szrj struct partition *into;
280338fd1498Szrj if (!flag_tree_loop_distribution)
280438fd1498Szrj {
280538fd1498Szrj for (i = 0; partitions.iterate (i, &into); ++i)
280638fd1498Szrj if (!partition_builtin_p (into))
280738fd1498Szrj break;
280838fd1498Szrj for (++i; partitions.iterate (i, &partition); ++i)
280938fd1498Szrj if (!partition_builtin_p (partition))
281038fd1498Szrj {
281138fd1498Szrj partition_merge_into (NULL, into, partition, FUSE_NON_BUILTIN);
281238fd1498Szrj partitions.unordered_remove (i);
281338fd1498Szrj partition_free (partition);
281438fd1498Szrj i--;
281538fd1498Szrj }
281638fd1498Szrj }
281738fd1498Szrj
281838fd1498Szrj /* Due to limitations in the transform phase we have to fuse all
281938fd1498Szrj reduction partitions into the last partition so the existing
282038fd1498Szrj loop will contain all loop-closed PHI nodes. */
282138fd1498Szrj for (i = 0; partitions.iterate (i, &into); ++i)
282238fd1498Szrj if (partition_reduction_p (into))
282338fd1498Szrj break;
282438fd1498Szrj for (i = i + 1; partitions.iterate (i, &partition); ++i)
282538fd1498Szrj if (partition_reduction_p (partition))
282638fd1498Szrj {
282738fd1498Szrj partition_merge_into (rdg, into, partition, FUSE_REDUCTION);
282838fd1498Szrj partitions.unordered_remove (i);
282938fd1498Szrj partition_free (partition);
283038fd1498Szrj i--;
283138fd1498Szrj }
283238fd1498Szrj
283338fd1498Szrj /* Apply our simple cost model - fuse partitions with similar
283438fd1498Szrj memory accesses. */
283538fd1498Szrj for (i = 0; partitions.iterate (i, &into); ++i)
283638fd1498Szrj {
283738fd1498Szrj bool changed = false;
283838fd1498Szrj if (partition_builtin_p (into) || into->kind == PKIND_PARTIAL_MEMSET)
283938fd1498Szrj continue;
284038fd1498Szrj for (int j = i + 1;
284138fd1498Szrj partitions.iterate (j, &partition); ++j)
284238fd1498Szrj {
284338fd1498Szrj if (share_memory_accesses (rdg, into, partition))
284438fd1498Szrj {
284538fd1498Szrj partition_merge_into (rdg, into, partition, FUSE_SHARE_REF);
284638fd1498Szrj partitions.unordered_remove (j);
284738fd1498Szrj partition_free (partition);
284838fd1498Szrj j--;
284938fd1498Szrj changed = true;
285038fd1498Szrj }
285138fd1498Szrj }
285238fd1498Szrj /* If we fused 0 1 2 in step 1 to 0,2 1 as 0 and 2 have similar
285338fd1498Szrj accesses when 1 and 2 have similar accesses but not 0 and 1
285438fd1498Szrj then in the next iteration we will fail to consider merging
285538fd1498Szrj 1 into 0,2. So try again if we did any merging into 0. */
285638fd1498Szrj if (changed)
285738fd1498Szrj i--;
285838fd1498Szrj }
285938fd1498Szrj
286038fd1498Szrj /* Build the partition dependency graph and fuse partitions in strong
286138fd1498Szrj connected component. */
286238fd1498Szrj if (partitions.length () > 1)
286338fd1498Szrj {
286438fd1498Szrj /* Don't support loop nest distribution under runtime alias check
286538fd1498Szrj since it's not likely to enable many vectorization opportunities. */
286638fd1498Szrj if (loop->inner)
286738fd1498Szrj merge_dep_scc_partitions (rdg, &partitions, false);
286838fd1498Szrj else
286938fd1498Szrj {
287038fd1498Szrj merge_dep_scc_partitions (rdg, &partitions, true);
287138fd1498Szrj if (partitions.length () > 1)
287238fd1498Szrj break_alias_scc_partitions (rdg, &partitions, &alias_ddrs);
287338fd1498Szrj }
287438fd1498Szrj }
287538fd1498Szrj
287638fd1498Szrj finalize_partitions (loop, &partitions, &alias_ddrs);
287738fd1498Szrj
287838fd1498Szrj nbp = partitions.length ();
287938fd1498Szrj if (nbp == 0
288038fd1498Szrj || (nbp == 1 && !partition_builtin_p (partitions[0]))
288138fd1498Szrj || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
288238fd1498Szrj {
288338fd1498Szrj nbp = 0;
288438fd1498Szrj goto ldist_done;
288538fd1498Szrj }
288638fd1498Szrj
288738fd1498Szrj if (version_for_distribution_p (&partitions, &alias_ddrs))
288838fd1498Szrj version_loop_by_alias_check (loop, &alias_ddrs);
288938fd1498Szrj
289038fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
289138fd1498Szrj {
289238fd1498Szrj fprintf (dump_file,
289338fd1498Szrj "distribute loop <%d> into partitions:\n", loop->num);
289438fd1498Szrj dump_rdg_partitions (dump_file, partitions);
289538fd1498Szrj }
289638fd1498Szrj
289738fd1498Szrj FOR_EACH_VEC_ELT (partitions, i, partition)
289838fd1498Szrj {
289938fd1498Szrj if (partition_builtin_p (partition))
290038fd1498Szrj (*nb_calls)++;
290138fd1498Szrj *destroy_p |= generate_code_for_partition (loop, partition, i < nbp - 1);
290238fd1498Szrj }
290338fd1498Szrj
290438fd1498Szrj ldist_done:
290538fd1498Szrj loop_nest.release ();
290638fd1498Szrj free_data_refs (datarefs_vec);
290738fd1498Szrj for (hash_table<ddr_hasher>::iterator iter = ddrs_table->begin ();
290838fd1498Szrj iter != ddrs_table->end (); ++iter)
290938fd1498Szrj {
291038fd1498Szrj free_dependence_relation (*iter);
291138fd1498Szrj *iter = NULL;
291238fd1498Szrj }
291338fd1498Szrj delete ddrs_table;
291438fd1498Szrj
291538fd1498Szrj FOR_EACH_VEC_ELT (partitions, i, partition)
291638fd1498Szrj partition_free (partition);
291738fd1498Szrj
291838fd1498Szrj free_rdg (rdg);
291938fd1498Szrj return nbp - *nb_calls;
292038fd1498Szrj }
292138fd1498Szrj
292238fd1498Szrj /* Distribute all loops in the current function. */
292338fd1498Szrj
292438fd1498Szrj namespace {
292538fd1498Szrj
292638fd1498Szrj const pass_data pass_data_loop_distribution =
292738fd1498Szrj {
292838fd1498Szrj GIMPLE_PASS, /* type */
292938fd1498Szrj "ldist", /* name */
293038fd1498Szrj OPTGROUP_LOOP, /* optinfo_flags */
293138fd1498Szrj TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
293238fd1498Szrj ( PROP_cfg | PROP_ssa ), /* properties_required */
293338fd1498Szrj 0, /* properties_provided */
293438fd1498Szrj 0, /* properties_destroyed */
293538fd1498Szrj 0, /* todo_flags_start */
293638fd1498Szrj 0, /* todo_flags_finish */
293738fd1498Szrj };
293838fd1498Szrj
293938fd1498Szrj class pass_loop_distribution : public gimple_opt_pass
294038fd1498Szrj {
294138fd1498Szrj public:
pass_loop_distribution(gcc::context * ctxt)294238fd1498Szrj pass_loop_distribution (gcc::context *ctxt)
294338fd1498Szrj : gimple_opt_pass (pass_data_loop_distribution, ctxt)
294438fd1498Szrj {}
294538fd1498Szrj
294638fd1498Szrj /* opt_pass methods: */
gate(function *)294738fd1498Szrj virtual bool gate (function *)
294838fd1498Szrj {
294938fd1498Szrj return flag_tree_loop_distribution
295038fd1498Szrj || flag_tree_loop_distribute_patterns;
295138fd1498Szrj }
295238fd1498Szrj
295338fd1498Szrj virtual unsigned int execute (function *);
295438fd1498Szrj
295538fd1498Szrj }; // class pass_loop_distribution
295638fd1498Szrj
295738fd1498Szrj
295838fd1498Szrj /* Given LOOP, this function records seed statements for distribution in
295938fd1498Szrj WORK_LIST. Return false if there is nothing for distribution. */
296038fd1498Szrj
296138fd1498Szrj static bool
find_seed_stmts_for_distribution(struct loop * loop,vec<gimple * > * work_list)296238fd1498Szrj find_seed_stmts_for_distribution (struct loop *loop, vec<gimple *> *work_list)
296338fd1498Szrj {
296438fd1498Szrj basic_block *bbs = get_loop_body_in_dom_order (loop);
296538fd1498Szrj
296638fd1498Szrj /* Initialize the worklist with stmts we seed the partitions with. */
296738fd1498Szrj for (unsigned i = 0; i < loop->num_nodes; ++i)
296838fd1498Szrj {
296938fd1498Szrj for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
297038fd1498Szrj !gsi_end_p (gsi); gsi_next (&gsi))
297138fd1498Szrj {
297238fd1498Szrj gphi *phi = gsi.phi ();
297338fd1498Szrj if (virtual_operand_p (gimple_phi_result (phi)))
297438fd1498Szrj continue;
297538fd1498Szrj /* Distribute stmts which have defs that are used outside of
297638fd1498Szrj the loop. */
297738fd1498Szrj if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
297838fd1498Szrj continue;
297938fd1498Szrj work_list->safe_push (phi);
298038fd1498Szrj }
298138fd1498Szrj for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
298238fd1498Szrj !gsi_end_p (gsi); gsi_next (&gsi))
298338fd1498Szrj {
298438fd1498Szrj gimple *stmt = gsi_stmt (gsi);
298538fd1498Szrj
298638fd1498Szrj /* If there is a stmt with side-effects bail out - we
298738fd1498Szrj cannot and should not distribute this loop. */
298838fd1498Szrj if (gimple_has_side_effects (stmt))
298938fd1498Szrj {
299038fd1498Szrj free (bbs);
299138fd1498Szrj return false;
299238fd1498Szrj }
299338fd1498Szrj
299438fd1498Szrj /* Distribute stmts which have defs that are used outside of
299538fd1498Szrj the loop. */
299638fd1498Szrj if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
299738fd1498Szrj ;
299838fd1498Szrj /* Otherwise only distribute stores for now. */
299938fd1498Szrj else if (!gimple_vdef (stmt))
300038fd1498Szrj continue;
300138fd1498Szrj
300238fd1498Szrj work_list->safe_push (stmt);
300338fd1498Szrj }
300438fd1498Szrj }
300538fd1498Szrj free (bbs);
300638fd1498Szrj return work_list->length () > 0;
300738fd1498Szrj }
300838fd1498Szrj
300938fd1498Szrj /* Given innermost LOOP, return the outermost enclosing loop that forms a
301038fd1498Szrj perfect loop nest. */
301138fd1498Szrj
301238fd1498Szrj static struct loop *
prepare_perfect_loop_nest(struct loop * loop)301338fd1498Szrj prepare_perfect_loop_nest (struct loop *loop)
301438fd1498Szrj {
301538fd1498Szrj struct loop *outer = loop_outer (loop);
301638fd1498Szrj tree niters = number_of_latch_executions (loop);
301738fd1498Szrj
301838fd1498Szrj /* TODO: We only support the innermost 3-level loop nest distribution
301938fd1498Szrj because of compilation time issue for now. This should be relaxed
302038fd1498Szrj in the future. Note we only allow 3-level loop nest distribution
302138fd1498Szrj when parallelizing loops. */
302238fd1498Szrj while ((loop->inner == NULL
302338fd1498Szrj || (loop->inner->inner == NULL && flag_tree_parallelize_loops > 1))
302438fd1498Szrj && loop_outer (outer)
302538fd1498Szrj && outer->inner == loop && loop->next == NULL
302638fd1498Szrj && single_exit (outer)
302738fd1498Szrj && optimize_loop_for_speed_p (outer)
302838fd1498Szrj && !chrec_contains_symbols_defined_in_loop (niters, outer->num)
302938fd1498Szrj && (niters = number_of_latch_executions (outer)) != NULL_TREE
303038fd1498Szrj && niters != chrec_dont_know)
303138fd1498Szrj {
303238fd1498Szrj loop = outer;
303338fd1498Szrj outer = loop_outer (loop);
303438fd1498Szrj }
303538fd1498Szrj
303638fd1498Szrj return loop;
303738fd1498Szrj }
303838fd1498Szrj
303938fd1498Szrj unsigned int
execute(function * fun)304038fd1498Szrj pass_loop_distribution::execute (function *fun)
304138fd1498Szrj {
304238fd1498Szrj struct loop *loop;
304338fd1498Szrj bool changed = false;
304438fd1498Szrj basic_block bb;
304538fd1498Szrj control_dependences *cd = NULL;
304638fd1498Szrj auto_vec<loop_p> loops_to_be_destroyed;
304738fd1498Szrj
304838fd1498Szrj if (number_of_loops (fun) <= 1)
304938fd1498Szrj return 0;
305038fd1498Szrj
305138fd1498Szrj /* Compute topological order for basic blocks. Topological order is
305238fd1498Szrj needed because data dependence is computed for data references in
305338fd1498Szrj lexicographical order. */
305438fd1498Szrj if (bb_top_order_index == NULL)
305538fd1498Szrj {
305638fd1498Szrj int rpo_num;
305738fd1498Szrj int *rpo = XNEWVEC (int, last_basic_block_for_fn (cfun));
305838fd1498Szrj
305938fd1498Szrj bb_top_order_index = XNEWVEC (int, last_basic_block_for_fn (cfun));
306038fd1498Szrj bb_top_order_index_size = last_basic_block_for_fn (cfun);
306138fd1498Szrj rpo_num = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, true);
306238fd1498Szrj for (int i = 0; i < rpo_num; i++)
306338fd1498Szrj bb_top_order_index[rpo[i]] = i;
306438fd1498Szrj
306538fd1498Szrj free (rpo);
306638fd1498Szrj }
306738fd1498Szrj
306838fd1498Szrj FOR_ALL_BB_FN (bb, fun)
306938fd1498Szrj {
307038fd1498Szrj gimple_stmt_iterator gsi;
307138fd1498Szrj for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
307238fd1498Szrj gimple_set_uid (gsi_stmt (gsi), -1);
307338fd1498Szrj for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
307438fd1498Szrj gimple_set_uid (gsi_stmt (gsi), -1);
307538fd1498Szrj }
307638fd1498Szrj
307738fd1498Szrj /* We can at the moment only distribute non-nested loops, thus restrict
307838fd1498Szrj walking to innermost loops. */
307938fd1498Szrj FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
308038fd1498Szrj {
308138fd1498Szrj /* Don't distribute multiple exit edges loop, or cold loop. */
308238fd1498Szrj if (!single_exit (loop)
308338fd1498Szrj || !optimize_loop_for_speed_p (loop))
308438fd1498Szrj continue;
308538fd1498Szrj
308638fd1498Szrj /* Don't distribute loop if niters is unknown. */
308738fd1498Szrj tree niters = number_of_latch_executions (loop);
308838fd1498Szrj if (niters == NULL_TREE || niters == chrec_dont_know)
308938fd1498Szrj continue;
309038fd1498Szrj
309138fd1498Szrj /* Get the perfect loop nest for distribution. */
309238fd1498Szrj loop = prepare_perfect_loop_nest (loop);
309338fd1498Szrj for (; loop; loop = loop->inner)
309438fd1498Szrj {
309538fd1498Szrj auto_vec<gimple *> work_list;
309638fd1498Szrj if (!find_seed_stmts_for_distribution (loop, &work_list))
309738fd1498Szrj break;
309838fd1498Szrj
309938fd1498Szrj const char *str = loop->inner ? " nest" : "";
310038fd1498Szrj location_t loc = find_loop_location (loop);
310138fd1498Szrj if (!cd)
310238fd1498Szrj {
310338fd1498Szrj calculate_dominance_info (CDI_DOMINATORS);
310438fd1498Szrj calculate_dominance_info (CDI_POST_DOMINATORS);
310538fd1498Szrj cd = new control_dependences ();
310638fd1498Szrj free_dominance_info (CDI_POST_DOMINATORS);
310738fd1498Szrj }
310838fd1498Szrj
310938fd1498Szrj bool destroy_p;
311038fd1498Szrj int nb_generated_loops, nb_generated_calls;
311138fd1498Szrj nb_generated_loops = distribute_loop (loop, work_list, cd,
311238fd1498Szrj &nb_generated_calls,
311338fd1498Szrj &destroy_p);
311438fd1498Szrj if (destroy_p)
311538fd1498Szrj loops_to_be_destroyed.safe_push (loop);
311638fd1498Szrj
311738fd1498Szrj if (nb_generated_loops + nb_generated_calls > 0)
311838fd1498Szrj {
311938fd1498Szrj changed = true;
312038fd1498Szrj dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
312138fd1498Szrj loc, "Loop%s %d distributed: split to %d loops "
312238fd1498Szrj "and %d library calls.\n", str, loop->num,
312338fd1498Szrj nb_generated_loops, nb_generated_calls);
312438fd1498Szrj
312538fd1498Szrj break;
312638fd1498Szrj }
312738fd1498Szrj
312838fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
312938fd1498Szrj fprintf (dump_file, "Loop%s %d not distributed.\n", str, loop->num);
313038fd1498Szrj }
313138fd1498Szrj }
313238fd1498Szrj
313338fd1498Szrj if (cd)
313438fd1498Szrj delete cd;
313538fd1498Szrj
313638fd1498Szrj if (bb_top_order_index != NULL)
313738fd1498Szrj {
313838fd1498Szrj free (bb_top_order_index);
313938fd1498Szrj bb_top_order_index = NULL;
314038fd1498Szrj bb_top_order_index_size = 0;
314138fd1498Szrj }
314238fd1498Szrj
314338fd1498Szrj if (changed)
314438fd1498Szrj {
314538fd1498Szrj /* Destroy loop bodies that could not be reused. Do this late as we
314638fd1498Szrj otherwise can end up refering to stale data in control dependences. */
314738fd1498Szrj unsigned i;
314838fd1498Szrj FOR_EACH_VEC_ELT (loops_to_be_destroyed, i, loop)
314938fd1498Szrj destroy_loop (loop);
315038fd1498Szrj
315138fd1498Szrj /* Cached scalar evolutions now may refer to wrong or non-existing
315238fd1498Szrj loops. */
315338fd1498Szrj scev_reset_htab ();
315438fd1498Szrj mark_virtual_operands_for_renaming (fun);
315538fd1498Szrj rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
315638fd1498Szrj }
315738fd1498Szrj
315838fd1498Szrj checking_verify_loop_structure ();
315938fd1498Szrj
316038fd1498Szrj return changed ? TODO_cleanup_cfg : 0;
316138fd1498Szrj }
316238fd1498Szrj
316338fd1498Szrj } // anon namespace
316438fd1498Szrj
316538fd1498Szrj gimple_opt_pass *
make_pass_loop_distribution(gcc::context * ctxt)316638fd1498Szrj make_pass_loop_distribution (gcc::context *ctxt)
316738fd1498Szrj {
316838fd1498Szrj return new pass_loop_distribution (ctxt);
316938fd1498Szrj }
3170