1*e4b17023SJohn Marino /* Loop autoparallelization.
2*e4b17023SJohn Marino Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
3*e4b17023SJohn Marino Free Software Foundation, Inc.
4*e4b17023SJohn Marino Contributed by Sebastian Pop <pop@cri.ensmp.fr> and
5*e4b17023SJohn Marino Zdenek Dvorak <dvorakz@suse.cz>.
6*e4b17023SJohn Marino
7*e4b17023SJohn Marino This file is part of GCC.
8*e4b17023SJohn Marino
9*e4b17023SJohn Marino GCC is free software; you can redistribute it and/or modify it under
10*e4b17023SJohn Marino the terms of the GNU General Public License as published by the Free
11*e4b17023SJohn Marino Software Foundation; either version 3, or (at your option) any later
12*e4b17023SJohn Marino version.
13*e4b17023SJohn Marino
14*e4b17023SJohn Marino GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15*e4b17023SJohn Marino WARRANTY; without even the implied warranty of MERCHANTABILITY or
16*e4b17023SJohn Marino FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17*e4b17023SJohn Marino for more details.
18*e4b17023SJohn Marino
19*e4b17023SJohn Marino You should have received a copy of the GNU General Public License
20*e4b17023SJohn Marino along with GCC; see the file COPYING3. If not see
21*e4b17023SJohn Marino <http://www.gnu.org/licenses/>. */
22*e4b17023SJohn Marino
23*e4b17023SJohn Marino #include "config.h"
24*e4b17023SJohn Marino #include "system.h"
25*e4b17023SJohn Marino #include "coretypes.h"
26*e4b17023SJohn Marino #include "tree-flow.h"
27*e4b17023SJohn Marino #include "cfgloop.h"
28*e4b17023SJohn Marino #include "tree-data-ref.h"
29*e4b17023SJohn Marino #include "tree-scalar-evolution.h"
30*e4b17023SJohn Marino #include "gimple-pretty-print.h"
31*e4b17023SJohn Marino #include "tree-pass.h"
32*e4b17023SJohn Marino #include "langhooks.h"
33*e4b17023SJohn Marino #include "tree-vectorizer.h"
34*e4b17023SJohn Marino
35*e4b17023SJohn Marino /* This pass tries to distribute iterations of loops into several threads.
36*e4b17023SJohn Marino The implementation is straightforward -- for each loop we test whether its
37*e4b17023SJohn Marino iterations are independent, and if it is the case (and some additional
38*e4b17023SJohn Marino conditions regarding profitability and correctness are satisfied), we
39*e4b17023SJohn Marino add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
40*e4b17023SJohn Marino machinery do its job.
41*e4b17023SJohn Marino
42*e4b17023SJohn Marino The most of the complexity is in bringing the code into shape expected
43*e4b17023SJohn Marino by the omp expanders:
44*e4b17023SJohn Marino -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
45*e4b17023SJohn Marino variable and that the exit test is at the start of the loop body
46*e4b17023SJohn Marino -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
47*e4b17023SJohn Marino variables by accesses through pointers, and breaking up ssa chains
48*e4b17023SJohn Marino by storing the values incoming to the parallelized loop to a structure
49*e4b17023SJohn Marino passed to the new function as an argument (something similar is done
50*e4b17023SJohn Marino in omp gimplification, unfortunately only a small part of the code
51*e4b17023SJohn Marino can be shared).
52*e4b17023SJohn Marino
53*e4b17023SJohn Marino TODO:
54*e4b17023SJohn Marino -- if there are several parallelizable loops in a function, it may be
55*e4b17023SJohn Marino possible to generate the threads just once (using synchronization to
56*e4b17023SJohn Marino ensure that cross-loop dependences are obeyed).
57*e4b17023SJohn Marino -- handling of common scalar dependence patterns (accumulation, ...)
58*e4b17023SJohn Marino -- handling of non-innermost loops */
59*e4b17023SJohn Marino
60*e4b17023SJohn Marino /*
61*e4b17023SJohn Marino Reduction handling:
62*e4b17023SJohn Marino currently we use vect_force_simple_reduction() to detect reduction patterns.
63*e4b17023SJohn Marino The code transformation will be introduced by an example.
64*e4b17023SJohn Marino
65*e4b17023SJohn Marino
66*e4b17023SJohn Marino parloop
67*e4b17023SJohn Marino {
68*e4b17023SJohn Marino int sum=1;
69*e4b17023SJohn Marino
70*e4b17023SJohn Marino for (i = 0; i < N; i++)
71*e4b17023SJohn Marino {
72*e4b17023SJohn Marino x[i] = i + 3;
73*e4b17023SJohn Marino sum+=x[i];
74*e4b17023SJohn Marino }
75*e4b17023SJohn Marino }
76*e4b17023SJohn Marino
77*e4b17023SJohn Marino gimple-like code:
78*e4b17023SJohn Marino header_bb:
79*e4b17023SJohn Marino
80*e4b17023SJohn Marino # sum_29 = PHI <sum_11(5), 1(3)>
81*e4b17023SJohn Marino # i_28 = PHI <i_12(5), 0(3)>
82*e4b17023SJohn Marino D.1795_8 = i_28 + 3;
83*e4b17023SJohn Marino x[i_28] = D.1795_8;
84*e4b17023SJohn Marino sum_11 = D.1795_8 + sum_29;
85*e4b17023SJohn Marino i_12 = i_28 + 1;
86*e4b17023SJohn Marino if (N_6(D) > i_12)
87*e4b17023SJohn Marino goto header_bb;
88*e4b17023SJohn Marino
89*e4b17023SJohn Marino
90*e4b17023SJohn Marino exit_bb:
91*e4b17023SJohn Marino
92*e4b17023SJohn Marino # sum_21 = PHI <sum_11(4)>
93*e4b17023SJohn Marino printf (&"%d"[0], sum_21);
94*e4b17023SJohn Marino
95*e4b17023SJohn Marino
96*e4b17023SJohn Marino after reduction transformation (only relevant parts):
97*e4b17023SJohn Marino
98*e4b17023SJohn Marino parloop
99*e4b17023SJohn Marino {
100*e4b17023SJohn Marino
101*e4b17023SJohn Marino ....
102*e4b17023SJohn Marino
103*e4b17023SJohn Marino
104*e4b17023SJohn Marino # Storing the initial value given by the user. #
105*e4b17023SJohn Marino
106*e4b17023SJohn Marino .paral_data_store.32.sum.27 = 1;
107*e4b17023SJohn Marino
108*e4b17023SJohn Marino #pragma omp parallel num_threads(4)
109*e4b17023SJohn Marino
110*e4b17023SJohn Marino #pragma omp for schedule(static)
111*e4b17023SJohn Marino
112*e4b17023SJohn Marino # The neutral element corresponding to the particular
113*e4b17023SJohn Marino reduction's operation, e.g. 0 for PLUS_EXPR,
114*e4b17023SJohn Marino 1 for MULT_EXPR, etc. replaces the user's initial value. #
115*e4b17023SJohn Marino
116*e4b17023SJohn Marino # sum.27_29 = PHI <sum.27_11, 0>
117*e4b17023SJohn Marino
118*e4b17023SJohn Marino sum.27_11 = D.1827_8 + sum.27_29;
119*e4b17023SJohn Marino
120*e4b17023SJohn Marino GIMPLE_OMP_CONTINUE
121*e4b17023SJohn Marino
122*e4b17023SJohn Marino # Adding this reduction phi is done at create_phi_for_local_result() #
123*e4b17023SJohn Marino # sum.27_56 = PHI <sum.27_11, 0>
124*e4b17023SJohn Marino GIMPLE_OMP_RETURN
125*e4b17023SJohn Marino
126*e4b17023SJohn Marino # Creating the atomic operation is done at
127*e4b17023SJohn Marino create_call_for_reduction_1() #
128*e4b17023SJohn Marino
129*e4b17023SJohn Marino #pragma omp atomic_load
130*e4b17023SJohn Marino D.1839_59 = *&.paral_data_load.33_51->reduction.23;
131*e4b17023SJohn Marino D.1840_60 = sum.27_56 + D.1839_59;
132*e4b17023SJohn Marino #pragma omp atomic_store (D.1840_60);
133*e4b17023SJohn Marino
134*e4b17023SJohn Marino GIMPLE_OMP_RETURN
135*e4b17023SJohn Marino
136*e4b17023SJohn Marino # collecting the result after the join of the threads is done at
137*e4b17023SJohn Marino create_loads_for_reductions().
138*e4b17023SJohn Marino The value computed by the threads is loaded from the
139*e4b17023SJohn Marino shared struct. #
140*e4b17023SJohn Marino
141*e4b17023SJohn Marino
142*e4b17023SJohn Marino .paral_data_load.33_52 = &.paral_data_store.32;
143*e4b17023SJohn Marino sum_37 = .paral_data_load.33_52->sum.27;
144*e4b17023SJohn Marino sum_43 = D.1795_41 + sum_37;
145*e4b17023SJohn Marino
146*e4b17023SJohn Marino exit bb:
147*e4b17023SJohn Marino # sum_21 = PHI <sum_43, sum_26>
148*e4b17023SJohn Marino printf (&"%d"[0], sum_21);
149*e4b17023SJohn Marino
150*e4b17023SJohn Marino ...
151*e4b17023SJohn Marino
152*e4b17023SJohn Marino }
153*e4b17023SJohn Marino
154*e4b17023SJohn Marino */
155*e4b17023SJohn Marino
156*e4b17023SJohn Marino /* Minimal number of iterations of a loop that should be executed in each
157*e4b17023SJohn Marino thread. */
158*e4b17023SJohn Marino #define MIN_PER_THREAD 100
159*e4b17023SJohn Marino
160*e4b17023SJohn Marino /* Element of the hashtable, representing a
161*e4b17023SJohn Marino reduction in the current loop. */
162*e4b17023SJohn Marino struct reduction_info
163*e4b17023SJohn Marino {
164*e4b17023SJohn Marino gimple reduc_stmt; /* reduction statement. */
165*e4b17023SJohn Marino gimple reduc_phi; /* The phi node defining the reduction. */
166*e4b17023SJohn Marino enum tree_code reduction_code;/* code for the reduction operation. */
167*e4b17023SJohn Marino unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
168*e4b17023SJohn Marino result. */
169*e4b17023SJohn Marino gimple keep_res; /* The PHI_RESULT of this phi is the resulting value
170*e4b17023SJohn Marino of the reduction variable when existing the loop. */
171*e4b17023SJohn Marino tree initial_value; /* The initial value of the reduction var before entering the loop. */
172*e4b17023SJohn Marino tree field; /* the name of the field in the parloop data structure intended for reduction. */
173*e4b17023SJohn Marino tree init; /* reduction initialization value. */
174*e4b17023SJohn Marino gimple new_phi; /* (helper field) Newly created phi node whose result
175*e4b17023SJohn Marino will be passed to the atomic operation. Represents
176*e4b17023SJohn Marino the local result each thread computed for the reduction
177*e4b17023SJohn Marino operation. */
178*e4b17023SJohn Marino };
179*e4b17023SJohn Marino
180*e4b17023SJohn Marino /* Equality and hash functions for hashtab code. */
181*e4b17023SJohn Marino
182*e4b17023SJohn Marino static int
reduction_info_eq(const void * aa,const void * bb)183*e4b17023SJohn Marino reduction_info_eq (const void *aa, const void *bb)
184*e4b17023SJohn Marino {
185*e4b17023SJohn Marino const struct reduction_info *a = (const struct reduction_info *) aa;
186*e4b17023SJohn Marino const struct reduction_info *b = (const struct reduction_info *) bb;
187*e4b17023SJohn Marino
188*e4b17023SJohn Marino return (a->reduc_phi == b->reduc_phi);
189*e4b17023SJohn Marino }
190*e4b17023SJohn Marino
191*e4b17023SJohn Marino static hashval_t
reduction_info_hash(const void * aa)192*e4b17023SJohn Marino reduction_info_hash (const void *aa)
193*e4b17023SJohn Marino {
194*e4b17023SJohn Marino const struct reduction_info *a = (const struct reduction_info *) aa;
195*e4b17023SJohn Marino
196*e4b17023SJohn Marino return a->reduc_version;
197*e4b17023SJohn Marino }
198*e4b17023SJohn Marino
199*e4b17023SJohn Marino static struct reduction_info *
reduction_phi(htab_t reduction_list,gimple phi)200*e4b17023SJohn Marino reduction_phi (htab_t reduction_list, gimple phi)
201*e4b17023SJohn Marino {
202*e4b17023SJohn Marino struct reduction_info tmpred, *red;
203*e4b17023SJohn Marino
204*e4b17023SJohn Marino if (htab_elements (reduction_list) == 0 || phi == NULL)
205*e4b17023SJohn Marino return NULL;
206*e4b17023SJohn Marino
207*e4b17023SJohn Marino tmpred.reduc_phi = phi;
208*e4b17023SJohn Marino tmpred.reduc_version = gimple_uid (phi);
209*e4b17023SJohn Marino red = (struct reduction_info *) htab_find (reduction_list, &tmpred);
210*e4b17023SJohn Marino
211*e4b17023SJohn Marino return red;
212*e4b17023SJohn Marino }
213*e4b17023SJohn Marino
214*e4b17023SJohn Marino /* Element of hashtable of names to copy. */
215*e4b17023SJohn Marino
216*e4b17023SJohn Marino struct name_to_copy_elt
217*e4b17023SJohn Marino {
218*e4b17023SJohn Marino unsigned version; /* The version of the name to copy. */
219*e4b17023SJohn Marino tree new_name; /* The new name used in the copy. */
220*e4b17023SJohn Marino tree field; /* The field of the structure used to pass the
221*e4b17023SJohn Marino value. */
222*e4b17023SJohn Marino };
223*e4b17023SJohn Marino
224*e4b17023SJohn Marino /* Equality and hash functions for hashtab code. */
225*e4b17023SJohn Marino
226*e4b17023SJohn Marino static int
name_to_copy_elt_eq(const void * aa,const void * bb)227*e4b17023SJohn Marino name_to_copy_elt_eq (const void *aa, const void *bb)
228*e4b17023SJohn Marino {
229*e4b17023SJohn Marino const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
230*e4b17023SJohn Marino const struct name_to_copy_elt *b = (const struct name_to_copy_elt *) bb;
231*e4b17023SJohn Marino
232*e4b17023SJohn Marino return a->version == b->version;
233*e4b17023SJohn Marino }
234*e4b17023SJohn Marino
235*e4b17023SJohn Marino static hashval_t
name_to_copy_elt_hash(const void * aa)236*e4b17023SJohn Marino name_to_copy_elt_hash (const void *aa)
237*e4b17023SJohn Marino {
238*e4b17023SJohn Marino const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
239*e4b17023SJohn Marino
240*e4b17023SJohn Marino return (hashval_t) a->version;
241*e4b17023SJohn Marino }
242*e4b17023SJohn Marino
243*e4b17023SJohn Marino /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
244*e4b17023SJohn Marino matrix. Rather than use floats, we simply keep a single DENOMINATOR that
245*e4b17023SJohn Marino represents the denominator for every element in the matrix. */
246*e4b17023SJohn Marino typedef struct lambda_trans_matrix_s
247*e4b17023SJohn Marino {
248*e4b17023SJohn Marino lambda_matrix matrix;
249*e4b17023SJohn Marino int rowsize;
250*e4b17023SJohn Marino int colsize;
251*e4b17023SJohn Marino int denominator;
252*e4b17023SJohn Marino } *lambda_trans_matrix;
253*e4b17023SJohn Marino #define LTM_MATRIX(T) ((T)->matrix)
254*e4b17023SJohn Marino #define LTM_ROWSIZE(T) ((T)->rowsize)
255*e4b17023SJohn Marino #define LTM_COLSIZE(T) ((T)->colsize)
256*e4b17023SJohn Marino #define LTM_DENOMINATOR(T) ((T)->denominator)
257*e4b17023SJohn Marino
258*e4b17023SJohn Marino /* Allocate a new transformation matrix. */
259*e4b17023SJohn Marino
260*e4b17023SJohn Marino static lambda_trans_matrix
lambda_trans_matrix_new(int colsize,int rowsize,struct obstack * lambda_obstack)261*e4b17023SJohn Marino lambda_trans_matrix_new (int colsize, int rowsize,
262*e4b17023SJohn Marino struct obstack * lambda_obstack)
263*e4b17023SJohn Marino {
264*e4b17023SJohn Marino lambda_trans_matrix ret;
265*e4b17023SJohn Marino
266*e4b17023SJohn Marino ret = (lambda_trans_matrix)
267*e4b17023SJohn Marino obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
268*e4b17023SJohn Marino LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
269*e4b17023SJohn Marino LTM_ROWSIZE (ret) = rowsize;
270*e4b17023SJohn Marino LTM_COLSIZE (ret) = colsize;
271*e4b17023SJohn Marino LTM_DENOMINATOR (ret) = 1;
272*e4b17023SJohn Marino return ret;
273*e4b17023SJohn Marino }
274*e4b17023SJohn Marino
275*e4b17023SJohn Marino /* Multiply a vector VEC by a matrix MAT.
276*e4b17023SJohn Marino MAT is an M*N matrix, and VEC is a vector with length N. The result
277*e4b17023SJohn Marino is stored in DEST which must be a vector of length M. */
278*e4b17023SJohn Marino
279*e4b17023SJohn Marino static void
lambda_matrix_vector_mult(lambda_matrix matrix,int m,int n,lambda_vector vec,lambda_vector dest)280*e4b17023SJohn Marino lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
281*e4b17023SJohn Marino lambda_vector vec, lambda_vector dest)
282*e4b17023SJohn Marino {
283*e4b17023SJohn Marino int i, j;
284*e4b17023SJohn Marino
285*e4b17023SJohn Marino lambda_vector_clear (dest, m);
286*e4b17023SJohn Marino for (i = 0; i < m; i++)
287*e4b17023SJohn Marino for (j = 0; j < n; j++)
288*e4b17023SJohn Marino dest[i] += matrix[i][j] * vec[j];
289*e4b17023SJohn Marino }
290*e4b17023SJohn Marino
291*e4b17023SJohn Marino /* Return true if TRANS is a legal transformation matrix that respects
292*e4b17023SJohn Marino the dependence vectors in DISTS and DIRS. The conservative answer
293*e4b17023SJohn Marino is false.
294*e4b17023SJohn Marino
295*e4b17023SJohn Marino "Wolfe proves that a unimodular transformation represented by the
296*e4b17023SJohn Marino matrix T is legal when applied to a loop nest with a set of
297*e4b17023SJohn Marino lexicographically non-negative distance vectors RDG if and only if
298*e4b17023SJohn Marino for each vector d in RDG, (T.d >= 0) is lexicographically positive.
299*e4b17023SJohn Marino i.e.: if and only if it transforms the lexicographically positive
300*e4b17023SJohn Marino distance vectors to lexicographically positive vectors. Note that
301*e4b17023SJohn Marino a unimodular matrix must transform the zero vector (and only it) to
302*e4b17023SJohn Marino the zero vector." S.Muchnick. */
303*e4b17023SJohn Marino
304*e4b17023SJohn Marino static bool
lambda_transform_legal_p(lambda_trans_matrix trans,int nb_loops,VEC (ddr_p,heap)* dependence_relations)305*e4b17023SJohn Marino lambda_transform_legal_p (lambda_trans_matrix trans,
306*e4b17023SJohn Marino int nb_loops,
307*e4b17023SJohn Marino VEC (ddr_p, heap) *dependence_relations)
308*e4b17023SJohn Marino {
309*e4b17023SJohn Marino unsigned int i, j;
310*e4b17023SJohn Marino lambda_vector distres;
311*e4b17023SJohn Marino struct data_dependence_relation *ddr;
312*e4b17023SJohn Marino
313*e4b17023SJohn Marino gcc_assert (LTM_COLSIZE (trans) == nb_loops
314*e4b17023SJohn Marino && LTM_ROWSIZE (trans) == nb_loops);
315*e4b17023SJohn Marino
316*e4b17023SJohn Marino /* When there are no dependences, the transformation is correct. */
317*e4b17023SJohn Marino if (VEC_length (ddr_p, dependence_relations) == 0)
318*e4b17023SJohn Marino return true;
319*e4b17023SJohn Marino
320*e4b17023SJohn Marino ddr = VEC_index (ddr_p, dependence_relations, 0);
321*e4b17023SJohn Marino if (ddr == NULL)
322*e4b17023SJohn Marino return true;
323*e4b17023SJohn Marino
324*e4b17023SJohn Marino /* When there is an unknown relation in the dependence_relations, we
325*e4b17023SJohn Marino know that it is no worth looking at this loop nest: give up. */
326*e4b17023SJohn Marino if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
327*e4b17023SJohn Marino return false;
328*e4b17023SJohn Marino
329*e4b17023SJohn Marino distres = lambda_vector_new (nb_loops);
330*e4b17023SJohn Marino
331*e4b17023SJohn Marino /* For each distance vector in the dependence graph. */
332*e4b17023SJohn Marino FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
333*e4b17023SJohn Marino {
334*e4b17023SJohn Marino /* Don't care about relations for which we know that there is no
335*e4b17023SJohn Marino dependence, nor about read-read (aka. output-dependences):
336*e4b17023SJohn Marino these data accesses can happen in any order. */
337*e4b17023SJohn Marino if (DDR_ARE_DEPENDENT (ddr) == chrec_known
338*e4b17023SJohn Marino || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
339*e4b17023SJohn Marino continue;
340*e4b17023SJohn Marino
341*e4b17023SJohn Marino /* Conservatively answer: "this transformation is not valid". */
342*e4b17023SJohn Marino if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
343*e4b17023SJohn Marino return false;
344*e4b17023SJohn Marino
345*e4b17023SJohn Marino /* If the dependence could not be captured by a distance vector,
346*e4b17023SJohn Marino conservatively answer that the transform is not valid. */
347*e4b17023SJohn Marino if (DDR_NUM_DIST_VECTS (ddr) == 0)
348*e4b17023SJohn Marino return false;
349*e4b17023SJohn Marino
350*e4b17023SJohn Marino /* Compute trans.dist_vect */
351*e4b17023SJohn Marino for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
352*e4b17023SJohn Marino {
353*e4b17023SJohn Marino lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
354*e4b17023SJohn Marino DDR_DIST_VECT (ddr, j), distres);
355*e4b17023SJohn Marino
356*e4b17023SJohn Marino if (!lambda_vector_lexico_pos (distres, nb_loops))
357*e4b17023SJohn Marino return false;
358*e4b17023SJohn Marino }
359*e4b17023SJohn Marino }
360*e4b17023SJohn Marino return true;
361*e4b17023SJohn Marino }
362*e4b17023SJohn Marino
363*e4b17023SJohn Marino /* Data dependency analysis. Returns true if the iterations of LOOP
364*e4b17023SJohn Marino are independent on each other (that is, if we can execute them
365*e4b17023SJohn Marino in parallel). */
366*e4b17023SJohn Marino
367*e4b17023SJohn Marino static bool
loop_parallel_p(struct loop * loop,struct obstack * parloop_obstack)368*e4b17023SJohn Marino loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
369*e4b17023SJohn Marino {
370*e4b17023SJohn Marino VEC (loop_p, heap) *loop_nest;
371*e4b17023SJohn Marino VEC (ddr_p, heap) *dependence_relations;
372*e4b17023SJohn Marino VEC (data_reference_p, heap) *datarefs;
373*e4b17023SJohn Marino lambda_trans_matrix trans;
374*e4b17023SJohn Marino bool ret = false;
375*e4b17023SJohn Marino
376*e4b17023SJohn Marino if (dump_file && (dump_flags & TDF_DETAILS))
377*e4b17023SJohn Marino {
378*e4b17023SJohn Marino fprintf (dump_file, "Considering loop %d\n", loop->num);
379*e4b17023SJohn Marino if (!loop->inner)
380*e4b17023SJohn Marino fprintf (dump_file, "loop is innermost\n");
381*e4b17023SJohn Marino else
382*e4b17023SJohn Marino fprintf (dump_file, "loop NOT innermost\n");
383*e4b17023SJohn Marino }
384*e4b17023SJohn Marino
385*e4b17023SJohn Marino /* Check for problems with dependences. If the loop can be reversed,
386*e4b17023SJohn Marino the iterations are independent. */
387*e4b17023SJohn Marino datarefs = VEC_alloc (data_reference_p, heap, 10);
388*e4b17023SJohn Marino dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
389*e4b17023SJohn Marino loop_nest = VEC_alloc (loop_p, heap, 3);
390*e4b17023SJohn Marino if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
391*e4b17023SJohn Marino &dependence_relations))
392*e4b17023SJohn Marino {
393*e4b17023SJohn Marino if (dump_file && (dump_flags & TDF_DETAILS))
394*e4b17023SJohn Marino fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
395*e4b17023SJohn Marino ret = false;
396*e4b17023SJohn Marino goto end;
397*e4b17023SJohn Marino }
398*e4b17023SJohn Marino if (dump_file && (dump_flags & TDF_DETAILS))
399*e4b17023SJohn Marino dump_data_dependence_relations (dump_file, dependence_relations);
400*e4b17023SJohn Marino
401*e4b17023SJohn Marino trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
402*e4b17023SJohn Marino LTM_MATRIX (trans)[0][0] = -1;
403*e4b17023SJohn Marino
404*e4b17023SJohn Marino if (lambda_transform_legal_p (trans, 1, dependence_relations))
405*e4b17023SJohn Marino {
406*e4b17023SJohn Marino ret = true;
407*e4b17023SJohn Marino if (dump_file && (dump_flags & TDF_DETAILS))
408*e4b17023SJohn Marino fprintf (dump_file, " SUCCESS: may be parallelized\n");
409*e4b17023SJohn Marino }
410*e4b17023SJohn Marino else if (dump_file && (dump_flags & TDF_DETAILS))
411*e4b17023SJohn Marino fprintf (dump_file,
412*e4b17023SJohn Marino " FAILED: data dependencies exist across iterations\n");
413*e4b17023SJohn Marino
414*e4b17023SJohn Marino end:
415*e4b17023SJohn Marino VEC_free (loop_p, heap, loop_nest);
416*e4b17023SJohn Marino free_dependence_relations (dependence_relations);
417*e4b17023SJohn Marino free_data_refs (datarefs);
418*e4b17023SJohn Marino
419*e4b17023SJohn Marino return ret;
420*e4b17023SJohn Marino }
421*e4b17023SJohn Marino
422*e4b17023SJohn Marino /* Return true when LOOP contains basic blocks marked with the
423*e4b17023SJohn Marino BB_IRREDUCIBLE_LOOP flag. */
424*e4b17023SJohn Marino
425*e4b17023SJohn Marino static inline bool
loop_has_blocks_with_irreducible_flag(struct loop * loop)426*e4b17023SJohn Marino loop_has_blocks_with_irreducible_flag (struct loop *loop)
427*e4b17023SJohn Marino {
428*e4b17023SJohn Marino unsigned i;
429*e4b17023SJohn Marino basic_block *bbs = get_loop_body_in_dom_order (loop);
430*e4b17023SJohn Marino bool res = true;
431*e4b17023SJohn Marino
432*e4b17023SJohn Marino for (i = 0; i < loop->num_nodes; i++)
433*e4b17023SJohn Marino if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
434*e4b17023SJohn Marino goto end;
435*e4b17023SJohn Marino
436*e4b17023SJohn Marino res = false;
437*e4b17023SJohn Marino end:
438*e4b17023SJohn Marino free (bbs);
439*e4b17023SJohn Marino return res;
440*e4b17023SJohn Marino }
441*e4b17023SJohn Marino
442*e4b17023SJohn Marino /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
443*e4b17023SJohn Marino The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
444*e4b17023SJohn Marino to their addresses that can be reused. The address of OBJ is known to
445*e4b17023SJohn Marino be invariant in the whole function. Other needed statements are placed
446*e4b17023SJohn Marino right before GSI. */
447*e4b17023SJohn Marino
448*e4b17023SJohn Marino static tree
take_address_of(tree obj,tree type,edge entry,htab_t decl_address,gimple_stmt_iterator * gsi)449*e4b17023SJohn Marino take_address_of (tree obj, tree type, edge entry, htab_t decl_address,
450*e4b17023SJohn Marino gimple_stmt_iterator *gsi)
451*e4b17023SJohn Marino {
452*e4b17023SJohn Marino int uid;
453*e4b17023SJohn Marino void **dslot;
454*e4b17023SJohn Marino struct int_tree_map ielt, *nielt;
455*e4b17023SJohn Marino tree *var_p, name, bvar, addr;
456*e4b17023SJohn Marino gimple stmt;
457*e4b17023SJohn Marino gimple_seq stmts;
458*e4b17023SJohn Marino
459*e4b17023SJohn Marino /* Since the address of OBJ is invariant, the trees may be shared.
460*e4b17023SJohn Marino Avoid rewriting unrelated parts of the code. */
461*e4b17023SJohn Marino obj = unshare_expr (obj);
462*e4b17023SJohn Marino for (var_p = &obj;
463*e4b17023SJohn Marino handled_component_p (*var_p);
464*e4b17023SJohn Marino var_p = &TREE_OPERAND (*var_p, 0))
465*e4b17023SJohn Marino continue;
466*e4b17023SJohn Marino
467*e4b17023SJohn Marino /* Canonicalize the access to base on a MEM_REF. */
468*e4b17023SJohn Marino if (DECL_P (*var_p))
469*e4b17023SJohn Marino *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
470*e4b17023SJohn Marino
471*e4b17023SJohn Marino /* Assign a canonical SSA name to the address of the base decl used
472*e4b17023SJohn Marino in the address and share it for all accesses and addresses based
473*e4b17023SJohn Marino on it. */
474*e4b17023SJohn Marino uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
475*e4b17023SJohn Marino ielt.uid = uid;
476*e4b17023SJohn Marino dslot = htab_find_slot_with_hash (decl_address, &ielt, uid, INSERT);
477*e4b17023SJohn Marino if (!*dslot)
478*e4b17023SJohn Marino {
479*e4b17023SJohn Marino if (gsi == NULL)
480*e4b17023SJohn Marino return NULL;
481*e4b17023SJohn Marino addr = TREE_OPERAND (*var_p, 0);
482*e4b17023SJohn Marino bvar = create_tmp_var (TREE_TYPE (addr),
483*e4b17023SJohn Marino get_name (TREE_OPERAND
484*e4b17023SJohn Marino (TREE_OPERAND (*var_p, 0), 0)));
485*e4b17023SJohn Marino add_referenced_var (bvar);
486*e4b17023SJohn Marino stmt = gimple_build_assign (bvar, addr);
487*e4b17023SJohn Marino name = make_ssa_name (bvar, stmt);
488*e4b17023SJohn Marino gimple_assign_set_lhs (stmt, name);
489*e4b17023SJohn Marino gsi_insert_on_edge_immediate (entry, stmt);
490*e4b17023SJohn Marino
491*e4b17023SJohn Marino nielt = XNEW (struct int_tree_map);
492*e4b17023SJohn Marino nielt->uid = uid;
493*e4b17023SJohn Marino nielt->to = name;
494*e4b17023SJohn Marino *dslot = nielt;
495*e4b17023SJohn Marino }
496*e4b17023SJohn Marino else
497*e4b17023SJohn Marino name = ((struct int_tree_map *) *dslot)->to;
498*e4b17023SJohn Marino
499*e4b17023SJohn Marino /* Express the address in terms of the canonical SSA name. */
500*e4b17023SJohn Marino TREE_OPERAND (*var_p, 0) = name;
501*e4b17023SJohn Marino if (gsi == NULL)
502*e4b17023SJohn Marino return build_fold_addr_expr_with_type (obj, type);
503*e4b17023SJohn Marino
504*e4b17023SJohn Marino name = force_gimple_operand (build_addr (obj, current_function_decl),
505*e4b17023SJohn Marino &stmts, true, NULL_TREE);
506*e4b17023SJohn Marino if (!gimple_seq_empty_p (stmts))
507*e4b17023SJohn Marino gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
508*e4b17023SJohn Marino
509*e4b17023SJohn Marino if (!useless_type_conversion_p (type, TREE_TYPE (name)))
510*e4b17023SJohn Marino {
511*e4b17023SJohn Marino name = force_gimple_operand (fold_convert (type, name), &stmts, true,
512*e4b17023SJohn Marino NULL_TREE);
513*e4b17023SJohn Marino if (!gimple_seq_empty_p (stmts))
514*e4b17023SJohn Marino gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
515*e4b17023SJohn Marino }
516*e4b17023SJohn Marino
517*e4b17023SJohn Marino return name;
518*e4b17023SJohn Marino }
519*e4b17023SJohn Marino
520*e4b17023SJohn Marino /* Callback for htab_traverse. Create the initialization statement
521*e4b17023SJohn Marino for reduction described in SLOT, and place it at the preheader of
522*e4b17023SJohn Marino the loop described in DATA. */
523*e4b17023SJohn Marino
524*e4b17023SJohn Marino static int
initialize_reductions(void ** slot,void * data)525*e4b17023SJohn Marino initialize_reductions (void **slot, void *data)
526*e4b17023SJohn Marino {
527*e4b17023SJohn Marino tree init, c;
528*e4b17023SJohn Marino tree bvar, type, arg;
529*e4b17023SJohn Marino edge e;
530*e4b17023SJohn Marino
531*e4b17023SJohn Marino struct reduction_info *const reduc = (struct reduction_info *) *slot;
532*e4b17023SJohn Marino struct loop *loop = (struct loop *) data;
533*e4b17023SJohn Marino
534*e4b17023SJohn Marino /* Create initialization in preheader:
535*e4b17023SJohn Marino reduction_variable = initialization value of reduction. */
536*e4b17023SJohn Marino
537*e4b17023SJohn Marino /* In the phi node at the header, replace the argument coming
538*e4b17023SJohn Marino from the preheader with the reduction initialization value. */
539*e4b17023SJohn Marino
540*e4b17023SJohn Marino /* Create a new variable to initialize the reduction. */
541*e4b17023SJohn Marino type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
542*e4b17023SJohn Marino bvar = create_tmp_var (type, "reduction");
543*e4b17023SJohn Marino add_referenced_var (bvar);
544*e4b17023SJohn Marino
545*e4b17023SJohn Marino c = build_omp_clause (gimple_location (reduc->reduc_stmt),
546*e4b17023SJohn Marino OMP_CLAUSE_REDUCTION);
547*e4b17023SJohn Marino OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
548*e4b17023SJohn Marino OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
549*e4b17023SJohn Marino
550*e4b17023SJohn Marino init = omp_reduction_init (c, TREE_TYPE (bvar));
551*e4b17023SJohn Marino reduc->init = init;
552*e4b17023SJohn Marino
553*e4b17023SJohn Marino /* Replace the argument representing the initialization value
554*e4b17023SJohn Marino with the initialization value for the reduction (neutral
555*e4b17023SJohn Marino element for the particular operation, e.g. 0 for PLUS_EXPR,
556*e4b17023SJohn Marino 1 for MULT_EXPR, etc).
557*e4b17023SJohn Marino Keep the old value in a new variable "reduction_initial",
558*e4b17023SJohn Marino that will be taken in consideration after the parallel
559*e4b17023SJohn Marino computing is done. */
560*e4b17023SJohn Marino
561*e4b17023SJohn Marino e = loop_preheader_edge (loop);
562*e4b17023SJohn Marino arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
563*e4b17023SJohn Marino /* Create new variable to hold the initial value. */
564*e4b17023SJohn Marino
565*e4b17023SJohn Marino SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
566*e4b17023SJohn Marino (reduc->reduc_phi, loop_preheader_edge (loop)), init);
567*e4b17023SJohn Marino reduc->initial_value = arg;
568*e4b17023SJohn Marino return 1;
569*e4b17023SJohn Marino }
570*e4b17023SJohn Marino
571*e4b17023SJohn Marino struct elv_data
572*e4b17023SJohn Marino {
573*e4b17023SJohn Marino struct walk_stmt_info info;
574*e4b17023SJohn Marino edge entry;
575*e4b17023SJohn Marino htab_t decl_address;
576*e4b17023SJohn Marino gimple_stmt_iterator *gsi;
577*e4b17023SJohn Marino bool changed;
578*e4b17023SJohn Marino bool reset;
579*e4b17023SJohn Marino };
580*e4b17023SJohn Marino
581*e4b17023SJohn Marino /* Eliminates references to local variables in *TP out of the single
582*e4b17023SJohn Marino entry single exit region starting at DTA->ENTRY.
583*e4b17023SJohn Marino DECL_ADDRESS contains addresses of the references that had their
584*e4b17023SJohn Marino address taken already. If the expression is changed, CHANGED is
585*e4b17023SJohn Marino set to true. Callback for walk_tree. */
586*e4b17023SJohn Marino
587*e4b17023SJohn Marino static tree
eliminate_local_variables_1(tree * tp,int * walk_subtrees,void * data)588*e4b17023SJohn Marino eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
589*e4b17023SJohn Marino {
590*e4b17023SJohn Marino struct elv_data *const dta = (struct elv_data *) data;
591*e4b17023SJohn Marino tree t = *tp, var, addr, addr_type, type, obj;
592*e4b17023SJohn Marino
593*e4b17023SJohn Marino if (DECL_P (t))
594*e4b17023SJohn Marino {
595*e4b17023SJohn Marino *walk_subtrees = 0;
596*e4b17023SJohn Marino
597*e4b17023SJohn Marino if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
598*e4b17023SJohn Marino return NULL_TREE;
599*e4b17023SJohn Marino
600*e4b17023SJohn Marino type = TREE_TYPE (t);
601*e4b17023SJohn Marino addr_type = build_pointer_type (type);
602*e4b17023SJohn Marino addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
603*e4b17023SJohn Marino dta->gsi);
604*e4b17023SJohn Marino if (dta->gsi == NULL && addr == NULL_TREE)
605*e4b17023SJohn Marino {
606*e4b17023SJohn Marino dta->reset = true;
607*e4b17023SJohn Marino return NULL_TREE;
608*e4b17023SJohn Marino }
609*e4b17023SJohn Marino
610*e4b17023SJohn Marino *tp = build_simple_mem_ref (addr);
611*e4b17023SJohn Marino
612*e4b17023SJohn Marino dta->changed = true;
613*e4b17023SJohn Marino return NULL_TREE;
614*e4b17023SJohn Marino }
615*e4b17023SJohn Marino
616*e4b17023SJohn Marino if (TREE_CODE (t) == ADDR_EXPR)
617*e4b17023SJohn Marino {
618*e4b17023SJohn Marino /* ADDR_EXPR may appear in two contexts:
619*e4b17023SJohn Marino -- as a gimple operand, when the address taken is a function invariant
620*e4b17023SJohn Marino -- as gimple rhs, when the resulting address in not a function
621*e4b17023SJohn Marino invariant
622*e4b17023SJohn Marino We do not need to do anything special in the latter case (the base of
623*e4b17023SJohn Marino the memory reference whose address is taken may be replaced in the
624*e4b17023SJohn Marino DECL_P case). The former case is more complicated, as we need to
625*e4b17023SJohn Marino ensure that the new address is still a gimple operand. Thus, it
626*e4b17023SJohn Marino is not sufficient to replace just the base of the memory reference --
627*e4b17023SJohn Marino we need to move the whole computation of the address out of the
628*e4b17023SJohn Marino loop. */
629*e4b17023SJohn Marino if (!is_gimple_val (t))
630*e4b17023SJohn Marino return NULL_TREE;
631*e4b17023SJohn Marino
632*e4b17023SJohn Marino *walk_subtrees = 0;
633*e4b17023SJohn Marino obj = TREE_OPERAND (t, 0);
634*e4b17023SJohn Marino var = get_base_address (obj);
635*e4b17023SJohn Marino if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
636*e4b17023SJohn Marino return NULL_TREE;
637*e4b17023SJohn Marino
638*e4b17023SJohn Marino addr_type = TREE_TYPE (t);
639*e4b17023SJohn Marino addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
640*e4b17023SJohn Marino dta->gsi);
641*e4b17023SJohn Marino if (dta->gsi == NULL && addr == NULL_TREE)
642*e4b17023SJohn Marino {
643*e4b17023SJohn Marino dta->reset = true;
644*e4b17023SJohn Marino return NULL_TREE;
645*e4b17023SJohn Marino }
646*e4b17023SJohn Marino *tp = addr;
647*e4b17023SJohn Marino
648*e4b17023SJohn Marino dta->changed = true;
649*e4b17023SJohn Marino return NULL_TREE;
650*e4b17023SJohn Marino }
651*e4b17023SJohn Marino
652*e4b17023SJohn Marino if (!EXPR_P (t))
653*e4b17023SJohn Marino *walk_subtrees = 0;
654*e4b17023SJohn Marino
655*e4b17023SJohn Marino return NULL_TREE;
656*e4b17023SJohn Marino }
657*e4b17023SJohn Marino
658*e4b17023SJohn Marino /* Moves the references to local variables in STMT at *GSI out of the single
659*e4b17023SJohn Marino entry single exit region starting at ENTRY. DECL_ADDRESS contains
660*e4b17023SJohn Marino addresses of the references that had their address taken
661*e4b17023SJohn Marino already. */
662*e4b17023SJohn Marino
663*e4b17023SJohn Marino static void
eliminate_local_variables_stmt(edge entry,gimple_stmt_iterator * gsi,htab_t decl_address)664*e4b17023SJohn Marino eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
665*e4b17023SJohn Marino htab_t decl_address)
666*e4b17023SJohn Marino {
667*e4b17023SJohn Marino struct elv_data dta;
668*e4b17023SJohn Marino gimple stmt = gsi_stmt (*gsi);
669*e4b17023SJohn Marino
670*e4b17023SJohn Marino memset (&dta.info, '\0', sizeof (dta.info));
671*e4b17023SJohn Marino dta.entry = entry;
672*e4b17023SJohn Marino dta.decl_address = decl_address;
673*e4b17023SJohn Marino dta.changed = false;
674*e4b17023SJohn Marino dta.reset = false;
675*e4b17023SJohn Marino
676*e4b17023SJohn Marino if (gimple_debug_bind_p (stmt))
677*e4b17023SJohn Marino {
678*e4b17023SJohn Marino dta.gsi = NULL;
679*e4b17023SJohn Marino walk_tree (gimple_debug_bind_get_value_ptr (stmt),
680*e4b17023SJohn Marino eliminate_local_variables_1, &dta.info, NULL);
681*e4b17023SJohn Marino if (dta.reset)
682*e4b17023SJohn Marino {
683*e4b17023SJohn Marino gimple_debug_bind_reset_value (stmt);
684*e4b17023SJohn Marino dta.changed = true;
685*e4b17023SJohn Marino }
686*e4b17023SJohn Marino }
687*e4b17023SJohn Marino else
688*e4b17023SJohn Marino {
689*e4b17023SJohn Marino dta.gsi = gsi;
690*e4b17023SJohn Marino walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
691*e4b17023SJohn Marino }
692*e4b17023SJohn Marino
693*e4b17023SJohn Marino if (dta.changed)
694*e4b17023SJohn Marino update_stmt (stmt);
695*e4b17023SJohn Marino }
696*e4b17023SJohn Marino
697*e4b17023SJohn Marino /* Eliminates the references to local variables from the single entry
698*e4b17023SJohn Marino single exit region between the ENTRY and EXIT edges.
699*e4b17023SJohn Marino
700*e4b17023SJohn Marino This includes:
701*e4b17023SJohn Marino 1) Taking address of a local variable -- these are moved out of the
702*e4b17023SJohn Marino region (and temporary variable is created to hold the address if
703*e4b17023SJohn Marino necessary).
704*e4b17023SJohn Marino
705*e4b17023SJohn Marino 2) Dereferencing a local variable -- these are replaced with indirect
706*e4b17023SJohn Marino references. */
707*e4b17023SJohn Marino
708*e4b17023SJohn Marino static void
eliminate_local_variables(edge entry,edge exit)709*e4b17023SJohn Marino eliminate_local_variables (edge entry, edge exit)
710*e4b17023SJohn Marino {
711*e4b17023SJohn Marino basic_block bb;
712*e4b17023SJohn Marino VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
713*e4b17023SJohn Marino unsigned i;
714*e4b17023SJohn Marino gimple_stmt_iterator gsi;
715*e4b17023SJohn Marino bool has_debug_stmt = false;
716*e4b17023SJohn Marino htab_t decl_address = htab_create (10, int_tree_map_hash, int_tree_map_eq,
717*e4b17023SJohn Marino free);
718*e4b17023SJohn Marino basic_block entry_bb = entry->src;
719*e4b17023SJohn Marino basic_block exit_bb = exit->dest;
720*e4b17023SJohn Marino
721*e4b17023SJohn Marino gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
722*e4b17023SJohn Marino
723*e4b17023SJohn Marino FOR_EACH_VEC_ELT (basic_block, body, i, bb)
724*e4b17023SJohn Marino if (bb != entry_bb && bb != exit_bb)
725*e4b17023SJohn Marino for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
726*e4b17023SJohn Marino if (is_gimple_debug (gsi_stmt (gsi)))
727*e4b17023SJohn Marino {
728*e4b17023SJohn Marino if (gimple_debug_bind_p (gsi_stmt (gsi)))
729*e4b17023SJohn Marino has_debug_stmt = true;
730*e4b17023SJohn Marino }
731*e4b17023SJohn Marino else
732*e4b17023SJohn Marino eliminate_local_variables_stmt (entry, &gsi, decl_address);
733*e4b17023SJohn Marino
734*e4b17023SJohn Marino if (has_debug_stmt)
735*e4b17023SJohn Marino FOR_EACH_VEC_ELT (basic_block, body, i, bb)
736*e4b17023SJohn Marino if (bb != entry_bb && bb != exit_bb)
737*e4b17023SJohn Marino for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
738*e4b17023SJohn Marino if (gimple_debug_bind_p (gsi_stmt (gsi)))
739*e4b17023SJohn Marino eliminate_local_variables_stmt (entry, &gsi, decl_address);
740*e4b17023SJohn Marino
741*e4b17023SJohn Marino htab_delete (decl_address);
742*e4b17023SJohn Marino VEC_free (basic_block, heap, body);
743*e4b17023SJohn Marino }
744*e4b17023SJohn Marino
745*e4b17023SJohn Marino /* Returns true if expression EXPR is not defined between ENTRY and
746*e4b17023SJohn Marino EXIT, i.e. if all its operands are defined outside of the region. */
747*e4b17023SJohn Marino
748*e4b17023SJohn Marino static bool
expr_invariant_in_region_p(edge entry,edge exit,tree expr)749*e4b17023SJohn Marino expr_invariant_in_region_p (edge entry, edge exit, tree expr)
750*e4b17023SJohn Marino {
751*e4b17023SJohn Marino basic_block entry_bb = entry->src;
752*e4b17023SJohn Marino basic_block exit_bb = exit->dest;
753*e4b17023SJohn Marino basic_block def_bb;
754*e4b17023SJohn Marino
755*e4b17023SJohn Marino if (is_gimple_min_invariant (expr))
756*e4b17023SJohn Marino return true;
757*e4b17023SJohn Marino
758*e4b17023SJohn Marino if (TREE_CODE (expr) == SSA_NAME)
759*e4b17023SJohn Marino {
760*e4b17023SJohn Marino def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
761*e4b17023SJohn Marino if (def_bb
762*e4b17023SJohn Marino && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
763*e4b17023SJohn Marino && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
764*e4b17023SJohn Marino return false;
765*e4b17023SJohn Marino
766*e4b17023SJohn Marino return true;
767*e4b17023SJohn Marino }
768*e4b17023SJohn Marino
769*e4b17023SJohn Marino return false;
770*e4b17023SJohn Marino }
771*e4b17023SJohn Marino
772*e4b17023SJohn Marino /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
773*e4b17023SJohn Marino The copies are stored to NAME_COPIES, if NAME was already duplicated,
774*e4b17023SJohn Marino its duplicate stored in NAME_COPIES is returned.
775*e4b17023SJohn Marino
776*e4b17023SJohn Marino Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
777*e4b17023SJohn Marino duplicated, storing the copies in DECL_COPIES. */
778*e4b17023SJohn Marino
779*e4b17023SJohn Marino static tree
separate_decls_in_region_name(tree name,htab_t name_copies,htab_t decl_copies,bool copy_name_p)780*e4b17023SJohn Marino separate_decls_in_region_name (tree name,
781*e4b17023SJohn Marino htab_t name_copies, htab_t decl_copies,
782*e4b17023SJohn Marino bool copy_name_p)
783*e4b17023SJohn Marino {
784*e4b17023SJohn Marino tree copy, var, var_copy;
785*e4b17023SJohn Marino unsigned idx, uid, nuid;
786*e4b17023SJohn Marino struct int_tree_map ielt, *nielt;
787*e4b17023SJohn Marino struct name_to_copy_elt elt, *nelt;
788*e4b17023SJohn Marino void **slot, **dslot;
789*e4b17023SJohn Marino
790*e4b17023SJohn Marino if (TREE_CODE (name) != SSA_NAME)
791*e4b17023SJohn Marino return name;
792*e4b17023SJohn Marino
793*e4b17023SJohn Marino idx = SSA_NAME_VERSION (name);
794*e4b17023SJohn Marino elt.version = idx;
795*e4b17023SJohn Marino slot = htab_find_slot_with_hash (name_copies, &elt, idx,
796*e4b17023SJohn Marino copy_name_p ? INSERT : NO_INSERT);
797*e4b17023SJohn Marino if (slot && *slot)
798*e4b17023SJohn Marino return ((struct name_to_copy_elt *) *slot)->new_name;
799*e4b17023SJohn Marino
800*e4b17023SJohn Marino var = SSA_NAME_VAR (name);
801*e4b17023SJohn Marino uid = DECL_UID (var);
802*e4b17023SJohn Marino ielt.uid = uid;
803*e4b17023SJohn Marino dslot = htab_find_slot_with_hash (decl_copies, &ielt, uid, INSERT);
804*e4b17023SJohn Marino if (!*dslot)
805*e4b17023SJohn Marino {
806*e4b17023SJohn Marino var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
807*e4b17023SJohn Marino DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
808*e4b17023SJohn Marino add_referenced_var (var_copy);
809*e4b17023SJohn Marino nielt = XNEW (struct int_tree_map);
810*e4b17023SJohn Marino nielt->uid = uid;
811*e4b17023SJohn Marino nielt->to = var_copy;
812*e4b17023SJohn Marino *dslot = nielt;
813*e4b17023SJohn Marino
814*e4b17023SJohn Marino /* Ensure that when we meet this decl next time, we won't duplicate
815*e4b17023SJohn Marino it again. */
816*e4b17023SJohn Marino nuid = DECL_UID (var_copy);
817*e4b17023SJohn Marino ielt.uid = nuid;
818*e4b17023SJohn Marino dslot = htab_find_slot_with_hash (decl_copies, &ielt, nuid, INSERT);
819*e4b17023SJohn Marino gcc_assert (!*dslot);
820*e4b17023SJohn Marino nielt = XNEW (struct int_tree_map);
821*e4b17023SJohn Marino nielt->uid = nuid;
822*e4b17023SJohn Marino nielt->to = var_copy;
823*e4b17023SJohn Marino *dslot = nielt;
824*e4b17023SJohn Marino }
825*e4b17023SJohn Marino else
826*e4b17023SJohn Marino var_copy = ((struct int_tree_map *) *dslot)->to;
827*e4b17023SJohn Marino
828*e4b17023SJohn Marino if (copy_name_p)
829*e4b17023SJohn Marino {
830*e4b17023SJohn Marino copy = duplicate_ssa_name (name, NULL);
831*e4b17023SJohn Marino nelt = XNEW (struct name_to_copy_elt);
832*e4b17023SJohn Marino nelt->version = idx;
833*e4b17023SJohn Marino nelt->new_name = copy;
834*e4b17023SJohn Marino nelt->field = NULL_TREE;
835*e4b17023SJohn Marino *slot = nelt;
836*e4b17023SJohn Marino }
837*e4b17023SJohn Marino else
838*e4b17023SJohn Marino {
839*e4b17023SJohn Marino gcc_assert (!slot);
840*e4b17023SJohn Marino copy = name;
841*e4b17023SJohn Marino }
842*e4b17023SJohn Marino
843*e4b17023SJohn Marino SSA_NAME_VAR (copy) = var_copy;
844*e4b17023SJohn Marino return copy;
845*e4b17023SJohn Marino }
846*e4b17023SJohn Marino
847*e4b17023SJohn Marino /* Finds the ssa names used in STMT that are defined outside the
848*e4b17023SJohn Marino region between ENTRY and EXIT and replaces such ssa names with
849*e4b17023SJohn Marino their duplicates. The duplicates are stored to NAME_COPIES. Base
850*e4b17023SJohn Marino decls of all ssa names used in STMT (including those defined in
851*e4b17023SJohn Marino LOOP) are replaced with the new temporary variables; the
852*e4b17023SJohn Marino replacement decls are stored in DECL_COPIES. */
853*e4b17023SJohn Marino
854*e4b17023SJohn Marino static void
separate_decls_in_region_stmt(edge entry,edge exit,gimple stmt,htab_t name_copies,htab_t decl_copies)855*e4b17023SJohn Marino separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
856*e4b17023SJohn Marino htab_t name_copies, htab_t decl_copies)
857*e4b17023SJohn Marino {
858*e4b17023SJohn Marino use_operand_p use;
859*e4b17023SJohn Marino def_operand_p def;
860*e4b17023SJohn Marino ssa_op_iter oi;
861*e4b17023SJohn Marino tree name, copy;
862*e4b17023SJohn Marino bool copy_name_p;
863*e4b17023SJohn Marino
864*e4b17023SJohn Marino mark_virtual_ops_for_renaming (stmt);
865*e4b17023SJohn Marino
866*e4b17023SJohn Marino FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
867*e4b17023SJohn Marino {
868*e4b17023SJohn Marino name = DEF_FROM_PTR (def);
869*e4b17023SJohn Marino gcc_assert (TREE_CODE (name) == SSA_NAME);
870*e4b17023SJohn Marino copy = separate_decls_in_region_name (name, name_copies, decl_copies,
871*e4b17023SJohn Marino false);
872*e4b17023SJohn Marino gcc_assert (copy == name);
873*e4b17023SJohn Marino }
874*e4b17023SJohn Marino
875*e4b17023SJohn Marino FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
876*e4b17023SJohn Marino {
877*e4b17023SJohn Marino name = USE_FROM_PTR (use);
878*e4b17023SJohn Marino if (TREE_CODE (name) != SSA_NAME)
879*e4b17023SJohn Marino continue;
880*e4b17023SJohn Marino
881*e4b17023SJohn Marino copy_name_p = expr_invariant_in_region_p (entry, exit, name);
882*e4b17023SJohn Marino copy = separate_decls_in_region_name (name, name_copies, decl_copies,
883*e4b17023SJohn Marino copy_name_p);
884*e4b17023SJohn Marino SET_USE (use, copy);
885*e4b17023SJohn Marino }
886*e4b17023SJohn Marino }
887*e4b17023SJohn Marino
888*e4b17023SJohn Marino /* Finds the ssa names used in STMT that are defined outside the
889*e4b17023SJohn Marino region between ENTRY and EXIT and replaces such ssa names with
890*e4b17023SJohn Marino their duplicates. The duplicates are stored to NAME_COPIES. Base
891*e4b17023SJohn Marino decls of all ssa names used in STMT (including those defined in
892*e4b17023SJohn Marino LOOP) are replaced with the new temporary variables; the
893*e4b17023SJohn Marino replacement decls are stored in DECL_COPIES. */
894*e4b17023SJohn Marino
895*e4b17023SJohn Marino static bool
separate_decls_in_region_debug(gimple stmt,htab_t name_copies,htab_t decl_copies)896*e4b17023SJohn Marino separate_decls_in_region_debug (gimple stmt, htab_t name_copies,
897*e4b17023SJohn Marino htab_t decl_copies)
898*e4b17023SJohn Marino {
899*e4b17023SJohn Marino use_operand_p use;
900*e4b17023SJohn Marino ssa_op_iter oi;
901*e4b17023SJohn Marino tree var, name;
902*e4b17023SJohn Marino struct int_tree_map ielt;
903*e4b17023SJohn Marino struct name_to_copy_elt elt;
904*e4b17023SJohn Marino void **slot, **dslot;
905*e4b17023SJohn Marino
906*e4b17023SJohn Marino if (gimple_debug_bind_p (stmt))
907*e4b17023SJohn Marino var = gimple_debug_bind_get_var (stmt);
908*e4b17023SJohn Marino else if (gimple_debug_source_bind_p (stmt))
909*e4b17023SJohn Marino var = gimple_debug_source_bind_get_var (stmt);
910*e4b17023SJohn Marino else
911*e4b17023SJohn Marino return true;
912*e4b17023SJohn Marino if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
913*e4b17023SJohn Marino return true;
914*e4b17023SJohn Marino gcc_assert (DECL_P (var) && SSA_VAR_P (var));
915*e4b17023SJohn Marino ielt.uid = DECL_UID (var);
916*e4b17023SJohn Marino dslot = htab_find_slot_with_hash (decl_copies, &ielt, ielt.uid, NO_INSERT);
917*e4b17023SJohn Marino if (!dslot)
918*e4b17023SJohn Marino return true;
919*e4b17023SJohn Marino if (gimple_debug_bind_p (stmt))
920*e4b17023SJohn Marino gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
921*e4b17023SJohn Marino else if (gimple_debug_source_bind_p (stmt))
922*e4b17023SJohn Marino gimple_debug_source_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
923*e4b17023SJohn Marino
924*e4b17023SJohn Marino FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
925*e4b17023SJohn Marino {
926*e4b17023SJohn Marino name = USE_FROM_PTR (use);
927*e4b17023SJohn Marino if (TREE_CODE (name) != SSA_NAME)
928*e4b17023SJohn Marino continue;
929*e4b17023SJohn Marino
930*e4b17023SJohn Marino elt.version = SSA_NAME_VERSION (name);
931*e4b17023SJohn Marino slot = htab_find_slot_with_hash (name_copies, &elt, elt.version, NO_INSERT);
932*e4b17023SJohn Marino if (!slot)
933*e4b17023SJohn Marino {
934*e4b17023SJohn Marino gimple_debug_bind_reset_value (stmt);
935*e4b17023SJohn Marino update_stmt (stmt);
936*e4b17023SJohn Marino break;
937*e4b17023SJohn Marino }
938*e4b17023SJohn Marino
939*e4b17023SJohn Marino SET_USE (use, ((struct name_to_copy_elt *) *slot)->new_name);
940*e4b17023SJohn Marino }
941*e4b17023SJohn Marino
942*e4b17023SJohn Marino return false;
943*e4b17023SJohn Marino }
944*e4b17023SJohn Marino
945*e4b17023SJohn Marino /* Callback for htab_traverse. Adds a field corresponding to the reduction
946*e4b17023SJohn Marino specified in SLOT. The type is passed in DATA. */
947*e4b17023SJohn Marino
948*e4b17023SJohn Marino static int
add_field_for_reduction(void ** slot,void * data)949*e4b17023SJohn Marino add_field_for_reduction (void **slot, void *data)
950*e4b17023SJohn Marino {
951*e4b17023SJohn Marino
952*e4b17023SJohn Marino struct reduction_info *const red = (struct reduction_info *) *slot;
953*e4b17023SJohn Marino tree const type = (tree) data;
954*e4b17023SJohn Marino tree var = SSA_NAME_VAR (gimple_assign_lhs (red->reduc_stmt));
955*e4b17023SJohn Marino tree field = build_decl (gimple_location (red->reduc_stmt),
956*e4b17023SJohn Marino FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
957*e4b17023SJohn Marino
958*e4b17023SJohn Marino insert_field_into_struct (type, field);
959*e4b17023SJohn Marino
960*e4b17023SJohn Marino red->field = field;
961*e4b17023SJohn Marino
962*e4b17023SJohn Marino return 1;
963*e4b17023SJohn Marino }
964*e4b17023SJohn Marino
965*e4b17023SJohn Marino /* Callback for htab_traverse. Adds a field corresponding to a ssa name
966*e4b17023SJohn Marino described in SLOT. The type is passed in DATA. */
967*e4b17023SJohn Marino
968*e4b17023SJohn Marino static int
add_field_for_name(void ** slot,void * data)969*e4b17023SJohn Marino add_field_for_name (void **slot, void *data)
970*e4b17023SJohn Marino {
971*e4b17023SJohn Marino struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
972*e4b17023SJohn Marino tree type = (tree) data;
973*e4b17023SJohn Marino tree name = ssa_name (elt->version);
974*e4b17023SJohn Marino tree var = SSA_NAME_VAR (name);
975*e4b17023SJohn Marino tree field = build_decl (DECL_SOURCE_LOCATION (var),
976*e4b17023SJohn Marino FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
977*e4b17023SJohn Marino
978*e4b17023SJohn Marino insert_field_into_struct (type, field);
979*e4b17023SJohn Marino elt->field = field;
980*e4b17023SJohn Marino
981*e4b17023SJohn Marino return 1;
982*e4b17023SJohn Marino }
983*e4b17023SJohn Marino
984*e4b17023SJohn Marino /* Callback for htab_traverse. A local result is the intermediate result
985*e4b17023SJohn Marino computed by a single
986*e4b17023SJohn Marino thread, or the initial value in case no iteration was executed.
987*e4b17023SJohn Marino This function creates a phi node reflecting these values.
988*e4b17023SJohn Marino The phi's result will be stored in NEW_PHI field of the
989*e4b17023SJohn Marino reduction's data structure. */
990*e4b17023SJohn Marino
991*e4b17023SJohn Marino static int
create_phi_for_local_result(void ** slot,void * data)992*e4b17023SJohn Marino create_phi_for_local_result (void **slot, void *data)
993*e4b17023SJohn Marino {
994*e4b17023SJohn Marino struct reduction_info *const reduc = (struct reduction_info *) *slot;
995*e4b17023SJohn Marino const struct loop *const loop = (const struct loop *) data;
996*e4b17023SJohn Marino edge e;
997*e4b17023SJohn Marino gimple new_phi;
998*e4b17023SJohn Marino basic_block store_bb;
999*e4b17023SJohn Marino tree local_res;
1000*e4b17023SJohn Marino source_location locus;
1001*e4b17023SJohn Marino
1002*e4b17023SJohn Marino /* STORE_BB is the block where the phi
1003*e4b17023SJohn Marino should be stored. It is the destination of the loop exit.
1004*e4b17023SJohn Marino (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1005*e4b17023SJohn Marino store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1006*e4b17023SJohn Marino
1007*e4b17023SJohn Marino /* STORE_BB has two predecessors. One coming from the loop
1008*e4b17023SJohn Marino (the reduction's result is computed at the loop),
1009*e4b17023SJohn Marino and another coming from a block preceding the loop,
1010*e4b17023SJohn Marino when no iterations
1011*e4b17023SJohn Marino are executed (the initial value should be taken). */
1012*e4b17023SJohn Marino if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1013*e4b17023SJohn Marino e = EDGE_PRED (store_bb, 1);
1014*e4b17023SJohn Marino else
1015*e4b17023SJohn Marino e = EDGE_PRED (store_bb, 0);
1016*e4b17023SJohn Marino local_res
1017*e4b17023SJohn Marino = make_ssa_name (SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt)),
1018*e4b17023SJohn Marino NULL);
1019*e4b17023SJohn Marino locus = gimple_location (reduc->reduc_stmt);
1020*e4b17023SJohn Marino new_phi = create_phi_node (local_res, store_bb);
1021*e4b17023SJohn Marino SSA_NAME_DEF_STMT (local_res) = new_phi;
1022*e4b17023SJohn Marino add_phi_arg (new_phi, reduc->init, e, locus);
1023*e4b17023SJohn Marino add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
1024*e4b17023SJohn Marino FALLTHRU_EDGE (loop->latch), locus);
1025*e4b17023SJohn Marino reduc->new_phi = new_phi;
1026*e4b17023SJohn Marino
1027*e4b17023SJohn Marino return 1;
1028*e4b17023SJohn Marino }
1029*e4b17023SJohn Marino
1030*e4b17023SJohn Marino struct clsn_data
1031*e4b17023SJohn Marino {
1032*e4b17023SJohn Marino tree store;
1033*e4b17023SJohn Marino tree load;
1034*e4b17023SJohn Marino
1035*e4b17023SJohn Marino basic_block store_bb;
1036*e4b17023SJohn Marino basic_block load_bb;
1037*e4b17023SJohn Marino };
1038*e4b17023SJohn Marino
1039*e4b17023SJohn Marino /* Callback for htab_traverse. Create an atomic instruction for the
1040*e4b17023SJohn Marino reduction described in SLOT.
1041*e4b17023SJohn Marino DATA annotates the place in memory the atomic operation relates to,
1042*e4b17023SJohn Marino and the basic block it needs to be generated in. */
1043*e4b17023SJohn Marino
1044*e4b17023SJohn Marino static int
create_call_for_reduction_1(void ** slot,void * data)1045*e4b17023SJohn Marino create_call_for_reduction_1 (void **slot, void *data)
1046*e4b17023SJohn Marino {
1047*e4b17023SJohn Marino struct reduction_info *const reduc = (struct reduction_info *) *slot;
1048*e4b17023SJohn Marino struct clsn_data *const clsn_data = (struct clsn_data *) data;
1049*e4b17023SJohn Marino gimple_stmt_iterator gsi;
1050*e4b17023SJohn Marino tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1051*e4b17023SJohn Marino tree load_struct;
1052*e4b17023SJohn Marino basic_block bb;
1053*e4b17023SJohn Marino basic_block new_bb;
1054*e4b17023SJohn Marino edge e;
1055*e4b17023SJohn Marino tree t, addr, ref, x;
1056*e4b17023SJohn Marino tree tmp_load, name;
1057*e4b17023SJohn Marino gimple load;
1058*e4b17023SJohn Marino
1059*e4b17023SJohn Marino load_struct = build_simple_mem_ref (clsn_data->load);
1060*e4b17023SJohn Marino t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1061*e4b17023SJohn Marino
1062*e4b17023SJohn Marino addr = build_addr (t, current_function_decl);
1063*e4b17023SJohn Marino
1064*e4b17023SJohn Marino /* Create phi node. */
1065*e4b17023SJohn Marino bb = clsn_data->load_bb;
1066*e4b17023SJohn Marino
1067*e4b17023SJohn Marino e = split_block (bb, t);
1068*e4b17023SJohn Marino new_bb = e->dest;
1069*e4b17023SJohn Marino
1070*e4b17023SJohn Marino tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
1071*e4b17023SJohn Marino add_referenced_var (tmp_load);
1072*e4b17023SJohn Marino tmp_load = make_ssa_name (tmp_load, NULL);
1073*e4b17023SJohn Marino load = gimple_build_omp_atomic_load (tmp_load, addr);
1074*e4b17023SJohn Marino SSA_NAME_DEF_STMT (tmp_load) = load;
1075*e4b17023SJohn Marino gsi = gsi_start_bb (new_bb);
1076*e4b17023SJohn Marino gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1077*e4b17023SJohn Marino
1078*e4b17023SJohn Marino e = split_block (new_bb, load);
1079*e4b17023SJohn Marino new_bb = e->dest;
1080*e4b17023SJohn Marino gsi = gsi_start_bb (new_bb);
1081*e4b17023SJohn Marino ref = tmp_load;
1082*e4b17023SJohn Marino x = fold_build2 (reduc->reduction_code,
1083*e4b17023SJohn Marino TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1084*e4b17023SJohn Marino PHI_RESULT (reduc->new_phi));
1085*e4b17023SJohn Marino
1086*e4b17023SJohn Marino name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1087*e4b17023SJohn Marino GSI_CONTINUE_LINKING);
1088*e4b17023SJohn Marino
1089*e4b17023SJohn Marino gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
1090*e4b17023SJohn Marino return 1;
1091*e4b17023SJohn Marino }
1092*e4b17023SJohn Marino
1093*e4b17023SJohn Marino /* Create the atomic operation at the join point of the threads.
1094*e4b17023SJohn Marino REDUCTION_LIST describes the reductions in the LOOP.
1095*e4b17023SJohn Marino LD_ST_DATA describes the shared data structure where
1096*e4b17023SJohn Marino shared data is stored in and loaded from. */
1097*e4b17023SJohn Marino static void
create_call_for_reduction(struct loop * loop,htab_t reduction_list,struct clsn_data * ld_st_data)1098*e4b17023SJohn Marino create_call_for_reduction (struct loop *loop, htab_t reduction_list,
1099*e4b17023SJohn Marino struct clsn_data *ld_st_data)
1100*e4b17023SJohn Marino {
1101*e4b17023SJohn Marino htab_traverse (reduction_list, create_phi_for_local_result, loop);
1102*e4b17023SJohn Marino /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1103*e4b17023SJohn Marino ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
1104*e4b17023SJohn Marino htab_traverse (reduction_list, create_call_for_reduction_1, ld_st_data);
1105*e4b17023SJohn Marino }
1106*e4b17023SJohn Marino
1107*e4b17023SJohn Marino /* Callback for htab_traverse. Loads the final reduction value at the
1108*e4b17023SJohn Marino join point of all threads, and inserts it in the right place. */
1109*e4b17023SJohn Marino
1110*e4b17023SJohn Marino static int
create_loads_for_reductions(void ** slot,void * data)1111*e4b17023SJohn Marino create_loads_for_reductions (void **slot, void *data)
1112*e4b17023SJohn Marino {
1113*e4b17023SJohn Marino struct reduction_info *const red = (struct reduction_info *) *slot;
1114*e4b17023SJohn Marino struct clsn_data *const clsn_data = (struct clsn_data *) data;
1115*e4b17023SJohn Marino gimple stmt;
1116*e4b17023SJohn Marino gimple_stmt_iterator gsi;
1117*e4b17023SJohn Marino tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1118*e4b17023SJohn Marino tree load_struct;
1119*e4b17023SJohn Marino tree name;
1120*e4b17023SJohn Marino tree x;
1121*e4b17023SJohn Marino
1122*e4b17023SJohn Marino gsi = gsi_after_labels (clsn_data->load_bb);
1123*e4b17023SJohn Marino load_struct = build_simple_mem_ref (clsn_data->load);
1124*e4b17023SJohn Marino load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1125*e4b17023SJohn Marino NULL_TREE);
1126*e4b17023SJohn Marino
1127*e4b17023SJohn Marino x = load_struct;
1128*e4b17023SJohn Marino name = PHI_RESULT (red->keep_res);
1129*e4b17023SJohn Marino stmt = gimple_build_assign (name, x);
1130*e4b17023SJohn Marino SSA_NAME_DEF_STMT (name) = stmt;
1131*e4b17023SJohn Marino
1132*e4b17023SJohn Marino gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1133*e4b17023SJohn Marino
1134*e4b17023SJohn Marino for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1135*e4b17023SJohn Marino !gsi_end_p (gsi); gsi_next (&gsi))
1136*e4b17023SJohn Marino if (gsi_stmt (gsi) == red->keep_res)
1137*e4b17023SJohn Marino {
1138*e4b17023SJohn Marino remove_phi_node (&gsi, false);
1139*e4b17023SJohn Marino return 1;
1140*e4b17023SJohn Marino }
1141*e4b17023SJohn Marino gcc_unreachable ();
1142*e4b17023SJohn Marino }
1143*e4b17023SJohn Marino
1144*e4b17023SJohn Marino /* Load the reduction result that was stored in LD_ST_DATA.
1145*e4b17023SJohn Marino REDUCTION_LIST describes the list of reductions that the
1146*e4b17023SJohn Marino loads should be generated for. */
1147*e4b17023SJohn Marino static void
create_final_loads_for_reduction(htab_t reduction_list,struct clsn_data * ld_st_data)1148*e4b17023SJohn Marino create_final_loads_for_reduction (htab_t reduction_list,
1149*e4b17023SJohn Marino struct clsn_data *ld_st_data)
1150*e4b17023SJohn Marino {
1151*e4b17023SJohn Marino gimple_stmt_iterator gsi;
1152*e4b17023SJohn Marino tree t;
1153*e4b17023SJohn Marino gimple stmt;
1154*e4b17023SJohn Marino
1155*e4b17023SJohn Marino gsi = gsi_after_labels (ld_st_data->load_bb);
1156*e4b17023SJohn Marino t = build_fold_addr_expr (ld_st_data->store);
1157*e4b17023SJohn Marino stmt = gimple_build_assign (ld_st_data->load, t);
1158*e4b17023SJohn Marino
1159*e4b17023SJohn Marino gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1160*e4b17023SJohn Marino SSA_NAME_DEF_STMT (ld_st_data->load) = stmt;
1161*e4b17023SJohn Marino
1162*e4b17023SJohn Marino htab_traverse (reduction_list, create_loads_for_reductions, ld_st_data);
1163*e4b17023SJohn Marino
1164*e4b17023SJohn Marino }
1165*e4b17023SJohn Marino
1166*e4b17023SJohn Marino /* Callback for htab_traverse. Store the neutral value for the
1167*e4b17023SJohn Marino particular reduction's operation, e.g. 0 for PLUS_EXPR,
1168*e4b17023SJohn Marino 1 for MULT_EXPR, etc. into the reduction field.
1169*e4b17023SJohn Marino The reduction is specified in SLOT. The store information is
1170*e4b17023SJohn Marino passed in DATA. */
1171*e4b17023SJohn Marino
1172*e4b17023SJohn Marino static int
create_stores_for_reduction(void ** slot,void * data)1173*e4b17023SJohn Marino create_stores_for_reduction (void **slot, void *data)
1174*e4b17023SJohn Marino {
1175*e4b17023SJohn Marino struct reduction_info *const red = (struct reduction_info *) *slot;
1176*e4b17023SJohn Marino struct clsn_data *const clsn_data = (struct clsn_data *) data;
1177*e4b17023SJohn Marino tree t;
1178*e4b17023SJohn Marino gimple stmt;
1179*e4b17023SJohn Marino gimple_stmt_iterator gsi;
1180*e4b17023SJohn Marino tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1181*e4b17023SJohn Marino
1182*e4b17023SJohn Marino gsi = gsi_last_bb (clsn_data->store_bb);
1183*e4b17023SJohn Marino t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1184*e4b17023SJohn Marino stmt = gimple_build_assign (t, red->initial_value);
1185*e4b17023SJohn Marino mark_virtual_ops_for_renaming (stmt);
1186*e4b17023SJohn Marino gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1187*e4b17023SJohn Marino
1188*e4b17023SJohn Marino return 1;
1189*e4b17023SJohn Marino }
1190*e4b17023SJohn Marino
1191*e4b17023SJohn Marino /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1192*e4b17023SJohn Marino store to a field of STORE in STORE_BB for the ssa name and its duplicate
1193*e4b17023SJohn Marino specified in SLOT. */
1194*e4b17023SJohn Marino
1195*e4b17023SJohn Marino static int
create_loads_and_stores_for_name(void ** slot,void * data)1196*e4b17023SJohn Marino create_loads_and_stores_for_name (void **slot, void *data)
1197*e4b17023SJohn Marino {
1198*e4b17023SJohn Marino struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
1199*e4b17023SJohn Marino struct clsn_data *const clsn_data = (struct clsn_data *) data;
1200*e4b17023SJohn Marino tree t;
1201*e4b17023SJohn Marino gimple stmt;
1202*e4b17023SJohn Marino gimple_stmt_iterator gsi;
1203*e4b17023SJohn Marino tree type = TREE_TYPE (elt->new_name);
1204*e4b17023SJohn Marino tree load_struct;
1205*e4b17023SJohn Marino
1206*e4b17023SJohn Marino gsi = gsi_last_bb (clsn_data->store_bb);
1207*e4b17023SJohn Marino t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1208*e4b17023SJohn Marino stmt = gimple_build_assign (t, ssa_name (elt->version));
1209*e4b17023SJohn Marino mark_virtual_ops_for_renaming (stmt);
1210*e4b17023SJohn Marino gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1211*e4b17023SJohn Marino
1212*e4b17023SJohn Marino gsi = gsi_last_bb (clsn_data->load_bb);
1213*e4b17023SJohn Marino load_struct = build_simple_mem_ref (clsn_data->load);
1214*e4b17023SJohn Marino t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1215*e4b17023SJohn Marino stmt = gimple_build_assign (elt->new_name, t);
1216*e4b17023SJohn Marino SSA_NAME_DEF_STMT (elt->new_name) = stmt;
1217*e4b17023SJohn Marino gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1218*e4b17023SJohn Marino
1219*e4b17023SJohn Marino return 1;
1220*e4b17023SJohn Marino }
1221*e4b17023SJohn Marino
1222*e4b17023SJohn Marino /* Moves all the variables used in LOOP and defined outside of it (including
1223*e4b17023SJohn Marino the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1224*e4b17023SJohn Marino name) to a structure created for this purpose. The code
1225*e4b17023SJohn Marino
1226*e4b17023SJohn Marino while (1)
1227*e4b17023SJohn Marino {
1228*e4b17023SJohn Marino use (a);
1229*e4b17023SJohn Marino use (b);
1230*e4b17023SJohn Marino }
1231*e4b17023SJohn Marino
1232*e4b17023SJohn Marino is transformed this way:
1233*e4b17023SJohn Marino
1234*e4b17023SJohn Marino bb0:
1235*e4b17023SJohn Marino old.a = a;
1236*e4b17023SJohn Marino old.b = b;
1237*e4b17023SJohn Marino
1238*e4b17023SJohn Marino bb1:
1239*e4b17023SJohn Marino a' = new->a;
1240*e4b17023SJohn Marino b' = new->b;
1241*e4b17023SJohn Marino while (1)
1242*e4b17023SJohn Marino {
1243*e4b17023SJohn Marino use (a');
1244*e4b17023SJohn Marino use (b');
1245*e4b17023SJohn Marino }
1246*e4b17023SJohn Marino
1247*e4b17023SJohn Marino `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1248*e4b17023SJohn Marino pointer `new' is intentionally not initialized (the loop will be split to a
1249*e4b17023SJohn Marino separate function later, and `new' will be initialized from its arguments).
1250*e4b17023SJohn Marino LD_ST_DATA holds information about the shared data structure used to pass
1251*e4b17023SJohn Marino information among the threads. It is initialized here, and
1252*e4b17023SJohn Marino gen_parallel_loop will pass it to create_call_for_reduction that
1253*e4b17023SJohn Marino needs this information. REDUCTION_LIST describes the reductions
1254*e4b17023SJohn Marino in LOOP. */
1255*e4b17023SJohn Marino
1256*e4b17023SJohn Marino static void
separate_decls_in_region(edge entry,edge exit,htab_t reduction_list,tree * arg_struct,tree * new_arg_struct,struct clsn_data * ld_st_data)1257*e4b17023SJohn Marino separate_decls_in_region (edge entry, edge exit, htab_t reduction_list,
1258*e4b17023SJohn Marino tree *arg_struct, tree *new_arg_struct,
1259*e4b17023SJohn Marino struct clsn_data *ld_st_data)
1260*e4b17023SJohn Marino
1261*e4b17023SJohn Marino {
1262*e4b17023SJohn Marino basic_block bb1 = split_edge (entry);
1263*e4b17023SJohn Marino basic_block bb0 = single_pred (bb1);
1264*e4b17023SJohn Marino htab_t name_copies = htab_create (10, name_to_copy_elt_hash,
1265*e4b17023SJohn Marino name_to_copy_elt_eq, free);
1266*e4b17023SJohn Marino htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq,
1267*e4b17023SJohn Marino free);
1268*e4b17023SJohn Marino unsigned i;
1269*e4b17023SJohn Marino tree type, type_name, nvar;
1270*e4b17023SJohn Marino gimple_stmt_iterator gsi;
1271*e4b17023SJohn Marino struct clsn_data clsn_data;
1272*e4b17023SJohn Marino VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
1273*e4b17023SJohn Marino basic_block bb;
1274*e4b17023SJohn Marino basic_block entry_bb = bb1;
1275*e4b17023SJohn Marino basic_block exit_bb = exit->dest;
1276*e4b17023SJohn Marino bool has_debug_stmt = false;
1277*e4b17023SJohn Marino
1278*e4b17023SJohn Marino entry = single_succ_edge (entry_bb);
1279*e4b17023SJohn Marino gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1280*e4b17023SJohn Marino
1281*e4b17023SJohn Marino FOR_EACH_VEC_ELT (basic_block, body, i, bb)
1282*e4b17023SJohn Marino {
1283*e4b17023SJohn Marino if (bb != entry_bb && bb != exit_bb)
1284*e4b17023SJohn Marino {
1285*e4b17023SJohn Marino for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1286*e4b17023SJohn Marino separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1287*e4b17023SJohn Marino name_copies, decl_copies);
1288*e4b17023SJohn Marino
1289*e4b17023SJohn Marino for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1290*e4b17023SJohn Marino {
1291*e4b17023SJohn Marino gimple stmt = gsi_stmt (gsi);
1292*e4b17023SJohn Marino
1293*e4b17023SJohn Marino if (is_gimple_debug (stmt))
1294*e4b17023SJohn Marino has_debug_stmt = true;
1295*e4b17023SJohn Marino else
1296*e4b17023SJohn Marino separate_decls_in_region_stmt (entry, exit, stmt,
1297*e4b17023SJohn Marino name_copies, decl_copies);
1298*e4b17023SJohn Marino }
1299*e4b17023SJohn Marino }
1300*e4b17023SJohn Marino }
1301*e4b17023SJohn Marino
1302*e4b17023SJohn Marino /* Now process debug bind stmts. We must not create decls while
1303*e4b17023SJohn Marino processing debug stmts, so we defer their processing so as to
1304*e4b17023SJohn Marino make sure we will have debug info for as many variables as
1305*e4b17023SJohn Marino possible (all of those that were dealt with in the loop above),
1306*e4b17023SJohn Marino and discard those for which we know there's nothing we can
1307*e4b17023SJohn Marino do. */
1308*e4b17023SJohn Marino if (has_debug_stmt)
1309*e4b17023SJohn Marino FOR_EACH_VEC_ELT (basic_block, body, i, bb)
1310*e4b17023SJohn Marino if (bb != entry_bb && bb != exit_bb)
1311*e4b17023SJohn Marino {
1312*e4b17023SJohn Marino for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1313*e4b17023SJohn Marino {
1314*e4b17023SJohn Marino gimple stmt = gsi_stmt (gsi);
1315*e4b17023SJohn Marino
1316*e4b17023SJohn Marino if (is_gimple_debug (stmt))
1317*e4b17023SJohn Marino {
1318*e4b17023SJohn Marino if (separate_decls_in_region_debug (stmt, name_copies,
1319*e4b17023SJohn Marino decl_copies))
1320*e4b17023SJohn Marino {
1321*e4b17023SJohn Marino gsi_remove (&gsi, true);
1322*e4b17023SJohn Marino continue;
1323*e4b17023SJohn Marino }
1324*e4b17023SJohn Marino }
1325*e4b17023SJohn Marino
1326*e4b17023SJohn Marino gsi_next (&gsi);
1327*e4b17023SJohn Marino }
1328*e4b17023SJohn Marino }
1329*e4b17023SJohn Marino
1330*e4b17023SJohn Marino VEC_free (basic_block, heap, body);
1331*e4b17023SJohn Marino
1332*e4b17023SJohn Marino if (htab_elements (name_copies) == 0 && htab_elements (reduction_list) == 0)
1333*e4b17023SJohn Marino {
1334*e4b17023SJohn Marino /* It may happen that there is nothing to copy (if there are only
1335*e4b17023SJohn Marino loop carried and external variables in the loop). */
1336*e4b17023SJohn Marino *arg_struct = NULL;
1337*e4b17023SJohn Marino *new_arg_struct = NULL;
1338*e4b17023SJohn Marino }
1339*e4b17023SJohn Marino else
1340*e4b17023SJohn Marino {
1341*e4b17023SJohn Marino /* Create the type for the structure to store the ssa names to. */
1342*e4b17023SJohn Marino type = lang_hooks.types.make_type (RECORD_TYPE);
1343*e4b17023SJohn Marino type_name = build_decl (UNKNOWN_LOCATION,
1344*e4b17023SJohn Marino TYPE_DECL, create_tmp_var_name (".paral_data"),
1345*e4b17023SJohn Marino type);
1346*e4b17023SJohn Marino TYPE_NAME (type) = type_name;
1347*e4b17023SJohn Marino
1348*e4b17023SJohn Marino htab_traverse (name_copies, add_field_for_name, type);
1349*e4b17023SJohn Marino if (reduction_list && htab_elements (reduction_list) > 0)
1350*e4b17023SJohn Marino {
1351*e4b17023SJohn Marino /* Create the fields for reductions. */
1352*e4b17023SJohn Marino htab_traverse (reduction_list, add_field_for_reduction,
1353*e4b17023SJohn Marino type);
1354*e4b17023SJohn Marino }
1355*e4b17023SJohn Marino layout_type (type);
1356*e4b17023SJohn Marino
1357*e4b17023SJohn Marino /* Create the loads and stores. */
1358*e4b17023SJohn Marino *arg_struct = create_tmp_var (type, ".paral_data_store");
1359*e4b17023SJohn Marino add_referenced_var (*arg_struct);
1360*e4b17023SJohn Marino nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1361*e4b17023SJohn Marino add_referenced_var (nvar);
1362*e4b17023SJohn Marino *new_arg_struct = make_ssa_name (nvar, NULL);
1363*e4b17023SJohn Marino
1364*e4b17023SJohn Marino ld_st_data->store = *arg_struct;
1365*e4b17023SJohn Marino ld_st_data->load = *new_arg_struct;
1366*e4b17023SJohn Marino ld_st_data->store_bb = bb0;
1367*e4b17023SJohn Marino ld_st_data->load_bb = bb1;
1368*e4b17023SJohn Marino
1369*e4b17023SJohn Marino htab_traverse (name_copies, create_loads_and_stores_for_name,
1370*e4b17023SJohn Marino ld_st_data);
1371*e4b17023SJohn Marino
1372*e4b17023SJohn Marino /* Load the calculation from memory (after the join of the threads). */
1373*e4b17023SJohn Marino
1374*e4b17023SJohn Marino if (reduction_list && htab_elements (reduction_list) > 0)
1375*e4b17023SJohn Marino {
1376*e4b17023SJohn Marino htab_traverse (reduction_list, create_stores_for_reduction,
1377*e4b17023SJohn Marino ld_st_data);
1378*e4b17023SJohn Marino clsn_data.load = make_ssa_name (nvar, NULL);
1379*e4b17023SJohn Marino clsn_data.load_bb = exit->dest;
1380*e4b17023SJohn Marino clsn_data.store = ld_st_data->store;
1381*e4b17023SJohn Marino create_final_loads_for_reduction (reduction_list, &clsn_data);
1382*e4b17023SJohn Marino }
1383*e4b17023SJohn Marino }
1384*e4b17023SJohn Marino
1385*e4b17023SJohn Marino htab_delete (decl_copies);
1386*e4b17023SJohn Marino htab_delete (name_copies);
1387*e4b17023SJohn Marino }
1388*e4b17023SJohn Marino
1389*e4b17023SJohn Marino /* Bitmap containing uids of functions created by parallelization. We cannot
1390*e4b17023SJohn Marino allocate it from the default obstack, as it must live across compilation
1391*e4b17023SJohn Marino of several functions; we make it gc allocated instead. */
1392*e4b17023SJohn Marino
1393*e4b17023SJohn Marino static GTY(()) bitmap parallelized_functions;
1394*e4b17023SJohn Marino
1395*e4b17023SJohn Marino /* Returns true if FN was created by create_loop_fn. */
1396*e4b17023SJohn Marino
1397*e4b17023SJohn Marino static bool
parallelized_function_p(tree fn)1398*e4b17023SJohn Marino parallelized_function_p (tree fn)
1399*e4b17023SJohn Marino {
1400*e4b17023SJohn Marino if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1401*e4b17023SJohn Marino return false;
1402*e4b17023SJohn Marino
1403*e4b17023SJohn Marino return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1404*e4b17023SJohn Marino }
1405*e4b17023SJohn Marino
1406*e4b17023SJohn Marino /* Creates and returns an empty function that will receive the body of
1407*e4b17023SJohn Marino a parallelized loop. */
1408*e4b17023SJohn Marino
1409*e4b17023SJohn Marino static tree
create_loop_fn(location_t loc)1410*e4b17023SJohn Marino create_loop_fn (location_t loc)
1411*e4b17023SJohn Marino {
1412*e4b17023SJohn Marino char buf[100];
1413*e4b17023SJohn Marino char *tname;
1414*e4b17023SJohn Marino tree decl, type, name, t;
1415*e4b17023SJohn Marino struct function *act_cfun = cfun;
1416*e4b17023SJohn Marino static unsigned loopfn_num;
1417*e4b17023SJohn Marino
1418*e4b17023SJohn Marino snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1419*e4b17023SJohn Marino ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1420*e4b17023SJohn Marino clean_symbol_name (tname);
1421*e4b17023SJohn Marino name = get_identifier (tname);
1422*e4b17023SJohn Marino type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1423*e4b17023SJohn Marino
1424*e4b17023SJohn Marino decl = build_decl (loc, FUNCTION_DECL, name, type);
1425*e4b17023SJohn Marino if (!parallelized_functions)
1426*e4b17023SJohn Marino parallelized_functions = BITMAP_GGC_ALLOC ();
1427*e4b17023SJohn Marino bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1428*e4b17023SJohn Marino
1429*e4b17023SJohn Marino TREE_STATIC (decl) = 1;
1430*e4b17023SJohn Marino TREE_USED (decl) = 1;
1431*e4b17023SJohn Marino DECL_ARTIFICIAL (decl) = 1;
1432*e4b17023SJohn Marino DECL_IGNORED_P (decl) = 0;
1433*e4b17023SJohn Marino TREE_PUBLIC (decl) = 0;
1434*e4b17023SJohn Marino DECL_UNINLINABLE (decl) = 1;
1435*e4b17023SJohn Marino DECL_EXTERNAL (decl) = 0;
1436*e4b17023SJohn Marino DECL_CONTEXT (decl) = NULL_TREE;
1437*e4b17023SJohn Marino DECL_INITIAL (decl) = make_node (BLOCK);
1438*e4b17023SJohn Marino
1439*e4b17023SJohn Marino t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
1440*e4b17023SJohn Marino DECL_ARTIFICIAL (t) = 1;
1441*e4b17023SJohn Marino DECL_IGNORED_P (t) = 1;
1442*e4b17023SJohn Marino DECL_RESULT (decl) = t;
1443*e4b17023SJohn Marino
1444*e4b17023SJohn Marino t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
1445*e4b17023SJohn Marino ptr_type_node);
1446*e4b17023SJohn Marino DECL_ARTIFICIAL (t) = 1;
1447*e4b17023SJohn Marino DECL_ARG_TYPE (t) = ptr_type_node;
1448*e4b17023SJohn Marino DECL_CONTEXT (t) = decl;
1449*e4b17023SJohn Marino TREE_USED (t) = 1;
1450*e4b17023SJohn Marino DECL_ARGUMENTS (decl) = t;
1451*e4b17023SJohn Marino
1452*e4b17023SJohn Marino allocate_struct_function (decl, false);
1453*e4b17023SJohn Marino
1454*e4b17023SJohn Marino /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1455*e4b17023SJohn Marino it. */
1456*e4b17023SJohn Marino set_cfun (act_cfun);
1457*e4b17023SJohn Marino
1458*e4b17023SJohn Marino return decl;
1459*e4b17023SJohn Marino }
1460*e4b17023SJohn Marino
1461*e4b17023SJohn Marino /* Moves the exit condition of LOOP to the beginning of its header, and
1462*e4b17023SJohn Marino duplicates the part of the last iteration that gets disabled to the
1463*e4b17023SJohn Marino exit of the loop. NIT is the number of iterations of the loop
1464*e4b17023SJohn Marino (used to initialize the variables in the duplicated part).
1465*e4b17023SJohn Marino
1466*e4b17023SJohn Marino TODO: the common case is that latch of the loop is empty and immediately
1467*e4b17023SJohn Marino follows the loop exit. In this case, it would be better not to copy the
1468*e4b17023SJohn Marino body of the loop, but only move the entry of the loop directly before the
1469*e4b17023SJohn Marino exit check and increase the number of iterations of the loop by one.
1470*e4b17023SJohn Marino This may need some additional preconditioning in case NIT = ~0.
1471*e4b17023SJohn Marino REDUCTION_LIST describes the reductions in LOOP. */
1472*e4b17023SJohn Marino
1473*e4b17023SJohn Marino static void
transform_to_exit_first_loop(struct loop * loop,htab_t reduction_list,tree nit)1474*e4b17023SJohn Marino transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit)
1475*e4b17023SJohn Marino {
1476*e4b17023SJohn Marino basic_block *bbs, *nbbs, ex_bb, orig_header;
1477*e4b17023SJohn Marino unsigned n;
1478*e4b17023SJohn Marino bool ok;
1479*e4b17023SJohn Marino edge exit = single_dom_exit (loop), hpred;
1480*e4b17023SJohn Marino tree control, control_name, res, t;
1481*e4b17023SJohn Marino gimple phi, nphi, cond_stmt, stmt, cond_nit;
1482*e4b17023SJohn Marino gimple_stmt_iterator gsi;
1483*e4b17023SJohn Marino tree nit_1;
1484*e4b17023SJohn Marino edge exit_1;
1485*e4b17023SJohn Marino tree new_rhs;
1486*e4b17023SJohn Marino
1487*e4b17023SJohn Marino split_block_after_labels (loop->header);
1488*e4b17023SJohn Marino orig_header = single_succ (loop->header);
1489*e4b17023SJohn Marino hpred = single_succ_edge (loop->header);
1490*e4b17023SJohn Marino
1491*e4b17023SJohn Marino cond_stmt = last_stmt (exit->src);
1492*e4b17023SJohn Marino control = gimple_cond_lhs (cond_stmt);
1493*e4b17023SJohn Marino gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1494*e4b17023SJohn Marino
1495*e4b17023SJohn Marino /* Make sure that we have phi nodes on exit for all loop header phis
1496*e4b17023SJohn Marino (create_parallel_loop requires that). */
1497*e4b17023SJohn Marino for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1498*e4b17023SJohn Marino {
1499*e4b17023SJohn Marino phi = gsi_stmt (gsi);
1500*e4b17023SJohn Marino res = PHI_RESULT (phi);
1501*e4b17023SJohn Marino t = make_ssa_name (SSA_NAME_VAR (res), phi);
1502*e4b17023SJohn Marino SET_PHI_RESULT (phi, t);
1503*e4b17023SJohn Marino nphi = create_phi_node (res, orig_header);
1504*e4b17023SJohn Marino SSA_NAME_DEF_STMT (res) = nphi;
1505*e4b17023SJohn Marino add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1506*e4b17023SJohn Marino
1507*e4b17023SJohn Marino if (res == control)
1508*e4b17023SJohn Marino {
1509*e4b17023SJohn Marino gimple_cond_set_lhs (cond_stmt, t);
1510*e4b17023SJohn Marino update_stmt (cond_stmt);
1511*e4b17023SJohn Marino control = t;
1512*e4b17023SJohn Marino }
1513*e4b17023SJohn Marino }
1514*e4b17023SJohn Marino
1515*e4b17023SJohn Marino /* Setting the condition towards peeling the last iteration:
1516*e4b17023SJohn Marino If the block consisting of the exit condition has the latch as
1517*e4b17023SJohn Marino successor, then the body of the loop is executed before
1518*e4b17023SJohn Marino the exit condition is tested. In such case, moving the
1519*e4b17023SJohn Marino condition to the entry, causes that the loop will iterate
1520*e4b17023SJohn Marino one less iteration (which is the wanted outcome, since we
1521*e4b17023SJohn Marino peel out the last iteration). If the body is executed after
1522*e4b17023SJohn Marino the condition, moving the condition to the entry requires
1523*e4b17023SJohn Marino decrementing one iteration. */
1524*e4b17023SJohn Marino exit_1 = EDGE_SUCC (exit->src, EDGE_SUCC (exit->src, 0) == exit);
1525*e4b17023SJohn Marino if (exit_1->dest == loop->latch)
1526*e4b17023SJohn Marino new_rhs = gimple_cond_rhs (cond_stmt);
1527*e4b17023SJohn Marino else
1528*e4b17023SJohn Marino {
1529*e4b17023SJohn Marino new_rhs = fold_build2 (MINUS_EXPR, TREE_TYPE (gimple_cond_rhs (cond_stmt)),
1530*e4b17023SJohn Marino gimple_cond_rhs (cond_stmt),
1531*e4b17023SJohn Marino build_int_cst (TREE_TYPE (gimple_cond_rhs (cond_stmt)), 1));
1532*e4b17023SJohn Marino if (TREE_CODE (gimple_cond_rhs (cond_stmt)) == SSA_NAME)
1533*e4b17023SJohn Marino {
1534*e4b17023SJohn Marino basic_block preheader;
1535*e4b17023SJohn Marino gimple_stmt_iterator gsi1;
1536*e4b17023SJohn Marino
1537*e4b17023SJohn Marino preheader = loop_preheader_edge(loop)->src;
1538*e4b17023SJohn Marino gsi1 = gsi_after_labels (preheader);
1539*e4b17023SJohn Marino new_rhs = force_gimple_operand_gsi (&gsi1, new_rhs, true,
1540*e4b17023SJohn Marino NULL_TREE,false,GSI_CONTINUE_LINKING);
1541*e4b17023SJohn Marino }
1542*e4b17023SJohn Marino }
1543*e4b17023SJohn Marino gimple_cond_set_rhs (cond_stmt, unshare_expr (new_rhs));
1544*e4b17023SJohn Marino gimple_cond_set_lhs (cond_stmt, unshare_expr (gimple_cond_lhs (cond_stmt)));
1545*e4b17023SJohn Marino
1546*e4b17023SJohn Marino bbs = get_loop_body_in_dom_order (loop);
1547*e4b17023SJohn Marino
1548*e4b17023SJohn Marino for (n = 0; bbs[n] != loop->latch; n++)
1549*e4b17023SJohn Marino continue;
1550*e4b17023SJohn Marino nbbs = XNEWVEC (basic_block, n);
1551*e4b17023SJohn Marino ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1552*e4b17023SJohn Marino bbs + 1, n, nbbs);
1553*e4b17023SJohn Marino gcc_assert (ok);
1554*e4b17023SJohn Marino free (bbs);
1555*e4b17023SJohn Marino ex_bb = nbbs[0];
1556*e4b17023SJohn Marino free (nbbs);
1557*e4b17023SJohn Marino
1558*e4b17023SJohn Marino /* Other than reductions, the only gimple reg that should be copied
1559*e4b17023SJohn Marino out of the loop is the control variable. */
1560*e4b17023SJohn Marino
1561*e4b17023SJohn Marino control_name = NULL_TREE;
1562*e4b17023SJohn Marino for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
1563*e4b17023SJohn Marino {
1564*e4b17023SJohn Marino phi = gsi_stmt (gsi);
1565*e4b17023SJohn Marino res = PHI_RESULT (phi);
1566*e4b17023SJohn Marino if (!is_gimple_reg (res))
1567*e4b17023SJohn Marino {
1568*e4b17023SJohn Marino gsi_next (&gsi);
1569*e4b17023SJohn Marino continue;
1570*e4b17023SJohn Marino }
1571*e4b17023SJohn Marino
1572*e4b17023SJohn Marino /* Check if it is a part of reduction. If it is,
1573*e4b17023SJohn Marino keep the phi at the reduction's keep_res field. The
1574*e4b17023SJohn Marino PHI_RESULT of this phi is the resulting value of the reduction
1575*e4b17023SJohn Marino variable when exiting the loop. */
1576*e4b17023SJohn Marino
1577*e4b17023SJohn Marino exit = single_dom_exit (loop);
1578*e4b17023SJohn Marino
1579*e4b17023SJohn Marino if (htab_elements (reduction_list) > 0)
1580*e4b17023SJohn Marino {
1581*e4b17023SJohn Marino struct reduction_info *red;
1582*e4b17023SJohn Marino
1583*e4b17023SJohn Marino tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1584*e4b17023SJohn Marino red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1585*e4b17023SJohn Marino if (red)
1586*e4b17023SJohn Marino {
1587*e4b17023SJohn Marino red->keep_res = phi;
1588*e4b17023SJohn Marino gsi_next (&gsi);
1589*e4b17023SJohn Marino continue;
1590*e4b17023SJohn Marino }
1591*e4b17023SJohn Marino }
1592*e4b17023SJohn Marino gcc_assert (control_name == NULL_TREE
1593*e4b17023SJohn Marino && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1594*e4b17023SJohn Marino control_name = res;
1595*e4b17023SJohn Marino remove_phi_node (&gsi, false);
1596*e4b17023SJohn Marino }
1597*e4b17023SJohn Marino gcc_assert (control_name != NULL_TREE);
1598*e4b17023SJohn Marino
1599*e4b17023SJohn Marino /* Initialize the control variable to number of iterations
1600*e4b17023SJohn Marino according to the rhs of the exit condition. */
1601*e4b17023SJohn Marino gsi = gsi_after_labels (ex_bb);
1602*e4b17023SJohn Marino cond_nit = last_stmt (exit->src);
1603*e4b17023SJohn Marino nit_1 = gimple_cond_rhs (cond_nit);
1604*e4b17023SJohn Marino nit_1 = force_gimple_operand_gsi (&gsi,
1605*e4b17023SJohn Marino fold_convert (TREE_TYPE (control_name), nit_1),
1606*e4b17023SJohn Marino false, NULL_TREE, false, GSI_SAME_STMT);
1607*e4b17023SJohn Marino stmt = gimple_build_assign (control_name, nit_1);
1608*e4b17023SJohn Marino gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1609*e4b17023SJohn Marino SSA_NAME_DEF_STMT (control_name) = stmt;
1610*e4b17023SJohn Marino }
1611*e4b17023SJohn Marino
1612*e4b17023SJohn Marino /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1613*e4b17023SJohn Marino LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1614*e4b17023SJohn Marino NEW_DATA is the variable that should be initialized from the argument
1615*e4b17023SJohn Marino of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1616*e4b17023SJohn Marino basic block containing GIMPLE_OMP_PARALLEL tree. */
1617*e4b17023SJohn Marino
1618*e4b17023SJohn Marino static basic_block
create_parallel_loop(struct loop * loop,tree loop_fn,tree data,tree new_data,unsigned n_threads,location_t loc)1619*e4b17023SJohn Marino create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1620*e4b17023SJohn Marino tree new_data, unsigned n_threads, location_t loc)
1621*e4b17023SJohn Marino {
1622*e4b17023SJohn Marino gimple_stmt_iterator gsi;
1623*e4b17023SJohn Marino basic_block bb, paral_bb, for_bb, ex_bb;
1624*e4b17023SJohn Marino tree t, param;
1625*e4b17023SJohn Marino gimple stmt, for_stmt, phi, cond_stmt;
1626*e4b17023SJohn Marino tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1627*e4b17023SJohn Marino edge exit, nexit, guard, end, e;
1628*e4b17023SJohn Marino
1629*e4b17023SJohn Marino /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1630*e4b17023SJohn Marino bb = loop_preheader_edge (loop)->src;
1631*e4b17023SJohn Marino paral_bb = single_pred (bb);
1632*e4b17023SJohn Marino gsi = gsi_last_bb (paral_bb);
1633*e4b17023SJohn Marino
1634*e4b17023SJohn Marino t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
1635*e4b17023SJohn Marino OMP_CLAUSE_NUM_THREADS_EXPR (t)
1636*e4b17023SJohn Marino = build_int_cst (integer_type_node, n_threads);
1637*e4b17023SJohn Marino stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1638*e4b17023SJohn Marino gimple_set_location (stmt, loc);
1639*e4b17023SJohn Marino
1640*e4b17023SJohn Marino gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1641*e4b17023SJohn Marino
1642*e4b17023SJohn Marino /* Initialize NEW_DATA. */
1643*e4b17023SJohn Marino if (data)
1644*e4b17023SJohn Marino {
1645*e4b17023SJohn Marino gsi = gsi_after_labels (bb);
1646*e4b17023SJohn Marino
1647*e4b17023SJohn Marino param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1648*e4b17023SJohn Marino stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1649*e4b17023SJohn Marino gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1650*e4b17023SJohn Marino SSA_NAME_DEF_STMT (param) = stmt;
1651*e4b17023SJohn Marino
1652*e4b17023SJohn Marino stmt = gimple_build_assign (new_data,
1653*e4b17023SJohn Marino fold_convert (TREE_TYPE (new_data), param));
1654*e4b17023SJohn Marino gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1655*e4b17023SJohn Marino SSA_NAME_DEF_STMT (new_data) = stmt;
1656*e4b17023SJohn Marino }
1657*e4b17023SJohn Marino
1658*e4b17023SJohn Marino /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1659*e4b17023SJohn Marino bb = split_loop_exit_edge (single_dom_exit (loop));
1660*e4b17023SJohn Marino gsi = gsi_last_bb (bb);
1661*e4b17023SJohn Marino stmt = gimple_build_omp_return (false);
1662*e4b17023SJohn Marino gimple_set_location (stmt, loc);
1663*e4b17023SJohn Marino gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1664*e4b17023SJohn Marino
1665*e4b17023SJohn Marino /* Extract data for GIMPLE_OMP_FOR. */
1666*e4b17023SJohn Marino gcc_assert (loop->header == single_dom_exit (loop)->src);
1667*e4b17023SJohn Marino cond_stmt = last_stmt (loop->header);
1668*e4b17023SJohn Marino
1669*e4b17023SJohn Marino cvar = gimple_cond_lhs (cond_stmt);
1670*e4b17023SJohn Marino cvar_base = SSA_NAME_VAR (cvar);
1671*e4b17023SJohn Marino phi = SSA_NAME_DEF_STMT (cvar);
1672*e4b17023SJohn Marino cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1673*e4b17023SJohn Marino initvar = make_ssa_name (cvar_base, NULL);
1674*e4b17023SJohn Marino SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1675*e4b17023SJohn Marino initvar);
1676*e4b17023SJohn Marino cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1677*e4b17023SJohn Marino
1678*e4b17023SJohn Marino gsi = gsi_last_nondebug_bb (loop->latch);
1679*e4b17023SJohn Marino gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1680*e4b17023SJohn Marino gsi_remove (&gsi, true);
1681*e4b17023SJohn Marino
1682*e4b17023SJohn Marino /* Prepare cfg. */
1683*e4b17023SJohn Marino for_bb = split_edge (loop_preheader_edge (loop));
1684*e4b17023SJohn Marino ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1685*e4b17023SJohn Marino extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1686*e4b17023SJohn Marino gcc_assert (exit == single_dom_exit (loop));
1687*e4b17023SJohn Marino
1688*e4b17023SJohn Marino guard = make_edge (for_bb, ex_bb, 0);
1689*e4b17023SJohn Marino single_succ_edge (loop->latch)->flags = 0;
1690*e4b17023SJohn Marino end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
1691*e4b17023SJohn Marino for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1692*e4b17023SJohn Marino {
1693*e4b17023SJohn Marino source_location locus;
1694*e4b17023SJohn Marino tree def;
1695*e4b17023SJohn Marino phi = gsi_stmt (gsi);
1696*e4b17023SJohn Marino stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
1697*e4b17023SJohn Marino
1698*e4b17023SJohn Marino def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
1699*e4b17023SJohn Marino locus = gimple_phi_arg_location_from_edge (stmt,
1700*e4b17023SJohn Marino loop_preheader_edge (loop));
1701*e4b17023SJohn Marino add_phi_arg (phi, def, guard, locus);
1702*e4b17023SJohn Marino
1703*e4b17023SJohn Marino def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1704*e4b17023SJohn Marino locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
1705*e4b17023SJohn Marino add_phi_arg (phi, def, end, locus);
1706*e4b17023SJohn Marino }
1707*e4b17023SJohn Marino e = redirect_edge_and_branch (exit, nexit->dest);
1708*e4b17023SJohn Marino PENDING_STMT (e) = NULL;
1709*e4b17023SJohn Marino
1710*e4b17023SJohn Marino /* Emit GIMPLE_OMP_FOR. */
1711*e4b17023SJohn Marino gimple_cond_set_lhs (cond_stmt, cvar_base);
1712*e4b17023SJohn Marino type = TREE_TYPE (cvar);
1713*e4b17023SJohn Marino t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
1714*e4b17023SJohn Marino OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1715*e4b17023SJohn Marino
1716*e4b17023SJohn Marino for_stmt = gimple_build_omp_for (NULL, t, 1, NULL);
1717*e4b17023SJohn Marino gimple_set_location (for_stmt, loc);
1718*e4b17023SJohn Marino gimple_omp_for_set_index (for_stmt, 0, initvar);
1719*e4b17023SJohn Marino gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1720*e4b17023SJohn Marino gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1721*e4b17023SJohn Marino gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1722*e4b17023SJohn Marino gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1723*e4b17023SJohn Marino cvar_base,
1724*e4b17023SJohn Marino build_int_cst (type, 1)));
1725*e4b17023SJohn Marino
1726*e4b17023SJohn Marino gsi = gsi_last_bb (for_bb);
1727*e4b17023SJohn Marino gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
1728*e4b17023SJohn Marino SSA_NAME_DEF_STMT (initvar) = for_stmt;
1729*e4b17023SJohn Marino
1730*e4b17023SJohn Marino /* Emit GIMPLE_OMP_CONTINUE. */
1731*e4b17023SJohn Marino gsi = gsi_last_bb (loop->latch);
1732*e4b17023SJohn Marino stmt = gimple_build_omp_continue (cvar_next, cvar);
1733*e4b17023SJohn Marino gimple_set_location (stmt, loc);
1734*e4b17023SJohn Marino gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1735*e4b17023SJohn Marino SSA_NAME_DEF_STMT (cvar_next) = stmt;
1736*e4b17023SJohn Marino
1737*e4b17023SJohn Marino /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1738*e4b17023SJohn Marino gsi = gsi_last_bb (ex_bb);
1739*e4b17023SJohn Marino stmt = gimple_build_omp_return (true);
1740*e4b17023SJohn Marino gimple_set_location (stmt, loc);
1741*e4b17023SJohn Marino gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1742*e4b17023SJohn Marino
1743*e4b17023SJohn Marino return paral_bb;
1744*e4b17023SJohn Marino }
1745*e4b17023SJohn Marino
1746*e4b17023SJohn Marino /* Generates code to execute the iterations of LOOP in N_THREADS
1747*e4b17023SJohn Marino threads in parallel.
1748*e4b17023SJohn Marino
1749*e4b17023SJohn Marino NITER describes number of iterations of LOOP.
1750*e4b17023SJohn Marino REDUCTION_LIST describes the reductions existent in the LOOP. */
1751*e4b17023SJohn Marino
1752*e4b17023SJohn Marino static void
gen_parallel_loop(struct loop * loop,htab_t reduction_list,unsigned n_threads,struct tree_niter_desc * niter)1753*e4b17023SJohn Marino gen_parallel_loop (struct loop *loop, htab_t reduction_list,
1754*e4b17023SJohn Marino unsigned n_threads, struct tree_niter_desc *niter)
1755*e4b17023SJohn Marino {
1756*e4b17023SJohn Marino loop_iterator li;
1757*e4b17023SJohn Marino tree many_iterations_cond, type, nit;
1758*e4b17023SJohn Marino tree arg_struct, new_arg_struct;
1759*e4b17023SJohn Marino gimple_seq stmts;
1760*e4b17023SJohn Marino basic_block parallel_head;
1761*e4b17023SJohn Marino edge entry, exit;
1762*e4b17023SJohn Marino struct clsn_data clsn_data;
1763*e4b17023SJohn Marino unsigned prob;
1764*e4b17023SJohn Marino location_t loc;
1765*e4b17023SJohn Marino gimple cond_stmt;
1766*e4b17023SJohn Marino
1767*e4b17023SJohn Marino /* From
1768*e4b17023SJohn Marino
1769*e4b17023SJohn Marino ---------------------------------------------------------------------
1770*e4b17023SJohn Marino loop
1771*e4b17023SJohn Marino {
1772*e4b17023SJohn Marino IV = phi (INIT, IV + STEP)
1773*e4b17023SJohn Marino BODY1;
1774*e4b17023SJohn Marino if (COND)
1775*e4b17023SJohn Marino break;
1776*e4b17023SJohn Marino BODY2;
1777*e4b17023SJohn Marino }
1778*e4b17023SJohn Marino ---------------------------------------------------------------------
1779*e4b17023SJohn Marino
1780*e4b17023SJohn Marino with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1781*e4b17023SJohn Marino we generate the following code:
1782*e4b17023SJohn Marino
1783*e4b17023SJohn Marino ---------------------------------------------------------------------
1784*e4b17023SJohn Marino
1785*e4b17023SJohn Marino if (MAY_BE_ZERO
1786*e4b17023SJohn Marino || NITER < MIN_PER_THREAD * N_THREADS)
1787*e4b17023SJohn Marino goto original;
1788*e4b17023SJohn Marino
1789*e4b17023SJohn Marino BODY1;
1790*e4b17023SJohn Marino store all local loop-invariant variables used in body of the loop to DATA.
1791*e4b17023SJohn Marino GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1792*e4b17023SJohn Marino load the variables from DATA.
1793*e4b17023SJohn Marino GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1794*e4b17023SJohn Marino BODY2;
1795*e4b17023SJohn Marino BODY1;
1796*e4b17023SJohn Marino GIMPLE_OMP_CONTINUE;
1797*e4b17023SJohn Marino GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1798*e4b17023SJohn Marino GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1799*e4b17023SJohn Marino goto end;
1800*e4b17023SJohn Marino
1801*e4b17023SJohn Marino original:
1802*e4b17023SJohn Marino loop
1803*e4b17023SJohn Marino {
1804*e4b17023SJohn Marino IV = phi (INIT, IV + STEP)
1805*e4b17023SJohn Marino BODY1;
1806*e4b17023SJohn Marino if (COND)
1807*e4b17023SJohn Marino break;
1808*e4b17023SJohn Marino BODY2;
1809*e4b17023SJohn Marino }
1810*e4b17023SJohn Marino
1811*e4b17023SJohn Marino end:
1812*e4b17023SJohn Marino
1813*e4b17023SJohn Marino */
1814*e4b17023SJohn Marino
1815*e4b17023SJohn Marino /* Create two versions of the loop -- in the old one, we know that the
1816*e4b17023SJohn Marino number of iterations is large enough, and we will transform it into the
1817*e4b17023SJohn Marino loop that will be split to loop_fn, the new one will be used for the
1818*e4b17023SJohn Marino remaining iterations. */
1819*e4b17023SJohn Marino
1820*e4b17023SJohn Marino type = TREE_TYPE (niter->niter);
1821*e4b17023SJohn Marino nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1822*e4b17023SJohn Marino NULL_TREE);
1823*e4b17023SJohn Marino if (stmts)
1824*e4b17023SJohn Marino gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1825*e4b17023SJohn Marino
1826*e4b17023SJohn Marino many_iterations_cond =
1827*e4b17023SJohn Marino fold_build2 (GE_EXPR, boolean_type_node,
1828*e4b17023SJohn Marino nit, build_int_cst (type, MIN_PER_THREAD * n_threads));
1829*e4b17023SJohn Marino many_iterations_cond
1830*e4b17023SJohn Marino = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1831*e4b17023SJohn Marino invert_truthvalue (unshare_expr (niter->may_be_zero)),
1832*e4b17023SJohn Marino many_iterations_cond);
1833*e4b17023SJohn Marino many_iterations_cond
1834*e4b17023SJohn Marino = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
1835*e4b17023SJohn Marino if (stmts)
1836*e4b17023SJohn Marino gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1837*e4b17023SJohn Marino if (!is_gimple_condexpr (many_iterations_cond))
1838*e4b17023SJohn Marino {
1839*e4b17023SJohn Marino many_iterations_cond
1840*e4b17023SJohn Marino = force_gimple_operand (many_iterations_cond, &stmts,
1841*e4b17023SJohn Marino true, NULL_TREE);
1842*e4b17023SJohn Marino if (stmts)
1843*e4b17023SJohn Marino gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1844*e4b17023SJohn Marino }
1845*e4b17023SJohn Marino
1846*e4b17023SJohn Marino initialize_original_copy_tables ();
1847*e4b17023SJohn Marino
1848*e4b17023SJohn Marino /* We assume that the loop usually iterates a lot. */
1849*e4b17023SJohn Marino prob = 4 * REG_BR_PROB_BASE / 5;
1850*e4b17023SJohn Marino loop_version (loop, many_iterations_cond, NULL,
1851*e4b17023SJohn Marino prob, prob, REG_BR_PROB_BASE - prob, true);
1852*e4b17023SJohn Marino update_ssa (TODO_update_ssa);
1853*e4b17023SJohn Marino free_original_copy_tables ();
1854*e4b17023SJohn Marino
1855*e4b17023SJohn Marino /* Base all the induction variables in LOOP on a single control one. */
1856*e4b17023SJohn Marino canonicalize_loop_ivs (loop, &nit, true);
1857*e4b17023SJohn Marino
1858*e4b17023SJohn Marino /* Ensure that the exit condition is the first statement in the loop. */
1859*e4b17023SJohn Marino transform_to_exit_first_loop (loop, reduction_list, nit);
1860*e4b17023SJohn Marino
1861*e4b17023SJohn Marino /* Generate initializations for reductions. */
1862*e4b17023SJohn Marino if (htab_elements (reduction_list) > 0)
1863*e4b17023SJohn Marino htab_traverse (reduction_list, initialize_reductions, loop);
1864*e4b17023SJohn Marino
1865*e4b17023SJohn Marino /* Eliminate the references to local variables from the loop. */
1866*e4b17023SJohn Marino gcc_assert (single_exit (loop));
1867*e4b17023SJohn Marino entry = loop_preheader_edge (loop);
1868*e4b17023SJohn Marino exit = single_dom_exit (loop);
1869*e4b17023SJohn Marino
1870*e4b17023SJohn Marino eliminate_local_variables (entry, exit);
1871*e4b17023SJohn Marino /* In the old loop, move all variables non-local to the loop to a structure
1872*e4b17023SJohn Marino and back, and create separate decls for the variables used in loop. */
1873*e4b17023SJohn Marino separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
1874*e4b17023SJohn Marino &new_arg_struct, &clsn_data);
1875*e4b17023SJohn Marino
1876*e4b17023SJohn Marino /* Create the parallel constructs. */
1877*e4b17023SJohn Marino loc = UNKNOWN_LOCATION;
1878*e4b17023SJohn Marino cond_stmt = last_stmt (loop->header);
1879*e4b17023SJohn Marino if (cond_stmt)
1880*e4b17023SJohn Marino loc = gimple_location (cond_stmt);
1881*e4b17023SJohn Marino parallel_head = create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
1882*e4b17023SJohn Marino new_arg_struct, n_threads, loc);
1883*e4b17023SJohn Marino if (htab_elements (reduction_list) > 0)
1884*e4b17023SJohn Marino create_call_for_reduction (loop, reduction_list, &clsn_data);
1885*e4b17023SJohn Marino
1886*e4b17023SJohn Marino scev_reset ();
1887*e4b17023SJohn Marino
1888*e4b17023SJohn Marino /* Cancel the loop (it is simpler to do it here rather than to teach the
1889*e4b17023SJohn Marino expander to do it). */
1890*e4b17023SJohn Marino cancel_loop_tree (loop);
1891*e4b17023SJohn Marino
1892*e4b17023SJohn Marino /* Free loop bound estimations that could contain references to
1893*e4b17023SJohn Marino removed statements. */
1894*e4b17023SJohn Marino FOR_EACH_LOOP (li, loop, 0)
1895*e4b17023SJohn Marino free_numbers_of_iterations_estimates_loop (loop);
1896*e4b17023SJohn Marino
1897*e4b17023SJohn Marino /* Expand the parallel constructs. We do it directly here instead of running
1898*e4b17023SJohn Marino a separate expand_omp pass, since it is more efficient, and less likely to
1899*e4b17023SJohn Marino cause troubles with further analyses not being able to deal with the
1900*e4b17023SJohn Marino OMP trees. */
1901*e4b17023SJohn Marino
1902*e4b17023SJohn Marino omp_expand_local (parallel_head);
1903*e4b17023SJohn Marino }
1904*e4b17023SJohn Marino
1905*e4b17023SJohn Marino /* Returns true when LOOP contains vector phi nodes. */
1906*e4b17023SJohn Marino
1907*e4b17023SJohn Marino static bool
loop_has_vector_phi_nodes(struct loop * loop ATTRIBUTE_UNUSED)1908*e4b17023SJohn Marino loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
1909*e4b17023SJohn Marino {
1910*e4b17023SJohn Marino unsigned i;
1911*e4b17023SJohn Marino basic_block *bbs = get_loop_body_in_dom_order (loop);
1912*e4b17023SJohn Marino gimple_stmt_iterator gsi;
1913*e4b17023SJohn Marino bool res = true;
1914*e4b17023SJohn Marino
1915*e4b17023SJohn Marino for (i = 0; i < loop->num_nodes; i++)
1916*e4b17023SJohn Marino for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1917*e4b17023SJohn Marino if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
1918*e4b17023SJohn Marino goto end;
1919*e4b17023SJohn Marino
1920*e4b17023SJohn Marino res = false;
1921*e4b17023SJohn Marino end:
1922*e4b17023SJohn Marino free (bbs);
1923*e4b17023SJohn Marino return res;
1924*e4b17023SJohn Marino }
1925*e4b17023SJohn Marino
1926*e4b17023SJohn Marino /* Create a reduction_info struct, initialize it with REDUC_STMT
1927*e4b17023SJohn Marino and PHI, insert it to the REDUCTION_LIST. */
1928*e4b17023SJohn Marino
1929*e4b17023SJohn Marino static void
build_new_reduction(htab_t reduction_list,gimple reduc_stmt,gimple phi)1930*e4b17023SJohn Marino build_new_reduction (htab_t reduction_list, gimple reduc_stmt, gimple phi)
1931*e4b17023SJohn Marino {
1932*e4b17023SJohn Marino PTR *slot;
1933*e4b17023SJohn Marino struct reduction_info *new_reduction;
1934*e4b17023SJohn Marino
1935*e4b17023SJohn Marino gcc_assert (reduc_stmt);
1936*e4b17023SJohn Marino
1937*e4b17023SJohn Marino if (dump_file && (dump_flags & TDF_DETAILS))
1938*e4b17023SJohn Marino {
1939*e4b17023SJohn Marino fprintf (dump_file,
1940*e4b17023SJohn Marino "Detected reduction. reduction stmt is: \n");
1941*e4b17023SJohn Marino print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1942*e4b17023SJohn Marino fprintf (dump_file, "\n");
1943*e4b17023SJohn Marino }
1944*e4b17023SJohn Marino
1945*e4b17023SJohn Marino new_reduction = XCNEW (struct reduction_info);
1946*e4b17023SJohn Marino
1947*e4b17023SJohn Marino new_reduction->reduc_stmt = reduc_stmt;
1948*e4b17023SJohn Marino new_reduction->reduc_phi = phi;
1949*e4b17023SJohn Marino new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
1950*e4b17023SJohn Marino new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1951*e4b17023SJohn Marino slot = htab_find_slot (reduction_list, new_reduction, INSERT);
1952*e4b17023SJohn Marino *slot = new_reduction;
1953*e4b17023SJohn Marino }
1954*e4b17023SJohn Marino
1955*e4b17023SJohn Marino /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
1956*e4b17023SJohn Marino
1957*e4b17023SJohn Marino static int
set_reduc_phi_uids(void ** slot,void * data ATTRIBUTE_UNUSED)1958*e4b17023SJohn Marino set_reduc_phi_uids (void **slot, void *data ATTRIBUTE_UNUSED)
1959*e4b17023SJohn Marino {
1960*e4b17023SJohn Marino struct reduction_info *const red = (struct reduction_info *) *slot;
1961*e4b17023SJohn Marino gimple_set_uid (red->reduc_phi, red->reduc_version);
1962*e4b17023SJohn Marino return 1;
1963*e4b17023SJohn Marino }
1964*e4b17023SJohn Marino
1965*e4b17023SJohn Marino /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1966*e4b17023SJohn Marino
1967*e4b17023SJohn Marino static void
gather_scalar_reductions(loop_p loop,htab_t reduction_list)1968*e4b17023SJohn Marino gather_scalar_reductions (loop_p loop, htab_t reduction_list)
1969*e4b17023SJohn Marino {
1970*e4b17023SJohn Marino gimple_stmt_iterator gsi;
1971*e4b17023SJohn Marino loop_vec_info simple_loop_info;
1972*e4b17023SJohn Marino
1973*e4b17023SJohn Marino vect_dump = NULL;
1974*e4b17023SJohn Marino simple_loop_info = vect_analyze_loop_form (loop);
1975*e4b17023SJohn Marino
1976*e4b17023SJohn Marino for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1977*e4b17023SJohn Marino {
1978*e4b17023SJohn Marino gimple phi = gsi_stmt (gsi);
1979*e4b17023SJohn Marino affine_iv iv;
1980*e4b17023SJohn Marino tree res = PHI_RESULT (phi);
1981*e4b17023SJohn Marino bool double_reduc;
1982*e4b17023SJohn Marino
1983*e4b17023SJohn Marino if (!is_gimple_reg (res))
1984*e4b17023SJohn Marino continue;
1985*e4b17023SJohn Marino
1986*e4b17023SJohn Marino if (!simple_iv (loop, loop, res, &iv, true)
1987*e4b17023SJohn Marino && simple_loop_info)
1988*e4b17023SJohn Marino {
1989*e4b17023SJohn Marino gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
1990*e4b17023SJohn Marino phi, true,
1991*e4b17023SJohn Marino &double_reduc);
1992*e4b17023SJohn Marino if (reduc_stmt && !double_reduc)
1993*e4b17023SJohn Marino build_new_reduction (reduction_list, reduc_stmt, phi);
1994*e4b17023SJohn Marino }
1995*e4b17023SJohn Marino }
1996*e4b17023SJohn Marino destroy_loop_vec_info (simple_loop_info, true);
1997*e4b17023SJohn Marino
1998*e4b17023SJohn Marino /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
1999*e4b17023SJohn Marino and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2000*e4b17023SJohn Marino only now. */
2001*e4b17023SJohn Marino htab_traverse (reduction_list, set_reduc_phi_uids, NULL);
2002*e4b17023SJohn Marino }
2003*e4b17023SJohn Marino
2004*e4b17023SJohn Marino /* Try to initialize NITER for code generation part. */
2005*e4b17023SJohn Marino
2006*e4b17023SJohn Marino static bool
try_get_loop_niter(loop_p loop,struct tree_niter_desc * niter)2007*e4b17023SJohn Marino try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2008*e4b17023SJohn Marino {
2009*e4b17023SJohn Marino edge exit = single_dom_exit (loop);
2010*e4b17023SJohn Marino
2011*e4b17023SJohn Marino gcc_assert (exit);
2012*e4b17023SJohn Marino
2013*e4b17023SJohn Marino /* We need to know # of iterations, and there should be no uses of values
2014*e4b17023SJohn Marino defined inside loop outside of it, unless the values are invariants of
2015*e4b17023SJohn Marino the loop. */
2016*e4b17023SJohn Marino if (!number_of_iterations_exit (loop, exit, niter, false))
2017*e4b17023SJohn Marino {
2018*e4b17023SJohn Marino if (dump_file && (dump_flags & TDF_DETAILS))
2019*e4b17023SJohn Marino fprintf (dump_file, " FAILED: number of iterations not known\n");
2020*e4b17023SJohn Marino return false;
2021*e4b17023SJohn Marino }
2022*e4b17023SJohn Marino
2023*e4b17023SJohn Marino return true;
2024*e4b17023SJohn Marino }
2025*e4b17023SJohn Marino
2026*e4b17023SJohn Marino /* Try to initialize REDUCTION_LIST for code generation part.
2027*e4b17023SJohn Marino REDUCTION_LIST describes the reductions. */
2028*e4b17023SJohn Marino
2029*e4b17023SJohn Marino static bool
try_create_reduction_list(loop_p loop,htab_t reduction_list)2030*e4b17023SJohn Marino try_create_reduction_list (loop_p loop, htab_t reduction_list)
2031*e4b17023SJohn Marino {
2032*e4b17023SJohn Marino edge exit = single_dom_exit (loop);
2033*e4b17023SJohn Marino gimple_stmt_iterator gsi;
2034*e4b17023SJohn Marino
2035*e4b17023SJohn Marino gcc_assert (exit);
2036*e4b17023SJohn Marino
2037*e4b17023SJohn Marino gather_scalar_reductions (loop, reduction_list);
2038*e4b17023SJohn Marino
2039*e4b17023SJohn Marino
2040*e4b17023SJohn Marino for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2041*e4b17023SJohn Marino {
2042*e4b17023SJohn Marino gimple phi = gsi_stmt (gsi);
2043*e4b17023SJohn Marino struct reduction_info *red;
2044*e4b17023SJohn Marino imm_use_iterator imm_iter;
2045*e4b17023SJohn Marino use_operand_p use_p;
2046*e4b17023SJohn Marino gimple reduc_phi;
2047*e4b17023SJohn Marino tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2048*e4b17023SJohn Marino
2049*e4b17023SJohn Marino if (is_gimple_reg (val))
2050*e4b17023SJohn Marino {
2051*e4b17023SJohn Marino if (dump_file && (dump_flags & TDF_DETAILS))
2052*e4b17023SJohn Marino {
2053*e4b17023SJohn Marino fprintf (dump_file, "phi is ");
2054*e4b17023SJohn Marino print_gimple_stmt (dump_file, phi, 0, 0);
2055*e4b17023SJohn Marino fprintf (dump_file, "arg of phi to exit: value ");
2056*e4b17023SJohn Marino print_generic_expr (dump_file, val, 0);
2057*e4b17023SJohn Marino fprintf (dump_file, " used outside loop\n");
2058*e4b17023SJohn Marino fprintf (dump_file,
2059*e4b17023SJohn Marino " checking if it a part of reduction pattern: \n");
2060*e4b17023SJohn Marino }
2061*e4b17023SJohn Marino if (htab_elements (reduction_list) == 0)
2062*e4b17023SJohn Marino {
2063*e4b17023SJohn Marino if (dump_file && (dump_flags & TDF_DETAILS))
2064*e4b17023SJohn Marino fprintf (dump_file,
2065*e4b17023SJohn Marino " FAILED: it is not a part of reduction.\n");
2066*e4b17023SJohn Marino return false;
2067*e4b17023SJohn Marino }
2068*e4b17023SJohn Marino reduc_phi = NULL;
2069*e4b17023SJohn Marino FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2070*e4b17023SJohn Marino {
2071*e4b17023SJohn Marino if (!gimple_debug_bind_p (USE_STMT (use_p))
2072*e4b17023SJohn Marino && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2073*e4b17023SJohn Marino {
2074*e4b17023SJohn Marino reduc_phi = USE_STMT (use_p);
2075*e4b17023SJohn Marino break;
2076*e4b17023SJohn Marino }
2077*e4b17023SJohn Marino }
2078*e4b17023SJohn Marino red = reduction_phi (reduction_list, reduc_phi);
2079*e4b17023SJohn Marino if (red == NULL)
2080*e4b17023SJohn Marino {
2081*e4b17023SJohn Marino if (dump_file && (dump_flags & TDF_DETAILS))
2082*e4b17023SJohn Marino fprintf (dump_file,
2083*e4b17023SJohn Marino " FAILED: it is not a part of reduction.\n");
2084*e4b17023SJohn Marino return false;
2085*e4b17023SJohn Marino }
2086*e4b17023SJohn Marino if (dump_file && (dump_flags & TDF_DETAILS))
2087*e4b17023SJohn Marino {
2088*e4b17023SJohn Marino fprintf (dump_file, "reduction phi is ");
2089*e4b17023SJohn Marino print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2090*e4b17023SJohn Marino fprintf (dump_file, "reduction stmt is ");
2091*e4b17023SJohn Marino print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2092*e4b17023SJohn Marino }
2093*e4b17023SJohn Marino }
2094*e4b17023SJohn Marino }
2095*e4b17023SJohn Marino
2096*e4b17023SJohn Marino /* The iterations of the loop may communicate only through bivs whose
2097*e4b17023SJohn Marino iteration space can be distributed efficiently. */
2098*e4b17023SJohn Marino for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2099*e4b17023SJohn Marino {
2100*e4b17023SJohn Marino gimple phi = gsi_stmt (gsi);
2101*e4b17023SJohn Marino tree def = PHI_RESULT (phi);
2102*e4b17023SJohn Marino affine_iv iv;
2103*e4b17023SJohn Marino
2104*e4b17023SJohn Marino if (is_gimple_reg (def) && !simple_iv (loop, loop, def, &iv, true))
2105*e4b17023SJohn Marino {
2106*e4b17023SJohn Marino struct reduction_info *red;
2107*e4b17023SJohn Marino
2108*e4b17023SJohn Marino red = reduction_phi (reduction_list, phi);
2109*e4b17023SJohn Marino if (red == NULL)
2110*e4b17023SJohn Marino {
2111*e4b17023SJohn Marino if (dump_file && (dump_flags & TDF_DETAILS))
2112*e4b17023SJohn Marino fprintf (dump_file,
2113*e4b17023SJohn Marino " FAILED: scalar dependency between iterations\n");
2114*e4b17023SJohn Marino return false;
2115*e4b17023SJohn Marino }
2116*e4b17023SJohn Marino }
2117*e4b17023SJohn Marino }
2118*e4b17023SJohn Marino
2119*e4b17023SJohn Marino
2120*e4b17023SJohn Marino return true;
2121*e4b17023SJohn Marino }
2122*e4b17023SJohn Marino
2123*e4b17023SJohn Marino /* Detect parallel loops and generate parallel code using libgomp
2124*e4b17023SJohn Marino primitives. Returns true if some loop was parallelized, false
2125*e4b17023SJohn Marino otherwise. */
2126*e4b17023SJohn Marino
2127*e4b17023SJohn Marino bool
parallelize_loops(void)2128*e4b17023SJohn Marino parallelize_loops (void)
2129*e4b17023SJohn Marino {
2130*e4b17023SJohn Marino unsigned n_threads = flag_tree_parallelize_loops;
2131*e4b17023SJohn Marino bool changed = false;
2132*e4b17023SJohn Marino struct loop *loop;
2133*e4b17023SJohn Marino struct tree_niter_desc niter_desc;
2134*e4b17023SJohn Marino loop_iterator li;
2135*e4b17023SJohn Marino htab_t reduction_list;
2136*e4b17023SJohn Marino struct obstack parloop_obstack;
2137*e4b17023SJohn Marino HOST_WIDE_INT estimated;
2138*e4b17023SJohn Marino LOC loop_loc;
2139*e4b17023SJohn Marino
2140*e4b17023SJohn Marino /* Do not parallelize loops in the functions created by parallelization. */
2141*e4b17023SJohn Marino if (parallelized_function_p (cfun->decl))
2142*e4b17023SJohn Marino return false;
2143*e4b17023SJohn Marino if (cfun->has_nonlocal_label)
2144*e4b17023SJohn Marino return false;
2145*e4b17023SJohn Marino
2146*e4b17023SJohn Marino gcc_obstack_init (&parloop_obstack);
2147*e4b17023SJohn Marino reduction_list = htab_create (10, reduction_info_hash,
2148*e4b17023SJohn Marino reduction_info_eq, free);
2149*e4b17023SJohn Marino init_stmt_vec_info_vec ();
2150*e4b17023SJohn Marino
2151*e4b17023SJohn Marino FOR_EACH_LOOP (li, loop, 0)
2152*e4b17023SJohn Marino {
2153*e4b17023SJohn Marino htab_empty (reduction_list);
2154*e4b17023SJohn Marino if (dump_file && (dump_flags & TDF_DETAILS))
2155*e4b17023SJohn Marino {
2156*e4b17023SJohn Marino fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2157*e4b17023SJohn Marino if (loop->inner)
2158*e4b17023SJohn Marino fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2159*e4b17023SJohn Marino else
2160*e4b17023SJohn Marino fprintf (dump_file, "loop %d is innermost\n",loop->num);
2161*e4b17023SJohn Marino }
2162*e4b17023SJohn Marino
2163*e4b17023SJohn Marino /* If we use autopar in graphite pass, we use its marked dependency
2164*e4b17023SJohn Marino checking results. */
2165*e4b17023SJohn Marino if (flag_loop_parallelize_all && !loop->can_be_parallel)
2166*e4b17023SJohn Marino {
2167*e4b17023SJohn Marino if (dump_file && (dump_flags & TDF_DETAILS))
2168*e4b17023SJohn Marino fprintf (dump_file, "loop is not parallel according to graphite\n");
2169*e4b17023SJohn Marino continue;
2170*e4b17023SJohn Marino }
2171*e4b17023SJohn Marino
2172*e4b17023SJohn Marino if (!single_dom_exit (loop))
2173*e4b17023SJohn Marino {
2174*e4b17023SJohn Marino
2175*e4b17023SJohn Marino if (dump_file && (dump_flags & TDF_DETAILS))
2176*e4b17023SJohn Marino fprintf (dump_file, "loop is !single_dom_exit\n");
2177*e4b17023SJohn Marino
2178*e4b17023SJohn Marino continue;
2179*e4b17023SJohn Marino }
2180*e4b17023SJohn Marino
2181*e4b17023SJohn Marino if (/* And of course, the loop must be parallelizable. */
2182*e4b17023SJohn Marino !can_duplicate_loop_p (loop)
2183*e4b17023SJohn Marino || loop_has_blocks_with_irreducible_flag (loop)
2184*e4b17023SJohn Marino || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
2185*e4b17023SJohn Marino /* FIXME: the check for vector phi nodes could be removed. */
2186*e4b17023SJohn Marino || loop_has_vector_phi_nodes (loop)
2187*e4b17023SJohn Marino /* FIXME: transform_to_exit_first_loop does not handle not
2188*e4b17023SJohn Marino header-copied loops correctly - see PR46886. */
2189*e4b17023SJohn Marino || !do_while_loop_p (loop))
2190*e4b17023SJohn Marino continue;
2191*e4b17023SJohn Marino estimated = max_stmt_executions_int (loop, false);
2192*e4b17023SJohn Marino /* FIXME: Bypass this check as graphite doesn't update the
2193*e4b17023SJohn Marino count and frequency correctly now. */
2194*e4b17023SJohn Marino if (!flag_loop_parallelize_all
2195*e4b17023SJohn Marino && ((estimated !=-1
2196*e4b17023SJohn Marino && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
2197*e4b17023SJohn Marino /* Do not bother with loops in cold areas. */
2198*e4b17023SJohn Marino || optimize_loop_nest_for_size_p (loop)))
2199*e4b17023SJohn Marino continue;
2200*e4b17023SJohn Marino
2201*e4b17023SJohn Marino if (!try_get_loop_niter (loop, &niter_desc))
2202*e4b17023SJohn Marino continue;
2203*e4b17023SJohn Marino
2204*e4b17023SJohn Marino if (!try_create_reduction_list (loop, reduction_list))
2205*e4b17023SJohn Marino continue;
2206*e4b17023SJohn Marino
2207*e4b17023SJohn Marino if (!flag_loop_parallelize_all
2208*e4b17023SJohn Marino && !loop_parallel_p (loop, &parloop_obstack))
2209*e4b17023SJohn Marino continue;
2210*e4b17023SJohn Marino
2211*e4b17023SJohn Marino changed = true;
2212*e4b17023SJohn Marino if (dump_file && (dump_flags & TDF_DETAILS))
2213*e4b17023SJohn Marino {
2214*e4b17023SJohn Marino if (loop->inner)
2215*e4b17023SJohn Marino fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
2216*e4b17023SJohn Marino else
2217*e4b17023SJohn Marino fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2218*e4b17023SJohn Marino loop_loc = find_loop_location (loop);
2219*e4b17023SJohn Marino if (loop_loc != UNKNOWN_LOC)
2220*e4b17023SJohn Marino fprintf (dump_file, "\nloop at %s:%d: ",
2221*e4b17023SJohn Marino LOC_FILE (loop_loc), LOC_LINE (loop_loc));
2222*e4b17023SJohn Marino }
2223*e4b17023SJohn Marino gen_parallel_loop (loop, reduction_list,
2224*e4b17023SJohn Marino n_threads, &niter_desc);
2225*e4b17023SJohn Marino verify_flow_info ();
2226*e4b17023SJohn Marino verify_dominators (CDI_DOMINATORS);
2227*e4b17023SJohn Marino verify_loop_structure ();
2228*e4b17023SJohn Marino verify_loop_closed_ssa (true);
2229*e4b17023SJohn Marino }
2230*e4b17023SJohn Marino
2231*e4b17023SJohn Marino free_stmt_vec_info_vec ();
2232*e4b17023SJohn Marino htab_delete (reduction_list);
2233*e4b17023SJohn Marino obstack_free (&parloop_obstack, NULL);
2234*e4b17023SJohn Marino
2235*e4b17023SJohn Marino /* Parallelization will cause new function calls to be inserted through
2236*e4b17023SJohn Marino which local variables will escape. Reset the points-to solution
2237*e4b17023SJohn Marino for ESCAPED. */
2238*e4b17023SJohn Marino if (changed)
2239*e4b17023SJohn Marino pt_solution_reset (&cfun->gimple_df->escaped);
2240*e4b17023SJohn Marino
2241*e4b17023SJohn Marino return changed;
2242*e4b17023SJohn Marino }
2243*e4b17023SJohn Marino
2244*e4b17023SJohn Marino #include "gt-tree-parloops.h"
2245