xref: /dflybsd-src/contrib/gcc-4.7/gcc/tree-parloops.c (revision 04febcfb30580676d3e95f58a16c5137ee478b32)
1*e4b17023SJohn Marino /* Loop autoparallelization.
2*e4b17023SJohn Marino    Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
3*e4b17023SJohn Marino    Free Software Foundation, Inc.
4*e4b17023SJohn Marino    Contributed by Sebastian Pop <pop@cri.ensmp.fr> and
5*e4b17023SJohn Marino    Zdenek Dvorak <dvorakz@suse.cz>.
6*e4b17023SJohn Marino 
7*e4b17023SJohn Marino This file is part of GCC.
8*e4b17023SJohn Marino 
9*e4b17023SJohn Marino GCC is free software; you can redistribute it and/or modify it under
10*e4b17023SJohn Marino the terms of the GNU General Public License as published by the Free
11*e4b17023SJohn Marino Software Foundation; either version 3, or (at your option) any later
12*e4b17023SJohn Marino version.
13*e4b17023SJohn Marino 
14*e4b17023SJohn Marino GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15*e4b17023SJohn Marino WARRANTY; without even the implied warranty of MERCHANTABILITY or
16*e4b17023SJohn Marino FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17*e4b17023SJohn Marino for more details.
18*e4b17023SJohn Marino 
19*e4b17023SJohn Marino You should have received a copy of the GNU General Public License
20*e4b17023SJohn Marino along with GCC; see the file COPYING3.  If not see
21*e4b17023SJohn Marino <http://www.gnu.org/licenses/>.  */
22*e4b17023SJohn Marino 
23*e4b17023SJohn Marino #include "config.h"
24*e4b17023SJohn Marino #include "system.h"
25*e4b17023SJohn Marino #include "coretypes.h"
26*e4b17023SJohn Marino #include "tree-flow.h"
27*e4b17023SJohn Marino #include "cfgloop.h"
28*e4b17023SJohn Marino #include "tree-data-ref.h"
29*e4b17023SJohn Marino #include "tree-scalar-evolution.h"
30*e4b17023SJohn Marino #include "gimple-pretty-print.h"
31*e4b17023SJohn Marino #include "tree-pass.h"
32*e4b17023SJohn Marino #include "langhooks.h"
33*e4b17023SJohn Marino #include "tree-vectorizer.h"
34*e4b17023SJohn Marino 
35*e4b17023SJohn Marino /* This pass tries to distribute iterations of loops into several threads.
36*e4b17023SJohn Marino    The implementation is straightforward -- for each loop we test whether its
37*e4b17023SJohn Marino    iterations are independent, and if it is the case (and some additional
38*e4b17023SJohn Marino    conditions regarding profitability and correctness are satisfied), we
39*e4b17023SJohn Marino    add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
40*e4b17023SJohn Marino    machinery do its job.
41*e4b17023SJohn Marino 
42*e4b17023SJohn Marino    The most of the complexity is in bringing the code into shape expected
43*e4b17023SJohn Marino    by the omp expanders:
44*e4b17023SJohn Marino    -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
45*e4b17023SJohn Marino       variable and that the exit test is at the start of the loop body
46*e4b17023SJohn Marino    -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
47*e4b17023SJohn Marino       variables by accesses through pointers, and breaking up ssa chains
48*e4b17023SJohn Marino       by storing the values incoming to the parallelized loop to a structure
49*e4b17023SJohn Marino       passed to the new function as an argument (something similar is done
50*e4b17023SJohn Marino       in omp gimplification, unfortunately only a small part of the code
51*e4b17023SJohn Marino       can be shared).
52*e4b17023SJohn Marino 
53*e4b17023SJohn Marino    TODO:
54*e4b17023SJohn Marino    -- if there are several parallelizable loops in a function, it may be
55*e4b17023SJohn Marino       possible to generate the threads just once (using synchronization to
56*e4b17023SJohn Marino       ensure that cross-loop dependences are obeyed).
57*e4b17023SJohn Marino    -- handling of common scalar dependence patterns (accumulation, ...)
58*e4b17023SJohn Marino    -- handling of non-innermost loops  */
59*e4b17023SJohn Marino 
60*e4b17023SJohn Marino /*
61*e4b17023SJohn Marino   Reduction handling:
62*e4b17023SJohn Marino   currently we use vect_force_simple_reduction() to detect reduction patterns.
63*e4b17023SJohn Marino   The code transformation will be introduced by an example.
64*e4b17023SJohn Marino 
65*e4b17023SJohn Marino 
66*e4b17023SJohn Marino parloop
67*e4b17023SJohn Marino {
68*e4b17023SJohn Marino   int sum=1;
69*e4b17023SJohn Marino 
70*e4b17023SJohn Marino   for (i = 0; i < N; i++)
71*e4b17023SJohn Marino    {
72*e4b17023SJohn Marino     x[i] = i + 3;
73*e4b17023SJohn Marino     sum+=x[i];
74*e4b17023SJohn Marino    }
75*e4b17023SJohn Marino }
76*e4b17023SJohn Marino 
77*e4b17023SJohn Marino gimple-like code:
78*e4b17023SJohn Marino header_bb:
79*e4b17023SJohn Marino 
80*e4b17023SJohn Marino   # sum_29 = PHI <sum_11(5), 1(3)>
81*e4b17023SJohn Marino   # i_28 = PHI <i_12(5), 0(3)>
82*e4b17023SJohn Marino   D.1795_8 = i_28 + 3;
83*e4b17023SJohn Marino   x[i_28] = D.1795_8;
84*e4b17023SJohn Marino   sum_11 = D.1795_8 + sum_29;
85*e4b17023SJohn Marino   i_12 = i_28 + 1;
86*e4b17023SJohn Marino   if (N_6(D) > i_12)
87*e4b17023SJohn Marino     goto header_bb;
88*e4b17023SJohn Marino 
89*e4b17023SJohn Marino 
90*e4b17023SJohn Marino exit_bb:
91*e4b17023SJohn Marino 
92*e4b17023SJohn Marino   # sum_21 = PHI <sum_11(4)>
93*e4b17023SJohn Marino   printf (&"%d"[0], sum_21);
94*e4b17023SJohn Marino 
95*e4b17023SJohn Marino 
96*e4b17023SJohn Marino after reduction transformation (only relevant parts):
97*e4b17023SJohn Marino 
98*e4b17023SJohn Marino parloop
99*e4b17023SJohn Marino {
100*e4b17023SJohn Marino 
101*e4b17023SJohn Marino ....
102*e4b17023SJohn Marino 
103*e4b17023SJohn Marino 
104*e4b17023SJohn Marino   # Storing the initial value given by the user.  #
105*e4b17023SJohn Marino 
106*e4b17023SJohn Marino   .paral_data_store.32.sum.27 = 1;
107*e4b17023SJohn Marino 
108*e4b17023SJohn Marino   #pragma omp parallel num_threads(4)
109*e4b17023SJohn Marino 
110*e4b17023SJohn Marino   #pragma omp for schedule(static)
111*e4b17023SJohn Marino 
112*e4b17023SJohn Marino   # The neutral element corresponding to the particular
113*e4b17023SJohn Marino   reduction's operation, e.g. 0 for PLUS_EXPR,
114*e4b17023SJohn Marino   1 for MULT_EXPR, etc. replaces the user's initial value.  #
115*e4b17023SJohn Marino 
116*e4b17023SJohn Marino   # sum.27_29 = PHI <sum.27_11, 0>
117*e4b17023SJohn Marino 
118*e4b17023SJohn Marino   sum.27_11 = D.1827_8 + sum.27_29;
119*e4b17023SJohn Marino 
120*e4b17023SJohn Marino   GIMPLE_OMP_CONTINUE
121*e4b17023SJohn Marino 
122*e4b17023SJohn Marino   # Adding this reduction phi is done at create_phi_for_local_result() #
123*e4b17023SJohn Marino   # sum.27_56 = PHI <sum.27_11, 0>
124*e4b17023SJohn Marino   GIMPLE_OMP_RETURN
125*e4b17023SJohn Marino 
126*e4b17023SJohn Marino   # Creating the atomic operation is done at
127*e4b17023SJohn Marino   create_call_for_reduction_1()  #
128*e4b17023SJohn Marino 
129*e4b17023SJohn Marino   #pragma omp atomic_load
130*e4b17023SJohn Marino   D.1839_59 = *&.paral_data_load.33_51->reduction.23;
131*e4b17023SJohn Marino   D.1840_60 = sum.27_56 + D.1839_59;
132*e4b17023SJohn Marino   #pragma omp atomic_store (D.1840_60);
133*e4b17023SJohn Marino 
134*e4b17023SJohn Marino   GIMPLE_OMP_RETURN
135*e4b17023SJohn Marino 
136*e4b17023SJohn Marino  # collecting the result after the join of the threads is done at
137*e4b17023SJohn Marino   create_loads_for_reductions().
138*e4b17023SJohn Marino   The value computed by the threads is loaded from the
139*e4b17023SJohn Marino   shared struct.  #
140*e4b17023SJohn Marino 
141*e4b17023SJohn Marino 
142*e4b17023SJohn Marino   .paral_data_load.33_52 = &.paral_data_store.32;
143*e4b17023SJohn Marino   sum_37 =  .paral_data_load.33_52->sum.27;
144*e4b17023SJohn Marino   sum_43 = D.1795_41 + sum_37;
145*e4b17023SJohn Marino 
146*e4b17023SJohn Marino   exit bb:
147*e4b17023SJohn Marino   # sum_21 = PHI <sum_43, sum_26>
148*e4b17023SJohn Marino   printf (&"%d"[0], sum_21);
149*e4b17023SJohn Marino 
150*e4b17023SJohn Marino ...
151*e4b17023SJohn Marino 
152*e4b17023SJohn Marino }
153*e4b17023SJohn Marino 
154*e4b17023SJohn Marino */
155*e4b17023SJohn Marino 
156*e4b17023SJohn Marino /* Minimal number of iterations of a loop that should be executed in each
157*e4b17023SJohn Marino    thread.  */
158*e4b17023SJohn Marino #define MIN_PER_THREAD 100
159*e4b17023SJohn Marino 
160*e4b17023SJohn Marino /* Element of the hashtable, representing a
161*e4b17023SJohn Marino    reduction in the current loop.  */
162*e4b17023SJohn Marino struct reduction_info
163*e4b17023SJohn Marino {
164*e4b17023SJohn Marino   gimple reduc_stmt;		/* reduction statement.  */
165*e4b17023SJohn Marino   gimple reduc_phi;		/* The phi node defining the reduction.  */
166*e4b17023SJohn Marino   enum tree_code reduction_code;/* code for the reduction operation.  */
167*e4b17023SJohn Marino   unsigned reduc_version;	/* SSA_NAME_VERSION of original reduc_phi
168*e4b17023SJohn Marino 				   result.  */
169*e4b17023SJohn Marino   gimple keep_res;		/* The PHI_RESULT of this phi is the resulting value
170*e4b17023SJohn Marino 				   of the reduction variable when existing the loop. */
171*e4b17023SJohn Marino   tree initial_value;		/* The initial value of the reduction var before entering the loop.  */
172*e4b17023SJohn Marino   tree field;			/*  the name of the field in the parloop data structure intended for reduction.  */
173*e4b17023SJohn Marino   tree init;			/* reduction initialization value.  */
174*e4b17023SJohn Marino   gimple new_phi;		/* (helper field) Newly created phi node whose result
175*e4b17023SJohn Marino 				   will be passed to the atomic operation.  Represents
176*e4b17023SJohn Marino 				   the local result each thread computed for the reduction
177*e4b17023SJohn Marino 				   operation.  */
178*e4b17023SJohn Marino };
179*e4b17023SJohn Marino 
180*e4b17023SJohn Marino /* Equality and hash functions for hashtab code.  */
181*e4b17023SJohn Marino 
182*e4b17023SJohn Marino static int
reduction_info_eq(const void * aa,const void * bb)183*e4b17023SJohn Marino reduction_info_eq (const void *aa, const void *bb)
184*e4b17023SJohn Marino {
185*e4b17023SJohn Marino   const struct reduction_info *a = (const struct reduction_info *) aa;
186*e4b17023SJohn Marino   const struct reduction_info *b = (const struct reduction_info *) bb;
187*e4b17023SJohn Marino 
188*e4b17023SJohn Marino   return (a->reduc_phi == b->reduc_phi);
189*e4b17023SJohn Marino }
190*e4b17023SJohn Marino 
191*e4b17023SJohn Marino static hashval_t
reduction_info_hash(const void * aa)192*e4b17023SJohn Marino reduction_info_hash (const void *aa)
193*e4b17023SJohn Marino {
194*e4b17023SJohn Marino   const struct reduction_info *a = (const struct reduction_info *) aa;
195*e4b17023SJohn Marino 
196*e4b17023SJohn Marino   return a->reduc_version;
197*e4b17023SJohn Marino }
198*e4b17023SJohn Marino 
199*e4b17023SJohn Marino static struct reduction_info *
reduction_phi(htab_t reduction_list,gimple phi)200*e4b17023SJohn Marino reduction_phi (htab_t reduction_list, gimple phi)
201*e4b17023SJohn Marino {
202*e4b17023SJohn Marino   struct reduction_info tmpred, *red;
203*e4b17023SJohn Marino 
204*e4b17023SJohn Marino   if (htab_elements (reduction_list) == 0 || phi == NULL)
205*e4b17023SJohn Marino     return NULL;
206*e4b17023SJohn Marino 
207*e4b17023SJohn Marino   tmpred.reduc_phi = phi;
208*e4b17023SJohn Marino   tmpred.reduc_version = gimple_uid (phi);
209*e4b17023SJohn Marino   red = (struct reduction_info *) htab_find (reduction_list, &tmpred);
210*e4b17023SJohn Marino 
211*e4b17023SJohn Marino   return red;
212*e4b17023SJohn Marino }
213*e4b17023SJohn Marino 
214*e4b17023SJohn Marino /* Element of hashtable of names to copy.  */
215*e4b17023SJohn Marino 
216*e4b17023SJohn Marino struct name_to_copy_elt
217*e4b17023SJohn Marino {
218*e4b17023SJohn Marino   unsigned version;	/* The version of the name to copy.  */
219*e4b17023SJohn Marino   tree new_name;	/* The new name used in the copy.  */
220*e4b17023SJohn Marino   tree field;		/* The field of the structure used to pass the
221*e4b17023SJohn Marino 			   value.  */
222*e4b17023SJohn Marino };
223*e4b17023SJohn Marino 
224*e4b17023SJohn Marino /* Equality and hash functions for hashtab code.  */
225*e4b17023SJohn Marino 
226*e4b17023SJohn Marino static int
name_to_copy_elt_eq(const void * aa,const void * bb)227*e4b17023SJohn Marino name_to_copy_elt_eq (const void *aa, const void *bb)
228*e4b17023SJohn Marino {
229*e4b17023SJohn Marino   const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
230*e4b17023SJohn Marino   const struct name_to_copy_elt *b = (const struct name_to_copy_elt *) bb;
231*e4b17023SJohn Marino 
232*e4b17023SJohn Marino   return a->version == b->version;
233*e4b17023SJohn Marino }
234*e4b17023SJohn Marino 
235*e4b17023SJohn Marino static hashval_t
name_to_copy_elt_hash(const void * aa)236*e4b17023SJohn Marino name_to_copy_elt_hash (const void *aa)
237*e4b17023SJohn Marino {
238*e4b17023SJohn Marino   const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
239*e4b17023SJohn Marino 
240*e4b17023SJohn Marino   return (hashval_t) a->version;
241*e4b17023SJohn Marino }
242*e4b17023SJohn Marino 
243*e4b17023SJohn Marino /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
244*e4b17023SJohn Marino    matrix.  Rather than use floats, we simply keep a single DENOMINATOR that
245*e4b17023SJohn Marino    represents the denominator for every element in the matrix.  */
246*e4b17023SJohn Marino typedef struct lambda_trans_matrix_s
247*e4b17023SJohn Marino {
248*e4b17023SJohn Marino   lambda_matrix matrix;
249*e4b17023SJohn Marino   int rowsize;
250*e4b17023SJohn Marino   int colsize;
251*e4b17023SJohn Marino   int denominator;
252*e4b17023SJohn Marino } *lambda_trans_matrix;
253*e4b17023SJohn Marino #define LTM_MATRIX(T) ((T)->matrix)
254*e4b17023SJohn Marino #define LTM_ROWSIZE(T) ((T)->rowsize)
255*e4b17023SJohn Marino #define LTM_COLSIZE(T) ((T)->colsize)
256*e4b17023SJohn Marino #define LTM_DENOMINATOR(T) ((T)->denominator)
257*e4b17023SJohn Marino 
258*e4b17023SJohn Marino /* Allocate a new transformation matrix.  */
259*e4b17023SJohn Marino 
260*e4b17023SJohn Marino static lambda_trans_matrix
lambda_trans_matrix_new(int colsize,int rowsize,struct obstack * lambda_obstack)261*e4b17023SJohn Marino lambda_trans_matrix_new (int colsize, int rowsize,
262*e4b17023SJohn Marino 			 struct obstack * lambda_obstack)
263*e4b17023SJohn Marino {
264*e4b17023SJohn Marino   lambda_trans_matrix ret;
265*e4b17023SJohn Marino 
266*e4b17023SJohn Marino   ret = (lambda_trans_matrix)
267*e4b17023SJohn Marino     obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
268*e4b17023SJohn Marino   LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
269*e4b17023SJohn Marino   LTM_ROWSIZE (ret) = rowsize;
270*e4b17023SJohn Marino   LTM_COLSIZE (ret) = colsize;
271*e4b17023SJohn Marino   LTM_DENOMINATOR (ret) = 1;
272*e4b17023SJohn Marino   return ret;
273*e4b17023SJohn Marino }
274*e4b17023SJohn Marino 
275*e4b17023SJohn Marino /* Multiply a vector VEC by a matrix MAT.
276*e4b17023SJohn Marino    MAT is an M*N matrix, and VEC is a vector with length N.  The result
277*e4b17023SJohn Marino    is stored in DEST which must be a vector of length M.  */
278*e4b17023SJohn Marino 
279*e4b17023SJohn Marino static void
lambda_matrix_vector_mult(lambda_matrix matrix,int m,int n,lambda_vector vec,lambda_vector dest)280*e4b17023SJohn Marino lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
281*e4b17023SJohn Marino 			   lambda_vector vec, lambda_vector dest)
282*e4b17023SJohn Marino {
283*e4b17023SJohn Marino   int i, j;
284*e4b17023SJohn Marino 
285*e4b17023SJohn Marino   lambda_vector_clear (dest, m);
286*e4b17023SJohn Marino   for (i = 0; i < m; i++)
287*e4b17023SJohn Marino     for (j = 0; j < n; j++)
288*e4b17023SJohn Marino       dest[i] += matrix[i][j] * vec[j];
289*e4b17023SJohn Marino }
290*e4b17023SJohn Marino 
291*e4b17023SJohn Marino /* Return true if TRANS is a legal transformation matrix that respects
292*e4b17023SJohn Marino    the dependence vectors in DISTS and DIRS.  The conservative answer
293*e4b17023SJohn Marino    is false.
294*e4b17023SJohn Marino 
295*e4b17023SJohn Marino    "Wolfe proves that a unimodular transformation represented by the
296*e4b17023SJohn Marino    matrix T is legal when applied to a loop nest with a set of
297*e4b17023SJohn Marino    lexicographically non-negative distance vectors RDG if and only if
298*e4b17023SJohn Marino    for each vector d in RDG, (T.d >= 0) is lexicographically positive.
299*e4b17023SJohn Marino    i.e.: if and only if it transforms the lexicographically positive
300*e4b17023SJohn Marino    distance vectors to lexicographically positive vectors.  Note that
301*e4b17023SJohn Marino    a unimodular matrix must transform the zero vector (and only it) to
302*e4b17023SJohn Marino    the zero vector." S.Muchnick.  */
303*e4b17023SJohn Marino 
304*e4b17023SJohn Marino static bool
lambda_transform_legal_p(lambda_trans_matrix trans,int nb_loops,VEC (ddr_p,heap)* dependence_relations)305*e4b17023SJohn Marino lambda_transform_legal_p (lambda_trans_matrix trans,
306*e4b17023SJohn Marino 			  int nb_loops,
307*e4b17023SJohn Marino 			  VEC (ddr_p, heap) *dependence_relations)
308*e4b17023SJohn Marino {
309*e4b17023SJohn Marino   unsigned int i, j;
310*e4b17023SJohn Marino   lambda_vector distres;
311*e4b17023SJohn Marino   struct data_dependence_relation *ddr;
312*e4b17023SJohn Marino 
313*e4b17023SJohn Marino   gcc_assert (LTM_COLSIZE (trans) == nb_loops
314*e4b17023SJohn Marino 	      && LTM_ROWSIZE (trans) == nb_loops);
315*e4b17023SJohn Marino 
316*e4b17023SJohn Marino   /* When there are no dependences, the transformation is correct.  */
317*e4b17023SJohn Marino   if (VEC_length (ddr_p, dependence_relations) == 0)
318*e4b17023SJohn Marino     return true;
319*e4b17023SJohn Marino 
320*e4b17023SJohn Marino   ddr = VEC_index (ddr_p, dependence_relations, 0);
321*e4b17023SJohn Marino   if (ddr == NULL)
322*e4b17023SJohn Marino     return true;
323*e4b17023SJohn Marino 
324*e4b17023SJohn Marino   /* When there is an unknown relation in the dependence_relations, we
325*e4b17023SJohn Marino      know that it is no worth looking at this loop nest: give up.  */
326*e4b17023SJohn Marino   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
327*e4b17023SJohn Marino     return false;
328*e4b17023SJohn Marino 
329*e4b17023SJohn Marino   distres = lambda_vector_new (nb_loops);
330*e4b17023SJohn Marino 
331*e4b17023SJohn Marino   /* For each distance vector in the dependence graph.  */
332*e4b17023SJohn Marino   FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
333*e4b17023SJohn Marino     {
334*e4b17023SJohn Marino       /* Don't care about relations for which we know that there is no
335*e4b17023SJohn Marino 	 dependence, nor about read-read (aka. output-dependences):
336*e4b17023SJohn Marino 	 these data accesses can happen in any order.  */
337*e4b17023SJohn Marino       if (DDR_ARE_DEPENDENT (ddr) == chrec_known
338*e4b17023SJohn Marino 	  || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
339*e4b17023SJohn Marino 	continue;
340*e4b17023SJohn Marino 
341*e4b17023SJohn Marino       /* Conservatively answer: "this transformation is not valid".  */
342*e4b17023SJohn Marino       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
343*e4b17023SJohn Marino 	return false;
344*e4b17023SJohn Marino 
345*e4b17023SJohn Marino       /* If the dependence could not be captured by a distance vector,
346*e4b17023SJohn Marino 	 conservatively answer that the transform is not valid.  */
347*e4b17023SJohn Marino       if (DDR_NUM_DIST_VECTS (ddr) == 0)
348*e4b17023SJohn Marino 	return false;
349*e4b17023SJohn Marino 
350*e4b17023SJohn Marino       /* Compute trans.dist_vect */
351*e4b17023SJohn Marino       for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
352*e4b17023SJohn Marino 	{
353*e4b17023SJohn Marino 	  lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
354*e4b17023SJohn Marino 				     DDR_DIST_VECT (ddr, j), distres);
355*e4b17023SJohn Marino 
356*e4b17023SJohn Marino 	  if (!lambda_vector_lexico_pos (distres, nb_loops))
357*e4b17023SJohn Marino 	    return false;
358*e4b17023SJohn Marino 	}
359*e4b17023SJohn Marino     }
360*e4b17023SJohn Marino   return true;
361*e4b17023SJohn Marino }
362*e4b17023SJohn Marino 
363*e4b17023SJohn Marino /* Data dependency analysis. Returns true if the iterations of LOOP
364*e4b17023SJohn Marino    are independent on each other (that is, if we can execute them
365*e4b17023SJohn Marino    in parallel).  */
366*e4b17023SJohn Marino 
367*e4b17023SJohn Marino static bool
loop_parallel_p(struct loop * loop,struct obstack * parloop_obstack)368*e4b17023SJohn Marino loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
369*e4b17023SJohn Marino {
370*e4b17023SJohn Marino   VEC (loop_p, heap) *loop_nest;
371*e4b17023SJohn Marino   VEC (ddr_p, heap) *dependence_relations;
372*e4b17023SJohn Marino   VEC (data_reference_p, heap) *datarefs;
373*e4b17023SJohn Marino   lambda_trans_matrix trans;
374*e4b17023SJohn Marino   bool ret = false;
375*e4b17023SJohn Marino 
376*e4b17023SJohn Marino   if (dump_file && (dump_flags & TDF_DETAILS))
377*e4b17023SJohn Marino   {
378*e4b17023SJohn Marino     fprintf (dump_file, "Considering loop %d\n", loop->num);
379*e4b17023SJohn Marino     if (!loop->inner)
380*e4b17023SJohn Marino       fprintf (dump_file, "loop is innermost\n");
381*e4b17023SJohn Marino     else
382*e4b17023SJohn Marino       fprintf (dump_file, "loop NOT innermost\n");
383*e4b17023SJohn Marino    }
384*e4b17023SJohn Marino 
385*e4b17023SJohn Marino   /* Check for problems with dependences.  If the loop can be reversed,
386*e4b17023SJohn Marino      the iterations are independent.  */
387*e4b17023SJohn Marino   datarefs = VEC_alloc (data_reference_p, heap, 10);
388*e4b17023SJohn Marino   dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
389*e4b17023SJohn Marino   loop_nest = VEC_alloc (loop_p, heap, 3);
390*e4b17023SJohn Marino   if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
391*e4b17023SJohn Marino 					   &dependence_relations))
392*e4b17023SJohn Marino     {
393*e4b17023SJohn Marino       if (dump_file && (dump_flags & TDF_DETAILS))
394*e4b17023SJohn Marino 	fprintf (dump_file, "  FAILED: cannot analyze data dependencies\n");
395*e4b17023SJohn Marino       ret = false;
396*e4b17023SJohn Marino       goto end;
397*e4b17023SJohn Marino     }
398*e4b17023SJohn Marino   if (dump_file && (dump_flags & TDF_DETAILS))
399*e4b17023SJohn Marino     dump_data_dependence_relations (dump_file, dependence_relations);
400*e4b17023SJohn Marino 
401*e4b17023SJohn Marino   trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
402*e4b17023SJohn Marino   LTM_MATRIX (trans)[0][0] = -1;
403*e4b17023SJohn Marino 
404*e4b17023SJohn Marino   if (lambda_transform_legal_p (trans, 1, dependence_relations))
405*e4b17023SJohn Marino     {
406*e4b17023SJohn Marino       ret = true;
407*e4b17023SJohn Marino       if (dump_file && (dump_flags & TDF_DETAILS))
408*e4b17023SJohn Marino 	fprintf (dump_file, "  SUCCESS: may be parallelized\n");
409*e4b17023SJohn Marino     }
410*e4b17023SJohn Marino   else if (dump_file && (dump_flags & TDF_DETAILS))
411*e4b17023SJohn Marino     fprintf (dump_file,
412*e4b17023SJohn Marino 	     "  FAILED: data dependencies exist across iterations\n");
413*e4b17023SJohn Marino 
414*e4b17023SJohn Marino  end:
415*e4b17023SJohn Marino   VEC_free (loop_p, heap, loop_nest);
416*e4b17023SJohn Marino   free_dependence_relations (dependence_relations);
417*e4b17023SJohn Marino   free_data_refs (datarefs);
418*e4b17023SJohn Marino 
419*e4b17023SJohn Marino   return ret;
420*e4b17023SJohn Marino }
421*e4b17023SJohn Marino 
422*e4b17023SJohn Marino /* Return true when LOOP contains basic blocks marked with the
423*e4b17023SJohn Marino    BB_IRREDUCIBLE_LOOP flag.  */
424*e4b17023SJohn Marino 
425*e4b17023SJohn Marino static inline bool
loop_has_blocks_with_irreducible_flag(struct loop * loop)426*e4b17023SJohn Marino loop_has_blocks_with_irreducible_flag (struct loop *loop)
427*e4b17023SJohn Marino {
428*e4b17023SJohn Marino   unsigned i;
429*e4b17023SJohn Marino   basic_block *bbs = get_loop_body_in_dom_order (loop);
430*e4b17023SJohn Marino   bool res = true;
431*e4b17023SJohn Marino 
432*e4b17023SJohn Marino   for (i = 0; i < loop->num_nodes; i++)
433*e4b17023SJohn Marino     if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
434*e4b17023SJohn Marino       goto end;
435*e4b17023SJohn Marino 
436*e4b17023SJohn Marino   res = false;
437*e4b17023SJohn Marino  end:
438*e4b17023SJohn Marino   free (bbs);
439*e4b17023SJohn Marino   return res;
440*e4b17023SJohn Marino }
441*e4b17023SJohn Marino 
442*e4b17023SJohn Marino /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
443*e4b17023SJohn Marino    The assignment statement is placed on edge ENTRY.  DECL_ADDRESS maps decls
444*e4b17023SJohn Marino    to their addresses that can be reused.  The address of OBJ is known to
445*e4b17023SJohn Marino    be invariant in the whole function.  Other needed statements are placed
446*e4b17023SJohn Marino    right before GSI.  */
447*e4b17023SJohn Marino 
448*e4b17023SJohn Marino static tree
take_address_of(tree obj,tree type,edge entry,htab_t decl_address,gimple_stmt_iterator * gsi)449*e4b17023SJohn Marino take_address_of (tree obj, tree type, edge entry, htab_t decl_address,
450*e4b17023SJohn Marino 		 gimple_stmt_iterator *gsi)
451*e4b17023SJohn Marino {
452*e4b17023SJohn Marino   int uid;
453*e4b17023SJohn Marino   void **dslot;
454*e4b17023SJohn Marino   struct int_tree_map ielt, *nielt;
455*e4b17023SJohn Marino   tree *var_p, name, bvar, addr;
456*e4b17023SJohn Marino   gimple stmt;
457*e4b17023SJohn Marino   gimple_seq stmts;
458*e4b17023SJohn Marino 
459*e4b17023SJohn Marino   /* Since the address of OBJ is invariant, the trees may be shared.
460*e4b17023SJohn Marino      Avoid rewriting unrelated parts of the code.  */
461*e4b17023SJohn Marino   obj = unshare_expr (obj);
462*e4b17023SJohn Marino   for (var_p = &obj;
463*e4b17023SJohn Marino        handled_component_p (*var_p);
464*e4b17023SJohn Marino        var_p = &TREE_OPERAND (*var_p, 0))
465*e4b17023SJohn Marino     continue;
466*e4b17023SJohn Marino 
467*e4b17023SJohn Marino   /* Canonicalize the access to base on a MEM_REF.  */
468*e4b17023SJohn Marino   if (DECL_P (*var_p))
469*e4b17023SJohn Marino     *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
470*e4b17023SJohn Marino 
471*e4b17023SJohn Marino   /* Assign a canonical SSA name to the address of the base decl used
472*e4b17023SJohn Marino      in the address and share it for all accesses and addresses based
473*e4b17023SJohn Marino      on it.  */
474*e4b17023SJohn Marino   uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
475*e4b17023SJohn Marino   ielt.uid = uid;
476*e4b17023SJohn Marino   dslot = htab_find_slot_with_hash (decl_address, &ielt, uid, INSERT);
477*e4b17023SJohn Marino   if (!*dslot)
478*e4b17023SJohn Marino     {
479*e4b17023SJohn Marino       if (gsi == NULL)
480*e4b17023SJohn Marino 	return NULL;
481*e4b17023SJohn Marino       addr = TREE_OPERAND (*var_p, 0);
482*e4b17023SJohn Marino       bvar = create_tmp_var (TREE_TYPE (addr),
483*e4b17023SJohn Marino 			     get_name (TREE_OPERAND
484*e4b17023SJohn Marino 				         (TREE_OPERAND (*var_p, 0), 0)));
485*e4b17023SJohn Marino       add_referenced_var (bvar);
486*e4b17023SJohn Marino       stmt = gimple_build_assign (bvar, addr);
487*e4b17023SJohn Marino       name = make_ssa_name (bvar, stmt);
488*e4b17023SJohn Marino       gimple_assign_set_lhs (stmt, name);
489*e4b17023SJohn Marino       gsi_insert_on_edge_immediate (entry, stmt);
490*e4b17023SJohn Marino 
491*e4b17023SJohn Marino       nielt = XNEW (struct int_tree_map);
492*e4b17023SJohn Marino       nielt->uid = uid;
493*e4b17023SJohn Marino       nielt->to = name;
494*e4b17023SJohn Marino       *dslot = nielt;
495*e4b17023SJohn Marino     }
496*e4b17023SJohn Marino   else
497*e4b17023SJohn Marino     name = ((struct int_tree_map *) *dslot)->to;
498*e4b17023SJohn Marino 
499*e4b17023SJohn Marino   /* Express the address in terms of the canonical SSA name.  */
500*e4b17023SJohn Marino   TREE_OPERAND (*var_p, 0) = name;
501*e4b17023SJohn Marino   if (gsi == NULL)
502*e4b17023SJohn Marino     return build_fold_addr_expr_with_type (obj, type);
503*e4b17023SJohn Marino 
504*e4b17023SJohn Marino   name = force_gimple_operand (build_addr (obj, current_function_decl),
505*e4b17023SJohn Marino 			       &stmts, true, NULL_TREE);
506*e4b17023SJohn Marino   if (!gimple_seq_empty_p (stmts))
507*e4b17023SJohn Marino     gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
508*e4b17023SJohn Marino 
509*e4b17023SJohn Marino   if (!useless_type_conversion_p (type, TREE_TYPE (name)))
510*e4b17023SJohn Marino     {
511*e4b17023SJohn Marino       name = force_gimple_operand (fold_convert (type, name), &stmts, true,
512*e4b17023SJohn Marino 				   NULL_TREE);
513*e4b17023SJohn Marino       if (!gimple_seq_empty_p (stmts))
514*e4b17023SJohn Marino 	gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
515*e4b17023SJohn Marino     }
516*e4b17023SJohn Marino 
517*e4b17023SJohn Marino   return name;
518*e4b17023SJohn Marino }
519*e4b17023SJohn Marino 
520*e4b17023SJohn Marino /* Callback for htab_traverse.  Create the initialization statement
521*e4b17023SJohn Marino    for reduction described in SLOT, and place it at the preheader of
522*e4b17023SJohn Marino    the loop described in DATA.  */
523*e4b17023SJohn Marino 
524*e4b17023SJohn Marino static int
initialize_reductions(void ** slot,void * data)525*e4b17023SJohn Marino initialize_reductions (void **slot, void *data)
526*e4b17023SJohn Marino {
527*e4b17023SJohn Marino   tree init, c;
528*e4b17023SJohn Marino   tree bvar, type, arg;
529*e4b17023SJohn Marino   edge e;
530*e4b17023SJohn Marino 
531*e4b17023SJohn Marino   struct reduction_info *const reduc = (struct reduction_info *) *slot;
532*e4b17023SJohn Marino   struct loop *loop = (struct loop *) data;
533*e4b17023SJohn Marino 
534*e4b17023SJohn Marino   /* Create initialization in preheader:
535*e4b17023SJohn Marino      reduction_variable = initialization value of reduction.  */
536*e4b17023SJohn Marino 
537*e4b17023SJohn Marino   /* In the phi node at the header, replace the argument coming
538*e4b17023SJohn Marino      from the preheader with the reduction initialization value.  */
539*e4b17023SJohn Marino 
540*e4b17023SJohn Marino   /* Create a new variable to initialize the reduction.  */
541*e4b17023SJohn Marino   type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
542*e4b17023SJohn Marino   bvar = create_tmp_var (type, "reduction");
543*e4b17023SJohn Marino   add_referenced_var (bvar);
544*e4b17023SJohn Marino 
545*e4b17023SJohn Marino   c = build_omp_clause (gimple_location (reduc->reduc_stmt),
546*e4b17023SJohn Marino 			OMP_CLAUSE_REDUCTION);
547*e4b17023SJohn Marino   OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
548*e4b17023SJohn Marino   OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
549*e4b17023SJohn Marino 
550*e4b17023SJohn Marino   init = omp_reduction_init (c, TREE_TYPE (bvar));
551*e4b17023SJohn Marino   reduc->init = init;
552*e4b17023SJohn Marino 
553*e4b17023SJohn Marino   /* Replace the argument representing the initialization value
554*e4b17023SJohn Marino      with the initialization value for the reduction (neutral
555*e4b17023SJohn Marino      element for the particular operation, e.g. 0 for PLUS_EXPR,
556*e4b17023SJohn Marino      1 for MULT_EXPR, etc).
557*e4b17023SJohn Marino      Keep the old value in a new variable "reduction_initial",
558*e4b17023SJohn Marino      that will be taken in consideration after the parallel
559*e4b17023SJohn Marino      computing is done.  */
560*e4b17023SJohn Marino 
561*e4b17023SJohn Marino   e = loop_preheader_edge (loop);
562*e4b17023SJohn Marino   arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
563*e4b17023SJohn Marino   /* Create new variable to hold the initial value.  */
564*e4b17023SJohn Marino 
565*e4b17023SJohn Marino   SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
566*e4b17023SJohn Marino 	   (reduc->reduc_phi, loop_preheader_edge (loop)), init);
567*e4b17023SJohn Marino   reduc->initial_value = arg;
568*e4b17023SJohn Marino   return 1;
569*e4b17023SJohn Marino }
570*e4b17023SJohn Marino 
571*e4b17023SJohn Marino struct elv_data
572*e4b17023SJohn Marino {
573*e4b17023SJohn Marino   struct walk_stmt_info info;
574*e4b17023SJohn Marino   edge entry;
575*e4b17023SJohn Marino   htab_t decl_address;
576*e4b17023SJohn Marino   gimple_stmt_iterator *gsi;
577*e4b17023SJohn Marino   bool changed;
578*e4b17023SJohn Marino   bool reset;
579*e4b17023SJohn Marino };
580*e4b17023SJohn Marino 
581*e4b17023SJohn Marino /* Eliminates references to local variables in *TP out of the single
582*e4b17023SJohn Marino    entry single exit region starting at DTA->ENTRY.
583*e4b17023SJohn Marino    DECL_ADDRESS contains addresses of the references that had their
584*e4b17023SJohn Marino    address taken already.  If the expression is changed, CHANGED is
585*e4b17023SJohn Marino    set to true.  Callback for walk_tree.  */
586*e4b17023SJohn Marino 
587*e4b17023SJohn Marino static tree
eliminate_local_variables_1(tree * tp,int * walk_subtrees,void * data)588*e4b17023SJohn Marino eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
589*e4b17023SJohn Marino {
590*e4b17023SJohn Marino   struct elv_data *const dta = (struct elv_data *) data;
591*e4b17023SJohn Marino   tree t = *tp, var, addr, addr_type, type, obj;
592*e4b17023SJohn Marino 
593*e4b17023SJohn Marino   if (DECL_P (t))
594*e4b17023SJohn Marino     {
595*e4b17023SJohn Marino       *walk_subtrees = 0;
596*e4b17023SJohn Marino 
597*e4b17023SJohn Marino       if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
598*e4b17023SJohn Marino 	return NULL_TREE;
599*e4b17023SJohn Marino 
600*e4b17023SJohn Marino       type = TREE_TYPE (t);
601*e4b17023SJohn Marino       addr_type = build_pointer_type (type);
602*e4b17023SJohn Marino       addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
603*e4b17023SJohn Marino 			      dta->gsi);
604*e4b17023SJohn Marino       if (dta->gsi == NULL && addr == NULL_TREE)
605*e4b17023SJohn Marino 	{
606*e4b17023SJohn Marino 	  dta->reset = true;
607*e4b17023SJohn Marino 	  return NULL_TREE;
608*e4b17023SJohn Marino 	}
609*e4b17023SJohn Marino 
610*e4b17023SJohn Marino       *tp = build_simple_mem_ref (addr);
611*e4b17023SJohn Marino 
612*e4b17023SJohn Marino       dta->changed = true;
613*e4b17023SJohn Marino       return NULL_TREE;
614*e4b17023SJohn Marino     }
615*e4b17023SJohn Marino 
616*e4b17023SJohn Marino   if (TREE_CODE (t) == ADDR_EXPR)
617*e4b17023SJohn Marino     {
618*e4b17023SJohn Marino       /* ADDR_EXPR may appear in two contexts:
619*e4b17023SJohn Marino 	 -- as a gimple operand, when the address taken is a function invariant
620*e4b17023SJohn Marino 	 -- as gimple rhs, when the resulting address in not a function
621*e4b17023SJohn Marino 	    invariant
622*e4b17023SJohn Marino 	 We do not need to do anything special in the latter case (the base of
623*e4b17023SJohn Marino 	 the memory reference whose address is taken may be replaced in the
624*e4b17023SJohn Marino 	 DECL_P case).  The former case is more complicated, as we need to
625*e4b17023SJohn Marino 	 ensure that the new address is still a gimple operand.  Thus, it
626*e4b17023SJohn Marino 	 is not sufficient to replace just the base of the memory reference --
627*e4b17023SJohn Marino 	 we need to move the whole computation of the address out of the
628*e4b17023SJohn Marino 	 loop.  */
629*e4b17023SJohn Marino       if (!is_gimple_val (t))
630*e4b17023SJohn Marino 	return NULL_TREE;
631*e4b17023SJohn Marino 
632*e4b17023SJohn Marino       *walk_subtrees = 0;
633*e4b17023SJohn Marino       obj = TREE_OPERAND (t, 0);
634*e4b17023SJohn Marino       var = get_base_address (obj);
635*e4b17023SJohn Marino       if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
636*e4b17023SJohn Marino 	return NULL_TREE;
637*e4b17023SJohn Marino 
638*e4b17023SJohn Marino       addr_type = TREE_TYPE (t);
639*e4b17023SJohn Marino       addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
640*e4b17023SJohn Marino 			      dta->gsi);
641*e4b17023SJohn Marino       if (dta->gsi == NULL && addr == NULL_TREE)
642*e4b17023SJohn Marino 	{
643*e4b17023SJohn Marino 	  dta->reset = true;
644*e4b17023SJohn Marino 	  return NULL_TREE;
645*e4b17023SJohn Marino 	}
646*e4b17023SJohn Marino       *tp = addr;
647*e4b17023SJohn Marino 
648*e4b17023SJohn Marino       dta->changed = true;
649*e4b17023SJohn Marino       return NULL_TREE;
650*e4b17023SJohn Marino     }
651*e4b17023SJohn Marino 
652*e4b17023SJohn Marino   if (!EXPR_P (t))
653*e4b17023SJohn Marino     *walk_subtrees = 0;
654*e4b17023SJohn Marino 
655*e4b17023SJohn Marino   return NULL_TREE;
656*e4b17023SJohn Marino }
657*e4b17023SJohn Marino 
658*e4b17023SJohn Marino /* Moves the references to local variables in STMT at *GSI out of the single
659*e4b17023SJohn Marino    entry single exit region starting at ENTRY.  DECL_ADDRESS contains
660*e4b17023SJohn Marino    addresses of the references that had their address taken
661*e4b17023SJohn Marino    already.  */
662*e4b17023SJohn Marino 
663*e4b17023SJohn Marino static void
eliminate_local_variables_stmt(edge entry,gimple_stmt_iterator * gsi,htab_t decl_address)664*e4b17023SJohn Marino eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
665*e4b17023SJohn Marino 				htab_t decl_address)
666*e4b17023SJohn Marino {
667*e4b17023SJohn Marino   struct elv_data dta;
668*e4b17023SJohn Marino   gimple stmt = gsi_stmt (*gsi);
669*e4b17023SJohn Marino 
670*e4b17023SJohn Marino   memset (&dta.info, '\0', sizeof (dta.info));
671*e4b17023SJohn Marino   dta.entry = entry;
672*e4b17023SJohn Marino   dta.decl_address = decl_address;
673*e4b17023SJohn Marino   dta.changed = false;
674*e4b17023SJohn Marino   dta.reset = false;
675*e4b17023SJohn Marino 
676*e4b17023SJohn Marino   if (gimple_debug_bind_p (stmt))
677*e4b17023SJohn Marino     {
678*e4b17023SJohn Marino       dta.gsi = NULL;
679*e4b17023SJohn Marino       walk_tree (gimple_debug_bind_get_value_ptr (stmt),
680*e4b17023SJohn Marino 		 eliminate_local_variables_1, &dta.info, NULL);
681*e4b17023SJohn Marino       if (dta.reset)
682*e4b17023SJohn Marino 	{
683*e4b17023SJohn Marino 	  gimple_debug_bind_reset_value (stmt);
684*e4b17023SJohn Marino 	  dta.changed = true;
685*e4b17023SJohn Marino 	}
686*e4b17023SJohn Marino     }
687*e4b17023SJohn Marino   else
688*e4b17023SJohn Marino     {
689*e4b17023SJohn Marino       dta.gsi = gsi;
690*e4b17023SJohn Marino       walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
691*e4b17023SJohn Marino     }
692*e4b17023SJohn Marino 
693*e4b17023SJohn Marino   if (dta.changed)
694*e4b17023SJohn Marino     update_stmt (stmt);
695*e4b17023SJohn Marino }
696*e4b17023SJohn Marino 
697*e4b17023SJohn Marino /* Eliminates the references to local variables from the single entry
698*e4b17023SJohn Marino    single exit region between the ENTRY and EXIT edges.
699*e4b17023SJohn Marino 
700*e4b17023SJohn Marino    This includes:
701*e4b17023SJohn Marino    1) Taking address of a local variable -- these are moved out of the
702*e4b17023SJohn Marino    region (and temporary variable is created to hold the address if
703*e4b17023SJohn Marino    necessary).
704*e4b17023SJohn Marino 
705*e4b17023SJohn Marino    2) Dereferencing a local variable -- these are replaced with indirect
706*e4b17023SJohn Marino    references.  */
707*e4b17023SJohn Marino 
708*e4b17023SJohn Marino static void
eliminate_local_variables(edge entry,edge exit)709*e4b17023SJohn Marino eliminate_local_variables (edge entry, edge exit)
710*e4b17023SJohn Marino {
711*e4b17023SJohn Marino   basic_block bb;
712*e4b17023SJohn Marino   VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
713*e4b17023SJohn Marino   unsigned i;
714*e4b17023SJohn Marino   gimple_stmt_iterator gsi;
715*e4b17023SJohn Marino   bool has_debug_stmt = false;
716*e4b17023SJohn Marino   htab_t decl_address = htab_create (10, int_tree_map_hash, int_tree_map_eq,
717*e4b17023SJohn Marino 				     free);
718*e4b17023SJohn Marino   basic_block entry_bb = entry->src;
719*e4b17023SJohn Marino   basic_block exit_bb = exit->dest;
720*e4b17023SJohn Marino 
721*e4b17023SJohn Marino   gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
722*e4b17023SJohn Marino 
723*e4b17023SJohn Marino   FOR_EACH_VEC_ELT (basic_block, body, i, bb)
724*e4b17023SJohn Marino     if (bb != entry_bb && bb != exit_bb)
725*e4b17023SJohn Marino       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
726*e4b17023SJohn Marino 	if (is_gimple_debug (gsi_stmt (gsi)))
727*e4b17023SJohn Marino 	  {
728*e4b17023SJohn Marino 	    if (gimple_debug_bind_p (gsi_stmt (gsi)))
729*e4b17023SJohn Marino 	      has_debug_stmt = true;
730*e4b17023SJohn Marino 	  }
731*e4b17023SJohn Marino 	else
732*e4b17023SJohn Marino 	  eliminate_local_variables_stmt (entry, &gsi, decl_address);
733*e4b17023SJohn Marino 
734*e4b17023SJohn Marino   if (has_debug_stmt)
735*e4b17023SJohn Marino     FOR_EACH_VEC_ELT (basic_block, body, i, bb)
736*e4b17023SJohn Marino       if (bb != entry_bb && bb != exit_bb)
737*e4b17023SJohn Marino 	for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
738*e4b17023SJohn Marino 	  if (gimple_debug_bind_p (gsi_stmt (gsi)))
739*e4b17023SJohn Marino 	    eliminate_local_variables_stmt (entry, &gsi, decl_address);
740*e4b17023SJohn Marino 
741*e4b17023SJohn Marino   htab_delete (decl_address);
742*e4b17023SJohn Marino   VEC_free (basic_block, heap, body);
743*e4b17023SJohn Marino }
744*e4b17023SJohn Marino 
745*e4b17023SJohn Marino /* Returns true if expression EXPR is not defined between ENTRY and
746*e4b17023SJohn Marino    EXIT, i.e. if all its operands are defined outside of the region.  */
747*e4b17023SJohn Marino 
748*e4b17023SJohn Marino static bool
expr_invariant_in_region_p(edge entry,edge exit,tree expr)749*e4b17023SJohn Marino expr_invariant_in_region_p (edge entry, edge exit, tree expr)
750*e4b17023SJohn Marino {
751*e4b17023SJohn Marino   basic_block entry_bb = entry->src;
752*e4b17023SJohn Marino   basic_block exit_bb = exit->dest;
753*e4b17023SJohn Marino   basic_block def_bb;
754*e4b17023SJohn Marino 
755*e4b17023SJohn Marino   if (is_gimple_min_invariant (expr))
756*e4b17023SJohn Marino     return true;
757*e4b17023SJohn Marino 
758*e4b17023SJohn Marino   if (TREE_CODE (expr) == SSA_NAME)
759*e4b17023SJohn Marino     {
760*e4b17023SJohn Marino       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
761*e4b17023SJohn Marino       if (def_bb
762*e4b17023SJohn Marino 	  && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
763*e4b17023SJohn Marino 	  && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
764*e4b17023SJohn Marino 	return false;
765*e4b17023SJohn Marino 
766*e4b17023SJohn Marino       return true;
767*e4b17023SJohn Marino     }
768*e4b17023SJohn Marino 
769*e4b17023SJohn Marino   return false;
770*e4b17023SJohn Marino }
771*e4b17023SJohn Marino 
772*e4b17023SJohn Marino /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
773*e4b17023SJohn Marino    The copies are stored to NAME_COPIES, if NAME was already duplicated,
774*e4b17023SJohn Marino    its duplicate stored in NAME_COPIES is returned.
775*e4b17023SJohn Marino 
776*e4b17023SJohn Marino    Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
777*e4b17023SJohn Marino    duplicated, storing the copies in DECL_COPIES.  */
778*e4b17023SJohn Marino 
779*e4b17023SJohn Marino static tree
separate_decls_in_region_name(tree name,htab_t name_copies,htab_t decl_copies,bool copy_name_p)780*e4b17023SJohn Marino separate_decls_in_region_name (tree name,
781*e4b17023SJohn Marino 			       htab_t name_copies, htab_t decl_copies,
782*e4b17023SJohn Marino 			       bool copy_name_p)
783*e4b17023SJohn Marino {
784*e4b17023SJohn Marino   tree copy, var, var_copy;
785*e4b17023SJohn Marino   unsigned idx, uid, nuid;
786*e4b17023SJohn Marino   struct int_tree_map ielt, *nielt;
787*e4b17023SJohn Marino   struct name_to_copy_elt elt, *nelt;
788*e4b17023SJohn Marino   void **slot, **dslot;
789*e4b17023SJohn Marino 
790*e4b17023SJohn Marino   if (TREE_CODE (name) != SSA_NAME)
791*e4b17023SJohn Marino     return name;
792*e4b17023SJohn Marino 
793*e4b17023SJohn Marino   idx = SSA_NAME_VERSION (name);
794*e4b17023SJohn Marino   elt.version = idx;
795*e4b17023SJohn Marino   slot = htab_find_slot_with_hash (name_copies, &elt, idx,
796*e4b17023SJohn Marino 				   copy_name_p ? INSERT : NO_INSERT);
797*e4b17023SJohn Marino   if (slot && *slot)
798*e4b17023SJohn Marino     return ((struct name_to_copy_elt *) *slot)->new_name;
799*e4b17023SJohn Marino 
800*e4b17023SJohn Marino   var = SSA_NAME_VAR (name);
801*e4b17023SJohn Marino   uid = DECL_UID (var);
802*e4b17023SJohn Marino   ielt.uid = uid;
803*e4b17023SJohn Marino   dslot = htab_find_slot_with_hash (decl_copies, &ielt, uid, INSERT);
804*e4b17023SJohn Marino   if (!*dslot)
805*e4b17023SJohn Marino     {
806*e4b17023SJohn Marino       var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
807*e4b17023SJohn Marino       DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
808*e4b17023SJohn Marino       add_referenced_var (var_copy);
809*e4b17023SJohn Marino       nielt = XNEW (struct int_tree_map);
810*e4b17023SJohn Marino       nielt->uid = uid;
811*e4b17023SJohn Marino       nielt->to = var_copy;
812*e4b17023SJohn Marino       *dslot = nielt;
813*e4b17023SJohn Marino 
814*e4b17023SJohn Marino       /* Ensure that when we meet this decl next time, we won't duplicate
815*e4b17023SJohn Marino          it again.  */
816*e4b17023SJohn Marino       nuid = DECL_UID (var_copy);
817*e4b17023SJohn Marino       ielt.uid = nuid;
818*e4b17023SJohn Marino       dslot = htab_find_slot_with_hash (decl_copies, &ielt, nuid, INSERT);
819*e4b17023SJohn Marino       gcc_assert (!*dslot);
820*e4b17023SJohn Marino       nielt = XNEW (struct int_tree_map);
821*e4b17023SJohn Marino       nielt->uid = nuid;
822*e4b17023SJohn Marino       nielt->to = var_copy;
823*e4b17023SJohn Marino       *dslot = nielt;
824*e4b17023SJohn Marino     }
825*e4b17023SJohn Marino   else
826*e4b17023SJohn Marino     var_copy = ((struct int_tree_map *) *dslot)->to;
827*e4b17023SJohn Marino 
828*e4b17023SJohn Marino   if (copy_name_p)
829*e4b17023SJohn Marino     {
830*e4b17023SJohn Marino       copy = duplicate_ssa_name (name, NULL);
831*e4b17023SJohn Marino       nelt = XNEW (struct name_to_copy_elt);
832*e4b17023SJohn Marino       nelt->version = idx;
833*e4b17023SJohn Marino       nelt->new_name = copy;
834*e4b17023SJohn Marino       nelt->field = NULL_TREE;
835*e4b17023SJohn Marino       *slot = nelt;
836*e4b17023SJohn Marino     }
837*e4b17023SJohn Marino   else
838*e4b17023SJohn Marino     {
839*e4b17023SJohn Marino       gcc_assert (!slot);
840*e4b17023SJohn Marino       copy = name;
841*e4b17023SJohn Marino     }
842*e4b17023SJohn Marino 
843*e4b17023SJohn Marino   SSA_NAME_VAR (copy) = var_copy;
844*e4b17023SJohn Marino   return copy;
845*e4b17023SJohn Marino }
846*e4b17023SJohn Marino 
847*e4b17023SJohn Marino /* Finds the ssa names used in STMT that are defined outside the
848*e4b17023SJohn Marino    region between ENTRY and EXIT and replaces such ssa names with
849*e4b17023SJohn Marino    their duplicates.  The duplicates are stored to NAME_COPIES.  Base
850*e4b17023SJohn Marino    decls of all ssa names used in STMT (including those defined in
851*e4b17023SJohn Marino    LOOP) are replaced with the new temporary variables; the
852*e4b17023SJohn Marino    replacement decls are stored in DECL_COPIES.  */
853*e4b17023SJohn Marino 
854*e4b17023SJohn Marino static void
separate_decls_in_region_stmt(edge entry,edge exit,gimple stmt,htab_t name_copies,htab_t decl_copies)855*e4b17023SJohn Marino separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
856*e4b17023SJohn Marino 			       htab_t name_copies, htab_t decl_copies)
857*e4b17023SJohn Marino {
858*e4b17023SJohn Marino   use_operand_p use;
859*e4b17023SJohn Marino   def_operand_p def;
860*e4b17023SJohn Marino   ssa_op_iter oi;
861*e4b17023SJohn Marino   tree name, copy;
862*e4b17023SJohn Marino   bool copy_name_p;
863*e4b17023SJohn Marino 
864*e4b17023SJohn Marino   mark_virtual_ops_for_renaming (stmt);
865*e4b17023SJohn Marino 
866*e4b17023SJohn Marino   FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
867*e4b17023SJohn Marino   {
868*e4b17023SJohn Marino     name = DEF_FROM_PTR (def);
869*e4b17023SJohn Marino     gcc_assert (TREE_CODE (name) == SSA_NAME);
870*e4b17023SJohn Marino     copy = separate_decls_in_region_name (name, name_copies, decl_copies,
871*e4b17023SJohn Marino 					  false);
872*e4b17023SJohn Marino     gcc_assert (copy == name);
873*e4b17023SJohn Marino   }
874*e4b17023SJohn Marino 
875*e4b17023SJohn Marino   FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
876*e4b17023SJohn Marino   {
877*e4b17023SJohn Marino     name = USE_FROM_PTR (use);
878*e4b17023SJohn Marino     if (TREE_CODE (name) != SSA_NAME)
879*e4b17023SJohn Marino       continue;
880*e4b17023SJohn Marino 
881*e4b17023SJohn Marino     copy_name_p = expr_invariant_in_region_p (entry, exit, name);
882*e4b17023SJohn Marino     copy = separate_decls_in_region_name (name, name_copies, decl_copies,
883*e4b17023SJohn Marino 					  copy_name_p);
884*e4b17023SJohn Marino     SET_USE (use, copy);
885*e4b17023SJohn Marino   }
886*e4b17023SJohn Marino }
887*e4b17023SJohn Marino 
888*e4b17023SJohn Marino /* Finds the ssa names used in STMT that are defined outside the
889*e4b17023SJohn Marino    region between ENTRY and EXIT and replaces such ssa names with
890*e4b17023SJohn Marino    their duplicates.  The duplicates are stored to NAME_COPIES.  Base
891*e4b17023SJohn Marino    decls of all ssa names used in STMT (including those defined in
892*e4b17023SJohn Marino    LOOP) are replaced with the new temporary variables; the
893*e4b17023SJohn Marino    replacement decls are stored in DECL_COPIES.  */
894*e4b17023SJohn Marino 
895*e4b17023SJohn Marino static bool
separate_decls_in_region_debug(gimple stmt,htab_t name_copies,htab_t decl_copies)896*e4b17023SJohn Marino separate_decls_in_region_debug (gimple stmt, htab_t name_copies,
897*e4b17023SJohn Marino 				htab_t decl_copies)
898*e4b17023SJohn Marino {
899*e4b17023SJohn Marino   use_operand_p use;
900*e4b17023SJohn Marino   ssa_op_iter oi;
901*e4b17023SJohn Marino   tree var, name;
902*e4b17023SJohn Marino   struct int_tree_map ielt;
903*e4b17023SJohn Marino   struct name_to_copy_elt elt;
904*e4b17023SJohn Marino   void **slot, **dslot;
905*e4b17023SJohn Marino 
906*e4b17023SJohn Marino   if (gimple_debug_bind_p (stmt))
907*e4b17023SJohn Marino     var = gimple_debug_bind_get_var (stmt);
908*e4b17023SJohn Marino   else if (gimple_debug_source_bind_p (stmt))
909*e4b17023SJohn Marino     var = gimple_debug_source_bind_get_var (stmt);
910*e4b17023SJohn Marino   else
911*e4b17023SJohn Marino     return true;
912*e4b17023SJohn Marino   if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
913*e4b17023SJohn Marino     return true;
914*e4b17023SJohn Marino   gcc_assert (DECL_P (var) && SSA_VAR_P (var));
915*e4b17023SJohn Marino   ielt.uid = DECL_UID (var);
916*e4b17023SJohn Marino   dslot = htab_find_slot_with_hash (decl_copies, &ielt, ielt.uid, NO_INSERT);
917*e4b17023SJohn Marino   if (!dslot)
918*e4b17023SJohn Marino     return true;
919*e4b17023SJohn Marino   if (gimple_debug_bind_p (stmt))
920*e4b17023SJohn Marino     gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
921*e4b17023SJohn Marino   else if (gimple_debug_source_bind_p (stmt))
922*e4b17023SJohn Marino     gimple_debug_source_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
923*e4b17023SJohn Marino 
924*e4b17023SJohn Marino   FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
925*e4b17023SJohn Marino   {
926*e4b17023SJohn Marino     name = USE_FROM_PTR (use);
927*e4b17023SJohn Marino     if (TREE_CODE (name) != SSA_NAME)
928*e4b17023SJohn Marino       continue;
929*e4b17023SJohn Marino 
930*e4b17023SJohn Marino     elt.version = SSA_NAME_VERSION (name);
931*e4b17023SJohn Marino     slot = htab_find_slot_with_hash (name_copies, &elt, elt.version, NO_INSERT);
932*e4b17023SJohn Marino     if (!slot)
933*e4b17023SJohn Marino       {
934*e4b17023SJohn Marino 	gimple_debug_bind_reset_value (stmt);
935*e4b17023SJohn Marino 	update_stmt (stmt);
936*e4b17023SJohn Marino 	break;
937*e4b17023SJohn Marino       }
938*e4b17023SJohn Marino 
939*e4b17023SJohn Marino     SET_USE (use, ((struct name_to_copy_elt *) *slot)->new_name);
940*e4b17023SJohn Marino   }
941*e4b17023SJohn Marino 
942*e4b17023SJohn Marino   return false;
943*e4b17023SJohn Marino }
944*e4b17023SJohn Marino 
945*e4b17023SJohn Marino /* Callback for htab_traverse.  Adds a field corresponding to the reduction
946*e4b17023SJohn Marino    specified in SLOT. The type is passed in DATA.  */
947*e4b17023SJohn Marino 
948*e4b17023SJohn Marino static int
add_field_for_reduction(void ** slot,void * data)949*e4b17023SJohn Marino add_field_for_reduction (void **slot, void *data)
950*e4b17023SJohn Marino {
951*e4b17023SJohn Marino 
952*e4b17023SJohn Marino   struct reduction_info *const red = (struct reduction_info *) *slot;
953*e4b17023SJohn Marino   tree const type = (tree) data;
954*e4b17023SJohn Marino   tree var = SSA_NAME_VAR (gimple_assign_lhs (red->reduc_stmt));
955*e4b17023SJohn Marino   tree field = build_decl (gimple_location (red->reduc_stmt),
956*e4b17023SJohn Marino 			   FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
957*e4b17023SJohn Marino 
958*e4b17023SJohn Marino   insert_field_into_struct (type, field);
959*e4b17023SJohn Marino 
960*e4b17023SJohn Marino   red->field = field;
961*e4b17023SJohn Marino 
962*e4b17023SJohn Marino   return 1;
963*e4b17023SJohn Marino }
964*e4b17023SJohn Marino 
965*e4b17023SJohn Marino /* Callback for htab_traverse.  Adds a field corresponding to a ssa name
966*e4b17023SJohn Marino    described in SLOT. The type is passed in DATA.  */
967*e4b17023SJohn Marino 
968*e4b17023SJohn Marino static int
add_field_for_name(void ** slot,void * data)969*e4b17023SJohn Marino add_field_for_name (void **slot, void *data)
970*e4b17023SJohn Marino {
971*e4b17023SJohn Marino   struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
972*e4b17023SJohn Marino   tree type = (tree) data;
973*e4b17023SJohn Marino   tree name = ssa_name (elt->version);
974*e4b17023SJohn Marino   tree var = SSA_NAME_VAR (name);
975*e4b17023SJohn Marino   tree field = build_decl (DECL_SOURCE_LOCATION (var),
976*e4b17023SJohn Marino 			   FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
977*e4b17023SJohn Marino 
978*e4b17023SJohn Marino   insert_field_into_struct (type, field);
979*e4b17023SJohn Marino   elt->field = field;
980*e4b17023SJohn Marino 
981*e4b17023SJohn Marino   return 1;
982*e4b17023SJohn Marino }
983*e4b17023SJohn Marino 
984*e4b17023SJohn Marino /* Callback for htab_traverse.  A local result is the intermediate result
985*e4b17023SJohn Marino    computed by a single
986*e4b17023SJohn Marino    thread, or the initial value in case no iteration was executed.
987*e4b17023SJohn Marino    This function creates a phi node reflecting these values.
988*e4b17023SJohn Marino    The phi's result will be stored in NEW_PHI field of the
989*e4b17023SJohn Marino    reduction's data structure.  */
990*e4b17023SJohn Marino 
991*e4b17023SJohn Marino static int
create_phi_for_local_result(void ** slot,void * data)992*e4b17023SJohn Marino create_phi_for_local_result (void **slot, void *data)
993*e4b17023SJohn Marino {
994*e4b17023SJohn Marino   struct reduction_info *const reduc = (struct reduction_info *) *slot;
995*e4b17023SJohn Marino   const struct loop *const loop = (const struct loop *) data;
996*e4b17023SJohn Marino   edge e;
997*e4b17023SJohn Marino   gimple new_phi;
998*e4b17023SJohn Marino   basic_block store_bb;
999*e4b17023SJohn Marino   tree local_res;
1000*e4b17023SJohn Marino   source_location locus;
1001*e4b17023SJohn Marino 
1002*e4b17023SJohn Marino   /* STORE_BB is the block where the phi
1003*e4b17023SJohn Marino      should be stored.  It is the destination of the loop exit.
1004*e4b17023SJohn Marino      (Find the fallthru edge from GIMPLE_OMP_CONTINUE).  */
1005*e4b17023SJohn Marino   store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1006*e4b17023SJohn Marino 
1007*e4b17023SJohn Marino   /* STORE_BB has two predecessors.  One coming from  the loop
1008*e4b17023SJohn Marino      (the reduction's result is computed at the loop),
1009*e4b17023SJohn Marino      and another coming from a block preceding the loop,
1010*e4b17023SJohn Marino      when no iterations
1011*e4b17023SJohn Marino      are executed (the initial value should be taken).  */
1012*e4b17023SJohn Marino   if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1013*e4b17023SJohn Marino     e = EDGE_PRED (store_bb, 1);
1014*e4b17023SJohn Marino   else
1015*e4b17023SJohn Marino     e = EDGE_PRED (store_bb, 0);
1016*e4b17023SJohn Marino   local_res
1017*e4b17023SJohn Marino     = make_ssa_name (SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt)),
1018*e4b17023SJohn Marino 		     NULL);
1019*e4b17023SJohn Marino   locus = gimple_location (reduc->reduc_stmt);
1020*e4b17023SJohn Marino   new_phi = create_phi_node (local_res, store_bb);
1021*e4b17023SJohn Marino   SSA_NAME_DEF_STMT (local_res) = new_phi;
1022*e4b17023SJohn Marino   add_phi_arg (new_phi, reduc->init, e, locus);
1023*e4b17023SJohn Marino   add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
1024*e4b17023SJohn Marino 	       FALLTHRU_EDGE (loop->latch), locus);
1025*e4b17023SJohn Marino   reduc->new_phi = new_phi;
1026*e4b17023SJohn Marino 
1027*e4b17023SJohn Marino   return 1;
1028*e4b17023SJohn Marino }
1029*e4b17023SJohn Marino 
1030*e4b17023SJohn Marino struct clsn_data
1031*e4b17023SJohn Marino {
1032*e4b17023SJohn Marino   tree store;
1033*e4b17023SJohn Marino   tree load;
1034*e4b17023SJohn Marino 
1035*e4b17023SJohn Marino   basic_block store_bb;
1036*e4b17023SJohn Marino   basic_block load_bb;
1037*e4b17023SJohn Marino };
1038*e4b17023SJohn Marino 
1039*e4b17023SJohn Marino /* Callback for htab_traverse.  Create an atomic instruction for the
1040*e4b17023SJohn Marino    reduction described in SLOT.
1041*e4b17023SJohn Marino    DATA annotates the place in memory the atomic operation relates to,
1042*e4b17023SJohn Marino    and the basic block it needs to be generated in.  */
1043*e4b17023SJohn Marino 
1044*e4b17023SJohn Marino static int
create_call_for_reduction_1(void ** slot,void * data)1045*e4b17023SJohn Marino create_call_for_reduction_1 (void **slot, void *data)
1046*e4b17023SJohn Marino {
1047*e4b17023SJohn Marino   struct reduction_info *const reduc = (struct reduction_info *) *slot;
1048*e4b17023SJohn Marino   struct clsn_data *const clsn_data = (struct clsn_data *) data;
1049*e4b17023SJohn Marino   gimple_stmt_iterator gsi;
1050*e4b17023SJohn Marino   tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1051*e4b17023SJohn Marino   tree load_struct;
1052*e4b17023SJohn Marino   basic_block bb;
1053*e4b17023SJohn Marino   basic_block new_bb;
1054*e4b17023SJohn Marino   edge e;
1055*e4b17023SJohn Marino   tree t, addr, ref, x;
1056*e4b17023SJohn Marino   tree tmp_load, name;
1057*e4b17023SJohn Marino   gimple load;
1058*e4b17023SJohn Marino 
1059*e4b17023SJohn Marino   load_struct = build_simple_mem_ref (clsn_data->load);
1060*e4b17023SJohn Marino   t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1061*e4b17023SJohn Marino 
1062*e4b17023SJohn Marino   addr = build_addr (t, current_function_decl);
1063*e4b17023SJohn Marino 
1064*e4b17023SJohn Marino   /* Create phi node.  */
1065*e4b17023SJohn Marino   bb = clsn_data->load_bb;
1066*e4b17023SJohn Marino 
1067*e4b17023SJohn Marino   e = split_block (bb, t);
1068*e4b17023SJohn Marino   new_bb = e->dest;
1069*e4b17023SJohn Marino 
1070*e4b17023SJohn Marino   tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
1071*e4b17023SJohn Marino   add_referenced_var (tmp_load);
1072*e4b17023SJohn Marino   tmp_load = make_ssa_name (tmp_load, NULL);
1073*e4b17023SJohn Marino   load = gimple_build_omp_atomic_load (tmp_load, addr);
1074*e4b17023SJohn Marino   SSA_NAME_DEF_STMT (tmp_load) = load;
1075*e4b17023SJohn Marino   gsi = gsi_start_bb (new_bb);
1076*e4b17023SJohn Marino   gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1077*e4b17023SJohn Marino 
1078*e4b17023SJohn Marino   e = split_block (new_bb, load);
1079*e4b17023SJohn Marino   new_bb = e->dest;
1080*e4b17023SJohn Marino   gsi = gsi_start_bb (new_bb);
1081*e4b17023SJohn Marino   ref = tmp_load;
1082*e4b17023SJohn Marino   x = fold_build2 (reduc->reduction_code,
1083*e4b17023SJohn Marino 		   TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1084*e4b17023SJohn Marino 		   PHI_RESULT (reduc->new_phi));
1085*e4b17023SJohn Marino 
1086*e4b17023SJohn Marino   name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1087*e4b17023SJohn Marino 				   GSI_CONTINUE_LINKING);
1088*e4b17023SJohn Marino 
1089*e4b17023SJohn Marino   gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
1090*e4b17023SJohn Marino   return 1;
1091*e4b17023SJohn Marino }
1092*e4b17023SJohn Marino 
1093*e4b17023SJohn Marino /* Create the atomic operation at the join point of the threads.
1094*e4b17023SJohn Marino    REDUCTION_LIST describes the reductions in the LOOP.
1095*e4b17023SJohn Marino    LD_ST_DATA describes the shared data structure where
1096*e4b17023SJohn Marino    shared data is stored in and loaded from.  */
1097*e4b17023SJohn Marino static void
create_call_for_reduction(struct loop * loop,htab_t reduction_list,struct clsn_data * ld_st_data)1098*e4b17023SJohn Marino create_call_for_reduction (struct loop *loop, htab_t reduction_list,
1099*e4b17023SJohn Marino 			   struct clsn_data *ld_st_data)
1100*e4b17023SJohn Marino {
1101*e4b17023SJohn Marino   htab_traverse (reduction_list, create_phi_for_local_result, loop);
1102*e4b17023SJohn Marino   /* Find the fallthru edge from GIMPLE_OMP_CONTINUE.  */
1103*e4b17023SJohn Marino   ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
1104*e4b17023SJohn Marino   htab_traverse (reduction_list, create_call_for_reduction_1, ld_st_data);
1105*e4b17023SJohn Marino }
1106*e4b17023SJohn Marino 
1107*e4b17023SJohn Marino /* Callback for htab_traverse.  Loads the final reduction value at the
1108*e4b17023SJohn Marino    join point of all threads, and inserts it in the right place.  */
1109*e4b17023SJohn Marino 
1110*e4b17023SJohn Marino static int
create_loads_for_reductions(void ** slot,void * data)1111*e4b17023SJohn Marino create_loads_for_reductions (void **slot, void *data)
1112*e4b17023SJohn Marino {
1113*e4b17023SJohn Marino   struct reduction_info *const red = (struct reduction_info *) *slot;
1114*e4b17023SJohn Marino   struct clsn_data *const clsn_data = (struct clsn_data *) data;
1115*e4b17023SJohn Marino   gimple stmt;
1116*e4b17023SJohn Marino   gimple_stmt_iterator gsi;
1117*e4b17023SJohn Marino   tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1118*e4b17023SJohn Marino   tree load_struct;
1119*e4b17023SJohn Marino   tree name;
1120*e4b17023SJohn Marino   tree x;
1121*e4b17023SJohn Marino 
1122*e4b17023SJohn Marino   gsi = gsi_after_labels (clsn_data->load_bb);
1123*e4b17023SJohn Marino   load_struct = build_simple_mem_ref (clsn_data->load);
1124*e4b17023SJohn Marino   load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1125*e4b17023SJohn Marino 			NULL_TREE);
1126*e4b17023SJohn Marino 
1127*e4b17023SJohn Marino   x = load_struct;
1128*e4b17023SJohn Marino   name = PHI_RESULT (red->keep_res);
1129*e4b17023SJohn Marino   stmt = gimple_build_assign (name, x);
1130*e4b17023SJohn Marino   SSA_NAME_DEF_STMT (name) = stmt;
1131*e4b17023SJohn Marino 
1132*e4b17023SJohn Marino   gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1133*e4b17023SJohn Marino 
1134*e4b17023SJohn Marino   for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1135*e4b17023SJohn Marino        !gsi_end_p (gsi); gsi_next (&gsi))
1136*e4b17023SJohn Marino     if (gsi_stmt (gsi) == red->keep_res)
1137*e4b17023SJohn Marino       {
1138*e4b17023SJohn Marino 	remove_phi_node (&gsi, false);
1139*e4b17023SJohn Marino 	return 1;
1140*e4b17023SJohn Marino       }
1141*e4b17023SJohn Marino   gcc_unreachable ();
1142*e4b17023SJohn Marino }
1143*e4b17023SJohn Marino 
1144*e4b17023SJohn Marino /* Load the reduction result that was stored in LD_ST_DATA.
1145*e4b17023SJohn Marino    REDUCTION_LIST describes the list of reductions that the
1146*e4b17023SJohn Marino    loads should be generated for.  */
1147*e4b17023SJohn Marino static void
create_final_loads_for_reduction(htab_t reduction_list,struct clsn_data * ld_st_data)1148*e4b17023SJohn Marino create_final_loads_for_reduction (htab_t reduction_list,
1149*e4b17023SJohn Marino 				  struct clsn_data *ld_st_data)
1150*e4b17023SJohn Marino {
1151*e4b17023SJohn Marino   gimple_stmt_iterator gsi;
1152*e4b17023SJohn Marino   tree t;
1153*e4b17023SJohn Marino   gimple stmt;
1154*e4b17023SJohn Marino 
1155*e4b17023SJohn Marino   gsi = gsi_after_labels (ld_st_data->load_bb);
1156*e4b17023SJohn Marino   t = build_fold_addr_expr (ld_st_data->store);
1157*e4b17023SJohn Marino   stmt = gimple_build_assign (ld_st_data->load, t);
1158*e4b17023SJohn Marino 
1159*e4b17023SJohn Marino   gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1160*e4b17023SJohn Marino   SSA_NAME_DEF_STMT (ld_st_data->load) = stmt;
1161*e4b17023SJohn Marino 
1162*e4b17023SJohn Marino   htab_traverse (reduction_list, create_loads_for_reductions, ld_st_data);
1163*e4b17023SJohn Marino 
1164*e4b17023SJohn Marino }
1165*e4b17023SJohn Marino 
1166*e4b17023SJohn Marino /* Callback for htab_traverse.  Store the neutral value for the
1167*e4b17023SJohn Marino   particular reduction's operation, e.g. 0 for PLUS_EXPR,
1168*e4b17023SJohn Marino   1 for MULT_EXPR, etc. into the reduction field.
1169*e4b17023SJohn Marino   The reduction is specified in SLOT. The store information is
1170*e4b17023SJohn Marino   passed in DATA.  */
1171*e4b17023SJohn Marino 
1172*e4b17023SJohn Marino static int
create_stores_for_reduction(void ** slot,void * data)1173*e4b17023SJohn Marino create_stores_for_reduction (void **slot, void *data)
1174*e4b17023SJohn Marino {
1175*e4b17023SJohn Marino   struct reduction_info *const red = (struct reduction_info *) *slot;
1176*e4b17023SJohn Marino   struct clsn_data *const clsn_data = (struct clsn_data *) data;
1177*e4b17023SJohn Marino   tree t;
1178*e4b17023SJohn Marino   gimple stmt;
1179*e4b17023SJohn Marino   gimple_stmt_iterator gsi;
1180*e4b17023SJohn Marino   tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1181*e4b17023SJohn Marino 
1182*e4b17023SJohn Marino   gsi = gsi_last_bb (clsn_data->store_bb);
1183*e4b17023SJohn Marino   t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1184*e4b17023SJohn Marino   stmt = gimple_build_assign (t, red->initial_value);
1185*e4b17023SJohn Marino   mark_virtual_ops_for_renaming (stmt);
1186*e4b17023SJohn Marino   gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1187*e4b17023SJohn Marino 
1188*e4b17023SJohn Marino   return 1;
1189*e4b17023SJohn Marino }
1190*e4b17023SJohn Marino 
1191*e4b17023SJohn Marino /* Callback for htab_traverse.  Creates loads to a field of LOAD in LOAD_BB and
1192*e4b17023SJohn Marino    store to a field of STORE in STORE_BB for the ssa name and its duplicate
1193*e4b17023SJohn Marino    specified in SLOT.  */
1194*e4b17023SJohn Marino 
1195*e4b17023SJohn Marino static int
create_loads_and_stores_for_name(void ** slot,void * data)1196*e4b17023SJohn Marino create_loads_and_stores_for_name (void **slot, void *data)
1197*e4b17023SJohn Marino {
1198*e4b17023SJohn Marino   struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
1199*e4b17023SJohn Marino   struct clsn_data *const clsn_data = (struct clsn_data *) data;
1200*e4b17023SJohn Marino   tree t;
1201*e4b17023SJohn Marino   gimple stmt;
1202*e4b17023SJohn Marino   gimple_stmt_iterator gsi;
1203*e4b17023SJohn Marino   tree type = TREE_TYPE (elt->new_name);
1204*e4b17023SJohn Marino   tree load_struct;
1205*e4b17023SJohn Marino 
1206*e4b17023SJohn Marino   gsi = gsi_last_bb (clsn_data->store_bb);
1207*e4b17023SJohn Marino   t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1208*e4b17023SJohn Marino   stmt = gimple_build_assign (t, ssa_name (elt->version));
1209*e4b17023SJohn Marino   mark_virtual_ops_for_renaming (stmt);
1210*e4b17023SJohn Marino   gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1211*e4b17023SJohn Marino 
1212*e4b17023SJohn Marino   gsi = gsi_last_bb (clsn_data->load_bb);
1213*e4b17023SJohn Marino   load_struct = build_simple_mem_ref (clsn_data->load);
1214*e4b17023SJohn Marino   t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1215*e4b17023SJohn Marino   stmt = gimple_build_assign (elt->new_name, t);
1216*e4b17023SJohn Marino   SSA_NAME_DEF_STMT (elt->new_name) = stmt;
1217*e4b17023SJohn Marino   gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1218*e4b17023SJohn Marino 
1219*e4b17023SJohn Marino   return 1;
1220*e4b17023SJohn Marino }
1221*e4b17023SJohn Marino 
1222*e4b17023SJohn Marino /* Moves all the variables used in LOOP and defined outside of it (including
1223*e4b17023SJohn Marino    the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1224*e4b17023SJohn Marino    name) to a structure created for this purpose.  The code
1225*e4b17023SJohn Marino 
1226*e4b17023SJohn Marino    while (1)
1227*e4b17023SJohn Marino      {
1228*e4b17023SJohn Marino        use (a);
1229*e4b17023SJohn Marino        use (b);
1230*e4b17023SJohn Marino      }
1231*e4b17023SJohn Marino 
1232*e4b17023SJohn Marino    is transformed this way:
1233*e4b17023SJohn Marino 
1234*e4b17023SJohn Marino    bb0:
1235*e4b17023SJohn Marino    old.a = a;
1236*e4b17023SJohn Marino    old.b = b;
1237*e4b17023SJohn Marino 
1238*e4b17023SJohn Marino    bb1:
1239*e4b17023SJohn Marino    a' = new->a;
1240*e4b17023SJohn Marino    b' = new->b;
1241*e4b17023SJohn Marino    while (1)
1242*e4b17023SJohn Marino      {
1243*e4b17023SJohn Marino        use (a');
1244*e4b17023SJohn Marino        use (b');
1245*e4b17023SJohn Marino      }
1246*e4b17023SJohn Marino 
1247*e4b17023SJohn Marino    `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT.  The
1248*e4b17023SJohn Marino    pointer `new' is intentionally not initialized (the loop will be split to a
1249*e4b17023SJohn Marino    separate function later, and `new' will be initialized from its arguments).
1250*e4b17023SJohn Marino    LD_ST_DATA holds information about the shared data structure used to pass
1251*e4b17023SJohn Marino    information among the threads.  It is initialized here, and
1252*e4b17023SJohn Marino    gen_parallel_loop will pass it to create_call_for_reduction that
1253*e4b17023SJohn Marino    needs this information.  REDUCTION_LIST describes the reductions
1254*e4b17023SJohn Marino    in LOOP.  */
1255*e4b17023SJohn Marino 
1256*e4b17023SJohn Marino static void
separate_decls_in_region(edge entry,edge exit,htab_t reduction_list,tree * arg_struct,tree * new_arg_struct,struct clsn_data * ld_st_data)1257*e4b17023SJohn Marino separate_decls_in_region (edge entry, edge exit, htab_t reduction_list,
1258*e4b17023SJohn Marino 			  tree *arg_struct, tree *new_arg_struct,
1259*e4b17023SJohn Marino 			  struct clsn_data *ld_st_data)
1260*e4b17023SJohn Marino 
1261*e4b17023SJohn Marino {
1262*e4b17023SJohn Marino   basic_block bb1 = split_edge (entry);
1263*e4b17023SJohn Marino   basic_block bb0 = single_pred (bb1);
1264*e4b17023SJohn Marino   htab_t name_copies = htab_create (10, name_to_copy_elt_hash,
1265*e4b17023SJohn Marino 				    name_to_copy_elt_eq, free);
1266*e4b17023SJohn Marino   htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq,
1267*e4b17023SJohn Marino 				    free);
1268*e4b17023SJohn Marino   unsigned i;
1269*e4b17023SJohn Marino   tree type, type_name, nvar;
1270*e4b17023SJohn Marino   gimple_stmt_iterator gsi;
1271*e4b17023SJohn Marino   struct clsn_data clsn_data;
1272*e4b17023SJohn Marino   VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
1273*e4b17023SJohn Marino   basic_block bb;
1274*e4b17023SJohn Marino   basic_block entry_bb = bb1;
1275*e4b17023SJohn Marino   basic_block exit_bb = exit->dest;
1276*e4b17023SJohn Marino   bool has_debug_stmt = false;
1277*e4b17023SJohn Marino 
1278*e4b17023SJohn Marino   entry = single_succ_edge (entry_bb);
1279*e4b17023SJohn Marino   gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1280*e4b17023SJohn Marino 
1281*e4b17023SJohn Marino   FOR_EACH_VEC_ELT (basic_block, body, i, bb)
1282*e4b17023SJohn Marino     {
1283*e4b17023SJohn Marino       if (bb != entry_bb && bb != exit_bb)
1284*e4b17023SJohn Marino 	{
1285*e4b17023SJohn Marino 	  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1286*e4b17023SJohn Marino 	    separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1287*e4b17023SJohn Marino 					   name_copies, decl_copies);
1288*e4b17023SJohn Marino 
1289*e4b17023SJohn Marino 	  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1290*e4b17023SJohn Marino 	    {
1291*e4b17023SJohn Marino 	      gimple stmt = gsi_stmt (gsi);
1292*e4b17023SJohn Marino 
1293*e4b17023SJohn Marino 	      if (is_gimple_debug (stmt))
1294*e4b17023SJohn Marino 		has_debug_stmt = true;
1295*e4b17023SJohn Marino 	      else
1296*e4b17023SJohn Marino 		separate_decls_in_region_stmt (entry, exit, stmt,
1297*e4b17023SJohn Marino 					       name_copies, decl_copies);
1298*e4b17023SJohn Marino 	    }
1299*e4b17023SJohn Marino 	}
1300*e4b17023SJohn Marino     }
1301*e4b17023SJohn Marino 
1302*e4b17023SJohn Marino   /* Now process debug bind stmts.  We must not create decls while
1303*e4b17023SJohn Marino      processing debug stmts, so we defer their processing so as to
1304*e4b17023SJohn Marino      make sure we will have debug info for as many variables as
1305*e4b17023SJohn Marino      possible (all of those that were dealt with in the loop above),
1306*e4b17023SJohn Marino      and discard those for which we know there's nothing we can
1307*e4b17023SJohn Marino      do.  */
1308*e4b17023SJohn Marino   if (has_debug_stmt)
1309*e4b17023SJohn Marino     FOR_EACH_VEC_ELT (basic_block, body, i, bb)
1310*e4b17023SJohn Marino       if (bb != entry_bb && bb != exit_bb)
1311*e4b17023SJohn Marino 	{
1312*e4b17023SJohn Marino 	  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1313*e4b17023SJohn Marino 	    {
1314*e4b17023SJohn Marino 	      gimple stmt = gsi_stmt (gsi);
1315*e4b17023SJohn Marino 
1316*e4b17023SJohn Marino 	      if (is_gimple_debug (stmt))
1317*e4b17023SJohn Marino 		{
1318*e4b17023SJohn Marino 		  if (separate_decls_in_region_debug (stmt, name_copies,
1319*e4b17023SJohn Marino 						      decl_copies))
1320*e4b17023SJohn Marino 		    {
1321*e4b17023SJohn Marino 		      gsi_remove (&gsi, true);
1322*e4b17023SJohn Marino 		      continue;
1323*e4b17023SJohn Marino 		    }
1324*e4b17023SJohn Marino 		}
1325*e4b17023SJohn Marino 
1326*e4b17023SJohn Marino 	      gsi_next (&gsi);
1327*e4b17023SJohn Marino 	    }
1328*e4b17023SJohn Marino 	}
1329*e4b17023SJohn Marino 
1330*e4b17023SJohn Marino   VEC_free (basic_block, heap, body);
1331*e4b17023SJohn Marino 
1332*e4b17023SJohn Marino   if (htab_elements (name_copies) == 0 && htab_elements (reduction_list) == 0)
1333*e4b17023SJohn Marino     {
1334*e4b17023SJohn Marino       /* It may happen that there is nothing to copy (if there are only
1335*e4b17023SJohn Marino          loop carried and external variables in the loop).  */
1336*e4b17023SJohn Marino       *arg_struct = NULL;
1337*e4b17023SJohn Marino       *new_arg_struct = NULL;
1338*e4b17023SJohn Marino     }
1339*e4b17023SJohn Marino   else
1340*e4b17023SJohn Marino     {
1341*e4b17023SJohn Marino       /* Create the type for the structure to store the ssa names to.  */
1342*e4b17023SJohn Marino       type = lang_hooks.types.make_type (RECORD_TYPE);
1343*e4b17023SJohn Marino       type_name = build_decl (UNKNOWN_LOCATION,
1344*e4b17023SJohn Marino 			      TYPE_DECL, create_tmp_var_name (".paral_data"),
1345*e4b17023SJohn Marino 			      type);
1346*e4b17023SJohn Marino       TYPE_NAME (type) = type_name;
1347*e4b17023SJohn Marino 
1348*e4b17023SJohn Marino       htab_traverse (name_copies, add_field_for_name, type);
1349*e4b17023SJohn Marino       if (reduction_list && htab_elements (reduction_list) > 0)
1350*e4b17023SJohn Marino 	{
1351*e4b17023SJohn Marino 	  /* Create the fields for reductions.  */
1352*e4b17023SJohn Marino 	  htab_traverse (reduction_list, add_field_for_reduction,
1353*e4b17023SJohn Marino                          type);
1354*e4b17023SJohn Marino 	}
1355*e4b17023SJohn Marino       layout_type (type);
1356*e4b17023SJohn Marino 
1357*e4b17023SJohn Marino       /* Create the loads and stores.  */
1358*e4b17023SJohn Marino       *arg_struct = create_tmp_var (type, ".paral_data_store");
1359*e4b17023SJohn Marino       add_referenced_var (*arg_struct);
1360*e4b17023SJohn Marino       nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1361*e4b17023SJohn Marino       add_referenced_var (nvar);
1362*e4b17023SJohn Marino       *new_arg_struct = make_ssa_name (nvar, NULL);
1363*e4b17023SJohn Marino 
1364*e4b17023SJohn Marino       ld_st_data->store = *arg_struct;
1365*e4b17023SJohn Marino       ld_st_data->load = *new_arg_struct;
1366*e4b17023SJohn Marino       ld_st_data->store_bb = bb0;
1367*e4b17023SJohn Marino       ld_st_data->load_bb = bb1;
1368*e4b17023SJohn Marino 
1369*e4b17023SJohn Marino       htab_traverse (name_copies, create_loads_and_stores_for_name,
1370*e4b17023SJohn Marino 		     ld_st_data);
1371*e4b17023SJohn Marino 
1372*e4b17023SJohn Marino       /* Load the calculation from memory (after the join of the threads).  */
1373*e4b17023SJohn Marino 
1374*e4b17023SJohn Marino       if (reduction_list && htab_elements (reduction_list) > 0)
1375*e4b17023SJohn Marino 	{
1376*e4b17023SJohn Marino 	  htab_traverse (reduction_list, create_stores_for_reduction,
1377*e4b17023SJohn Marino                         ld_st_data);
1378*e4b17023SJohn Marino 	  clsn_data.load = make_ssa_name (nvar, NULL);
1379*e4b17023SJohn Marino 	  clsn_data.load_bb = exit->dest;
1380*e4b17023SJohn Marino 	  clsn_data.store = ld_st_data->store;
1381*e4b17023SJohn Marino 	  create_final_loads_for_reduction (reduction_list, &clsn_data);
1382*e4b17023SJohn Marino 	}
1383*e4b17023SJohn Marino     }
1384*e4b17023SJohn Marino 
1385*e4b17023SJohn Marino   htab_delete (decl_copies);
1386*e4b17023SJohn Marino   htab_delete (name_copies);
1387*e4b17023SJohn Marino }
1388*e4b17023SJohn Marino 
1389*e4b17023SJohn Marino /* Bitmap containing uids of functions created by parallelization.  We cannot
1390*e4b17023SJohn Marino    allocate it from the default obstack, as it must live across compilation
1391*e4b17023SJohn Marino    of several functions; we make it gc allocated instead.  */
1392*e4b17023SJohn Marino 
1393*e4b17023SJohn Marino static GTY(()) bitmap parallelized_functions;
1394*e4b17023SJohn Marino 
1395*e4b17023SJohn Marino /* Returns true if FN was created by create_loop_fn.  */
1396*e4b17023SJohn Marino 
1397*e4b17023SJohn Marino static bool
parallelized_function_p(tree fn)1398*e4b17023SJohn Marino parallelized_function_p (tree fn)
1399*e4b17023SJohn Marino {
1400*e4b17023SJohn Marino   if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1401*e4b17023SJohn Marino     return false;
1402*e4b17023SJohn Marino 
1403*e4b17023SJohn Marino   return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1404*e4b17023SJohn Marino }
1405*e4b17023SJohn Marino 
1406*e4b17023SJohn Marino /* Creates and returns an empty function that will receive the body of
1407*e4b17023SJohn Marino    a parallelized loop.  */
1408*e4b17023SJohn Marino 
1409*e4b17023SJohn Marino static tree
create_loop_fn(location_t loc)1410*e4b17023SJohn Marino create_loop_fn (location_t loc)
1411*e4b17023SJohn Marino {
1412*e4b17023SJohn Marino   char buf[100];
1413*e4b17023SJohn Marino   char *tname;
1414*e4b17023SJohn Marino   tree decl, type, name, t;
1415*e4b17023SJohn Marino   struct function *act_cfun = cfun;
1416*e4b17023SJohn Marino   static unsigned loopfn_num;
1417*e4b17023SJohn Marino 
1418*e4b17023SJohn Marino   snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1419*e4b17023SJohn Marino   ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1420*e4b17023SJohn Marino   clean_symbol_name (tname);
1421*e4b17023SJohn Marino   name = get_identifier (tname);
1422*e4b17023SJohn Marino   type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1423*e4b17023SJohn Marino 
1424*e4b17023SJohn Marino   decl = build_decl (loc, FUNCTION_DECL, name, type);
1425*e4b17023SJohn Marino   if (!parallelized_functions)
1426*e4b17023SJohn Marino     parallelized_functions = BITMAP_GGC_ALLOC ();
1427*e4b17023SJohn Marino   bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1428*e4b17023SJohn Marino 
1429*e4b17023SJohn Marino   TREE_STATIC (decl) = 1;
1430*e4b17023SJohn Marino   TREE_USED (decl) = 1;
1431*e4b17023SJohn Marino   DECL_ARTIFICIAL (decl) = 1;
1432*e4b17023SJohn Marino   DECL_IGNORED_P (decl) = 0;
1433*e4b17023SJohn Marino   TREE_PUBLIC (decl) = 0;
1434*e4b17023SJohn Marino   DECL_UNINLINABLE (decl) = 1;
1435*e4b17023SJohn Marino   DECL_EXTERNAL (decl) = 0;
1436*e4b17023SJohn Marino   DECL_CONTEXT (decl) = NULL_TREE;
1437*e4b17023SJohn Marino   DECL_INITIAL (decl) = make_node (BLOCK);
1438*e4b17023SJohn Marino 
1439*e4b17023SJohn Marino   t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
1440*e4b17023SJohn Marino   DECL_ARTIFICIAL (t) = 1;
1441*e4b17023SJohn Marino   DECL_IGNORED_P (t) = 1;
1442*e4b17023SJohn Marino   DECL_RESULT (decl) = t;
1443*e4b17023SJohn Marino 
1444*e4b17023SJohn Marino   t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
1445*e4b17023SJohn Marino 		  ptr_type_node);
1446*e4b17023SJohn Marino   DECL_ARTIFICIAL (t) = 1;
1447*e4b17023SJohn Marino   DECL_ARG_TYPE (t) = ptr_type_node;
1448*e4b17023SJohn Marino   DECL_CONTEXT (t) = decl;
1449*e4b17023SJohn Marino   TREE_USED (t) = 1;
1450*e4b17023SJohn Marino   DECL_ARGUMENTS (decl) = t;
1451*e4b17023SJohn Marino 
1452*e4b17023SJohn Marino   allocate_struct_function (decl, false);
1453*e4b17023SJohn Marino 
1454*e4b17023SJohn Marino   /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1455*e4b17023SJohn Marino      it.  */
1456*e4b17023SJohn Marino   set_cfun (act_cfun);
1457*e4b17023SJohn Marino 
1458*e4b17023SJohn Marino   return decl;
1459*e4b17023SJohn Marino }
1460*e4b17023SJohn Marino 
1461*e4b17023SJohn Marino /* Moves the exit condition of LOOP to the beginning of its header, and
1462*e4b17023SJohn Marino    duplicates the part of the last iteration that gets disabled to the
1463*e4b17023SJohn Marino    exit of the loop.  NIT is the number of iterations of the loop
1464*e4b17023SJohn Marino    (used to initialize the variables in the duplicated part).
1465*e4b17023SJohn Marino 
1466*e4b17023SJohn Marino    TODO: the common case is that latch of the loop is empty and immediately
1467*e4b17023SJohn Marino    follows the loop exit.  In this case, it would be better not to copy the
1468*e4b17023SJohn Marino    body of the loop, but only move the entry of the loop directly before the
1469*e4b17023SJohn Marino    exit check and increase the number of iterations of the loop by one.
1470*e4b17023SJohn Marino    This may need some additional preconditioning in case NIT = ~0.
1471*e4b17023SJohn Marino    REDUCTION_LIST describes the reductions in LOOP.  */
1472*e4b17023SJohn Marino 
1473*e4b17023SJohn Marino static void
transform_to_exit_first_loop(struct loop * loop,htab_t reduction_list,tree nit)1474*e4b17023SJohn Marino transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit)
1475*e4b17023SJohn Marino {
1476*e4b17023SJohn Marino   basic_block *bbs, *nbbs, ex_bb, orig_header;
1477*e4b17023SJohn Marino   unsigned n;
1478*e4b17023SJohn Marino   bool ok;
1479*e4b17023SJohn Marino   edge exit = single_dom_exit (loop), hpred;
1480*e4b17023SJohn Marino   tree control, control_name, res, t;
1481*e4b17023SJohn Marino   gimple phi, nphi, cond_stmt, stmt, cond_nit;
1482*e4b17023SJohn Marino   gimple_stmt_iterator gsi;
1483*e4b17023SJohn Marino   tree nit_1;
1484*e4b17023SJohn Marino   edge exit_1;
1485*e4b17023SJohn Marino   tree new_rhs;
1486*e4b17023SJohn Marino 
1487*e4b17023SJohn Marino   split_block_after_labels (loop->header);
1488*e4b17023SJohn Marino   orig_header = single_succ (loop->header);
1489*e4b17023SJohn Marino   hpred = single_succ_edge (loop->header);
1490*e4b17023SJohn Marino 
1491*e4b17023SJohn Marino   cond_stmt = last_stmt (exit->src);
1492*e4b17023SJohn Marino   control = gimple_cond_lhs (cond_stmt);
1493*e4b17023SJohn Marino   gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1494*e4b17023SJohn Marino 
1495*e4b17023SJohn Marino   /* Make sure that we have phi nodes on exit for all loop header phis
1496*e4b17023SJohn Marino      (create_parallel_loop requires that).  */
1497*e4b17023SJohn Marino   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1498*e4b17023SJohn Marino     {
1499*e4b17023SJohn Marino       phi = gsi_stmt (gsi);
1500*e4b17023SJohn Marino       res = PHI_RESULT (phi);
1501*e4b17023SJohn Marino       t = make_ssa_name (SSA_NAME_VAR (res), phi);
1502*e4b17023SJohn Marino       SET_PHI_RESULT (phi, t);
1503*e4b17023SJohn Marino       nphi = create_phi_node (res, orig_header);
1504*e4b17023SJohn Marino       SSA_NAME_DEF_STMT (res) = nphi;
1505*e4b17023SJohn Marino       add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1506*e4b17023SJohn Marino 
1507*e4b17023SJohn Marino       if (res == control)
1508*e4b17023SJohn Marino 	{
1509*e4b17023SJohn Marino 	  gimple_cond_set_lhs (cond_stmt, t);
1510*e4b17023SJohn Marino 	  update_stmt (cond_stmt);
1511*e4b17023SJohn Marino 	  control = t;
1512*e4b17023SJohn Marino 	}
1513*e4b17023SJohn Marino     }
1514*e4b17023SJohn Marino 
1515*e4b17023SJohn Marino  /* Setting the condition towards peeling the last iteration:
1516*e4b17023SJohn Marino     If the block consisting of the exit condition has the latch as
1517*e4b17023SJohn Marino     successor, then the body of the loop is executed before
1518*e4b17023SJohn Marino     the exit condition is tested.  In such case, moving the
1519*e4b17023SJohn Marino     condition to the entry, causes that the loop will iterate
1520*e4b17023SJohn Marino     one less iteration (which is the wanted outcome, since we
1521*e4b17023SJohn Marino     peel out the last iteration).  If the body is executed after
1522*e4b17023SJohn Marino     the condition, moving the condition to the entry requires
1523*e4b17023SJohn Marino     decrementing one iteration.  */
1524*e4b17023SJohn Marino   exit_1 = EDGE_SUCC (exit->src, EDGE_SUCC (exit->src, 0) == exit);
1525*e4b17023SJohn Marino   if (exit_1->dest == loop->latch)
1526*e4b17023SJohn Marino     new_rhs = gimple_cond_rhs (cond_stmt);
1527*e4b17023SJohn Marino   else
1528*e4b17023SJohn Marino   {
1529*e4b17023SJohn Marino     new_rhs = fold_build2 (MINUS_EXPR, TREE_TYPE (gimple_cond_rhs (cond_stmt)),
1530*e4b17023SJohn Marino 			   gimple_cond_rhs (cond_stmt),
1531*e4b17023SJohn Marino 			   build_int_cst (TREE_TYPE (gimple_cond_rhs (cond_stmt)), 1));
1532*e4b17023SJohn Marino     if (TREE_CODE (gimple_cond_rhs (cond_stmt)) == SSA_NAME)
1533*e4b17023SJohn Marino       {
1534*e4b17023SJohn Marino  	basic_block preheader;
1535*e4b17023SJohn Marino   	gimple_stmt_iterator gsi1;
1536*e4b17023SJohn Marino 
1537*e4b17023SJohn Marino   	preheader = loop_preheader_edge(loop)->src;
1538*e4b17023SJohn Marino     	gsi1 = gsi_after_labels (preheader);
1539*e4b17023SJohn Marino 	new_rhs = force_gimple_operand_gsi (&gsi1, new_rhs, true,
1540*e4b17023SJohn Marino 					    NULL_TREE,false,GSI_CONTINUE_LINKING);
1541*e4b17023SJohn Marino       }
1542*e4b17023SJohn Marino   }
1543*e4b17023SJohn Marino   gimple_cond_set_rhs (cond_stmt, unshare_expr (new_rhs));
1544*e4b17023SJohn Marino   gimple_cond_set_lhs (cond_stmt, unshare_expr (gimple_cond_lhs (cond_stmt)));
1545*e4b17023SJohn Marino 
1546*e4b17023SJohn Marino   bbs = get_loop_body_in_dom_order (loop);
1547*e4b17023SJohn Marino 
1548*e4b17023SJohn Marino   for (n = 0; bbs[n] != loop->latch; n++)
1549*e4b17023SJohn Marino     continue;
1550*e4b17023SJohn Marino   nbbs = XNEWVEC (basic_block, n);
1551*e4b17023SJohn Marino   ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1552*e4b17023SJohn Marino 				   bbs + 1, n, nbbs);
1553*e4b17023SJohn Marino   gcc_assert (ok);
1554*e4b17023SJohn Marino   free (bbs);
1555*e4b17023SJohn Marino   ex_bb = nbbs[0];
1556*e4b17023SJohn Marino   free (nbbs);
1557*e4b17023SJohn Marino 
1558*e4b17023SJohn Marino   /* Other than reductions, the only gimple reg that should be copied
1559*e4b17023SJohn Marino      out of the loop is the control variable.  */
1560*e4b17023SJohn Marino 
1561*e4b17023SJohn Marino   control_name = NULL_TREE;
1562*e4b17023SJohn Marino   for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
1563*e4b17023SJohn Marino     {
1564*e4b17023SJohn Marino       phi = gsi_stmt (gsi);
1565*e4b17023SJohn Marino       res = PHI_RESULT (phi);
1566*e4b17023SJohn Marino       if (!is_gimple_reg (res))
1567*e4b17023SJohn Marino 	{
1568*e4b17023SJohn Marino 	  gsi_next (&gsi);
1569*e4b17023SJohn Marino 	  continue;
1570*e4b17023SJohn Marino 	}
1571*e4b17023SJohn Marino 
1572*e4b17023SJohn Marino       /* Check if it is a part of reduction.  If it is,
1573*e4b17023SJohn Marino          keep the phi at the reduction's keep_res field.  The
1574*e4b17023SJohn Marino          PHI_RESULT of this phi is the resulting value of the reduction
1575*e4b17023SJohn Marino          variable when exiting the loop.  */
1576*e4b17023SJohn Marino 
1577*e4b17023SJohn Marino       exit = single_dom_exit (loop);
1578*e4b17023SJohn Marino 
1579*e4b17023SJohn Marino       if (htab_elements (reduction_list) > 0)
1580*e4b17023SJohn Marino 	{
1581*e4b17023SJohn Marino 	  struct reduction_info *red;
1582*e4b17023SJohn Marino 
1583*e4b17023SJohn Marino 	  tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1584*e4b17023SJohn Marino 	  red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1585*e4b17023SJohn Marino 	  if (red)
1586*e4b17023SJohn Marino 	    {
1587*e4b17023SJohn Marino 	      red->keep_res = phi;
1588*e4b17023SJohn Marino 	      gsi_next (&gsi);
1589*e4b17023SJohn Marino 	      continue;
1590*e4b17023SJohn Marino 	    }
1591*e4b17023SJohn Marino 	}
1592*e4b17023SJohn Marino       gcc_assert (control_name == NULL_TREE
1593*e4b17023SJohn Marino 		  && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1594*e4b17023SJohn Marino       control_name = res;
1595*e4b17023SJohn Marino       remove_phi_node (&gsi, false);
1596*e4b17023SJohn Marino     }
1597*e4b17023SJohn Marino   gcc_assert (control_name != NULL_TREE);
1598*e4b17023SJohn Marino 
1599*e4b17023SJohn Marino   /* Initialize the control variable to number of iterations
1600*e4b17023SJohn Marino      according to the rhs of the exit condition.  */
1601*e4b17023SJohn Marino   gsi = gsi_after_labels (ex_bb);
1602*e4b17023SJohn Marino   cond_nit = last_stmt (exit->src);
1603*e4b17023SJohn Marino   nit_1 =  gimple_cond_rhs (cond_nit);
1604*e4b17023SJohn Marino   nit_1 = force_gimple_operand_gsi (&gsi,
1605*e4b17023SJohn Marino 				  fold_convert (TREE_TYPE (control_name), nit_1),
1606*e4b17023SJohn Marino 				  false, NULL_TREE, false, GSI_SAME_STMT);
1607*e4b17023SJohn Marino   stmt = gimple_build_assign (control_name, nit_1);
1608*e4b17023SJohn Marino   gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1609*e4b17023SJohn Marino   SSA_NAME_DEF_STMT (control_name) = stmt;
1610*e4b17023SJohn Marino }
1611*e4b17023SJohn Marino 
1612*e4b17023SJohn Marino /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1613*e4b17023SJohn Marino    LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1614*e4b17023SJohn Marino    NEW_DATA is the variable that should be initialized from the argument
1615*e4b17023SJohn Marino    of LOOP_FN.  N_THREADS is the requested number of threads.  Returns the
1616*e4b17023SJohn Marino    basic block containing GIMPLE_OMP_PARALLEL tree.  */
1617*e4b17023SJohn Marino 
1618*e4b17023SJohn Marino static basic_block
create_parallel_loop(struct loop * loop,tree loop_fn,tree data,tree new_data,unsigned n_threads,location_t loc)1619*e4b17023SJohn Marino create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1620*e4b17023SJohn Marino 		      tree new_data, unsigned n_threads, location_t loc)
1621*e4b17023SJohn Marino {
1622*e4b17023SJohn Marino   gimple_stmt_iterator gsi;
1623*e4b17023SJohn Marino   basic_block bb, paral_bb, for_bb, ex_bb;
1624*e4b17023SJohn Marino   tree t, param;
1625*e4b17023SJohn Marino   gimple stmt, for_stmt, phi, cond_stmt;
1626*e4b17023SJohn Marino   tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1627*e4b17023SJohn Marino   edge exit, nexit, guard, end, e;
1628*e4b17023SJohn Marino 
1629*e4b17023SJohn Marino   /* Prepare the GIMPLE_OMP_PARALLEL statement.  */
1630*e4b17023SJohn Marino   bb = loop_preheader_edge (loop)->src;
1631*e4b17023SJohn Marino   paral_bb = single_pred (bb);
1632*e4b17023SJohn Marino   gsi = gsi_last_bb (paral_bb);
1633*e4b17023SJohn Marino 
1634*e4b17023SJohn Marino   t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
1635*e4b17023SJohn Marino   OMP_CLAUSE_NUM_THREADS_EXPR (t)
1636*e4b17023SJohn Marino     = build_int_cst (integer_type_node, n_threads);
1637*e4b17023SJohn Marino   stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1638*e4b17023SJohn Marino   gimple_set_location (stmt, loc);
1639*e4b17023SJohn Marino 
1640*e4b17023SJohn Marino   gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1641*e4b17023SJohn Marino 
1642*e4b17023SJohn Marino   /* Initialize NEW_DATA.  */
1643*e4b17023SJohn Marino   if (data)
1644*e4b17023SJohn Marino     {
1645*e4b17023SJohn Marino       gsi = gsi_after_labels (bb);
1646*e4b17023SJohn Marino 
1647*e4b17023SJohn Marino       param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1648*e4b17023SJohn Marino       stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1649*e4b17023SJohn Marino       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1650*e4b17023SJohn Marino       SSA_NAME_DEF_STMT (param) = stmt;
1651*e4b17023SJohn Marino 
1652*e4b17023SJohn Marino       stmt = gimple_build_assign (new_data,
1653*e4b17023SJohn Marino 				  fold_convert (TREE_TYPE (new_data), param));
1654*e4b17023SJohn Marino       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1655*e4b17023SJohn Marino       SSA_NAME_DEF_STMT (new_data) = stmt;
1656*e4b17023SJohn Marino     }
1657*e4b17023SJohn Marino 
1658*e4b17023SJohn Marino   /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL.  */
1659*e4b17023SJohn Marino   bb = split_loop_exit_edge (single_dom_exit (loop));
1660*e4b17023SJohn Marino   gsi = gsi_last_bb (bb);
1661*e4b17023SJohn Marino   stmt = gimple_build_omp_return (false);
1662*e4b17023SJohn Marino   gimple_set_location (stmt, loc);
1663*e4b17023SJohn Marino   gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1664*e4b17023SJohn Marino 
1665*e4b17023SJohn Marino   /* Extract data for GIMPLE_OMP_FOR.  */
1666*e4b17023SJohn Marino   gcc_assert (loop->header == single_dom_exit (loop)->src);
1667*e4b17023SJohn Marino   cond_stmt = last_stmt (loop->header);
1668*e4b17023SJohn Marino 
1669*e4b17023SJohn Marino   cvar = gimple_cond_lhs (cond_stmt);
1670*e4b17023SJohn Marino   cvar_base = SSA_NAME_VAR (cvar);
1671*e4b17023SJohn Marino   phi = SSA_NAME_DEF_STMT (cvar);
1672*e4b17023SJohn Marino   cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1673*e4b17023SJohn Marino   initvar = make_ssa_name (cvar_base, NULL);
1674*e4b17023SJohn Marino   SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1675*e4b17023SJohn Marino 	   initvar);
1676*e4b17023SJohn Marino   cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1677*e4b17023SJohn Marino 
1678*e4b17023SJohn Marino   gsi = gsi_last_nondebug_bb (loop->latch);
1679*e4b17023SJohn Marino   gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1680*e4b17023SJohn Marino   gsi_remove (&gsi, true);
1681*e4b17023SJohn Marino 
1682*e4b17023SJohn Marino   /* Prepare cfg.  */
1683*e4b17023SJohn Marino   for_bb = split_edge (loop_preheader_edge (loop));
1684*e4b17023SJohn Marino   ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1685*e4b17023SJohn Marino   extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1686*e4b17023SJohn Marino   gcc_assert (exit == single_dom_exit (loop));
1687*e4b17023SJohn Marino 
1688*e4b17023SJohn Marino   guard = make_edge (for_bb, ex_bb, 0);
1689*e4b17023SJohn Marino   single_succ_edge (loop->latch)->flags = 0;
1690*e4b17023SJohn Marino   end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
1691*e4b17023SJohn Marino   for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1692*e4b17023SJohn Marino     {
1693*e4b17023SJohn Marino       source_location locus;
1694*e4b17023SJohn Marino       tree def;
1695*e4b17023SJohn Marino       phi = gsi_stmt (gsi);
1696*e4b17023SJohn Marino       stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
1697*e4b17023SJohn Marino 
1698*e4b17023SJohn Marino       def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
1699*e4b17023SJohn Marino       locus = gimple_phi_arg_location_from_edge (stmt,
1700*e4b17023SJohn Marino 						 loop_preheader_edge (loop));
1701*e4b17023SJohn Marino       add_phi_arg (phi, def, guard, locus);
1702*e4b17023SJohn Marino 
1703*e4b17023SJohn Marino       def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1704*e4b17023SJohn Marino       locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
1705*e4b17023SJohn Marino       add_phi_arg (phi, def, end, locus);
1706*e4b17023SJohn Marino     }
1707*e4b17023SJohn Marino   e = redirect_edge_and_branch (exit, nexit->dest);
1708*e4b17023SJohn Marino   PENDING_STMT (e) = NULL;
1709*e4b17023SJohn Marino 
1710*e4b17023SJohn Marino   /* Emit GIMPLE_OMP_FOR.  */
1711*e4b17023SJohn Marino   gimple_cond_set_lhs (cond_stmt, cvar_base);
1712*e4b17023SJohn Marino   type = TREE_TYPE (cvar);
1713*e4b17023SJohn Marino   t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
1714*e4b17023SJohn Marino   OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1715*e4b17023SJohn Marino 
1716*e4b17023SJohn Marino   for_stmt = gimple_build_omp_for (NULL, t, 1, NULL);
1717*e4b17023SJohn Marino   gimple_set_location (for_stmt, loc);
1718*e4b17023SJohn Marino   gimple_omp_for_set_index (for_stmt, 0, initvar);
1719*e4b17023SJohn Marino   gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1720*e4b17023SJohn Marino   gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1721*e4b17023SJohn Marino   gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1722*e4b17023SJohn Marino   gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1723*e4b17023SJohn Marino 						cvar_base,
1724*e4b17023SJohn Marino 						build_int_cst (type, 1)));
1725*e4b17023SJohn Marino 
1726*e4b17023SJohn Marino   gsi = gsi_last_bb (for_bb);
1727*e4b17023SJohn Marino   gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
1728*e4b17023SJohn Marino   SSA_NAME_DEF_STMT (initvar) = for_stmt;
1729*e4b17023SJohn Marino 
1730*e4b17023SJohn Marino   /* Emit GIMPLE_OMP_CONTINUE.  */
1731*e4b17023SJohn Marino   gsi = gsi_last_bb (loop->latch);
1732*e4b17023SJohn Marino   stmt = gimple_build_omp_continue (cvar_next, cvar);
1733*e4b17023SJohn Marino   gimple_set_location (stmt, loc);
1734*e4b17023SJohn Marino   gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1735*e4b17023SJohn Marino   SSA_NAME_DEF_STMT (cvar_next) = stmt;
1736*e4b17023SJohn Marino 
1737*e4b17023SJohn Marino   /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR.  */
1738*e4b17023SJohn Marino   gsi = gsi_last_bb (ex_bb);
1739*e4b17023SJohn Marino   stmt = gimple_build_omp_return (true);
1740*e4b17023SJohn Marino   gimple_set_location (stmt, loc);
1741*e4b17023SJohn Marino   gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1742*e4b17023SJohn Marino 
1743*e4b17023SJohn Marino   return paral_bb;
1744*e4b17023SJohn Marino }
1745*e4b17023SJohn Marino 
1746*e4b17023SJohn Marino /* Generates code to execute the iterations of LOOP in N_THREADS
1747*e4b17023SJohn Marino    threads in parallel.
1748*e4b17023SJohn Marino 
1749*e4b17023SJohn Marino    NITER describes number of iterations of LOOP.
1750*e4b17023SJohn Marino    REDUCTION_LIST describes the reductions existent in the LOOP.  */
1751*e4b17023SJohn Marino 
1752*e4b17023SJohn Marino static void
gen_parallel_loop(struct loop * loop,htab_t reduction_list,unsigned n_threads,struct tree_niter_desc * niter)1753*e4b17023SJohn Marino gen_parallel_loop (struct loop *loop, htab_t reduction_list,
1754*e4b17023SJohn Marino 		   unsigned n_threads, struct tree_niter_desc *niter)
1755*e4b17023SJohn Marino {
1756*e4b17023SJohn Marino   loop_iterator li;
1757*e4b17023SJohn Marino   tree many_iterations_cond, type, nit;
1758*e4b17023SJohn Marino   tree arg_struct, new_arg_struct;
1759*e4b17023SJohn Marino   gimple_seq stmts;
1760*e4b17023SJohn Marino   basic_block parallel_head;
1761*e4b17023SJohn Marino   edge entry, exit;
1762*e4b17023SJohn Marino   struct clsn_data clsn_data;
1763*e4b17023SJohn Marino   unsigned prob;
1764*e4b17023SJohn Marino   location_t loc;
1765*e4b17023SJohn Marino   gimple cond_stmt;
1766*e4b17023SJohn Marino 
1767*e4b17023SJohn Marino   /* From
1768*e4b17023SJohn Marino 
1769*e4b17023SJohn Marino      ---------------------------------------------------------------------
1770*e4b17023SJohn Marino      loop
1771*e4b17023SJohn Marino        {
1772*e4b17023SJohn Marino 	 IV = phi (INIT, IV + STEP)
1773*e4b17023SJohn Marino 	 BODY1;
1774*e4b17023SJohn Marino 	 if (COND)
1775*e4b17023SJohn Marino 	   break;
1776*e4b17023SJohn Marino 	 BODY2;
1777*e4b17023SJohn Marino        }
1778*e4b17023SJohn Marino      ---------------------------------------------------------------------
1779*e4b17023SJohn Marino 
1780*e4b17023SJohn Marino      with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1781*e4b17023SJohn Marino      we generate the following code:
1782*e4b17023SJohn Marino 
1783*e4b17023SJohn Marino      ---------------------------------------------------------------------
1784*e4b17023SJohn Marino 
1785*e4b17023SJohn Marino      if (MAY_BE_ZERO
1786*e4b17023SJohn Marino      || NITER < MIN_PER_THREAD * N_THREADS)
1787*e4b17023SJohn Marino      goto original;
1788*e4b17023SJohn Marino 
1789*e4b17023SJohn Marino      BODY1;
1790*e4b17023SJohn Marino      store all local loop-invariant variables used in body of the loop to DATA.
1791*e4b17023SJohn Marino      GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1792*e4b17023SJohn Marino      load the variables from DATA.
1793*e4b17023SJohn Marino      GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1794*e4b17023SJohn Marino      BODY2;
1795*e4b17023SJohn Marino      BODY1;
1796*e4b17023SJohn Marino      GIMPLE_OMP_CONTINUE;
1797*e4b17023SJohn Marino      GIMPLE_OMP_RETURN         -- GIMPLE_OMP_FOR
1798*e4b17023SJohn Marino      GIMPLE_OMP_RETURN         -- GIMPLE_OMP_PARALLEL
1799*e4b17023SJohn Marino      goto end;
1800*e4b17023SJohn Marino 
1801*e4b17023SJohn Marino      original:
1802*e4b17023SJohn Marino      loop
1803*e4b17023SJohn Marino        {
1804*e4b17023SJohn Marino 	 IV = phi (INIT, IV + STEP)
1805*e4b17023SJohn Marino 	 BODY1;
1806*e4b17023SJohn Marino 	 if (COND)
1807*e4b17023SJohn Marino 	   break;
1808*e4b17023SJohn Marino 	 BODY2;
1809*e4b17023SJohn Marino        }
1810*e4b17023SJohn Marino 
1811*e4b17023SJohn Marino      end:
1812*e4b17023SJohn Marino 
1813*e4b17023SJohn Marino    */
1814*e4b17023SJohn Marino 
1815*e4b17023SJohn Marino   /* Create two versions of the loop -- in the old one, we know that the
1816*e4b17023SJohn Marino      number of iterations is large enough, and we will transform it into the
1817*e4b17023SJohn Marino      loop that will be split to loop_fn, the new one will be used for the
1818*e4b17023SJohn Marino      remaining iterations.  */
1819*e4b17023SJohn Marino 
1820*e4b17023SJohn Marino   type = TREE_TYPE (niter->niter);
1821*e4b17023SJohn Marino   nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1822*e4b17023SJohn Marino 			      NULL_TREE);
1823*e4b17023SJohn Marino   if (stmts)
1824*e4b17023SJohn Marino     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1825*e4b17023SJohn Marino 
1826*e4b17023SJohn Marino   many_iterations_cond =
1827*e4b17023SJohn Marino     fold_build2 (GE_EXPR, boolean_type_node,
1828*e4b17023SJohn Marino 		 nit, build_int_cst (type, MIN_PER_THREAD * n_threads));
1829*e4b17023SJohn Marino   many_iterations_cond
1830*e4b17023SJohn Marino     = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1831*e4b17023SJohn Marino 		   invert_truthvalue (unshare_expr (niter->may_be_zero)),
1832*e4b17023SJohn Marino 		   many_iterations_cond);
1833*e4b17023SJohn Marino   many_iterations_cond
1834*e4b17023SJohn Marino     = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
1835*e4b17023SJohn Marino   if (stmts)
1836*e4b17023SJohn Marino     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1837*e4b17023SJohn Marino   if (!is_gimple_condexpr (many_iterations_cond))
1838*e4b17023SJohn Marino     {
1839*e4b17023SJohn Marino       many_iterations_cond
1840*e4b17023SJohn Marino 	= force_gimple_operand (many_iterations_cond, &stmts,
1841*e4b17023SJohn Marino 				true, NULL_TREE);
1842*e4b17023SJohn Marino       if (stmts)
1843*e4b17023SJohn Marino 	gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1844*e4b17023SJohn Marino     }
1845*e4b17023SJohn Marino 
1846*e4b17023SJohn Marino   initialize_original_copy_tables ();
1847*e4b17023SJohn Marino 
1848*e4b17023SJohn Marino   /* We assume that the loop usually iterates a lot.  */
1849*e4b17023SJohn Marino   prob = 4 * REG_BR_PROB_BASE / 5;
1850*e4b17023SJohn Marino   loop_version (loop, many_iterations_cond, NULL,
1851*e4b17023SJohn Marino 		prob, prob, REG_BR_PROB_BASE - prob, true);
1852*e4b17023SJohn Marino   update_ssa (TODO_update_ssa);
1853*e4b17023SJohn Marino   free_original_copy_tables ();
1854*e4b17023SJohn Marino 
1855*e4b17023SJohn Marino   /* Base all the induction variables in LOOP on a single control one.  */
1856*e4b17023SJohn Marino   canonicalize_loop_ivs (loop, &nit, true);
1857*e4b17023SJohn Marino 
1858*e4b17023SJohn Marino   /* Ensure that the exit condition is the first statement in the loop.  */
1859*e4b17023SJohn Marino   transform_to_exit_first_loop (loop, reduction_list, nit);
1860*e4b17023SJohn Marino 
1861*e4b17023SJohn Marino   /* Generate initializations for reductions.  */
1862*e4b17023SJohn Marino   if (htab_elements (reduction_list) > 0)
1863*e4b17023SJohn Marino     htab_traverse (reduction_list, initialize_reductions, loop);
1864*e4b17023SJohn Marino 
1865*e4b17023SJohn Marino   /* Eliminate the references to local variables from the loop.  */
1866*e4b17023SJohn Marino   gcc_assert (single_exit (loop));
1867*e4b17023SJohn Marino   entry = loop_preheader_edge (loop);
1868*e4b17023SJohn Marino   exit = single_dom_exit (loop);
1869*e4b17023SJohn Marino 
1870*e4b17023SJohn Marino   eliminate_local_variables (entry, exit);
1871*e4b17023SJohn Marino   /* In the old loop, move all variables non-local to the loop to a structure
1872*e4b17023SJohn Marino      and back, and create separate decls for the variables used in loop.  */
1873*e4b17023SJohn Marino   separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
1874*e4b17023SJohn Marino 			    &new_arg_struct, &clsn_data);
1875*e4b17023SJohn Marino 
1876*e4b17023SJohn Marino   /* Create the parallel constructs.  */
1877*e4b17023SJohn Marino   loc = UNKNOWN_LOCATION;
1878*e4b17023SJohn Marino   cond_stmt = last_stmt (loop->header);
1879*e4b17023SJohn Marino   if (cond_stmt)
1880*e4b17023SJohn Marino     loc = gimple_location (cond_stmt);
1881*e4b17023SJohn Marino   parallel_head = create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
1882*e4b17023SJohn Marino 					new_arg_struct, n_threads, loc);
1883*e4b17023SJohn Marino   if (htab_elements (reduction_list) > 0)
1884*e4b17023SJohn Marino     create_call_for_reduction (loop, reduction_list, &clsn_data);
1885*e4b17023SJohn Marino 
1886*e4b17023SJohn Marino   scev_reset ();
1887*e4b17023SJohn Marino 
1888*e4b17023SJohn Marino   /* Cancel the loop (it is simpler to do it here rather than to teach the
1889*e4b17023SJohn Marino      expander to do it).  */
1890*e4b17023SJohn Marino   cancel_loop_tree (loop);
1891*e4b17023SJohn Marino 
1892*e4b17023SJohn Marino   /* Free loop bound estimations that could contain references to
1893*e4b17023SJohn Marino      removed statements.  */
1894*e4b17023SJohn Marino   FOR_EACH_LOOP (li, loop, 0)
1895*e4b17023SJohn Marino     free_numbers_of_iterations_estimates_loop (loop);
1896*e4b17023SJohn Marino 
1897*e4b17023SJohn Marino   /* Expand the parallel constructs.  We do it directly here instead of running
1898*e4b17023SJohn Marino      a separate expand_omp pass, since it is more efficient, and less likely to
1899*e4b17023SJohn Marino      cause troubles with further analyses not being able to deal with the
1900*e4b17023SJohn Marino      OMP trees.  */
1901*e4b17023SJohn Marino 
1902*e4b17023SJohn Marino   omp_expand_local (parallel_head);
1903*e4b17023SJohn Marino }
1904*e4b17023SJohn Marino 
1905*e4b17023SJohn Marino /* Returns true when LOOP contains vector phi nodes.  */
1906*e4b17023SJohn Marino 
1907*e4b17023SJohn Marino static bool
loop_has_vector_phi_nodes(struct loop * loop ATTRIBUTE_UNUSED)1908*e4b17023SJohn Marino loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
1909*e4b17023SJohn Marino {
1910*e4b17023SJohn Marino   unsigned i;
1911*e4b17023SJohn Marino   basic_block *bbs = get_loop_body_in_dom_order (loop);
1912*e4b17023SJohn Marino   gimple_stmt_iterator gsi;
1913*e4b17023SJohn Marino   bool res = true;
1914*e4b17023SJohn Marino 
1915*e4b17023SJohn Marino   for (i = 0; i < loop->num_nodes; i++)
1916*e4b17023SJohn Marino     for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1917*e4b17023SJohn Marino       if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
1918*e4b17023SJohn Marino 	goto end;
1919*e4b17023SJohn Marino 
1920*e4b17023SJohn Marino   res = false;
1921*e4b17023SJohn Marino  end:
1922*e4b17023SJohn Marino   free (bbs);
1923*e4b17023SJohn Marino   return res;
1924*e4b17023SJohn Marino }
1925*e4b17023SJohn Marino 
1926*e4b17023SJohn Marino /* Create a reduction_info struct, initialize it with REDUC_STMT
1927*e4b17023SJohn Marino    and PHI, insert it to the REDUCTION_LIST.  */
1928*e4b17023SJohn Marino 
1929*e4b17023SJohn Marino static void
build_new_reduction(htab_t reduction_list,gimple reduc_stmt,gimple phi)1930*e4b17023SJohn Marino build_new_reduction (htab_t reduction_list, gimple reduc_stmt, gimple phi)
1931*e4b17023SJohn Marino {
1932*e4b17023SJohn Marino   PTR *slot;
1933*e4b17023SJohn Marino   struct reduction_info *new_reduction;
1934*e4b17023SJohn Marino 
1935*e4b17023SJohn Marino   gcc_assert (reduc_stmt);
1936*e4b17023SJohn Marino 
1937*e4b17023SJohn Marino   if (dump_file && (dump_flags & TDF_DETAILS))
1938*e4b17023SJohn Marino     {
1939*e4b17023SJohn Marino       fprintf (dump_file,
1940*e4b17023SJohn Marino 	       "Detected reduction. reduction stmt is: \n");
1941*e4b17023SJohn Marino       print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1942*e4b17023SJohn Marino       fprintf (dump_file, "\n");
1943*e4b17023SJohn Marino     }
1944*e4b17023SJohn Marino 
1945*e4b17023SJohn Marino   new_reduction = XCNEW (struct reduction_info);
1946*e4b17023SJohn Marino 
1947*e4b17023SJohn Marino   new_reduction->reduc_stmt = reduc_stmt;
1948*e4b17023SJohn Marino   new_reduction->reduc_phi = phi;
1949*e4b17023SJohn Marino   new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
1950*e4b17023SJohn Marino   new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1951*e4b17023SJohn Marino   slot = htab_find_slot (reduction_list, new_reduction, INSERT);
1952*e4b17023SJohn Marino   *slot = new_reduction;
1953*e4b17023SJohn Marino }
1954*e4b17023SJohn Marino 
1955*e4b17023SJohn Marino /* Callback for htab_traverse.  Sets gimple_uid of reduc_phi stmts.  */
1956*e4b17023SJohn Marino 
1957*e4b17023SJohn Marino static int
set_reduc_phi_uids(void ** slot,void * data ATTRIBUTE_UNUSED)1958*e4b17023SJohn Marino set_reduc_phi_uids (void **slot, void *data ATTRIBUTE_UNUSED)
1959*e4b17023SJohn Marino {
1960*e4b17023SJohn Marino   struct reduction_info *const red = (struct reduction_info *) *slot;
1961*e4b17023SJohn Marino   gimple_set_uid (red->reduc_phi, red->reduc_version);
1962*e4b17023SJohn Marino   return 1;
1963*e4b17023SJohn Marino }
1964*e4b17023SJohn Marino 
1965*e4b17023SJohn Marino /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST.  */
1966*e4b17023SJohn Marino 
1967*e4b17023SJohn Marino static void
gather_scalar_reductions(loop_p loop,htab_t reduction_list)1968*e4b17023SJohn Marino gather_scalar_reductions (loop_p loop, htab_t reduction_list)
1969*e4b17023SJohn Marino {
1970*e4b17023SJohn Marino   gimple_stmt_iterator gsi;
1971*e4b17023SJohn Marino   loop_vec_info simple_loop_info;
1972*e4b17023SJohn Marino 
1973*e4b17023SJohn Marino   vect_dump = NULL;
1974*e4b17023SJohn Marino   simple_loop_info = vect_analyze_loop_form (loop);
1975*e4b17023SJohn Marino 
1976*e4b17023SJohn Marino   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1977*e4b17023SJohn Marino     {
1978*e4b17023SJohn Marino       gimple phi = gsi_stmt (gsi);
1979*e4b17023SJohn Marino       affine_iv iv;
1980*e4b17023SJohn Marino       tree res = PHI_RESULT (phi);
1981*e4b17023SJohn Marino       bool double_reduc;
1982*e4b17023SJohn Marino 
1983*e4b17023SJohn Marino       if (!is_gimple_reg (res))
1984*e4b17023SJohn Marino 	continue;
1985*e4b17023SJohn Marino 
1986*e4b17023SJohn Marino       if (!simple_iv (loop, loop, res, &iv, true)
1987*e4b17023SJohn Marino 	&& simple_loop_info)
1988*e4b17023SJohn Marino 	{
1989*e4b17023SJohn Marino            gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
1990*e4b17023SJohn Marino 							    phi, true,
1991*e4b17023SJohn Marino 							    &double_reduc);
1992*e4b17023SJohn Marino 	   if (reduc_stmt && !double_reduc)
1993*e4b17023SJohn Marino               build_new_reduction (reduction_list, reduc_stmt, phi);
1994*e4b17023SJohn Marino         }
1995*e4b17023SJohn Marino     }
1996*e4b17023SJohn Marino   destroy_loop_vec_info (simple_loop_info, true);
1997*e4b17023SJohn Marino 
1998*e4b17023SJohn Marino   /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
1999*e4b17023SJohn Marino      and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2000*e4b17023SJohn Marino      only now.  */
2001*e4b17023SJohn Marino   htab_traverse (reduction_list, set_reduc_phi_uids, NULL);
2002*e4b17023SJohn Marino }
2003*e4b17023SJohn Marino 
2004*e4b17023SJohn Marino /* Try to initialize NITER for code generation part.  */
2005*e4b17023SJohn Marino 
2006*e4b17023SJohn Marino static bool
try_get_loop_niter(loop_p loop,struct tree_niter_desc * niter)2007*e4b17023SJohn Marino try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2008*e4b17023SJohn Marino {
2009*e4b17023SJohn Marino   edge exit = single_dom_exit (loop);
2010*e4b17023SJohn Marino 
2011*e4b17023SJohn Marino   gcc_assert (exit);
2012*e4b17023SJohn Marino 
2013*e4b17023SJohn Marino   /* We need to know # of iterations, and there should be no uses of values
2014*e4b17023SJohn Marino      defined inside loop outside of it, unless the values are invariants of
2015*e4b17023SJohn Marino      the loop.  */
2016*e4b17023SJohn Marino   if (!number_of_iterations_exit (loop, exit, niter, false))
2017*e4b17023SJohn Marino     {
2018*e4b17023SJohn Marino       if (dump_file && (dump_flags & TDF_DETAILS))
2019*e4b17023SJohn Marino 	fprintf (dump_file, "  FAILED: number of iterations not known\n");
2020*e4b17023SJohn Marino       return false;
2021*e4b17023SJohn Marino     }
2022*e4b17023SJohn Marino 
2023*e4b17023SJohn Marino   return true;
2024*e4b17023SJohn Marino }
2025*e4b17023SJohn Marino 
2026*e4b17023SJohn Marino /* Try to initialize REDUCTION_LIST for code generation part.
2027*e4b17023SJohn Marino    REDUCTION_LIST describes the reductions.  */
2028*e4b17023SJohn Marino 
2029*e4b17023SJohn Marino static bool
try_create_reduction_list(loop_p loop,htab_t reduction_list)2030*e4b17023SJohn Marino try_create_reduction_list (loop_p loop, htab_t reduction_list)
2031*e4b17023SJohn Marino {
2032*e4b17023SJohn Marino   edge exit = single_dom_exit (loop);
2033*e4b17023SJohn Marino   gimple_stmt_iterator gsi;
2034*e4b17023SJohn Marino 
2035*e4b17023SJohn Marino   gcc_assert (exit);
2036*e4b17023SJohn Marino 
2037*e4b17023SJohn Marino   gather_scalar_reductions (loop, reduction_list);
2038*e4b17023SJohn Marino 
2039*e4b17023SJohn Marino 
2040*e4b17023SJohn Marino   for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2041*e4b17023SJohn Marino     {
2042*e4b17023SJohn Marino       gimple phi = gsi_stmt (gsi);
2043*e4b17023SJohn Marino       struct reduction_info *red;
2044*e4b17023SJohn Marino       imm_use_iterator imm_iter;
2045*e4b17023SJohn Marino       use_operand_p use_p;
2046*e4b17023SJohn Marino       gimple reduc_phi;
2047*e4b17023SJohn Marino       tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2048*e4b17023SJohn Marino 
2049*e4b17023SJohn Marino       if (is_gimple_reg (val))
2050*e4b17023SJohn Marino 	{
2051*e4b17023SJohn Marino 	  if (dump_file && (dump_flags & TDF_DETAILS))
2052*e4b17023SJohn Marino 	    {
2053*e4b17023SJohn Marino 	      fprintf (dump_file, "phi is ");
2054*e4b17023SJohn Marino 	      print_gimple_stmt (dump_file, phi, 0, 0);
2055*e4b17023SJohn Marino 	      fprintf (dump_file, "arg of phi to exit:   value ");
2056*e4b17023SJohn Marino 	      print_generic_expr (dump_file, val, 0);
2057*e4b17023SJohn Marino 	      fprintf (dump_file, " used outside loop\n");
2058*e4b17023SJohn Marino 	      fprintf (dump_file,
2059*e4b17023SJohn Marino 		       "  checking if it a part of reduction pattern:  \n");
2060*e4b17023SJohn Marino 	    }
2061*e4b17023SJohn Marino 	  if (htab_elements (reduction_list) == 0)
2062*e4b17023SJohn Marino 	    {
2063*e4b17023SJohn Marino 	      if (dump_file && (dump_flags & TDF_DETAILS))
2064*e4b17023SJohn Marino 		fprintf (dump_file,
2065*e4b17023SJohn Marino 			 "  FAILED: it is not a part of reduction.\n");
2066*e4b17023SJohn Marino 	      return false;
2067*e4b17023SJohn Marino 	    }
2068*e4b17023SJohn Marino 	  reduc_phi = NULL;
2069*e4b17023SJohn Marino 	  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2070*e4b17023SJohn Marino 	    {
2071*e4b17023SJohn Marino 	      if (!gimple_debug_bind_p (USE_STMT (use_p))
2072*e4b17023SJohn Marino 		  && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2073*e4b17023SJohn Marino 		{
2074*e4b17023SJohn Marino 		  reduc_phi = USE_STMT (use_p);
2075*e4b17023SJohn Marino 		  break;
2076*e4b17023SJohn Marino 		}
2077*e4b17023SJohn Marino 	    }
2078*e4b17023SJohn Marino 	  red = reduction_phi (reduction_list, reduc_phi);
2079*e4b17023SJohn Marino 	  if (red == NULL)
2080*e4b17023SJohn Marino 	    {
2081*e4b17023SJohn Marino 	      if (dump_file && (dump_flags & TDF_DETAILS))
2082*e4b17023SJohn Marino 		fprintf (dump_file,
2083*e4b17023SJohn Marino 			 "  FAILED: it is not a part of reduction.\n");
2084*e4b17023SJohn Marino 	      return false;
2085*e4b17023SJohn Marino 	    }
2086*e4b17023SJohn Marino 	  if (dump_file && (dump_flags & TDF_DETAILS))
2087*e4b17023SJohn Marino 	    {
2088*e4b17023SJohn Marino 	      fprintf (dump_file, "reduction phi is  ");
2089*e4b17023SJohn Marino 	      print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2090*e4b17023SJohn Marino 	      fprintf (dump_file, "reduction stmt is  ");
2091*e4b17023SJohn Marino 	      print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2092*e4b17023SJohn Marino 	    }
2093*e4b17023SJohn Marino 	}
2094*e4b17023SJohn Marino     }
2095*e4b17023SJohn Marino 
2096*e4b17023SJohn Marino   /* The iterations of the loop may communicate only through bivs whose
2097*e4b17023SJohn Marino      iteration space can be distributed efficiently.  */
2098*e4b17023SJohn Marino   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2099*e4b17023SJohn Marino     {
2100*e4b17023SJohn Marino       gimple phi = gsi_stmt (gsi);
2101*e4b17023SJohn Marino       tree def = PHI_RESULT (phi);
2102*e4b17023SJohn Marino       affine_iv iv;
2103*e4b17023SJohn Marino 
2104*e4b17023SJohn Marino       if (is_gimple_reg (def) && !simple_iv (loop, loop, def, &iv, true))
2105*e4b17023SJohn Marino 	{
2106*e4b17023SJohn Marino 	  struct reduction_info *red;
2107*e4b17023SJohn Marino 
2108*e4b17023SJohn Marino 	  red = reduction_phi (reduction_list, phi);
2109*e4b17023SJohn Marino 	  if (red == NULL)
2110*e4b17023SJohn Marino 	    {
2111*e4b17023SJohn Marino 	      if (dump_file && (dump_flags & TDF_DETAILS))
2112*e4b17023SJohn Marino 		fprintf (dump_file,
2113*e4b17023SJohn Marino 			 "  FAILED: scalar dependency between iterations\n");
2114*e4b17023SJohn Marino 	      return false;
2115*e4b17023SJohn Marino 	    }
2116*e4b17023SJohn Marino 	}
2117*e4b17023SJohn Marino     }
2118*e4b17023SJohn Marino 
2119*e4b17023SJohn Marino 
2120*e4b17023SJohn Marino   return true;
2121*e4b17023SJohn Marino }
2122*e4b17023SJohn Marino 
2123*e4b17023SJohn Marino /* Detect parallel loops and generate parallel code using libgomp
2124*e4b17023SJohn Marino    primitives.  Returns true if some loop was parallelized, false
2125*e4b17023SJohn Marino    otherwise.  */
2126*e4b17023SJohn Marino 
2127*e4b17023SJohn Marino bool
parallelize_loops(void)2128*e4b17023SJohn Marino parallelize_loops (void)
2129*e4b17023SJohn Marino {
2130*e4b17023SJohn Marino   unsigned n_threads = flag_tree_parallelize_loops;
2131*e4b17023SJohn Marino   bool changed = false;
2132*e4b17023SJohn Marino   struct loop *loop;
2133*e4b17023SJohn Marino   struct tree_niter_desc niter_desc;
2134*e4b17023SJohn Marino   loop_iterator li;
2135*e4b17023SJohn Marino   htab_t reduction_list;
2136*e4b17023SJohn Marino   struct obstack parloop_obstack;
2137*e4b17023SJohn Marino   HOST_WIDE_INT estimated;
2138*e4b17023SJohn Marino   LOC loop_loc;
2139*e4b17023SJohn Marino 
2140*e4b17023SJohn Marino   /* Do not parallelize loops in the functions created by parallelization.  */
2141*e4b17023SJohn Marino   if (parallelized_function_p (cfun->decl))
2142*e4b17023SJohn Marino     return false;
2143*e4b17023SJohn Marino   if (cfun->has_nonlocal_label)
2144*e4b17023SJohn Marino     return false;
2145*e4b17023SJohn Marino 
2146*e4b17023SJohn Marino   gcc_obstack_init (&parloop_obstack);
2147*e4b17023SJohn Marino   reduction_list = htab_create (10, reduction_info_hash,
2148*e4b17023SJohn Marino 				     reduction_info_eq, free);
2149*e4b17023SJohn Marino   init_stmt_vec_info_vec ();
2150*e4b17023SJohn Marino 
2151*e4b17023SJohn Marino   FOR_EACH_LOOP (li, loop, 0)
2152*e4b17023SJohn Marino     {
2153*e4b17023SJohn Marino       htab_empty (reduction_list);
2154*e4b17023SJohn Marino       if (dump_file && (dump_flags & TDF_DETAILS))
2155*e4b17023SJohn Marino       {
2156*e4b17023SJohn Marino         fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2157*e4b17023SJohn Marino 	if (loop->inner)
2158*e4b17023SJohn Marino 	  fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2159*e4b17023SJohn Marino 	else
2160*e4b17023SJohn Marino 	  fprintf (dump_file, "loop %d is innermost\n",loop->num);
2161*e4b17023SJohn Marino       }
2162*e4b17023SJohn Marino 
2163*e4b17023SJohn Marino       /* If we use autopar in graphite pass, we use its marked dependency
2164*e4b17023SJohn Marino       checking results.  */
2165*e4b17023SJohn Marino       if (flag_loop_parallelize_all && !loop->can_be_parallel)
2166*e4b17023SJohn Marino       {
2167*e4b17023SJohn Marino         if (dump_file && (dump_flags & TDF_DETAILS))
2168*e4b17023SJohn Marino 	   fprintf (dump_file, "loop is not parallel according to graphite\n");
2169*e4b17023SJohn Marino 	continue;
2170*e4b17023SJohn Marino       }
2171*e4b17023SJohn Marino 
2172*e4b17023SJohn Marino       if (!single_dom_exit (loop))
2173*e4b17023SJohn Marino       {
2174*e4b17023SJohn Marino 
2175*e4b17023SJohn Marino         if (dump_file && (dump_flags & TDF_DETAILS))
2176*e4b17023SJohn Marino 	  fprintf (dump_file, "loop is !single_dom_exit\n");
2177*e4b17023SJohn Marino 
2178*e4b17023SJohn Marino 	continue;
2179*e4b17023SJohn Marino       }
2180*e4b17023SJohn Marino 
2181*e4b17023SJohn Marino       if (/* And of course, the loop must be parallelizable.  */
2182*e4b17023SJohn Marino 	  !can_duplicate_loop_p (loop)
2183*e4b17023SJohn Marino 	  || loop_has_blocks_with_irreducible_flag (loop)
2184*e4b17023SJohn Marino 	  || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
2185*e4b17023SJohn Marino 	  /* FIXME: the check for vector phi nodes could be removed.  */
2186*e4b17023SJohn Marino 	  || loop_has_vector_phi_nodes (loop)
2187*e4b17023SJohn Marino 	  /* FIXME: transform_to_exit_first_loop does not handle not
2188*e4b17023SJohn Marino 	     header-copied loops correctly - see PR46886.  */
2189*e4b17023SJohn Marino 	  || !do_while_loop_p (loop))
2190*e4b17023SJohn Marino 	continue;
2191*e4b17023SJohn Marino       estimated = max_stmt_executions_int (loop, false);
2192*e4b17023SJohn Marino       /* FIXME: Bypass this check as graphite doesn't update the
2193*e4b17023SJohn Marino       count and frequency correctly now.  */
2194*e4b17023SJohn Marino       if (!flag_loop_parallelize_all
2195*e4b17023SJohn Marino 	  && ((estimated !=-1
2196*e4b17023SJohn Marino 	     && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
2197*e4b17023SJohn Marino 	      /* Do not bother with loops in cold areas.  */
2198*e4b17023SJohn Marino 	      || optimize_loop_nest_for_size_p (loop)))
2199*e4b17023SJohn Marino 	continue;
2200*e4b17023SJohn Marino 
2201*e4b17023SJohn Marino       if (!try_get_loop_niter (loop, &niter_desc))
2202*e4b17023SJohn Marino 	continue;
2203*e4b17023SJohn Marino 
2204*e4b17023SJohn Marino       if (!try_create_reduction_list (loop, reduction_list))
2205*e4b17023SJohn Marino 	continue;
2206*e4b17023SJohn Marino 
2207*e4b17023SJohn Marino       if (!flag_loop_parallelize_all
2208*e4b17023SJohn Marino 	  && !loop_parallel_p (loop, &parloop_obstack))
2209*e4b17023SJohn Marino 	continue;
2210*e4b17023SJohn Marino 
2211*e4b17023SJohn Marino       changed = true;
2212*e4b17023SJohn Marino       if (dump_file && (dump_flags & TDF_DETAILS))
2213*e4b17023SJohn Marino       {
2214*e4b17023SJohn Marino 	if (loop->inner)
2215*e4b17023SJohn Marino 	  fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
2216*e4b17023SJohn Marino 	else
2217*e4b17023SJohn Marino 	  fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2218*e4b17023SJohn Marino 	loop_loc = find_loop_location (loop);
2219*e4b17023SJohn Marino 	if (loop_loc != UNKNOWN_LOC)
2220*e4b17023SJohn Marino 	  fprintf (dump_file, "\nloop at %s:%d: ",
2221*e4b17023SJohn Marino 		   LOC_FILE (loop_loc), LOC_LINE (loop_loc));
2222*e4b17023SJohn Marino       }
2223*e4b17023SJohn Marino       gen_parallel_loop (loop, reduction_list,
2224*e4b17023SJohn Marino 			 n_threads, &niter_desc);
2225*e4b17023SJohn Marino       verify_flow_info ();
2226*e4b17023SJohn Marino       verify_dominators (CDI_DOMINATORS);
2227*e4b17023SJohn Marino       verify_loop_structure ();
2228*e4b17023SJohn Marino       verify_loop_closed_ssa (true);
2229*e4b17023SJohn Marino     }
2230*e4b17023SJohn Marino 
2231*e4b17023SJohn Marino   free_stmt_vec_info_vec ();
2232*e4b17023SJohn Marino   htab_delete (reduction_list);
2233*e4b17023SJohn Marino   obstack_free (&parloop_obstack, NULL);
2234*e4b17023SJohn Marino 
2235*e4b17023SJohn Marino   /* Parallelization will cause new function calls to be inserted through
2236*e4b17023SJohn Marino      which local variables will escape.  Reset the points-to solution
2237*e4b17023SJohn Marino      for ESCAPED.  */
2238*e4b17023SJohn Marino   if (changed)
2239*e4b17023SJohn Marino     pt_solution_reset (&cfun->gimple_df->escaped);
2240*e4b17023SJohn Marino 
2241*e4b17023SJohn Marino   return changed;
2242*e4b17023SJohn Marino }
2243*e4b17023SJohn Marino 
2244*e4b17023SJohn Marino #include "gt-tree-parloops.h"
2245