xref: /dflybsd-src/contrib/gcc-8.0/gcc/tree-parloops.c (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1*38fd1498Szrj /* Loop autoparallelization.
2*38fd1498Szrj    Copyright (C) 2006-2018 Free Software Foundation, Inc.
3*38fd1498Szrj    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4*38fd1498Szrj    Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
5*38fd1498Szrj 
6*38fd1498Szrj This file is part of GCC.
7*38fd1498Szrj 
8*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
9*38fd1498Szrj the terms of the GNU General Public License as published by the Free
10*38fd1498Szrj Software Foundation; either version 3, or (at your option) any later
11*38fd1498Szrj version.
12*38fd1498Szrj 
13*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
15*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16*38fd1498Szrj for more details.
17*38fd1498Szrj 
18*38fd1498Szrj You should have received a copy of the GNU General Public License
19*38fd1498Szrj along with GCC; see the file COPYING3.  If not see
20*38fd1498Szrj <http://www.gnu.org/licenses/>.  */
21*38fd1498Szrj 
22*38fd1498Szrj #include "config.h"
23*38fd1498Szrj #include "system.h"
24*38fd1498Szrj #include "coretypes.h"
25*38fd1498Szrj #include "backend.h"
26*38fd1498Szrj #include "tree.h"
27*38fd1498Szrj #include "gimple.h"
28*38fd1498Szrj #include "cfghooks.h"
29*38fd1498Szrj #include "tree-pass.h"
30*38fd1498Szrj #include "ssa.h"
31*38fd1498Szrj #include "cgraph.h"
32*38fd1498Szrj #include "gimple-pretty-print.h"
33*38fd1498Szrj #include "fold-const.h"
34*38fd1498Szrj #include "gimplify.h"
35*38fd1498Szrj #include "gimple-iterator.h"
36*38fd1498Szrj #include "gimplify-me.h"
37*38fd1498Szrj #include "gimple-walk.h"
38*38fd1498Szrj #include "stor-layout.h"
39*38fd1498Szrj #include "tree-nested.h"
40*38fd1498Szrj #include "tree-cfg.h"
41*38fd1498Szrj #include "tree-ssa-loop-ivopts.h"
42*38fd1498Szrj #include "tree-ssa-loop-manip.h"
43*38fd1498Szrj #include "tree-ssa-loop-niter.h"
44*38fd1498Szrj #include "tree-ssa-loop.h"
45*38fd1498Szrj #include "tree-into-ssa.h"
46*38fd1498Szrj #include "cfgloop.h"
47*38fd1498Szrj #include "tree-scalar-evolution.h"
48*38fd1498Szrj #include "langhooks.h"
49*38fd1498Szrj #include "tree-vectorizer.h"
50*38fd1498Szrj #include "tree-hasher.h"
51*38fd1498Szrj #include "tree-parloops.h"
52*38fd1498Szrj #include "omp-general.h"
53*38fd1498Szrj #include "omp-low.h"
54*38fd1498Szrj #include "tree-ssa.h"
55*38fd1498Szrj #include "params.h"
56*38fd1498Szrj #include "params-enum.h"
57*38fd1498Szrj #include "tree-ssa-alias.h"
58*38fd1498Szrj #include "tree-eh.h"
59*38fd1498Szrj #include "gomp-constants.h"
60*38fd1498Szrj #include "tree-dfa.h"
61*38fd1498Szrj #include "stringpool.h"
62*38fd1498Szrj #include "attribs.h"
63*38fd1498Szrj 
64*38fd1498Szrj /* This pass tries to distribute iterations of loops into several threads.
65*38fd1498Szrj    The implementation is straightforward -- for each loop we test whether its
66*38fd1498Szrj    iterations are independent, and if it is the case (and some additional
67*38fd1498Szrj    conditions regarding profitability and correctness are satisfied), we
68*38fd1498Szrj    add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
69*38fd1498Szrj    machinery do its job.
70*38fd1498Szrj 
71*38fd1498Szrj    The most of the complexity is in bringing the code into shape expected
72*38fd1498Szrj    by the omp expanders:
73*38fd1498Szrj    -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
74*38fd1498Szrj       variable and that the exit test is at the start of the loop body
75*38fd1498Szrj    -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
76*38fd1498Szrj       variables by accesses through pointers, and breaking up ssa chains
77*38fd1498Szrj       by storing the values incoming to the parallelized loop to a structure
78*38fd1498Szrj       passed to the new function as an argument (something similar is done
79*38fd1498Szrj       in omp gimplification, unfortunately only a small part of the code
80*38fd1498Szrj       can be shared).
81*38fd1498Szrj 
82*38fd1498Szrj    TODO:
83*38fd1498Szrj    -- if there are several parallelizable loops in a function, it may be
84*38fd1498Szrj       possible to generate the threads just once (using synchronization to
85*38fd1498Szrj       ensure that cross-loop dependences are obeyed).
86*38fd1498Szrj    -- handling of common reduction patterns for outer loops.
87*38fd1498Szrj 
88*38fd1498Szrj    More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC  */
89*38fd1498Szrj /*
90*38fd1498Szrj   Reduction handling:
91*38fd1498Szrj   currently we use vect_force_simple_reduction() to detect reduction patterns.
92*38fd1498Szrj   The code transformation will be introduced by an example.
93*38fd1498Szrj 
94*38fd1498Szrj 
95*38fd1498Szrj parloop
96*38fd1498Szrj {
97*38fd1498Szrj   int sum=1;
98*38fd1498Szrj 
99*38fd1498Szrj   for (i = 0; i < N; i++)
100*38fd1498Szrj    {
101*38fd1498Szrj     x[i] = i + 3;
102*38fd1498Szrj     sum+=x[i];
103*38fd1498Szrj    }
104*38fd1498Szrj }
105*38fd1498Szrj 
106*38fd1498Szrj gimple-like code:
107*38fd1498Szrj header_bb:
108*38fd1498Szrj 
109*38fd1498Szrj   # sum_29 = PHI <sum_11(5), 1(3)>
110*38fd1498Szrj   # i_28 = PHI <i_12(5), 0(3)>
111*38fd1498Szrj   D.1795_8 = i_28 + 3;
112*38fd1498Szrj   x[i_28] = D.1795_8;
113*38fd1498Szrj   sum_11 = D.1795_8 + sum_29;
114*38fd1498Szrj   i_12 = i_28 + 1;
115*38fd1498Szrj   if (N_6(D) > i_12)
116*38fd1498Szrj     goto header_bb;
117*38fd1498Szrj 
118*38fd1498Szrj 
119*38fd1498Szrj exit_bb:
120*38fd1498Szrj 
121*38fd1498Szrj   # sum_21 = PHI <sum_11(4)>
122*38fd1498Szrj   printf (&"%d"[0], sum_21);
123*38fd1498Szrj 
124*38fd1498Szrj 
125*38fd1498Szrj after reduction transformation (only relevant parts):
126*38fd1498Szrj 
127*38fd1498Szrj parloop
128*38fd1498Szrj {
129*38fd1498Szrj 
130*38fd1498Szrj ....
131*38fd1498Szrj 
132*38fd1498Szrj 
133*38fd1498Szrj   # Storing the initial value given by the user.  #
134*38fd1498Szrj 
135*38fd1498Szrj   .paral_data_store.32.sum.27 = 1;
136*38fd1498Szrj 
137*38fd1498Szrj   #pragma omp parallel num_threads(4)
138*38fd1498Szrj 
139*38fd1498Szrj   #pragma omp for schedule(static)
140*38fd1498Szrj 
141*38fd1498Szrj   # The neutral element corresponding to the particular
142*38fd1498Szrj   reduction's operation, e.g. 0 for PLUS_EXPR,
143*38fd1498Szrj   1 for MULT_EXPR, etc. replaces the user's initial value.  #
144*38fd1498Szrj 
145*38fd1498Szrj   # sum.27_29 = PHI <sum.27_11, 0>
146*38fd1498Szrj 
147*38fd1498Szrj   sum.27_11 = D.1827_8 + sum.27_29;
148*38fd1498Szrj 
149*38fd1498Szrj   GIMPLE_OMP_CONTINUE
150*38fd1498Szrj 
151*38fd1498Szrj   # Adding this reduction phi is done at create_phi_for_local_result() #
152*38fd1498Szrj   # sum.27_56 = PHI <sum.27_11, 0>
153*38fd1498Szrj   GIMPLE_OMP_RETURN
154*38fd1498Szrj 
155*38fd1498Szrj   # Creating the atomic operation is done at
156*38fd1498Szrj   create_call_for_reduction_1()  #
157*38fd1498Szrj 
158*38fd1498Szrj   #pragma omp atomic_load
159*38fd1498Szrj   D.1839_59 = *&.paral_data_load.33_51->reduction.23;
160*38fd1498Szrj   D.1840_60 = sum.27_56 + D.1839_59;
161*38fd1498Szrj   #pragma omp atomic_store (D.1840_60);
162*38fd1498Szrj 
163*38fd1498Szrj   GIMPLE_OMP_RETURN
164*38fd1498Szrj 
165*38fd1498Szrj  # collecting the result after the join of the threads is done at
166*38fd1498Szrj   create_loads_for_reductions().
167*38fd1498Szrj   The value computed by the threads is loaded from the
168*38fd1498Szrj   shared struct.  #
169*38fd1498Szrj 
170*38fd1498Szrj 
171*38fd1498Szrj   .paral_data_load.33_52 = &.paral_data_store.32;
172*38fd1498Szrj   sum_37 =  .paral_data_load.33_52->sum.27;
173*38fd1498Szrj   sum_43 = D.1795_41 + sum_37;
174*38fd1498Szrj 
175*38fd1498Szrj   exit bb:
176*38fd1498Szrj   # sum_21 = PHI <sum_43, sum_26>
177*38fd1498Szrj   printf (&"%d"[0], sum_21);
178*38fd1498Szrj 
179*38fd1498Szrj ...
180*38fd1498Szrj 
181*38fd1498Szrj }
182*38fd1498Szrj 
183*38fd1498Szrj */
184*38fd1498Szrj 
185*38fd1498Szrj /* Minimal number of iterations of a loop that should be executed in each
186*38fd1498Szrj    thread.  */
187*38fd1498Szrj #define MIN_PER_THREAD PARAM_VALUE (PARAM_PARLOOPS_MIN_PER_THREAD)
188*38fd1498Szrj 
189*38fd1498Szrj /* Element of the hashtable, representing a
190*38fd1498Szrj    reduction in the current loop.  */
191*38fd1498Szrj struct reduction_info
192*38fd1498Szrj {
193*38fd1498Szrj   gimple *reduc_stmt;		/* reduction statement.  */
194*38fd1498Szrj   gimple *reduc_phi;		/* The phi node defining the reduction.  */
195*38fd1498Szrj   enum tree_code reduction_code;/* code for the reduction operation.  */
196*38fd1498Szrj   unsigned reduc_version;	/* SSA_NAME_VERSION of original reduc_phi
197*38fd1498Szrj 				   result.  */
198*38fd1498Szrj   gphi *keep_res;		/* The PHI_RESULT of this phi is the resulting value
199*38fd1498Szrj 				   of the reduction variable when existing the loop. */
200*38fd1498Szrj   tree initial_value;		/* The initial value of the reduction var before entering the loop.  */
201*38fd1498Szrj   tree field;			/*  the name of the field in the parloop data structure intended for reduction.  */
202*38fd1498Szrj   tree reduc_addr;		/* The address of the reduction variable for
203*38fd1498Szrj 				   openacc reductions.  */
204*38fd1498Szrj   tree init;			/* reduction initialization value.  */
205*38fd1498Szrj   gphi *new_phi;		/* (helper field) Newly created phi node whose result
206*38fd1498Szrj 				   will be passed to the atomic operation.  Represents
207*38fd1498Szrj 				   the local result each thread computed for the reduction
208*38fd1498Szrj 				   operation.  */
209*38fd1498Szrj };
210*38fd1498Szrj 
211*38fd1498Szrj /* Reduction info hashtable helpers.  */
212*38fd1498Szrj 
213*38fd1498Szrj struct reduction_hasher : free_ptr_hash <reduction_info>
214*38fd1498Szrj {
215*38fd1498Szrj   static inline hashval_t hash (const reduction_info *);
216*38fd1498Szrj   static inline bool equal (const reduction_info *, const reduction_info *);
217*38fd1498Szrj };
218*38fd1498Szrj 
219*38fd1498Szrj /* Equality and hash functions for hashtab code.  */
220*38fd1498Szrj 
221*38fd1498Szrj inline bool
equal(const reduction_info * a,const reduction_info * b)222*38fd1498Szrj reduction_hasher::equal (const reduction_info *a, const reduction_info *b)
223*38fd1498Szrj {
224*38fd1498Szrj   return (a->reduc_phi == b->reduc_phi);
225*38fd1498Szrj }
226*38fd1498Szrj 
227*38fd1498Szrj inline hashval_t
hash(const reduction_info * a)228*38fd1498Szrj reduction_hasher::hash (const reduction_info *a)
229*38fd1498Szrj {
230*38fd1498Szrj   return a->reduc_version;
231*38fd1498Szrj }
232*38fd1498Szrj 
233*38fd1498Szrj typedef hash_table<reduction_hasher> reduction_info_table_type;
234*38fd1498Szrj 
235*38fd1498Szrj 
236*38fd1498Szrj static struct reduction_info *
reduction_phi(reduction_info_table_type * reduction_list,gimple * phi)237*38fd1498Szrj reduction_phi (reduction_info_table_type *reduction_list, gimple *phi)
238*38fd1498Szrj {
239*38fd1498Szrj   struct reduction_info tmpred, *red;
240*38fd1498Szrj 
241*38fd1498Szrj   if (reduction_list->elements () == 0 || phi == NULL)
242*38fd1498Szrj     return NULL;
243*38fd1498Szrj 
244*38fd1498Szrj   if (gimple_uid (phi) == (unsigned int)-1
245*38fd1498Szrj       || gimple_uid (phi) == 0)
246*38fd1498Szrj     return NULL;
247*38fd1498Szrj 
248*38fd1498Szrj   tmpred.reduc_phi = phi;
249*38fd1498Szrj   tmpred.reduc_version = gimple_uid (phi);
250*38fd1498Szrj   red = reduction_list->find (&tmpred);
251*38fd1498Szrj   gcc_assert (red == NULL || red->reduc_phi == phi);
252*38fd1498Szrj 
253*38fd1498Szrj   return red;
254*38fd1498Szrj }
255*38fd1498Szrj 
256*38fd1498Szrj /* Element of hashtable of names to copy.  */
257*38fd1498Szrj 
258*38fd1498Szrj struct name_to_copy_elt
259*38fd1498Szrj {
260*38fd1498Szrj   unsigned version;	/* The version of the name to copy.  */
261*38fd1498Szrj   tree new_name;	/* The new name used in the copy.  */
262*38fd1498Szrj   tree field;		/* The field of the structure used to pass the
263*38fd1498Szrj 			   value.  */
264*38fd1498Szrj };
265*38fd1498Szrj 
266*38fd1498Szrj /* Name copies hashtable helpers.  */
267*38fd1498Szrj 
268*38fd1498Szrj struct name_to_copy_hasher : free_ptr_hash <name_to_copy_elt>
269*38fd1498Szrj {
270*38fd1498Szrj   static inline hashval_t hash (const name_to_copy_elt *);
271*38fd1498Szrj   static inline bool equal (const name_to_copy_elt *, const name_to_copy_elt *);
272*38fd1498Szrj };
273*38fd1498Szrj 
274*38fd1498Szrj /* Equality and hash functions for hashtab code.  */
275*38fd1498Szrj 
276*38fd1498Szrj inline bool
equal(const name_to_copy_elt * a,const name_to_copy_elt * b)277*38fd1498Szrj name_to_copy_hasher::equal (const name_to_copy_elt *a, const name_to_copy_elt *b)
278*38fd1498Szrj {
279*38fd1498Szrj   return a->version == b->version;
280*38fd1498Szrj }
281*38fd1498Szrj 
282*38fd1498Szrj inline hashval_t
hash(const name_to_copy_elt * a)283*38fd1498Szrj name_to_copy_hasher::hash (const name_to_copy_elt *a)
284*38fd1498Szrj {
285*38fd1498Szrj   return (hashval_t) a->version;
286*38fd1498Szrj }
287*38fd1498Szrj 
288*38fd1498Szrj typedef hash_table<name_to_copy_hasher> name_to_copy_table_type;
289*38fd1498Szrj 
290*38fd1498Szrj /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
291*38fd1498Szrj    matrix.  Rather than use floats, we simply keep a single DENOMINATOR that
292*38fd1498Szrj    represents the denominator for every element in the matrix.  */
293*38fd1498Szrj typedef struct lambda_trans_matrix_s
294*38fd1498Szrj {
295*38fd1498Szrj   lambda_matrix matrix;
296*38fd1498Szrj   int rowsize;
297*38fd1498Szrj   int colsize;
298*38fd1498Szrj   int denominator;
299*38fd1498Szrj } *lambda_trans_matrix;
300*38fd1498Szrj #define LTM_MATRIX(T) ((T)->matrix)
301*38fd1498Szrj #define LTM_ROWSIZE(T) ((T)->rowsize)
302*38fd1498Szrj #define LTM_COLSIZE(T) ((T)->colsize)
303*38fd1498Szrj #define LTM_DENOMINATOR(T) ((T)->denominator)
304*38fd1498Szrj 
305*38fd1498Szrj /* Allocate a new transformation matrix.  */
306*38fd1498Szrj 
307*38fd1498Szrj static lambda_trans_matrix
lambda_trans_matrix_new(int colsize,int rowsize,struct obstack * lambda_obstack)308*38fd1498Szrj lambda_trans_matrix_new (int colsize, int rowsize,
309*38fd1498Szrj 			 struct obstack * lambda_obstack)
310*38fd1498Szrj {
311*38fd1498Szrj   lambda_trans_matrix ret;
312*38fd1498Szrj 
313*38fd1498Szrj   ret = (lambda_trans_matrix)
314*38fd1498Szrj     obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
315*38fd1498Szrj   LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
316*38fd1498Szrj   LTM_ROWSIZE (ret) = rowsize;
317*38fd1498Szrj   LTM_COLSIZE (ret) = colsize;
318*38fd1498Szrj   LTM_DENOMINATOR (ret) = 1;
319*38fd1498Szrj   return ret;
320*38fd1498Szrj }
321*38fd1498Szrj 
322*38fd1498Szrj /* Multiply a vector VEC by a matrix MAT.
323*38fd1498Szrj    MAT is an M*N matrix, and VEC is a vector with length N.  The result
324*38fd1498Szrj    is stored in DEST which must be a vector of length M.  */
325*38fd1498Szrj 
326*38fd1498Szrj static void
lambda_matrix_vector_mult(lambda_matrix matrix,int m,int n,lambda_vector vec,lambda_vector dest)327*38fd1498Szrj lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
328*38fd1498Szrj 			   lambda_vector vec, lambda_vector dest)
329*38fd1498Szrj {
330*38fd1498Szrj   int i, j;
331*38fd1498Szrj 
332*38fd1498Szrj   lambda_vector_clear (dest, m);
333*38fd1498Szrj   for (i = 0; i < m; i++)
334*38fd1498Szrj     for (j = 0; j < n; j++)
335*38fd1498Szrj       dest[i] += matrix[i][j] * vec[j];
336*38fd1498Szrj }
337*38fd1498Szrj 
338*38fd1498Szrj /* Return true if TRANS is a legal transformation matrix that respects
339*38fd1498Szrj    the dependence vectors in DISTS and DIRS.  The conservative answer
340*38fd1498Szrj    is false.
341*38fd1498Szrj 
342*38fd1498Szrj    "Wolfe proves that a unimodular transformation represented by the
343*38fd1498Szrj    matrix T is legal when applied to a loop nest with a set of
344*38fd1498Szrj    lexicographically non-negative distance vectors RDG if and only if
345*38fd1498Szrj    for each vector d in RDG, (T.d >= 0) is lexicographically positive.
346*38fd1498Szrj    i.e.: if and only if it transforms the lexicographically positive
347*38fd1498Szrj    distance vectors to lexicographically positive vectors.  Note that
348*38fd1498Szrj    a unimodular matrix must transform the zero vector (and only it) to
349*38fd1498Szrj    the zero vector." S.Muchnick.  */
350*38fd1498Szrj 
351*38fd1498Szrj static bool
lambda_transform_legal_p(lambda_trans_matrix trans,int nb_loops,vec<ddr_p> dependence_relations)352*38fd1498Szrj lambda_transform_legal_p (lambda_trans_matrix trans,
353*38fd1498Szrj 			  int nb_loops,
354*38fd1498Szrj 			  vec<ddr_p> dependence_relations)
355*38fd1498Szrj {
356*38fd1498Szrj   unsigned int i, j;
357*38fd1498Szrj   lambda_vector distres;
358*38fd1498Szrj   struct data_dependence_relation *ddr;
359*38fd1498Szrj 
360*38fd1498Szrj   gcc_assert (LTM_COLSIZE (trans) == nb_loops
361*38fd1498Szrj 	      && LTM_ROWSIZE (trans) == nb_loops);
362*38fd1498Szrj 
363*38fd1498Szrj   /* When there are no dependences, the transformation is correct.  */
364*38fd1498Szrj   if (dependence_relations.length () == 0)
365*38fd1498Szrj     return true;
366*38fd1498Szrj 
367*38fd1498Szrj   ddr = dependence_relations[0];
368*38fd1498Szrj   if (ddr == NULL)
369*38fd1498Szrj     return true;
370*38fd1498Szrj 
371*38fd1498Szrj   /* When there is an unknown relation in the dependence_relations, we
372*38fd1498Szrj      know that it is no worth looking at this loop nest: give up.  */
373*38fd1498Szrj   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
374*38fd1498Szrj     return false;
375*38fd1498Szrj 
376*38fd1498Szrj   distres = lambda_vector_new (nb_loops);
377*38fd1498Szrj 
378*38fd1498Szrj   /* For each distance vector in the dependence graph.  */
379*38fd1498Szrj   FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
380*38fd1498Szrj     {
381*38fd1498Szrj       /* Don't care about relations for which we know that there is no
382*38fd1498Szrj 	 dependence, nor about read-read (aka. output-dependences):
383*38fd1498Szrj 	 these data accesses can happen in any order.  */
384*38fd1498Szrj       if (DDR_ARE_DEPENDENT (ddr) == chrec_known
385*38fd1498Szrj 	  || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
386*38fd1498Szrj 	continue;
387*38fd1498Szrj 
388*38fd1498Szrj       /* Conservatively answer: "this transformation is not valid".  */
389*38fd1498Szrj       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
390*38fd1498Szrj 	return false;
391*38fd1498Szrj 
392*38fd1498Szrj       /* If the dependence could not be captured by a distance vector,
393*38fd1498Szrj 	 conservatively answer that the transform is not valid.  */
394*38fd1498Szrj       if (DDR_NUM_DIST_VECTS (ddr) == 0)
395*38fd1498Szrj 	return false;
396*38fd1498Szrj 
397*38fd1498Szrj       /* Compute trans.dist_vect */
398*38fd1498Szrj       for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
399*38fd1498Szrj 	{
400*38fd1498Szrj 	  lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
401*38fd1498Szrj 				     DDR_DIST_VECT (ddr, j), distres);
402*38fd1498Szrj 
403*38fd1498Szrj 	  if (!lambda_vector_lexico_pos (distres, nb_loops))
404*38fd1498Szrj 	    return false;
405*38fd1498Szrj 	}
406*38fd1498Szrj     }
407*38fd1498Szrj   return true;
408*38fd1498Szrj }
409*38fd1498Szrj 
410*38fd1498Szrj /* Data dependency analysis. Returns true if the iterations of LOOP
411*38fd1498Szrj    are independent on each other (that is, if we can execute them
412*38fd1498Szrj    in parallel).  */
413*38fd1498Szrj 
414*38fd1498Szrj static bool
loop_parallel_p(struct loop * loop,struct obstack * parloop_obstack)415*38fd1498Szrj loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
416*38fd1498Szrj {
417*38fd1498Szrj   vec<ddr_p> dependence_relations;
418*38fd1498Szrj   vec<data_reference_p> datarefs;
419*38fd1498Szrj   lambda_trans_matrix trans;
420*38fd1498Szrj   bool ret = false;
421*38fd1498Szrj 
422*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
423*38fd1498Szrj   {
424*38fd1498Szrj     fprintf (dump_file, "Considering loop %d\n", loop->num);
425*38fd1498Szrj     if (!loop->inner)
426*38fd1498Szrj       fprintf (dump_file, "loop is innermost\n");
427*38fd1498Szrj     else
428*38fd1498Szrj       fprintf (dump_file, "loop NOT innermost\n");
429*38fd1498Szrj    }
430*38fd1498Szrj 
431*38fd1498Szrj   /* Check for problems with dependences.  If the loop can be reversed,
432*38fd1498Szrj      the iterations are independent.  */
433*38fd1498Szrj   auto_vec<loop_p, 3> loop_nest;
434*38fd1498Szrj   datarefs.create (10);
435*38fd1498Szrj   dependence_relations.create (100);
436*38fd1498Szrj   if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
437*38fd1498Szrj 					   &dependence_relations))
438*38fd1498Szrj     {
439*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
440*38fd1498Szrj 	fprintf (dump_file, "  FAILED: cannot analyze data dependencies\n");
441*38fd1498Szrj       ret = false;
442*38fd1498Szrj       goto end;
443*38fd1498Szrj     }
444*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
445*38fd1498Szrj     dump_data_dependence_relations (dump_file, dependence_relations);
446*38fd1498Szrj 
447*38fd1498Szrj   trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
448*38fd1498Szrj   LTM_MATRIX (trans)[0][0] = -1;
449*38fd1498Szrj 
450*38fd1498Szrj   if (lambda_transform_legal_p (trans, 1, dependence_relations))
451*38fd1498Szrj     {
452*38fd1498Szrj       ret = true;
453*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
454*38fd1498Szrj 	fprintf (dump_file, "  SUCCESS: may be parallelized\n");
455*38fd1498Szrj     }
456*38fd1498Szrj   else if (dump_file && (dump_flags & TDF_DETAILS))
457*38fd1498Szrj     fprintf (dump_file,
458*38fd1498Szrj 	     "  FAILED: data dependencies exist across iterations\n");
459*38fd1498Szrj 
460*38fd1498Szrj  end:
461*38fd1498Szrj   free_dependence_relations (dependence_relations);
462*38fd1498Szrj   free_data_refs (datarefs);
463*38fd1498Szrj 
464*38fd1498Szrj   return ret;
465*38fd1498Szrj }
466*38fd1498Szrj 
467*38fd1498Szrj /* Return true when LOOP contains basic blocks marked with the
468*38fd1498Szrj    BB_IRREDUCIBLE_LOOP flag.  */
469*38fd1498Szrj 
470*38fd1498Szrj static inline bool
loop_has_blocks_with_irreducible_flag(struct loop * loop)471*38fd1498Szrj loop_has_blocks_with_irreducible_flag (struct loop *loop)
472*38fd1498Szrj {
473*38fd1498Szrj   unsigned i;
474*38fd1498Szrj   basic_block *bbs = get_loop_body_in_dom_order (loop);
475*38fd1498Szrj   bool res = true;
476*38fd1498Szrj 
477*38fd1498Szrj   for (i = 0; i < loop->num_nodes; i++)
478*38fd1498Szrj     if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
479*38fd1498Szrj       goto end;
480*38fd1498Szrj 
481*38fd1498Szrj   res = false;
482*38fd1498Szrj  end:
483*38fd1498Szrj   free (bbs);
484*38fd1498Szrj   return res;
485*38fd1498Szrj }
486*38fd1498Szrj 
487*38fd1498Szrj /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
488*38fd1498Szrj    The assignment statement is placed on edge ENTRY.  DECL_ADDRESS maps decls
489*38fd1498Szrj    to their addresses that can be reused.  The address of OBJ is known to
490*38fd1498Szrj    be invariant in the whole function.  Other needed statements are placed
491*38fd1498Szrj    right before GSI.  */
492*38fd1498Szrj 
493*38fd1498Szrj static tree
take_address_of(tree obj,tree type,edge entry,int_tree_htab_type * decl_address,gimple_stmt_iterator * gsi)494*38fd1498Szrj take_address_of (tree obj, tree type, edge entry,
495*38fd1498Szrj 		 int_tree_htab_type *decl_address, gimple_stmt_iterator *gsi)
496*38fd1498Szrj {
497*38fd1498Szrj   int uid;
498*38fd1498Szrj   tree *var_p, name, addr;
499*38fd1498Szrj   gassign *stmt;
500*38fd1498Szrj   gimple_seq stmts;
501*38fd1498Szrj 
502*38fd1498Szrj   /* Since the address of OBJ is invariant, the trees may be shared.
503*38fd1498Szrj      Avoid rewriting unrelated parts of the code.  */
504*38fd1498Szrj   obj = unshare_expr (obj);
505*38fd1498Szrj   for (var_p = &obj;
506*38fd1498Szrj        handled_component_p (*var_p);
507*38fd1498Szrj        var_p = &TREE_OPERAND (*var_p, 0))
508*38fd1498Szrj     continue;
509*38fd1498Szrj 
510*38fd1498Szrj   /* Canonicalize the access to base on a MEM_REF.  */
511*38fd1498Szrj   if (DECL_P (*var_p))
512*38fd1498Szrj     *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
513*38fd1498Szrj 
514*38fd1498Szrj   /* Assign a canonical SSA name to the address of the base decl used
515*38fd1498Szrj      in the address and share it for all accesses and addresses based
516*38fd1498Szrj      on it.  */
517*38fd1498Szrj   uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
518*38fd1498Szrj   int_tree_map elt;
519*38fd1498Szrj   elt.uid = uid;
520*38fd1498Szrj   int_tree_map *slot = decl_address->find_slot (elt, INSERT);
521*38fd1498Szrj   if (!slot->to)
522*38fd1498Szrj     {
523*38fd1498Szrj       if (gsi == NULL)
524*38fd1498Szrj 	return NULL;
525*38fd1498Szrj       addr = TREE_OPERAND (*var_p, 0);
526*38fd1498Szrj       const char *obj_name
527*38fd1498Szrj 	= get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
528*38fd1498Szrj       if (obj_name)
529*38fd1498Szrj 	name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
530*38fd1498Szrj       else
531*38fd1498Szrj 	name = make_ssa_name (TREE_TYPE (addr));
532*38fd1498Szrj       stmt = gimple_build_assign (name, addr);
533*38fd1498Szrj       gsi_insert_on_edge_immediate (entry, stmt);
534*38fd1498Szrj 
535*38fd1498Szrj       slot->uid = uid;
536*38fd1498Szrj       slot->to = name;
537*38fd1498Szrj     }
538*38fd1498Szrj   else
539*38fd1498Szrj     name = slot->to;
540*38fd1498Szrj 
541*38fd1498Szrj   /* Express the address in terms of the canonical SSA name.  */
542*38fd1498Szrj   TREE_OPERAND (*var_p, 0) = name;
543*38fd1498Szrj   if (gsi == NULL)
544*38fd1498Szrj     return build_fold_addr_expr_with_type (obj, type);
545*38fd1498Szrj 
546*38fd1498Szrj   name = force_gimple_operand (build_addr (obj),
547*38fd1498Szrj 			       &stmts, true, NULL_TREE);
548*38fd1498Szrj   if (!gimple_seq_empty_p (stmts))
549*38fd1498Szrj     gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
550*38fd1498Szrj 
551*38fd1498Szrj   if (!useless_type_conversion_p (type, TREE_TYPE (name)))
552*38fd1498Szrj     {
553*38fd1498Szrj       name = force_gimple_operand (fold_convert (type, name), &stmts, true,
554*38fd1498Szrj 				   NULL_TREE);
555*38fd1498Szrj       if (!gimple_seq_empty_p (stmts))
556*38fd1498Szrj 	gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
557*38fd1498Szrj     }
558*38fd1498Szrj 
559*38fd1498Szrj   return name;
560*38fd1498Szrj }
561*38fd1498Szrj 
562*38fd1498Szrj static tree
reduc_stmt_res(gimple * stmt)563*38fd1498Szrj reduc_stmt_res (gimple *stmt)
564*38fd1498Szrj {
565*38fd1498Szrj   return (gimple_code (stmt) == GIMPLE_PHI
566*38fd1498Szrj 	  ? gimple_phi_result (stmt)
567*38fd1498Szrj 	  : gimple_assign_lhs (stmt));
568*38fd1498Szrj }
569*38fd1498Szrj 
570*38fd1498Szrj /* Callback for htab_traverse.  Create the initialization statement
571*38fd1498Szrj    for reduction described in SLOT, and place it at the preheader of
572*38fd1498Szrj    the loop described in DATA.  */
573*38fd1498Szrj 
574*38fd1498Szrj int
initialize_reductions(reduction_info ** slot,struct loop * loop)575*38fd1498Szrj initialize_reductions (reduction_info **slot, struct loop *loop)
576*38fd1498Szrj {
577*38fd1498Szrj   tree init;
578*38fd1498Szrj   tree type, arg;
579*38fd1498Szrj   edge e;
580*38fd1498Szrj 
581*38fd1498Szrj   struct reduction_info *const reduc = *slot;
582*38fd1498Szrj 
583*38fd1498Szrj   /* Create initialization in preheader:
584*38fd1498Szrj      reduction_variable = initialization value of reduction.  */
585*38fd1498Szrj 
586*38fd1498Szrj   /* In the phi node at the header, replace the argument coming
587*38fd1498Szrj      from the preheader with the reduction initialization value.  */
588*38fd1498Szrj 
589*38fd1498Szrj   /* Initialize the reduction.  */
590*38fd1498Szrj   type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
591*38fd1498Szrj   init = omp_reduction_init_op (gimple_location (reduc->reduc_stmt),
592*38fd1498Szrj 				reduc->reduction_code, type);
593*38fd1498Szrj   reduc->init = init;
594*38fd1498Szrj 
595*38fd1498Szrj   /* Replace the argument representing the initialization value
596*38fd1498Szrj      with the initialization value for the reduction (neutral
597*38fd1498Szrj      element for the particular operation, e.g. 0 for PLUS_EXPR,
598*38fd1498Szrj      1 for MULT_EXPR, etc).
599*38fd1498Szrj      Keep the old value in a new variable "reduction_initial",
600*38fd1498Szrj      that will be taken in consideration after the parallel
601*38fd1498Szrj      computing is done.  */
602*38fd1498Szrj 
603*38fd1498Szrj   e = loop_preheader_edge (loop);
604*38fd1498Szrj   arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
605*38fd1498Szrj   /* Create new variable to hold the initial value.  */
606*38fd1498Szrj 
607*38fd1498Szrj   SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
608*38fd1498Szrj 	   (reduc->reduc_phi, loop_preheader_edge (loop)), init);
609*38fd1498Szrj   reduc->initial_value = arg;
610*38fd1498Szrj   return 1;
611*38fd1498Szrj }
612*38fd1498Szrj 
613*38fd1498Szrj struct elv_data
614*38fd1498Szrj {
615*38fd1498Szrj   struct walk_stmt_info info;
616*38fd1498Szrj   edge entry;
617*38fd1498Szrj   int_tree_htab_type *decl_address;
618*38fd1498Szrj   gimple_stmt_iterator *gsi;
619*38fd1498Szrj   bool changed;
620*38fd1498Szrj   bool reset;
621*38fd1498Szrj };
622*38fd1498Szrj 
623*38fd1498Szrj /* Eliminates references to local variables in *TP out of the single
624*38fd1498Szrj    entry single exit region starting at DTA->ENTRY.
625*38fd1498Szrj    DECL_ADDRESS contains addresses of the references that had their
626*38fd1498Szrj    address taken already.  If the expression is changed, CHANGED is
627*38fd1498Szrj    set to true.  Callback for walk_tree.  */
628*38fd1498Szrj 
629*38fd1498Szrj static tree
eliminate_local_variables_1(tree * tp,int * walk_subtrees,void * data)630*38fd1498Szrj eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
631*38fd1498Szrj {
632*38fd1498Szrj   struct elv_data *const dta = (struct elv_data *) data;
633*38fd1498Szrj   tree t = *tp, var, addr, addr_type, type, obj;
634*38fd1498Szrj 
635*38fd1498Szrj   if (DECL_P (t))
636*38fd1498Szrj     {
637*38fd1498Szrj       *walk_subtrees = 0;
638*38fd1498Szrj 
639*38fd1498Szrj       if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
640*38fd1498Szrj 	return NULL_TREE;
641*38fd1498Szrj 
642*38fd1498Szrj       type = TREE_TYPE (t);
643*38fd1498Szrj       addr_type = build_pointer_type (type);
644*38fd1498Szrj       addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
645*38fd1498Szrj 			      dta->gsi);
646*38fd1498Szrj       if (dta->gsi == NULL && addr == NULL_TREE)
647*38fd1498Szrj 	{
648*38fd1498Szrj 	  dta->reset = true;
649*38fd1498Szrj 	  return NULL_TREE;
650*38fd1498Szrj 	}
651*38fd1498Szrj 
652*38fd1498Szrj       *tp = build_simple_mem_ref (addr);
653*38fd1498Szrj 
654*38fd1498Szrj       dta->changed = true;
655*38fd1498Szrj       return NULL_TREE;
656*38fd1498Szrj     }
657*38fd1498Szrj 
658*38fd1498Szrj   if (TREE_CODE (t) == ADDR_EXPR)
659*38fd1498Szrj     {
660*38fd1498Szrj       /* ADDR_EXPR may appear in two contexts:
661*38fd1498Szrj 	 -- as a gimple operand, when the address taken is a function invariant
662*38fd1498Szrj 	 -- as gimple rhs, when the resulting address in not a function
663*38fd1498Szrj 	    invariant
664*38fd1498Szrj 	 We do not need to do anything special in the latter case (the base of
665*38fd1498Szrj 	 the memory reference whose address is taken may be replaced in the
666*38fd1498Szrj 	 DECL_P case).  The former case is more complicated, as we need to
667*38fd1498Szrj 	 ensure that the new address is still a gimple operand.  Thus, it
668*38fd1498Szrj 	 is not sufficient to replace just the base of the memory reference --
669*38fd1498Szrj 	 we need to move the whole computation of the address out of the
670*38fd1498Szrj 	 loop.  */
671*38fd1498Szrj       if (!is_gimple_val (t))
672*38fd1498Szrj 	return NULL_TREE;
673*38fd1498Szrj 
674*38fd1498Szrj       *walk_subtrees = 0;
675*38fd1498Szrj       obj = TREE_OPERAND (t, 0);
676*38fd1498Szrj       var = get_base_address (obj);
677*38fd1498Szrj       if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
678*38fd1498Szrj 	return NULL_TREE;
679*38fd1498Szrj 
680*38fd1498Szrj       addr_type = TREE_TYPE (t);
681*38fd1498Szrj       addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
682*38fd1498Szrj 			      dta->gsi);
683*38fd1498Szrj       if (dta->gsi == NULL && addr == NULL_TREE)
684*38fd1498Szrj 	{
685*38fd1498Szrj 	  dta->reset = true;
686*38fd1498Szrj 	  return NULL_TREE;
687*38fd1498Szrj 	}
688*38fd1498Szrj       *tp = addr;
689*38fd1498Szrj 
690*38fd1498Szrj       dta->changed = true;
691*38fd1498Szrj       return NULL_TREE;
692*38fd1498Szrj     }
693*38fd1498Szrj 
694*38fd1498Szrj   if (!EXPR_P (t))
695*38fd1498Szrj     *walk_subtrees = 0;
696*38fd1498Szrj 
697*38fd1498Szrj   return NULL_TREE;
698*38fd1498Szrj }
699*38fd1498Szrj 
700*38fd1498Szrj /* Moves the references to local variables in STMT at *GSI out of the single
701*38fd1498Szrj    entry single exit region starting at ENTRY.  DECL_ADDRESS contains
702*38fd1498Szrj    addresses of the references that had their address taken
703*38fd1498Szrj    already.  */
704*38fd1498Szrj 
705*38fd1498Szrj static void
eliminate_local_variables_stmt(edge entry,gimple_stmt_iterator * gsi,int_tree_htab_type * decl_address)706*38fd1498Szrj eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
707*38fd1498Szrj 				int_tree_htab_type *decl_address)
708*38fd1498Szrj {
709*38fd1498Szrj   struct elv_data dta;
710*38fd1498Szrj   gimple *stmt = gsi_stmt (*gsi);
711*38fd1498Szrj 
712*38fd1498Szrj   memset (&dta.info, '\0', sizeof (dta.info));
713*38fd1498Szrj   dta.entry = entry;
714*38fd1498Szrj   dta.decl_address = decl_address;
715*38fd1498Szrj   dta.changed = false;
716*38fd1498Szrj   dta.reset = false;
717*38fd1498Szrj 
718*38fd1498Szrj   if (gimple_debug_bind_p (stmt))
719*38fd1498Szrj     {
720*38fd1498Szrj       dta.gsi = NULL;
721*38fd1498Szrj       walk_tree (gimple_debug_bind_get_value_ptr (stmt),
722*38fd1498Szrj 		 eliminate_local_variables_1, &dta.info, NULL);
723*38fd1498Szrj       if (dta.reset)
724*38fd1498Szrj 	{
725*38fd1498Szrj 	  gimple_debug_bind_reset_value (stmt);
726*38fd1498Szrj 	  dta.changed = true;
727*38fd1498Szrj 	}
728*38fd1498Szrj     }
729*38fd1498Szrj   else if (gimple_clobber_p (stmt))
730*38fd1498Szrj     {
731*38fd1498Szrj       unlink_stmt_vdef (stmt);
732*38fd1498Szrj       stmt = gimple_build_nop ();
733*38fd1498Szrj       gsi_replace (gsi, stmt, false);
734*38fd1498Szrj       dta.changed = true;
735*38fd1498Szrj     }
736*38fd1498Szrj   else
737*38fd1498Szrj     {
738*38fd1498Szrj       dta.gsi = gsi;
739*38fd1498Szrj       walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
740*38fd1498Szrj     }
741*38fd1498Szrj 
742*38fd1498Szrj   if (dta.changed)
743*38fd1498Szrj     update_stmt (stmt);
744*38fd1498Szrj }
745*38fd1498Szrj 
746*38fd1498Szrj /* Eliminates the references to local variables from the single entry
747*38fd1498Szrj    single exit region between the ENTRY and EXIT edges.
748*38fd1498Szrj 
749*38fd1498Szrj    This includes:
750*38fd1498Szrj    1) Taking address of a local variable -- these are moved out of the
751*38fd1498Szrj    region (and temporary variable is created to hold the address if
752*38fd1498Szrj    necessary).
753*38fd1498Szrj 
754*38fd1498Szrj    2) Dereferencing a local variable -- these are replaced with indirect
755*38fd1498Szrj    references.  */
756*38fd1498Szrj 
757*38fd1498Szrj static void
eliminate_local_variables(edge entry,edge exit)758*38fd1498Szrj eliminate_local_variables (edge entry, edge exit)
759*38fd1498Szrj {
760*38fd1498Szrj   basic_block bb;
761*38fd1498Szrj   auto_vec<basic_block, 3> body;
762*38fd1498Szrj   unsigned i;
763*38fd1498Szrj   gimple_stmt_iterator gsi;
764*38fd1498Szrj   bool has_debug_stmt = false;
765*38fd1498Szrj   int_tree_htab_type decl_address (10);
766*38fd1498Szrj   basic_block entry_bb = entry->src;
767*38fd1498Szrj   basic_block exit_bb = exit->dest;
768*38fd1498Szrj 
769*38fd1498Szrj   gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
770*38fd1498Szrj 
771*38fd1498Szrj   FOR_EACH_VEC_ELT (body, i, bb)
772*38fd1498Szrj     if (bb != entry_bb && bb != exit_bb)
773*38fd1498Szrj       {
774*38fd1498Szrj         for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
775*38fd1498Szrj 	  if (is_gimple_debug (gsi_stmt (gsi)))
776*38fd1498Szrj 	    {
777*38fd1498Szrj 	      if (gimple_debug_bind_p (gsi_stmt (gsi)))
778*38fd1498Szrj 	        has_debug_stmt = true;
779*38fd1498Szrj 	    }
780*38fd1498Szrj 	  else
781*38fd1498Szrj 	    eliminate_local_variables_stmt (entry, &gsi, &decl_address);
782*38fd1498Szrj       }
783*38fd1498Szrj 
784*38fd1498Szrj   if (has_debug_stmt)
785*38fd1498Szrj     FOR_EACH_VEC_ELT (body, i, bb)
786*38fd1498Szrj       if (bb != entry_bb && bb != exit_bb)
787*38fd1498Szrj 	for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
788*38fd1498Szrj 	  if (gimple_debug_bind_p (gsi_stmt (gsi)))
789*38fd1498Szrj 	    eliminate_local_variables_stmt (entry, &gsi, &decl_address);
790*38fd1498Szrj }
791*38fd1498Szrj 
792*38fd1498Szrj /* Returns true if expression EXPR is not defined between ENTRY and
793*38fd1498Szrj    EXIT, i.e. if all its operands are defined outside of the region.  */
794*38fd1498Szrj 
795*38fd1498Szrj static bool
expr_invariant_in_region_p(edge entry,edge exit,tree expr)796*38fd1498Szrj expr_invariant_in_region_p (edge entry, edge exit, tree expr)
797*38fd1498Szrj {
798*38fd1498Szrj   basic_block entry_bb = entry->src;
799*38fd1498Szrj   basic_block exit_bb = exit->dest;
800*38fd1498Szrj   basic_block def_bb;
801*38fd1498Szrj 
802*38fd1498Szrj   if (is_gimple_min_invariant (expr))
803*38fd1498Szrj     return true;
804*38fd1498Szrj 
805*38fd1498Szrj   if (TREE_CODE (expr) == SSA_NAME)
806*38fd1498Szrj     {
807*38fd1498Szrj       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
808*38fd1498Szrj       if (def_bb
809*38fd1498Szrj 	  && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
810*38fd1498Szrj 	  && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
811*38fd1498Szrj 	return false;
812*38fd1498Szrj 
813*38fd1498Szrj       return true;
814*38fd1498Szrj     }
815*38fd1498Szrj 
816*38fd1498Szrj   return false;
817*38fd1498Szrj }
818*38fd1498Szrj 
819*38fd1498Szrj /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
820*38fd1498Szrj    The copies are stored to NAME_COPIES, if NAME was already duplicated,
821*38fd1498Szrj    its duplicate stored in NAME_COPIES is returned.
822*38fd1498Szrj 
823*38fd1498Szrj    Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
824*38fd1498Szrj    duplicated, storing the copies in DECL_COPIES.  */
825*38fd1498Szrj 
826*38fd1498Szrj static tree
separate_decls_in_region_name(tree name,name_to_copy_table_type * name_copies,int_tree_htab_type * decl_copies,bool copy_name_p)827*38fd1498Szrj separate_decls_in_region_name (tree name, name_to_copy_table_type *name_copies,
828*38fd1498Szrj 			       int_tree_htab_type *decl_copies,
829*38fd1498Szrj 			       bool copy_name_p)
830*38fd1498Szrj {
831*38fd1498Szrj   tree copy, var, var_copy;
832*38fd1498Szrj   unsigned idx, uid, nuid;
833*38fd1498Szrj   struct int_tree_map ielt;
834*38fd1498Szrj   struct name_to_copy_elt elt, *nelt;
835*38fd1498Szrj   name_to_copy_elt **slot;
836*38fd1498Szrj   int_tree_map *dslot;
837*38fd1498Szrj 
838*38fd1498Szrj   if (TREE_CODE (name) != SSA_NAME)
839*38fd1498Szrj     return name;
840*38fd1498Szrj 
841*38fd1498Szrj   idx = SSA_NAME_VERSION (name);
842*38fd1498Szrj   elt.version = idx;
843*38fd1498Szrj   slot = name_copies->find_slot_with_hash (&elt, idx,
844*38fd1498Szrj 					   copy_name_p ? INSERT : NO_INSERT);
845*38fd1498Szrj   if (slot && *slot)
846*38fd1498Szrj     return (*slot)->new_name;
847*38fd1498Szrj 
848*38fd1498Szrj   if (copy_name_p)
849*38fd1498Szrj     {
850*38fd1498Szrj       copy = duplicate_ssa_name (name, NULL);
851*38fd1498Szrj       nelt = XNEW (struct name_to_copy_elt);
852*38fd1498Szrj       nelt->version = idx;
853*38fd1498Szrj       nelt->new_name = copy;
854*38fd1498Szrj       nelt->field = NULL_TREE;
855*38fd1498Szrj       *slot = nelt;
856*38fd1498Szrj     }
857*38fd1498Szrj   else
858*38fd1498Szrj     {
859*38fd1498Szrj       gcc_assert (!slot);
860*38fd1498Szrj       copy = name;
861*38fd1498Szrj     }
862*38fd1498Szrj 
863*38fd1498Szrj   var = SSA_NAME_VAR (name);
864*38fd1498Szrj   if (!var)
865*38fd1498Szrj     return copy;
866*38fd1498Szrj 
867*38fd1498Szrj   uid = DECL_UID (var);
868*38fd1498Szrj   ielt.uid = uid;
869*38fd1498Szrj   dslot = decl_copies->find_slot_with_hash (ielt, uid, INSERT);
870*38fd1498Szrj   if (!dslot->to)
871*38fd1498Szrj     {
872*38fd1498Szrj       var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
873*38fd1498Szrj       DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
874*38fd1498Szrj       dslot->uid = uid;
875*38fd1498Szrj       dslot->to = var_copy;
876*38fd1498Szrj 
877*38fd1498Szrj       /* Ensure that when we meet this decl next time, we won't duplicate
878*38fd1498Szrj          it again.  */
879*38fd1498Szrj       nuid = DECL_UID (var_copy);
880*38fd1498Szrj       ielt.uid = nuid;
881*38fd1498Szrj       dslot = decl_copies->find_slot_with_hash (ielt, nuid, INSERT);
882*38fd1498Szrj       gcc_assert (!dslot->to);
883*38fd1498Szrj       dslot->uid = nuid;
884*38fd1498Szrj       dslot->to = var_copy;
885*38fd1498Szrj     }
886*38fd1498Szrj   else
887*38fd1498Szrj     var_copy = dslot->to;
888*38fd1498Szrj 
889*38fd1498Szrj   replace_ssa_name_symbol (copy, var_copy);
890*38fd1498Szrj   return copy;
891*38fd1498Szrj }
892*38fd1498Szrj 
893*38fd1498Szrj /* Finds the ssa names used in STMT that are defined outside the
894*38fd1498Szrj    region between ENTRY and EXIT and replaces such ssa names with
895*38fd1498Szrj    their duplicates.  The duplicates are stored to NAME_COPIES.  Base
896*38fd1498Szrj    decls of all ssa names used in STMT (including those defined in
897*38fd1498Szrj    LOOP) are replaced with the new temporary variables; the
898*38fd1498Szrj    replacement decls are stored in DECL_COPIES.  */
899*38fd1498Szrj 
900*38fd1498Szrj static void
separate_decls_in_region_stmt(edge entry,edge exit,gimple * stmt,name_to_copy_table_type * name_copies,int_tree_htab_type * decl_copies)901*38fd1498Szrj separate_decls_in_region_stmt (edge entry, edge exit, gimple *stmt,
902*38fd1498Szrj 			       name_to_copy_table_type *name_copies,
903*38fd1498Szrj 			       int_tree_htab_type *decl_copies)
904*38fd1498Szrj {
905*38fd1498Szrj   use_operand_p use;
906*38fd1498Szrj   def_operand_p def;
907*38fd1498Szrj   ssa_op_iter oi;
908*38fd1498Szrj   tree name, copy;
909*38fd1498Szrj   bool copy_name_p;
910*38fd1498Szrj 
911*38fd1498Szrj   FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
912*38fd1498Szrj   {
913*38fd1498Szrj     name = DEF_FROM_PTR (def);
914*38fd1498Szrj     gcc_assert (TREE_CODE (name) == SSA_NAME);
915*38fd1498Szrj     copy = separate_decls_in_region_name (name, name_copies, decl_copies,
916*38fd1498Szrj 					  false);
917*38fd1498Szrj     gcc_assert (copy == name);
918*38fd1498Szrj   }
919*38fd1498Szrj 
920*38fd1498Szrj   FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
921*38fd1498Szrj   {
922*38fd1498Szrj     name = USE_FROM_PTR (use);
923*38fd1498Szrj     if (TREE_CODE (name) != SSA_NAME)
924*38fd1498Szrj       continue;
925*38fd1498Szrj 
926*38fd1498Szrj     copy_name_p = expr_invariant_in_region_p (entry, exit, name);
927*38fd1498Szrj     copy = separate_decls_in_region_name (name, name_copies, decl_copies,
928*38fd1498Szrj 					  copy_name_p);
929*38fd1498Szrj     SET_USE (use, copy);
930*38fd1498Szrj   }
931*38fd1498Szrj }
932*38fd1498Szrj 
933*38fd1498Szrj /* Finds the ssa names used in STMT that are defined outside the
934*38fd1498Szrj    region between ENTRY and EXIT and replaces such ssa names with
935*38fd1498Szrj    their duplicates.  The duplicates are stored to NAME_COPIES.  Base
936*38fd1498Szrj    decls of all ssa names used in STMT (including those defined in
937*38fd1498Szrj    LOOP) are replaced with the new temporary variables; the
938*38fd1498Szrj    replacement decls are stored in DECL_COPIES.  */
939*38fd1498Szrj 
940*38fd1498Szrj static bool
separate_decls_in_region_debug(gimple * stmt,name_to_copy_table_type * name_copies,int_tree_htab_type * decl_copies)941*38fd1498Szrj separate_decls_in_region_debug (gimple *stmt,
942*38fd1498Szrj 				name_to_copy_table_type *name_copies,
943*38fd1498Szrj 				int_tree_htab_type *decl_copies)
944*38fd1498Szrj {
945*38fd1498Szrj   use_operand_p use;
946*38fd1498Szrj   ssa_op_iter oi;
947*38fd1498Szrj   tree var, name;
948*38fd1498Szrj   struct int_tree_map ielt;
949*38fd1498Szrj   struct name_to_copy_elt elt;
950*38fd1498Szrj   name_to_copy_elt **slot;
951*38fd1498Szrj   int_tree_map *dslot;
952*38fd1498Szrj 
953*38fd1498Szrj   if (gimple_debug_bind_p (stmt))
954*38fd1498Szrj     var = gimple_debug_bind_get_var (stmt);
955*38fd1498Szrj   else if (gimple_debug_source_bind_p (stmt))
956*38fd1498Szrj     var = gimple_debug_source_bind_get_var (stmt);
957*38fd1498Szrj   else
958*38fd1498Szrj     return true;
959*38fd1498Szrj   if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
960*38fd1498Szrj     return true;
961*38fd1498Szrj   gcc_assert (DECL_P (var) && SSA_VAR_P (var));
962*38fd1498Szrj   ielt.uid = DECL_UID (var);
963*38fd1498Szrj   dslot = decl_copies->find_slot_with_hash (ielt, ielt.uid, NO_INSERT);
964*38fd1498Szrj   if (!dslot)
965*38fd1498Szrj     return true;
966*38fd1498Szrj   if (gimple_debug_bind_p (stmt))
967*38fd1498Szrj     gimple_debug_bind_set_var (stmt, dslot->to);
968*38fd1498Szrj   else if (gimple_debug_source_bind_p (stmt))
969*38fd1498Szrj     gimple_debug_source_bind_set_var (stmt, dslot->to);
970*38fd1498Szrj 
971*38fd1498Szrj   FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
972*38fd1498Szrj   {
973*38fd1498Szrj     name = USE_FROM_PTR (use);
974*38fd1498Szrj     if (TREE_CODE (name) != SSA_NAME)
975*38fd1498Szrj       continue;
976*38fd1498Szrj 
977*38fd1498Szrj     elt.version = SSA_NAME_VERSION (name);
978*38fd1498Szrj     slot = name_copies->find_slot_with_hash (&elt, elt.version, NO_INSERT);
979*38fd1498Szrj     if (!slot)
980*38fd1498Szrj       {
981*38fd1498Szrj 	gimple_debug_bind_reset_value (stmt);
982*38fd1498Szrj 	update_stmt (stmt);
983*38fd1498Szrj 	break;
984*38fd1498Szrj       }
985*38fd1498Szrj 
986*38fd1498Szrj     SET_USE (use, (*slot)->new_name);
987*38fd1498Szrj   }
988*38fd1498Szrj 
989*38fd1498Szrj   return false;
990*38fd1498Szrj }
991*38fd1498Szrj 
992*38fd1498Szrj /* Callback for htab_traverse.  Adds a field corresponding to the reduction
993*38fd1498Szrj    specified in SLOT. The type is passed in DATA.  */
994*38fd1498Szrj 
995*38fd1498Szrj int
add_field_for_reduction(reduction_info ** slot,tree type)996*38fd1498Szrj add_field_for_reduction (reduction_info **slot, tree type)
997*38fd1498Szrj {
998*38fd1498Szrj 
999*38fd1498Szrj   struct reduction_info *const red = *slot;
1000*38fd1498Szrj   tree var = reduc_stmt_res (red->reduc_stmt);
1001*38fd1498Szrj   tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
1002*38fd1498Szrj 			   SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
1003*38fd1498Szrj 
1004*38fd1498Szrj   insert_field_into_struct (type, field);
1005*38fd1498Szrj 
1006*38fd1498Szrj   red->field = field;
1007*38fd1498Szrj 
1008*38fd1498Szrj   return 1;
1009*38fd1498Szrj }
1010*38fd1498Szrj 
1011*38fd1498Szrj /* Callback for htab_traverse.  Adds a field corresponding to a ssa name
1012*38fd1498Szrj    described in SLOT. The type is passed in DATA.  */
1013*38fd1498Szrj 
1014*38fd1498Szrj int
add_field_for_name(name_to_copy_elt ** slot,tree type)1015*38fd1498Szrj add_field_for_name (name_to_copy_elt **slot, tree type)
1016*38fd1498Szrj {
1017*38fd1498Szrj   struct name_to_copy_elt *const elt = *slot;
1018*38fd1498Szrj   tree name = ssa_name (elt->version);
1019*38fd1498Szrj   tree field = build_decl (UNKNOWN_LOCATION,
1020*38fd1498Szrj 			   FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1021*38fd1498Szrj 			   TREE_TYPE (name));
1022*38fd1498Szrj 
1023*38fd1498Szrj   insert_field_into_struct (type, field);
1024*38fd1498Szrj   elt->field = field;
1025*38fd1498Szrj 
1026*38fd1498Szrj   return 1;
1027*38fd1498Szrj }
1028*38fd1498Szrj 
1029*38fd1498Szrj /* Callback for htab_traverse.  A local result is the intermediate result
1030*38fd1498Szrj    computed by a single
1031*38fd1498Szrj    thread, or the initial value in case no iteration was executed.
1032*38fd1498Szrj    This function creates a phi node reflecting these values.
1033*38fd1498Szrj    The phi's result will be stored in NEW_PHI field of the
1034*38fd1498Szrj    reduction's data structure.  */
1035*38fd1498Szrj 
1036*38fd1498Szrj int
create_phi_for_local_result(reduction_info ** slot,struct loop * loop)1037*38fd1498Szrj create_phi_for_local_result (reduction_info **slot, struct loop *loop)
1038*38fd1498Szrj {
1039*38fd1498Szrj   struct reduction_info *const reduc = *slot;
1040*38fd1498Szrj   edge e;
1041*38fd1498Szrj   gphi *new_phi;
1042*38fd1498Szrj   basic_block store_bb, continue_bb;
1043*38fd1498Szrj   tree local_res;
1044*38fd1498Szrj   source_location locus;
1045*38fd1498Szrj 
1046*38fd1498Szrj   /* STORE_BB is the block where the phi
1047*38fd1498Szrj      should be stored.  It is the destination of the loop exit.
1048*38fd1498Szrj      (Find the fallthru edge from GIMPLE_OMP_CONTINUE).  */
1049*38fd1498Szrj   continue_bb = single_pred (loop->latch);
1050*38fd1498Szrj   store_bb = FALLTHRU_EDGE (continue_bb)->dest;
1051*38fd1498Szrj 
1052*38fd1498Szrj   /* STORE_BB has two predecessors.  One coming from  the loop
1053*38fd1498Szrj      (the reduction's result is computed at the loop),
1054*38fd1498Szrj      and another coming from a block preceding the loop,
1055*38fd1498Szrj      when no iterations
1056*38fd1498Szrj      are executed (the initial value should be taken).  */
1057*38fd1498Szrj   if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (continue_bb))
1058*38fd1498Szrj     e = EDGE_PRED (store_bb, 1);
1059*38fd1498Szrj   else
1060*38fd1498Szrj     e = EDGE_PRED (store_bb, 0);
1061*38fd1498Szrj   tree lhs = reduc_stmt_res (reduc->reduc_stmt);
1062*38fd1498Szrj   local_res = copy_ssa_name (lhs);
1063*38fd1498Szrj   locus = gimple_location (reduc->reduc_stmt);
1064*38fd1498Szrj   new_phi = create_phi_node (local_res, store_bb);
1065*38fd1498Szrj   add_phi_arg (new_phi, reduc->init, e, locus);
1066*38fd1498Szrj   add_phi_arg (new_phi, lhs, FALLTHRU_EDGE (continue_bb), locus);
1067*38fd1498Szrj   reduc->new_phi = new_phi;
1068*38fd1498Szrj 
1069*38fd1498Szrj   return 1;
1070*38fd1498Szrj }
1071*38fd1498Szrj 
1072*38fd1498Szrj struct clsn_data
1073*38fd1498Szrj {
1074*38fd1498Szrj   tree store;
1075*38fd1498Szrj   tree load;
1076*38fd1498Szrj 
1077*38fd1498Szrj   basic_block store_bb;
1078*38fd1498Szrj   basic_block load_bb;
1079*38fd1498Szrj };
1080*38fd1498Szrj 
1081*38fd1498Szrj /* Callback for htab_traverse.  Create an atomic instruction for the
1082*38fd1498Szrj    reduction described in SLOT.
1083*38fd1498Szrj    DATA annotates the place in memory the atomic operation relates to,
1084*38fd1498Szrj    and the basic block it needs to be generated in.  */
1085*38fd1498Szrj 
1086*38fd1498Szrj int
create_call_for_reduction_1(reduction_info ** slot,struct clsn_data * clsn_data)1087*38fd1498Szrj create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
1088*38fd1498Szrj {
1089*38fd1498Szrj   struct reduction_info *const reduc = *slot;
1090*38fd1498Szrj   gimple_stmt_iterator gsi;
1091*38fd1498Szrj   tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1092*38fd1498Szrj   tree load_struct;
1093*38fd1498Szrj   basic_block bb;
1094*38fd1498Szrj   basic_block new_bb;
1095*38fd1498Szrj   edge e;
1096*38fd1498Szrj   tree t, addr, ref, x;
1097*38fd1498Szrj   tree tmp_load, name;
1098*38fd1498Szrj   gimple *load;
1099*38fd1498Szrj 
1100*38fd1498Szrj   if (reduc->reduc_addr == NULL_TREE)
1101*38fd1498Szrj     {
1102*38fd1498Szrj       load_struct = build_simple_mem_ref (clsn_data->load);
1103*38fd1498Szrj       t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1104*38fd1498Szrj 
1105*38fd1498Szrj       addr = build_addr (t);
1106*38fd1498Szrj     }
1107*38fd1498Szrj   else
1108*38fd1498Szrj     {
1109*38fd1498Szrj       /* Set the address for the atomic store.  */
1110*38fd1498Szrj       addr = reduc->reduc_addr;
1111*38fd1498Szrj 
1112*38fd1498Szrj       /* Remove the non-atomic store '*addr = sum'.  */
1113*38fd1498Szrj       tree res = PHI_RESULT (reduc->keep_res);
1114*38fd1498Szrj       use_operand_p use_p;
1115*38fd1498Szrj       gimple *stmt;
1116*38fd1498Szrj       bool single_use_p = single_imm_use (res, &use_p, &stmt);
1117*38fd1498Szrj       gcc_assert (single_use_p);
1118*38fd1498Szrj       replace_uses_by (gimple_vdef (stmt),
1119*38fd1498Szrj 		       gimple_vuse (stmt));
1120*38fd1498Szrj       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1121*38fd1498Szrj       gsi_remove (&gsi, true);
1122*38fd1498Szrj     }
1123*38fd1498Szrj 
1124*38fd1498Szrj   /* Create phi node.  */
1125*38fd1498Szrj   bb = clsn_data->load_bb;
1126*38fd1498Szrj 
1127*38fd1498Szrj   gsi = gsi_last_bb (bb);
1128*38fd1498Szrj   e = split_block (bb, gsi_stmt (gsi));
1129*38fd1498Szrj   new_bb = e->dest;
1130*38fd1498Szrj 
1131*38fd1498Szrj   tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)));
1132*38fd1498Szrj   tmp_load = make_ssa_name (tmp_load);
1133*38fd1498Szrj   load = gimple_build_omp_atomic_load (tmp_load, addr);
1134*38fd1498Szrj   SSA_NAME_DEF_STMT (tmp_load) = load;
1135*38fd1498Szrj   gsi = gsi_start_bb (new_bb);
1136*38fd1498Szrj   gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1137*38fd1498Szrj 
1138*38fd1498Szrj   e = split_block (new_bb, load);
1139*38fd1498Szrj   new_bb = e->dest;
1140*38fd1498Szrj   gsi = gsi_start_bb (new_bb);
1141*38fd1498Szrj   ref = tmp_load;
1142*38fd1498Szrj   x = fold_build2 (reduc->reduction_code,
1143*38fd1498Szrj 		   TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1144*38fd1498Szrj 		   PHI_RESULT (reduc->new_phi));
1145*38fd1498Szrj 
1146*38fd1498Szrj   name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1147*38fd1498Szrj 				   GSI_CONTINUE_LINKING);
1148*38fd1498Szrj 
1149*38fd1498Szrj   gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
1150*38fd1498Szrj   return 1;
1151*38fd1498Szrj }
1152*38fd1498Szrj 
1153*38fd1498Szrj /* Create the atomic operation at the join point of the threads.
1154*38fd1498Szrj    REDUCTION_LIST describes the reductions in the LOOP.
1155*38fd1498Szrj    LD_ST_DATA describes the shared data structure where
1156*38fd1498Szrj    shared data is stored in and loaded from.  */
1157*38fd1498Szrj static void
create_call_for_reduction(struct loop * loop,reduction_info_table_type * reduction_list,struct clsn_data * ld_st_data)1158*38fd1498Szrj create_call_for_reduction (struct loop *loop,
1159*38fd1498Szrj 			   reduction_info_table_type *reduction_list,
1160*38fd1498Szrj 			   struct clsn_data *ld_st_data)
1161*38fd1498Szrj {
1162*38fd1498Szrj   reduction_list->traverse <struct loop *, create_phi_for_local_result> (loop);
1163*38fd1498Szrj   /* Find the fallthru edge from GIMPLE_OMP_CONTINUE.  */
1164*38fd1498Szrj   basic_block continue_bb = single_pred (loop->latch);
1165*38fd1498Szrj   ld_st_data->load_bb = FALLTHRU_EDGE (continue_bb)->dest;
1166*38fd1498Szrj   reduction_list
1167*38fd1498Szrj     ->traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
1168*38fd1498Szrj }
1169*38fd1498Szrj 
1170*38fd1498Szrj /* Callback for htab_traverse.  Loads the final reduction value at the
1171*38fd1498Szrj    join point of all threads, and inserts it in the right place.  */
1172*38fd1498Szrj 
1173*38fd1498Szrj int
create_loads_for_reductions(reduction_info ** slot,struct clsn_data * clsn_data)1174*38fd1498Szrj create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
1175*38fd1498Szrj {
1176*38fd1498Szrj   struct reduction_info *const red = *slot;
1177*38fd1498Szrj   gimple *stmt;
1178*38fd1498Szrj   gimple_stmt_iterator gsi;
1179*38fd1498Szrj   tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
1180*38fd1498Szrj   tree load_struct;
1181*38fd1498Szrj   tree name;
1182*38fd1498Szrj   tree x;
1183*38fd1498Szrj 
1184*38fd1498Szrj   /* If there's no exit phi, the result of the reduction is unused.  */
1185*38fd1498Szrj   if (red->keep_res == NULL)
1186*38fd1498Szrj     return 1;
1187*38fd1498Szrj 
1188*38fd1498Szrj   gsi = gsi_after_labels (clsn_data->load_bb);
1189*38fd1498Szrj   load_struct = build_simple_mem_ref (clsn_data->load);
1190*38fd1498Szrj   load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1191*38fd1498Szrj 			NULL_TREE);
1192*38fd1498Szrj 
1193*38fd1498Szrj   x = load_struct;
1194*38fd1498Szrj   name = PHI_RESULT (red->keep_res);
1195*38fd1498Szrj   stmt = gimple_build_assign (name, x);
1196*38fd1498Szrj 
1197*38fd1498Szrj   gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1198*38fd1498Szrj 
1199*38fd1498Szrj   for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1200*38fd1498Szrj        !gsi_end_p (gsi); gsi_next (&gsi))
1201*38fd1498Szrj     if (gsi_stmt (gsi) == red->keep_res)
1202*38fd1498Szrj       {
1203*38fd1498Szrj 	remove_phi_node (&gsi, false);
1204*38fd1498Szrj 	return 1;
1205*38fd1498Szrj       }
1206*38fd1498Szrj   gcc_unreachable ();
1207*38fd1498Szrj }
1208*38fd1498Szrj 
1209*38fd1498Szrj /* Load the reduction result that was stored in LD_ST_DATA.
1210*38fd1498Szrj    REDUCTION_LIST describes the list of reductions that the
1211*38fd1498Szrj    loads should be generated for.  */
1212*38fd1498Szrj static void
create_final_loads_for_reduction(reduction_info_table_type * reduction_list,struct clsn_data * ld_st_data)1213*38fd1498Szrj create_final_loads_for_reduction (reduction_info_table_type *reduction_list,
1214*38fd1498Szrj 				  struct clsn_data *ld_st_data)
1215*38fd1498Szrj {
1216*38fd1498Szrj   gimple_stmt_iterator gsi;
1217*38fd1498Szrj   tree t;
1218*38fd1498Szrj   gimple *stmt;
1219*38fd1498Szrj 
1220*38fd1498Szrj   gsi = gsi_after_labels (ld_st_data->load_bb);
1221*38fd1498Szrj   t = build_fold_addr_expr (ld_st_data->store);
1222*38fd1498Szrj   stmt = gimple_build_assign (ld_st_data->load, t);
1223*38fd1498Szrj 
1224*38fd1498Szrj   gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1225*38fd1498Szrj 
1226*38fd1498Szrj   reduction_list
1227*38fd1498Szrj     ->traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
1228*38fd1498Szrj 
1229*38fd1498Szrj }
1230*38fd1498Szrj 
1231*38fd1498Szrj /* Callback for htab_traverse.  Store the neutral value for the
1232*38fd1498Szrj   particular reduction's operation, e.g. 0 for PLUS_EXPR,
1233*38fd1498Szrj   1 for MULT_EXPR, etc. into the reduction field.
1234*38fd1498Szrj   The reduction is specified in SLOT. The store information is
1235*38fd1498Szrj   passed in DATA.  */
1236*38fd1498Szrj 
1237*38fd1498Szrj int
create_stores_for_reduction(reduction_info ** slot,struct clsn_data * clsn_data)1238*38fd1498Szrj create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
1239*38fd1498Szrj {
1240*38fd1498Szrj   struct reduction_info *const red = *slot;
1241*38fd1498Szrj   tree t;
1242*38fd1498Szrj   gimple *stmt;
1243*38fd1498Szrj   gimple_stmt_iterator gsi;
1244*38fd1498Szrj   tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
1245*38fd1498Szrj 
1246*38fd1498Szrj   gsi = gsi_last_bb (clsn_data->store_bb);
1247*38fd1498Szrj   t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1248*38fd1498Szrj   stmt = gimple_build_assign (t, red->initial_value);
1249*38fd1498Szrj   gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1250*38fd1498Szrj 
1251*38fd1498Szrj   return 1;
1252*38fd1498Szrj }
1253*38fd1498Szrj 
1254*38fd1498Szrj /* Callback for htab_traverse.  Creates loads to a field of LOAD in LOAD_BB and
1255*38fd1498Szrj    store to a field of STORE in STORE_BB for the ssa name and its duplicate
1256*38fd1498Szrj    specified in SLOT.  */
1257*38fd1498Szrj 
1258*38fd1498Szrj int
create_loads_and_stores_for_name(name_to_copy_elt ** slot,struct clsn_data * clsn_data)1259*38fd1498Szrj create_loads_and_stores_for_name (name_to_copy_elt **slot,
1260*38fd1498Szrj 				  struct clsn_data *clsn_data)
1261*38fd1498Szrj {
1262*38fd1498Szrj   struct name_to_copy_elt *const elt = *slot;
1263*38fd1498Szrj   tree t;
1264*38fd1498Szrj   gimple *stmt;
1265*38fd1498Szrj   gimple_stmt_iterator gsi;
1266*38fd1498Szrj   tree type = TREE_TYPE (elt->new_name);
1267*38fd1498Szrj   tree load_struct;
1268*38fd1498Szrj 
1269*38fd1498Szrj   gsi = gsi_last_bb (clsn_data->store_bb);
1270*38fd1498Szrj   t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1271*38fd1498Szrj   stmt = gimple_build_assign (t, ssa_name (elt->version));
1272*38fd1498Szrj   gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1273*38fd1498Szrj 
1274*38fd1498Szrj   gsi = gsi_last_bb (clsn_data->load_bb);
1275*38fd1498Szrj   load_struct = build_simple_mem_ref (clsn_data->load);
1276*38fd1498Szrj   t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1277*38fd1498Szrj   stmt = gimple_build_assign (elt->new_name, t);
1278*38fd1498Szrj   gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1279*38fd1498Szrj 
1280*38fd1498Szrj   return 1;
1281*38fd1498Szrj }
1282*38fd1498Szrj 
1283*38fd1498Szrj /* Moves all the variables used in LOOP and defined outside of it (including
1284*38fd1498Szrj    the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1285*38fd1498Szrj    name) to a structure created for this purpose.  The code
1286*38fd1498Szrj 
1287*38fd1498Szrj    while (1)
1288*38fd1498Szrj      {
1289*38fd1498Szrj        use (a);
1290*38fd1498Szrj        use (b);
1291*38fd1498Szrj      }
1292*38fd1498Szrj 
1293*38fd1498Szrj    is transformed this way:
1294*38fd1498Szrj 
1295*38fd1498Szrj    bb0:
1296*38fd1498Szrj    old.a = a;
1297*38fd1498Szrj    old.b = b;
1298*38fd1498Szrj 
1299*38fd1498Szrj    bb1:
1300*38fd1498Szrj    a' = new->a;
1301*38fd1498Szrj    b' = new->b;
1302*38fd1498Szrj    while (1)
1303*38fd1498Szrj      {
1304*38fd1498Szrj        use (a');
1305*38fd1498Szrj        use (b');
1306*38fd1498Szrj      }
1307*38fd1498Szrj 
1308*38fd1498Szrj    `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT.  The
1309*38fd1498Szrj    pointer `new' is intentionally not initialized (the loop will be split to a
1310*38fd1498Szrj    separate function later, and `new' will be initialized from its arguments).
1311*38fd1498Szrj    LD_ST_DATA holds information about the shared data structure used to pass
1312*38fd1498Szrj    information among the threads.  It is initialized here, and
1313*38fd1498Szrj    gen_parallel_loop will pass it to create_call_for_reduction that
1314*38fd1498Szrj    needs this information.  REDUCTION_LIST describes the reductions
1315*38fd1498Szrj    in LOOP.  */
1316*38fd1498Szrj 
1317*38fd1498Szrj static void
separate_decls_in_region(edge entry,edge exit,reduction_info_table_type * reduction_list,tree * arg_struct,tree * new_arg_struct,struct clsn_data * ld_st_data)1318*38fd1498Szrj separate_decls_in_region (edge entry, edge exit,
1319*38fd1498Szrj 			  reduction_info_table_type *reduction_list,
1320*38fd1498Szrj 			  tree *arg_struct, tree *new_arg_struct,
1321*38fd1498Szrj 			  struct clsn_data *ld_st_data)
1322*38fd1498Szrj 
1323*38fd1498Szrj {
1324*38fd1498Szrj   basic_block bb1 = split_edge (entry);
1325*38fd1498Szrj   basic_block bb0 = single_pred (bb1);
1326*38fd1498Szrj   name_to_copy_table_type name_copies (10);
1327*38fd1498Szrj   int_tree_htab_type decl_copies (10);
1328*38fd1498Szrj   unsigned i;
1329*38fd1498Szrj   tree type, type_name, nvar;
1330*38fd1498Szrj   gimple_stmt_iterator gsi;
1331*38fd1498Szrj   struct clsn_data clsn_data;
1332*38fd1498Szrj   auto_vec<basic_block, 3> body;
1333*38fd1498Szrj   basic_block bb;
1334*38fd1498Szrj   basic_block entry_bb = bb1;
1335*38fd1498Szrj   basic_block exit_bb = exit->dest;
1336*38fd1498Szrj   bool has_debug_stmt = false;
1337*38fd1498Szrj 
1338*38fd1498Szrj   entry = single_succ_edge (entry_bb);
1339*38fd1498Szrj   gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1340*38fd1498Szrj 
1341*38fd1498Szrj   FOR_EACH_VEC_ELT (body, i, bb)
1342*38fd1498Szrj     {
1343*38fd1498Szrj       if (bb != entry_bb && bb != exit_bb)
1344*38fd1498Szrj 	{
1345*38fd1498Szrj 	  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1346*38fd1498Szrj 	    separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1347*38fd1498Szrj 					   &name_copies, &decl_copies);
1348*38fd1498Szrj 
1349*38fd1498Szrj 	  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1350*38fd1498Szrj 	    {
1351*38fd1498Szrj 	      gimple *stmt = gsi_stmt (gsi);
1352*38fd1498Szrj 
1353*38fd1498Szrj 	      if (is_gimple_debug (stmt))
1354*38fd1498Szrj 		has_debug_stmt = true;
1355*38fd1498Szrj 	      else
1356*38fd1498Szrj 		separate_decls_in_region_stmt (entry, exit, stmt,
1357*38fd1498Szrj 					       &name_copies, &decl_copies);
1358*38fd1498Szrj 	    }
1359*38fd1498Szrj 	}
1360*38fd1498Szrj     }
1361*38fd1498Szrj 
1362*38fd1498Szrj   /* Now process debug bind stmts.  We must not create decls while
1363*38fd1498Szrj      processing debug stmts, so we defer their processing so as to
1364*38fd1498Szrj      make sure we will have debug info for as many variables as
1365*38fd1498Szrj      possible (all of those that were dealt with in the loop above),
1366*38fd1498Szrj      and discard those for which we know there's nothing we can
1367*38fd1498Szrj      do.  */
1368*38fd1498Szrj   if (has_debug_stmt)
1369*38fd1498Szrj     FOR_EACH_VEC_ELT (body, i, bb)
1370*38fd1498Szrj       if (bb != entry_bb && bb != exit_bb)
1371*38fd1498Szrj 	{
1372*38fd1498Szrj 	  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1373*38fd1498Szrj 	    {
1374*38fd1498Szrj 	      gimple *stmt = gsi_stmt (gsi);
1375*38fd1498Szrj 
1376*38fd1498Szrj 	      if (is_gimple_debug (stmt))
1377*38fd1498Szrj 		{
1378*38fd1498Szrj 		  if (separate_decls_in_region_debug (stmt, &name_copies,
1379*38fd1498Szrj 						      &decl_copies))
1380*38fd1498Szrj 		    {
1381*38fd1498Szrj 		      gsi_remove (&gsi, true);
1382*38fd1498Szrj 		      continue;
1383*38fd1498Szrj 		    }
1384*38fd1498Szrj 		}
1385*38fd1498Szrj 
1386*38fd1498Szrj 	      gsi_next (&gsi);
1387*38fd1498Szrj 	    }
1388*38fd1498Szrj 	}
1389*38fd1498Szrj 
1390*38fd1498Szrj   if (name_copies.elements () == 0 && reduction_list->elements () == 0)
1391*38fd1498Szrj     {
1392*38fd1498Szrj       /* It may happen that there is nothing to copy (if there are only
1393*38fd1498Szrj          loop carried and external variables in the loop).  */
1394*38fd1498Szrj       *arg_struct = NULL;
1395*38fd1498Szrj       *new_arg_struct = NULL;
1396*38fd1498Szrj     }
1397*38fd1498Szrj   else
1398*38fd1498Szrj     {
1399*38fd1498Szrj       /* Create the type for the structure to store the ssa names to.  */
1400*38fd1498Szrj       type = lang_hooks.types.make_type (RECORD_TYPE);
1401*38fd1498Szrj       type_name = build_decl (UNKNOWN_LOCATION,
1402*38fd1498Szrj 			      TYPE_DECL, create_tmp_var_name (".paral_data"),
1403*38fd1498Szrj 			      type);
1404*38fd1498Szrj       TYPE_NAME (type) = type_name;
1405*38fd1498Szrj 
1406*38fd1498Szrj       name_copies.traverse <tree, add_field_for_name> (type);
1407*38fd1498Szrj       if (reduction_list && reduction_list->elements () > 0)
1408*38fd1498Szrj 	{
1409*38fd1498Szrj 	  /* Create the fields for reductions.  */
1410*38fd1498Szrj 	  reduction_list->traverse <tree, add_field_for_reduction> (type);
1411*38fd1498Szrj 	}
1412*38fd1498Szrj       layout_type (type);
1413*38fd1498Szrj 
1414*38fd1498Szrj       /* Create the loads and stores.  */
1415*38fd1498Szrj       *arg_struct = create_tmp_var (type, ".paral_data_store");
1416*38fd1498Szrj       nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1417*38fd1498Szrj       *new_arg_struct = make_ssa_name (nvar);
1418*38fd1498Szrj 
1419*38fd1498Szrj       ld_st_data->store = *arg_struct;
1420*38fd1498Szrj       ld_st_data->load = *new_arg_struct;
1421*38fd1498Szrj       ld_st_data->store_bb = bb0;
1422*38fd1498Szrj       ld_st_data->load_bb = bb1;
1423*38fd1498Szrj 
1424*38fd1498Szrj       name_copies
1425*38fd1498Szrj 	.traverse <struct clsn_data *, create_loads_and_stores_for_name>
1426*38fd1498Szrj 		  (ld_st_data);
1427*38fd1498Szrj 
1428*38fd1498Szrj       /* Load the calculation from memory (after the join of the threads).  */
1429*38fd1498Szrj 
1430*38fd1498Szrj       if (reduction_list && reduction_list->elements () > 0)
1431*38fd1498Szrj 	{
1432*38fd1498Szrj 	  reduction_list
1433*38fd1498Szrj 	    ->traverse <struct clsn_data *, create_stores_for_reduction>
1434*38fd1498Szrj 	    (ld_st_data);
1435*38fd1498Szrj 	  clsn_data.load = make_ssa_name (nvar);
1436*38fd1498Szrj 	  clsn_data.load_bb = exit->dest;
1437*38fd1498Szrj 	  clsn_data.store = ld_st_data->store;
1438*38fd1498Szrj 	  create_final_loads_for_reduction (reduction_list, &clsn_data);
1439*38fd1498Szrj 	}
1440*38fd1498Szrj     }
1441*38fd1498Szrj }
1442*38fd1498Szrj 
1443*38fd1498Szrj /* Returns true if FN was created to run in parallel.  */
1444*38fd1498Szrj 
1445*38fd1498Szrj bool
parallelized_function_p(tree fndecl)1446*38fd1498Szrj parallelized_function_p (tree fndecl)
1447*38fd1498Szrj {
1448*38fd1498Szrj   cgraph_node *node = cgraph_node::get (fndecl);
1449*38fd1498Szrj   gcc_assert (node != NULL);
1450*38fd1498Szrj   return node->parallelized_function;
1451*38fd1498Szrj }
1452*38fd1498Szrj 
1453*38fd1498Szrj /* Creates and returns an empty function that will receive the body of
1454*38fd1498Szrj    a parallelized loop.  */
1455*38fd1498Szrj 
1456*38fd1498Szrj static tree
create_loop_fn(location_t loc)1457*38fd1498Szrj create_loop_fn (location_t loc)
1458*38fd1498Szrj {
1459*38fd1498Szrj   char buf[100];
1460*38fd1498Szrj   char *tname;
1461*38fd1498Szrj   tree decl, type, name, t;
1462*38fd1498Szrj   struct function *act_cfun = cfun;
1463*38fd1498Szrj   static unsigned loopfn_num;
1464*38fd1498Szrj 
1465*38fd1498Szrj   loc = LOCATION_LOCUS (loc);
1466*38fd1498Szrj   snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1467*38fd1498Szrj   ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1468*38fd1498Szrj   clean_symbol_name (tname);
1469*38fd1498Szrj   name = get_identifier (tname);
1470*38fd1498Szrj   type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1471*38fd1498Szrj 
1472*38fd1498Szrj   decl = build_decl (loc, FUNCTION_DECL, name, type);
1473*38fd1498Szrj   TREE_STATIC (decl) = 1;
1474*38fd1498Szrj   TREE_USED (decl) = 1;
1475*38fd1498Szrj   DECL_ARTIFICIAL (decl) = 1;
1476*38fd1498Szrj   DECL_IGNORED_P (decl) = 0;
1477*38fd1498Szrj   TREE_PUBLIC (decl) = 0;
1478*38fd1498Szrj   DECL_UNINLINABLE (decl) = 1;
1479*38fd1498Szrj   DECL_EXTERNAL (decl) = 0;
1480*38fd1498Szrj   DECL_CONTEXT (decl) = NULL_TREE;
1481*38fd1498Szrj   DECL_INITIAL (decl) = make_node (BLOCK);
1482*38fd1498Szrj   BLOCK_SUPERCONTEXT (DECL_INITIAL (decl)) = decl;
1483*38fd1498Szrj 
1484*38fd1498Szrj   t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
1485*38fd1498Szrj   DECL_ARTIFICIAL (t) = 1;
1486*38fd1498Szrj   DECL_IGNORED_P (t) = 1;
1487*38fd1498Szrj   DECL_RESULT (decl) = t;
1488*38fd1498Szrj 
1489*38fd1498Szrj   t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
1490*38fd1498Szrj 		  ptr_type_node);
1491*38fd1498Szrj   DECL_ARTIFICIAL (t) = 1;
1492*38fd1498Szrj   DECL_ARG_TYPE (t) = ptr_type_node;
1493*38fd1498Szrj   DECL_CONTEXT (t) = decl;
1494*38fd1498Szrj   TREE_USED (t) = 1;
1495*38fd1498Szrj   DECL_ARGUMENTS (decl) = t;
1496*38fd1498Szrj 
1497*38fd1498Szrj   allocate_struct_function (decl, false);
1498*38fd1498Szrj 
1499*38fd1498Szrj   /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1500*38fd1498Szrj      it.  */
1501*38fd1498Szrj   set_cfun (act_cfun);
1502*38fd1498Szrj 
1503*38fd1498Szrj   return decl;
1504*38fd1498Szrj }
1505*38fd1498Szrj 
1506*38fd1498Szrj /* Replace uses of NAME by VAL in block BB.  */
1507*38fd1498Szrj 
1508*38fd1498Szrj static void
replace_uses_in_bb_by(tree name,tree val,basic_block bb)1509*38fd1498Szrj replace_uses_in_bb_by (tree name, tree val, basic_block bb)
1510*38fd1498Szrj {
1511*38fd1498Szrj   gimple *use_stmt;
1512*38fd1498Szrj   imm_use_iterator imm_iter;
1513*38fd1498Szrj 
1514*38fd1498Szrj   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, name)
1515*38fd1498Szrj     {
1516*38fd1498Szrj       if (gimple_bb (use_stmt) != bb)
1517*38fd1498Szrj 	continue;
1518*38fd1498Szrj 
1519*38fd1498Szrj       use_operand_p use_p;
1520*38fd1498Szrj       FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1521*38fd1498Szrj 	SET_USE (use_p, val);
1522*38fd1498Szrj     }
1523*38fd1498Szrj }
1524*38fd1498Szrj 
1525*38fd1498Szrj /* Do transformation from:
1526*38fd1498Szrj 
1527*38fd1498Szrj      <bb preheader>:
1528*38fd1498Szrj      ...
1529*38fd1498Szrj      goto <bb header>
1530*38fd1498Szrj 
1531*38fd1498Szrj      <bb header>:
1532*38fd1498Szrj      ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1533*38fd1498Szrj      sum_a = PHI <sum_init (preheader), sum_b (latch)>
1534*38fd1498Szrj      ...
1535*38fd1498Szrj      use (ivtmp_a)
1536*38fd1498Szrj      ...
1537*38fd1498Szrj      sum_b = sum_a + sum_update
1538*38fd1498Szrj      ...
1539*38fd1498Szrj      if (ivtmp_a < n)
1540*38fd1498Szrj        goto <bb latch>;
1541*38fd1498Szrj      else
1542*38fd1498Szrj        goto <bb exit>;
1543*38fd1498Szrj 
1544*38fd1498Szrj      <bb latch>:
1545*38fd1498Szrj      ivtmp_b = ivtmp_a + 1;
1546*38fd1498Szrj      goto <bb header>
1547*38fd1498Szrj 
1548*38fd1498Szrj      <bb exit>:
1549*38fd1498Szrj      sum_z = PHI <sum_b (cond[1]), ...>
1550*38fd1498Szrj 
1551*38fd1498Szrj      [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
1552*38fd1498Szrj 	 that's <bb header>.
1553*38fd1498Szrj 
1554*38fd1498Szrj    to:
1555*38fd1498Szrj 
1556*38fd1498Szrj      <bb preheader>:
1557*38fd1498Szrj      ...
1558*38fd1498Szrj      goto <bb newheader>
1559*38fd1498Szrj 
1560*38fd1498Szrj      <bb header>:
1561*38fd1498Szrj      ivtmp_a = PHI <ivtmp_c (latch)>
1562*38fd1498Szrj      sum_a = PHI <sum_c (latch)>
1563*38fd1498Szrj      ...
1564*38fd1498Szrj      use (ivtmp_a)
1565*38fd1498Szrj      ...
1566*38fd1498Szrj      sum_b = sum_a + sum_update
1567*38fd1498Szrj      ...
1568*38fd1498Szrj      goto <bb latch>;
1569*38fd1498Szrj 
1570*38fd1498Szrj      <bb newheader>:
1571*38fd1498Szrj      ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1572*38fd1498Szrj      sum_c = PHI <sum_init (preheader), sum_b (latch)>
1573*38fd1498Szrj      if (ivtmp_c < n + 1)
1574*38fd1498Szrj        goto <bb header>;
1575*38fd1498Szrj      else
1576*38fd1498Szrj        goto <bb newexit>;
1577*38fd1498Szrj 
1578*38fd1498Szrj      <bb latch>:
1579*38fd1498Szrj      ivtmp_b = ivtmp_a + 1;
1580*38fd1498Szrj      goto <bb newheader>
1581*38fd1498Szrj 
1582*38fd1498Szrj      <bb newexit>:
1583*38fd1498Szrj      sum_y = PHI <sum_c (newheader)>
1584*38fd1498Szrj 
1585*38fd1498Szrj      <bb exit>:
1586*38fd1498Szrj      sum_z = PHI <sum_y (newexit), ...>
1587*38fd1498Szrj 
1588*38fd1498Szrj 
1589*38fd1498Szrj    In unified diff format:
1590*38fd1498Szrj 
1591*38fd1498Szrj       <bb preheader>:
1592*38fd1498Szrj       ...
1593*38fd1498Szrj -     goto <bb header>
1594*38fd1498Szrj +     goto <bb newheader>
1595*38fd1498Szrj 
1596*38fd1498Szrj       <bb header>:
1597*38fd1498Szrj -     ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1598*38fd1498Szrj -     sum_a = PHI <sum_init (preheader), sum_b (latch)>
1599*38fd1498Szrj +     ivtmp_a = PHI <ivtmp_c (latch)>
1600*38fd1498Szrj +     sum_a = PHI <sum_c (latch)>
1601*38fd1498Szrj       ...
1602*38fd1498Szrj       use (ivtmp_a)
1603*38fd1498Szrj       ...
1604*38fd1498Szrj       sum_b = sum_a + sum_update
1605*38fd1498Szrj       ...
1606*38fd1498Szrj -     if (ivtmp_a < n)
1607*38fd1498Szrj -       goto <bb latch>;
1608*38fd1498Szrj +     goto <bb latch>;
1609*38fd1498Szrj +
1610*38fd1498Szrj +     <bb newheader>:
1611*38fd1498Szrj +     ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1612*38fd1498Szrj +     sum_c = PHI <sum_init (preheader), sum_b (latch)>
1613*38fd1498Szrj +     if (ivtmp_c < n + 1)
1614*38fd1498Szrj +       goto <bb header>;
1615*38fd1498Szrj       else
1616*38fd1498Szrj 	goto <bb exit>;
1617*38fd1498Szrj 
1618*38fd1498Szrj       <bb latch>:
1619*38fd1498Szrj       ivtmp_b = ivtmp_a + 1;
1620*38fd1498Szrj -     goto <bb header>
1621*38fd1498Szrj +     goto <bb newheader>
1622*38fd1498Szrj 
1623*38fd1498Szrj +    <bb newexit>:
1624*38fd1498Szrj +    sum_y = PHI <sum_c (newheader)>
1625*38fd1498Szrj 
1626*38fd1498Szrj       <bb exit>:
1627*38fd1498Szrj -     sum_z = PHI <sum_b (cond[1]), ...>
1628*38fd1498Szrj +     sum_z = PHI <sum_y (newexit), ...>
1629*38fd1498Szrj 
1630*38fd1498Szrj    Note: the example does not show any virtual phis, but these are handled more
1631*38fd1498Szrj    or less as reductions.
1632*38fd1498Szrj 
1633*38fd1498Szrj 
1634*38fd1498Szrj    Moves the exit condition of LOOP to the beginning of its header.
1635*38fd1498Szrj    REDUCTION_LIST describes the reductions in LOOP.  BOUND is the new loop
1636*38fd1498Szrj    bound.  */
1637*38fd1498Szrj 
1638*38fd1498Szrj static void
transform_to_exit_first_loop_alt(struct loop * loop,reduction_info_table_type * reduction_list,tree bound)1639*38fd1498Szrj transform_to_exit_first_loop_alt (struct loop *loop,
1640*38fd1498Szrj 				  reduction_info_table_type *reduction_list,
1641*38fd1498Szrj 				  tree bound)
1642*38fd1498Szrj {
1643*38fd1498Szrj   basic_block header = loop->header;
1644*38fd1498Szrj   basic_block latch = loop->latch;
1645*38fd1498Szrj   edge exit = single_dom_exit (loop);
1646*38fd1498Szrj   basic_block exit_block = exit->dest;
1647*38fd1498Szrj   gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1648*38fd1498Szrj   tree control = gimple_cond_lhs (cond_stmt);
1649*38fd1498Szrj   edge e;
1650*38fd1498Szrj 
1651*38fd1498Szrj   /* Rewriting virtuals into loop-closed ssa normal form makes this
1652*38fd1498Szrj      transformation simpler.  It also ensures that the virtuals are in
1653*38fd1498Szrj      loop-closed ssa normal from after the transformation, which is required by
1654*38fd1498Szrj      create_parallel_loop.  */
1655*38fd1498Szrj   rewrite_virtuals_into_loop_closed_ssa (loop);
1656*38fd1498Szrj 
1657*38fd1498Szrj   /* Create the new_header block.  */
1658*38fd1498Szrj   basic_block new_header = split_block_before_cond_jump (exit->src);
1659*38fd1498Szrj   edge edge_at_split = single_pred_edge (new_header);
1660*38fd1498Szrj 
1661*38fd1498Szrj   /* Redirect entry edge to new_header.  */
1662*38fd1498Szrj   edge entry = loop_preheader_edge (loop);
1663*38fd1498Szrj   e = redirect_edge_and_branch (entry, new_header);
1664*38fd1498Szrj   gcc_assert (e == entry);
1665*38fd1498Szrj 
1666*38fd1498Szrj   /* Redirect post_inc_edge to new_header.  */
1667*38fd1498Szrj   edge post_inc_edge = single_succ_edge (latch);
1668*38fd1498Szrj   e = redirect_edge_and_branch (post_inc_edge, new_header);
1669*38fd1498Szrj   gcc_assert (e == post_inc_edge);
1670*38fd1498Szrj 
1671*38fd1498Szrj   /* Redirect post_cond_edge to header.  */
1672*38fd1498Szrj   edge post_cond_edge = single_pred_edge (latch);
1673*38fd1498Szrj   e = redirect_edge_and_branch (post_cond_edge, header);
1674*38fd1498Szrj   gcc_assert (e == post_cond_edge);
1675*38fd1498Szrj 
1676*38fd1498Szrj   /* Redirect edge_at_split to latch.  */
1677*38fd1498Szrj   e = redirect_edge_and_branch (edge_at_split, latch);
1678*38fd1498Szrj   gcc_assert (e == edge_at_split);
1679*38fd1498Szrj 
1680*38fd1498Szrj   /* Set the new loop bound.  */
1681*38fd1498Szrj   gimple_cond_set_rhs (cond_stmt, bound);
1682*38fd1498Szrj   update_stmt (cond_stmt);
1683*38fd1498Szrj 
1684*38fd1498Szrj   /* Repair the ssa.  */
1685*38fd1498Szrj   vec<edge_var_map> *v = redirect_edge_var_map_vector (post_inc_edge);
1686*38fd1498Szrj   edge_var_map *vm;
1687*38fd1498Szrj   gphi_iterator gsi;
1688*38fd1498Szrj   int i;
1689*38fd1498Szrj   for (gsi = gsi_start_phis (header), i = 0;
1690*38fd1498Szrj        !gsi_end_p (gsi) && v->iterate (i, &vm);
1691*38fd1498Szrj        gsi_next (&gsi), i++)
1692*38fd1498Szrj     {
1693*38fd1498Szrj       gphi *phi = gsi.phi ();
1694*38fd1498Szrj       tree res_a = PHI_RESULT (phi);
1695*38fd1498Szrj 
1696*38fd1498Szrj       /* Create new phi.  */
1697*38fd1498Szrj       tree res_c = copy_ssa_name (res_a, phi);
1698*38fd1498Szrj       gphi *nphi = create_phi_node (res_c, new_header);
1699*38fd1498Szrj 
1700*38fd1498Szrj       /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'.  */
1701*38fd1498Szrj       replace_uses_in_bb_by (res_a, res_c, new_header);
1702*38fd1498Szrj 
1703*38fd1498Szrj       /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi.  */
1704*38fd1498Szrj       add_phi_arg (phi, res_c, post_cond_edge, UNKNOWN_LOCATION);
1705*38fd1498Szrj 
1706*38fd1498Szrj       /* Replace sum_b with sum_c in exit phi.  */
1707*38fd1498Szrj       tree res_b = redirect_edge_var_map_def (vm);
1708*38fd1498Szrj       replace_uses_in_bb_by (res_b, res_c, exit_block);
1709*38fd1498Szrj 
1710*38fd1498Szrj       struct reduction_info *red = reduction_phi (reduction_list, phi);
1711*38fd1498Szrj       gcc_assert (virtual_operand_p (res_a)
1712*38fd1498Szrj 		  || res_a == control
1713*38fd1498Szrj 		  || red != NULL);
1714*38fd1498Szrj 
1715*38fd1498Szrj       if (red)
1716*38fd1498Szrj 	{
1717*38fd1498Szrj 	  /* Register the new reduction phi.  */
1718*38fd1498Szrj 	  red->reduc_phi = nphi;
1719*38fd1498Szrj 	  gimple_set_uid (red->reduc_phi, red->reduc_version);
1720*38fd1498Szrj 	}
1721*38fd1498Szrj     }
1722*38fd1498Szrj   gcc_assert (gsi_end_p (gsi) && !v->iterate (i, &vm));
1723*38fd1498Szrj 
1724*38fd1498Szrj   /* Set the preheader argument of the new phis to ivtmp/sum_init.  */
1725*38fd1498Szrj   flush_pending_stmts (entry);
1726*38fd1498Szrj 
1727*38fd1498Szrj   /* Set the latch arguments of the new phis to ivtmp/sum_b.  */
1728*38fd1498Szrj   flush_pending_stmts (post_inc_edge);
1729*38fd1498Szrj 
1730*38fd1498Szrj 
1731*38fd1498Szrj   basic_block new_exit_block = NULL;
1732*38fd1498Szrj   if (!single_pred_p (exit->dest))
1733*38fd1498Szrj     {
1734*38fd1498Szrj       /* Create a new empty exit block, inbetween the new loop header and the
1735*38fd1498Szrj 	 old exit block.  The function separate_decls_in_region needs this block
1736*38fd1498Szrj 	 to insert code that is active on loop exit, but not any other path.  */
1737*38fd1498Szrj       new_exit_block = split_edge (exit);
1738*38fd1498Szrj     }
1739*38fd1498Szrj 
1740*38fd1498Szrj   /* Insert and register the reduction exit phis.  */
1741*38fd1498Szrj   for (gphi_iterator gsi = gsi_start_phis (exit_block);
1742*38fd1498Szrj        !gsi_end_p (gsi);
1743*38fd1498Szrj        gsi_next (&gsi))
1744*38fd1498Szrj     {
1745*38fd1498Szrj       gphi *phi = gsi.phi ();
1746*38fd1498Szrj       gphi *nphi = NULL;
1747*38fd1498Szrj       tree res_z = PHI_RESULT (phi);
1748*38fd1498Szrj       tree res_c;
1749*38fd1498Szrj 
1750*38fd1498Szrj       if (new_exit_block != NULL)
1751*38fd1498Szrj 	{
1752*38fd1498Szrj 	  /* Now that we have a new exit block, duplicate the phi of the old
1753*38fd1498Szrj 	     exit block in the new exit block to preserve loop-closed ssa.  */
1754*38fd1498Szrj 	  edge succ_new_exit_block = single_succ_edge (new_exit_block);
1755*38fd1498Szrj 	  edge pred_new_exit_block = single_pred_edge (new_exit_block);
1756*38fd1498Szrj 	  tree res_y = copy_ssa_name (res_z, phi);
1757*38fd1498Szrj 	  nphi = create_phi_node (res_y, new_exit_block);
1758*38fd1498Szrj 	  res_c = PHI_ARG_DEF_FROM_EDGE (phi, succ_new_exit_block);
1759*38fd1498Szrj 	  add_phi_arg (nphi, res_c, pred_new_exit_block, UNKNOWN_LOCATION);
1760*38fd1498Szrj 	  add_phi_arg (phi, res_y, succ_new_exit_block, UNKNOWN_LOCATION);
1761*38fd1498Szrj 	}
1762*38fd1498Szrj       else
1763*38fd1498Szrj 	res_c = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1764*38fd1498Szrj 
1765*38fd1498Szrj       if (virtual_operand_p (res_z))
1766*38fd1498Szrj 	continue;
1767*38fd1498Szrj 
1768*38fd1498Szrj       gimple *reduc_phi = SSA_NAME_DEF_STMT (res_c);
1769*38fd1498Szrj       struct reduction_info *red = reduction_phi (reduction_list, reduc_phi);
1770*38fd1498Szrj       if (red != NULL)
1771*38fd1498Szrj 	red->keep_res = (nphi != NULL
1772*38fd1498Szrj 			 ? nphi
1773*38fd1498Szrj 			 : phi);
1774*38fd1498Szrj     }
1775*38fd1498Szrj 
1776*38fd1498Szrj   /* We're going to cancel the loop at the end of gen_parallel_loop, but until
1777*38fd1498Szrj      then we're still using some fields, so only bother about fields that are
1778*38fd1498Szrj      still used: header and latch.
1779*38fd1498Szrj      The loop has a new header bb, so we update it.  The latch bb stays the
1780*38fd1498Szrj      same.  */
1781*38fd1498Szrj   loop->header = new_header;
1782*38fd1498Szrj 
1783*38fd1498Szrj   /* Recalculate dominance info.  */
1784*38fd1498Szrj   free_dominance_info (CDI_DOMINATORS);
1785*38fd1498Szrj   calculate_dominance_info (CDI_DOMINATORS);
1786*38fd1498Szrj 
1787*38fd1498Szrj   checking_verify_ssa (true, true);
1788*38fd1498Szrj }
1789*38fd1498Szrj 
1790*38fd1498Szrj /* Tries to moves the exit condition of LOOP to the beginning of its header
1791*38fd1498Szrj    without duplication of the loop body.  NIT is the number of iterations of the
1792*38fd1498Szrj    loop.  REDUCTION_LIST describes the reductions in LOOP.  Return true if
1793*38fd1498Szrj    transformation is successful.  */
1794*38fd1498Szrj 
1795*38fd1498Szrj static bool
try_transform_to_exit_first_loop_alt(struct loop * loop,reduction_info_table_type * reduction_list,tree nit)1796*38fd1498Szrj try_transform_to_exit_first_loop_alt (struct loop *loop,
1797*38fd1498Szrj 				      reduction_info_table_type *reduction_list,
1798*38fd1498Szrj 				      tree nit)
1799*38fd1498Szrj {
1800*38fd1498Szrj   /* Check whether the latch contains a single statement.  */
1801*38fd1498Szrj   if (!gimple_seq_nondebug_singleton_p (bb_seq (loop->latch)))
1802*38fd1498Szrj     return false;
1803*38fd1498Szrj 
1804*38fd1498Szrj   /* Check whether the latch contains no phis.  */
1805*38fd1498Szrj   if (phi_nodes (loop->latch) != NULL)
1806*38fd1498Szrj     return false;
1807*38fd1498Szrj 
1808*38fd1498Szrj   /* Check whether the latch contains the loop iv increment.  */
1809*38fd1498Szrj   edge back = single_succ_edge (loop->latch);
1810*38fd1498Szrj   edge exit = single_dom_exit (loop);
1811*38fd1498Szrj   gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1812*38fd1498Szrj   tree control = gimple_cond_lhs (cond_stmt);
1813*38fd1498Szrj   gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (control));
1814*38fd1498Szrj   tree inc_res = gimple_phi_arg_def (phi, back->dest_idx);
1815*38fd1498Szrj   if (gimple_bb (SSA_NAME_DEF_STMT (inc_res)) != loop->latch)
1816*38fd1498Szrj     return false;
1817*38fd1498Szrj 
1818*38fd1498Szrj   /* Check whether there's no code between the loop condition and the latch.  */
1819*38fd1498Szrj   if (!single_pred_p (loop->latch)
1820*38fd1498Szrj       || single_pred (loop->latch) != exit->src)
1821*38fd1498Szrj     return false;
1822*38fd1498Szrj 
1823*38fd1498Szrj   tree alt_bound = NULL_TREE;
1824*38fd1498Szrj   tree nit_type = TREE_TYPE (nit);
1825*38fd1498Szrj 
1826*38fd1498Szrj   /* Figure out whether nit + 1 overflows.  */
1827*38fd1498Szrj   if (TREE_CODE (nit) == INTEGER_CST)
1828*38fd1498Szrj     {
1829*38fd1498Szrj       if (!tree_int_cst_equal (nit, TYPE_MAX_VALUE (nit_type)))
1830*38fd1498Szrj 	{
1831*38fd1498Szrj 	  alt_bound = fold_build2_loc (UNKNOWN_LOCATION, PLUS_EXPR, nit_type,
1832*38fd1498Szrj 				       nit, build_one_cst (nit_type));
1833*38fd1498Szrj 
1834*38fd1498Szrj 	  gcc_assert (TREE_CODE (alt_bound) == INTEGER_CST);
1835*38fd1498Szrj 	  transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1836*38fd1498Szrj 	  return true;
1837*38fd1498Szrj 	}
1838*38fd1498Szrj       else
1839*38fd1498Szrj 	{
1840*38fd1498Szrj 	  /* Todo: Figure out if we can trigger this, if it's worth to handle
1841*38fd1498Szrj 	     optimally, and if we can handle it optimally.  */
1842*38fd1498Szrj 	  return false;
1843*38fd1498Szrj 	}
1844*38fd1498Szrj     }
1845*38fd1498Szrj 
1846*38fd1498Szrj   gcc_assert (TREE_CODE (nit) == SSA_NAME);
1847*38fd1498Szrj 
1848*38fd1498Szrj   /* Variable nit is the loop bound as returned by canonicalize_loop_ivs, for an
1849*38fd1498Szrj      iv with base 0 and step 1 that is incremented in the latch, like this:
1850*38fd1498Szrj 
1851*38fd1498Szrj      <bb header>:
1852*38fd1498Szrj      # iv_1 = PHI <0 (preheader), iv_2 (latch)>
1853*38fd1498Szrj      ...
1854*38fd1498Szrj      if (iv_1 < nit)
1855*38fd1498Szrj        goto <bb latch>;
1856*38fd1498Szrj      else
1857*38fd1498Szrj        goto <bb exit>;
1858*38fd1498Szrj 
1859*38fd1498Szrj      <bb latch>:
1860*38fd1498Szrj      iv_2 = iv_1 + 1;
1861*38fd1498Szrj      goto <bb header>;
1862*38fd1498Szrj 
1863*38fd1498Szrj      The range of iv_1 is [0, nit].  The latch edge is taken for
1864*38fd1498Szrj      iv_1 == [0, nit - 1] and the exit edge is taken for iv_1 == nit.  So the
1865*38fd1498Szrj      number of latch executions is equal to nit.
1866*38fd1498Szrj 
1867*38fd1498Szrj      The function max_loop_iterations gives us the maximum number of latch
1868*38fd1498Szrj      executions, so it gives us the maximum value of nit.  */
1869*38fd1498Szrj   widest_int nit_max;
1870*38fd1498Szrj   if (!max_loop_iterations (loop, &nit_max))
1871*38fd1498Szrj     return false;
1872*38fd1498Szrj 
1873*38fd1498Szrj   /* Check if nit + 1 overflows.  */
1874*38fd1498Szrj   widest_int type_max = wi::to_widest (TYPE_MAX_VALUE (nit_type));
1875*38fd1498Szrj   if (nit_max >= type_max)
1876*38fd1498Szrj     return false;
1877*38fd1498Szrj 
1878*38fd1498Szrj   gimple *def = SSA_NAME_DEF_STMT (nit);
1879*38fd1498Szrj 
1880*38fd1498Szrj   /* Try to find nit + 1, in the form of n in an assignment nit = n - 1.  */
1881*38fd1498Szrj   if (def
1882*38fd1498Szrj       && is_gimple_assign (def)
1883*38fd1498Szrj       && gimple_assign_rhs_code (def) == PLUS_EXPR)
1884*38fd1498Szrj     {
1885*38fd1498Szrj       tree op1 = gimple_assign_rhs1 (def);
1886*38fd1498Szrj       tree op2 = gimple_assign_rhs2 (def);
1887*38fd1498Szrj       if (integer_minus_onep (op1))
1888*38fd1498Szrj 	alt_bound = op2;
1889*38fd1498Szrj       else if (integer_minus_onep (op2))
1890*38fd1498Szrj 	alt_bound = op1;
1891*38fd1498Szrj     }
1892*38fd1498Szrj 
1893*38fd1498Szrj   /* If not found, insert nit + 1.  */
1894*38fd1498Szrj   if (alt_bound == NULL_TREE)
1895*38fd1498Szrj     {
1896*38fd1498Szrj       alt_bound = fold_build2 (PLUS_EXPR, nit_type, nit,
1897*38fd1498Szrj 			       build_int_cst_type (nit_type, 1));
1898*38fd1498Szrj 
1899*38fd1498Szrj       gimple_stmt_iterator gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
1900*38fd1498Szrj 
1901*38fd1498Szrj       alt_bound
1902*38fd1498Szrj 	= force_gimple_operand_gsi (&gsi, alt_bound, true, NULL_TREE, false,
1903*38fd1498Szrj 				    GSI_CONTINUE_LINKING);
1904*38fd1498Szrj     }
1905*38fd1498Szrj 
1906*38fd1498Szrj   transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1907*38fd1498Szrj   return true;
1908*38fd1498Szrj }
1909*38fd1498Szrj 
1910*38fd1498Szrj /* Moves the exit condition of LOOP to the beginning of its header.  NIT is the
1911*38fd1498Szrj    number of iterations of the loop.  REDUCTION_LIST describes the reductions in
1912*38fd1498Szrj    LOOP.  */
1913*38fd1498Szrj 
1914*38fd1498Szrj static void
transform_to_exit_first_loop(struct loop * loop,reduction_info_table_type * reduction_list,tree nit)1915*38fd1498Szrj transform_to_exit_first_loop (struct loop *loop,
1916*38fd1498Szrj 			      reduction_info_table_type *reduction_list,
1917*38fd1498Szrj 			      tree nit)
1918*38fd1498Szrj {
1919*38fd1498Szrj   basic_block *bbs, *nbbs, ex_bb, orig_header;
1920*38fd1498Szrj   unsigned n;
1921*38fd1498Szrj   bool ok;
1922*38fd1498Szrj   edge exit = single_dom_exit (loop), hpred;
1923*38fd1498Szrj   tree control, control_name, res, t;
1924*38fd1498Szrj   gphi *phi, *nphi;
1925*38fd1498Szrj   gassign *stmt;
1926*38fd1498Szrj   gcond *cond_stmt, *cond_nit;
1927*38fd1498Szrj   tree nit_1;
1928*38fd1498Szrj 
1929*38fd1498Szrj   split_block_after_labels (loop->header);
1930*38fd1498Szrj   orig_header = single_succ (loop->header);
1931*38fd1498Szrj   hpred = single_succ_edge (loop->header);
1932*38fd1498Szrj 
1933*38fd1498Szrj   cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1934*38fd1498Szrj   control = gimple_cond_lhs (cond_stmt);
1935*38fd1498Szrj   gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1936*38fd1498Szrj 
1937*38fd1498Szrj   /* Make sure that we have phi nodes on exit for all loop header phis
1938*38fd1498Szrj      (create_parallel_loop requires that).  */
1939*38fd1498Szrj   for (gphi_iterator gsi = gsi_start_phis (loop->header);
1940*38fd1498Szrj        !gsi_end_p (gsi);
1941*38fd1498Szrj        gsi_next (&gsi))
1942*38fd1498Szrj     {
1943*38fd1498Szrj       phi = gsi.phi ();
1944*38fd1498Szrj       res = PHI_RESULT (phi);
1945*38fd1498Szrj       t = copy_ssa_name (res, phi);
1946*38fd1498Szrj       SET_PHI_RESULT (phi, t);
1947*38fd1498Szrj       nphi = create_phi_node (res, orig_header);
1948*38fd1498Szrj       add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1949*38fd1498Szrj 
1950*38fd1498Szrj       if (res == control)
1951*38fd1498Szrj 	{
1952*38fd1498Szrj 	  gimple_cond_set_lhs (cond_stmt, t);
1953*38fd1498Szrj 	  update_stmt (cond_stmt);
1954*38fd1498Szrj 	  control = t;
1955*38fd1498Szrj 	}
1956*38fd1498Szrj     }
1957*38fd1498Szrj 
1958*38fd1498Szrj   bbs = get_loop_body_in_dom_order (loop);
1959*38fd1498Szrj 
1960*38fd1498Szrj   for (n = 0; bbs[n] != exit->src; n++)
1961*38fd1498Szrj    continue;
1962*38fd1498Szrj   nbbs = XNEWVEC (basic_block, n);
1963*38fd1498Szrj   ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1964*38fd1498Szrj 				   bbs + 1, n, nbbs);
1965*38fd1498Szrj   gcc_assert (ok);
1966*38fd1498Szrj   free (bbs);
1967*38fd1498Szrj   ex_bb = nbbs[0];
1968*38fd1498Szrj   free (nbbs);
1969*38fd1498Szrj 
1970*38fd1498Szrj   /* Other than reductions, the only gimple reg that should be copied
1971*38fd1498Szrj      out of the loop is the control variable.  */
1972*38fd1498Szrj   exit = single_dom_exit (loop);
1973*38fd1498Szrj   control_name = NULL_TREE;
1974*38fd1498Szrj   for (gphi_iterator gsi = gsi_start_phis (ex_bb);
1975*38fd1498Szrj        !gsi_end_p (gsi); )
1976*38fd1498Szrj     {
1977*38fd1498Szrj       phi = gsi.phi ();
1978*38fd1498Szrj       res = PHI_RESULT (phi);
1979*38fd1498Szrj       if (virtual_operand_p (res))
1980*38fd1498Szrj 	{
1981*38fd1498Szrj 	  gsi_next (&gsi);
1982*38fd1498Szrj 	  continue;
1983*38fd1498Szrj 	}
1984*38fd1498Szrj 
1985*38fd1498Szrj       /* Check if it is a part of reduction.  If it is,
1986*38fd1498Szrj          keep the phi at the reduction's keep_res field.  The
1987*38fd1498Szrj          PHI_RESULT of this phi is the resulting value of the reduction
1988*38fd1498Szrj          variable when exiting the loop.  */
1989*38fd1498Szrj 
1990*38fd1498Szrj       if (reduction_list->elements () > 0)
1991*38fd1498Szrj 	{
1992*38fd1498Szrj 	  struct reduction_info *red;
1993*38fd1498Szrj 
1994*38fd1498Szrj 	  tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1995*38fd1498Szrj 	  red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1996*38fd1498Szrj 	  if (red)
1997*38fd1498Szrj 	    {
1998*38fd1498Szrj 	      red->keep_res = phi;
1999*38fd1498Szrj 	      gsi_next (&gsi);
2000*38fd1498Szrj 	      continue;
2001*38fd1498Szrj 	    }
2002*38fd1498Szrj 	}
2003*38fd1498Szrj       gcc_assert (control_name == NULL_TREE
2004*38fd1498Szrj 		  && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
2005*38fd1498Szrj       control_name = res;
2006*38fd1498Szrj       remove_phi_node (&gsi, false);
2007*38fd1498Szrj     }
2008*38fd1498Szrj   gcc_assert (control_name != NULL_TREE);
2009*38fd1498Szrj 
2010*38fd1498Szrj   /* Initialize the control variable to number of iterations
2011*38fd1498Szrj      according to the rhs of the exit condition.  */
2012*38fd1498Szrj   gimple_stmt_iterator gsi = gsi_after_labels (ex_bb);
2013*38fd1498Szrj   cond_nit = as_a <gcond *> (last_stmt (exit->src));
2014*38fd1498Szrj   nit_1 =  gimple_cond_rhs (cond_nit);
2015*38fd1498Szrj   nit_1 = force_gimple_operand_gsi (&gsi,
2016*38fd1498Szrj 				  fold_convert (TREE_TYPE (control_name), nit_1),
2017*38fd1498Szrj 				  false, NULL_TREE, false, GSI_SAME_STMT);
2018*38fd1498Szrj   stmt = gimple_build_assign (control_name, nit_1);
2019*38fd1498Szrj   gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
2020*38fd1498Szrj }
2021*38fd1498Szrj 
2022*38fd1498Szrj /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
2023*38fd1498Szrj    LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
2024*38fd1498Szrj    NEW_DATA is the variable that should be initialized from the argument
2025*38fd1498Szrj    of LOOP_FN.  N_THREADS is the requested number of threads, which can be 0 if
2026*38fd1498Szrj    that number is to be determined later.  */
2027*38fd1498Szrj 
2028*38fd1498Szrj static void
create_parallel_loop(struct loop * loop,tree loop_fn,tree data,tree new_data,unsigned n_threads,location_t loc,bool oacc_kernels_p)2029*38fd1498Szrj create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
2030*38fd1498Szrj 		      tree new_data, unsigned n_threads, location_t loc,
2031*38fd1498Szrj 		      bool oacc_kernels_p)
2032*38fd1498Szrj {
2033*38fd1498Szrj   gimple_stmt_iterator gsi;
2034*38fd1498Szrj   basic_block for_bb, ex_bb, continue_bb;
2035*38fd1498Szrj   tree t, param;
2036*38fd1498Szrj   gomp_parallel *omp_par_stmt;
2037*38fd1498Szrj   gimple *omp_return_stmt1, *omp_return_stmt2;
2038*38fd1498Szrj   gimple *phi;
2039*38fd1498Szrj   gcond *cond_stmt;
2040*38fd1498Szrj   gomp_for *for_stmt;
2041*38fd1498Szrj   gomp_continue *omp_cont_stmt;
2042*38fd1498Szrj   tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
2043*38fd1498Szrj   edge exit, nexit, guard, end, e;
2044*38fd1498Szrj 
2045*38fd1498Szrj   if (oacc_kernels_p)
2046*38fd1498Szrj     {
2047*38fd1498Szrj       gcc_checking_assert (lookup_attribute ("oacc kernels",
2048*38fd1498Szrj 					     DECL_ATTRIBUTES (cfun->decl)));
2049*38fd1498Szrj       /* Indicate to later processing that this is a parallelized OpenACC
2050*38fd1498Szrj 	 kernels construct.  */
2051*38fd1498Szrj       DECL_ATTRIBUTES (cfun->decl)
2052*38fd1498Szrj 	= tree_cons (get_identifier ("oacc kernels parallelized"),
2053*38fd1498Szrj 		     NULL_TREE, DECL_ATTRIBUTES (cfun->decl));
2054*38fd1498Szrj     }
2055*38fd1498Szrj   else
2056*38fd1498Szrj     {
2057*38fd1498Szrj       /* Prepare the GIMPLE_OMP_PARALLEL statement.  */
2058*38fd1498Szrj 
2059*38fd1498Szrj       basic_block bb = loop_preheader_edge (loop)->src;
2060*38fd1498Szrj       basic_block paral_bb = single_pred (bb);
2061*38fd1498Szrj       gsi = gsi_last_bb (paral_bb);
2062*38fd1498Szrj 
2063*38fd1498Szrj       gcc_checking_assert (n_threads != 0);
2064*38fd1498Szrj       t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
2065*38fd1498Szrj       OMP_CLAUSE_NUM_THREADS_EXPR (t)
2066*38fd1498Szrj 	= build_int_cst (integer_type_node, n_threads);
2067*38fd1498Szrj       omp_par_stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
2068*38fd1498Szrj       gimple_set_location (omp_par_stmt, loc);
2069*38fd1498Szrj 
2070*38fd1498Szrj       gsi_insert_after (&gsi, omp_par_stmt, GSI_NEW_STMT);
2071*38fd1498Szrj 
2072*38fd1498Szrj       /* Initialize NEW_DATA.  */
2073*38fd1498Szrj       if (data)
2074*38fd1498Szrj 	{
2075*38fd1498Szrj 	  gassign *assign_stmt;
2076*38fd1498Szrj 
2077*38fd1498Szrj 	  gsi = gsi_after_labels (bb);
2078*38fd1498Szrj 
2079*38fd1498Szrj 	  param = make_ssa_name (DECL_ARGUMENTS (loop_fn));
2080*38fd1498Szrj 	  assign_stmt = gimple_build_assign (param, build_fold_addr_expr (data));
2081*38fd1498Szrj 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2082*38fd1498Szrj 
2083*38fd1498Szrj 	  assign_stmt = gimple_build_assign (new_data,
2084*38fd1498Szrj 					     fold_convert (TREE_TYPE (new_data), param));
2085*38fd1498Szrj 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2086*38fd1498Szrj 	}
2087*38fd1498Szrj 
2088*38fd1498Szrj       /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL.  */
2089*38fd1498Szrj       bb = split_loop_exit_edge (single_dom_exit (loop));
2090*38fd1498Szrj       gsi = gsi_last_bb (bb);
2091*38fd1498Szrj       omp_return_stmt1 = gimple_build_omp_return (false);
2092*38fd1498Szrj       gimple_set_location (omp_return_stmt1, loc);
2093*38fd1498Szrj       gsi_insert_after (&gsi, omp_return_stmt1, GSI_NEW_STMT);
2094*38fd1498Szrj     }
2095*38fd1498Szrj 
2096*38fd1498Szrj   /* Extract data for GIMPLE_OMP_FOR.  */
2097*38fd1498Szrj   gcc_assert (loop->header == single_dom_exit (loop)->src);
2098*38fd1498Szrj   cond_stmt = as_a <gcond *> (last_stmt (loop->header));
2099*38fd1498Szrj 
2100*38fd1498Szrj   cvar = gimple_cond_lhs (cond_stmt);
2101*38fd1498Szrj   cvar_base = SSA_NAME_VAR (cvar);
2102*38fd1498Szrj   phi = SSA_NAME_DEF_STMT (cvar);
2103*38fd1498Szrj   cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
2104*38fd1498Szrj   initvar = copy_ssa_name (cvar);
2105*38fd1498Szrj   SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
2106*38fd1498Szrj 	   initvar);
2107*38fd1498Szrj   cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2108*38fd1498Szrj 
2109*38fd1498Szrj   gsi = gsi_last_nondebug_bb (loop->latch);
2110*38fd1498Szrj   gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
2111*38fd1498Szrj   gsi_remove (&gsi, true);
2112*38fd1498Szrj 
2113*38fd1498Szrj   /* Prepare cfg.  */
2114*38fd1498Szrj   for_bb = split_edge (loop_preheader_edge (loop));
2115*38fd1498Szrj   ex_bb = split_loop_exit_edge (single_dom_exit (loop));
2116*38fd1498Szrj   extract_true_false_edges_from_block (loop->header, &nexit, &exit);
2117*38fd1498Szrj   gcc_assert (exit == single_dom_exit (loop));
2118*38fd1498Szrj 
2119*38fd1498Szrj   guard = make_edge (for_bb, ex_bb, 0);
2120*38fd1498Szrj   /* FIXME: What is the probability?  */
2121*38fd1498Szrj   guard->probability = profile_probability::guessed_never ();
2122*38fd1498Szrj   /* Split the latch edge, so LOOPS_HAVE_SIMPLE_LATCHES is still valid.  */
2123*38fd1498Szrj   loop->latch = split_edge (single_succ_edge (loop->latch));
2124*38fd1498Szrj   single_pred_edge (loop->latch)->flags = 0;
2125*38fd1498Szrj   end = make_single_succ_edge (single_pred (loop->latch), ex_bb, EDGE_FALLTHRU);
2126*38fd1498Szrj   rescan_loop_exit (end, true, false);
2127*38fd1498Szrj 
2128*38fd1498Szrj   for (gphi_iterator gpi = gsi_start_phis (ex_bb);
2129*38fd1498Szrj        !gsi_end_p (gpi); gsi_next (&gpi))
2130*38fd1498Szrj     {
2131*38fd1498Szrj       source_location locus;
2132*38fd1498Szrj       gphi *phi = gpi.phi ();
2133*38fd1498Szrj       tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2134*38fd1498Szrj       gimple *def_stmt = SSA_NAME_DEF_STMT (def);
2135*38fd1498Szrj 
2136*38fd1498Szrj       /* If the exit phi is not connected to a header phi in the same loop, this
2137*38fd1498Szrj 	 value is not modified in the loop, and we're done with this phi.  */
2138*38fd1498Szrj       if (!(gimple_code (def_stmt) == GIMPLE_PHI
2139*38fd1498Szrj 	    && gimple_bb (def_stmt) == loop->header))
2140*38fd1498Szrj 	{
2141*38fd1498Szrj 	  locus = gimple_phi_arg_location_from_edge (phi, exit);
2142*38fd1498Szrj 	  add_phi_arg (phi, def, guard, locus);
2143*38fd1498Szrj 	  add_phi_arg (phi, def, end, locus);
2144*38fd1498Szrj 	  continue;
2145*38fd1498Szrj 	}
2146*38fd1498Szrj 
2147*38fd1498Szrj       gphi *stmt = as_a <gphi *> (def_stmt);
2148*38fd1498Szrj       def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
2149*38fd1498Szrj       locus = gimple_phi_arg_location_from_edge (stmt,
2150*38fd1498Szrj 						 loop_preheader_edge (loop));
2151*38fd1498Szrj       add_phi_arg (phi, def, guard, locus);
2152*38fd1498Szrj 
2153*38fd1498Szrj       def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
2154*38fd1498Szrj       locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
2155*38fd1498Szrj       add_phi_arg (phi, def, end, locus);
2156*38fd1498Szrj     }
2157*38fd1498Szrj   e = redirect_edge_and_branch (exit, nexit->dest);
2158*38fd1498Szrj   PENDING_STMT (e) = NULL;
2159*38fd1498Szrj 
2160*38fd1498Szrj   /* Emit GIMPLE_OMP_FOR.  */
2161*38fd1498Szrj   if (oacc_kernels_p)
2162*38fd1498Szrj     /* Parallelized OpenACC kernels constructs use gang parallelism.  See also
2163*38fd1498Szrj        omp-offload.c:execute_oacc_device_lower.  */
2164*38fd1498Szrj     t = build_omp_clause (loc, OMP_CLAUSE_GANG);
2165*38fd1498Szrj   else
2166*38fd1498Szrj     {
2167*38fd1498Szrj       t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
2168*38fd1498Szrj       int chunk_size = PARAM_VALUE (PARAM_PARLOOPS_CHUNK_SIZE);
2169*38fd1498Szrj       enum PARAM_PARLOOPS_SCHEDULE_KIND schedule_type \
2170*38fd1498Szrj 	= (enum PARAM_PARLOOPS_SCHEDULE_KIND) PARAM_VALUE (PARAM_PARLOOPS_SCHEDULE);
2171*38fd1498Szrj       switch (schedule_type)
2172*38fd1498Szrj 	{
2173*38fd1498Szrj 	case PARAM_PARLOOPS_SCHEDULE_KIND_static:
2174*38fd1498Szrj 	  OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
2175*38fd1498Szrj 	  break;
2176*38fd1498Szrj 	case PARAM_PARLOOPS_SCHEDULE_KIND_dynamic:
2177*38fd1498Szrj 	  OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_DYNAMIC;
2178*38fd1498Szrj 	  break;
2179*38fd1498Szrj 	case PARAM_PARLOOPS_SCHEDULE_KIND_guided:
2180*38fd1498Szrj 	  OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_GUIDED;
2181*38fd1498Szrj 	  break;
2182*38fd1498Szrj 	case PARAM_PARLOOPS_SCHEDULE_KIND_auto:
2183*38fd1498Szrj 	  OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_AUTO;
2184*38fd1498Szrj 	  chunk_size = 0;
2185*38fd1498Szrj 	  break;
2186*38fd1498Szrj 	case PARAM_PARLOOPS_SCHEDULE_KIND_runtime:
2187*38fd1498Szrj 	  OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_RUNTIME;
2188*38fd1498Szrj 	  chunk_size = 0;
2189*38fd1498Szrj 	  break;
2190*38fd1498Szrj 	default:
2191*38fd1498Szrj 	  gcc_unreachable ();
2192*38fd1498Szrj 	}
2193*38fd1498Szrj       if (chunk_size != 0)
2194*38fd1498Szrj 	OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t)
2195*38fd1498Szrj 	  = build_int_cst (integer_type_node, chunk_size);
2196*38fd1498Szrj     }
2197*38fd1498Szrj 
2198*38fd1498Szrj   for_stmt = gimple_build_omp_for (NULL,
2199*38fd1498Szrj 				   (oacc_kernels_p
2200*38fd1498Szrj 				    ? GF_OMP_FOR_KIND_OACC_LOOP
2201*38fd1498Szrj 				    : GF_OMP_FOR_KIND_FOR),
2202*38fd1498Szrj 				   t, 1, NULL);
2203*38fd1498Szrj 
2204*38fd1498Szrj   gimple_cond_set_lhs (cond_stmt, cvar_base);
2205*38fd1498Szrj   type = TREE_TYPE (cvar);
2206*38fd1498Szrj   gimple_set_location (for_stmt, loc);
2207*38fd1498Szrj   gimple_omp_for_set_index (for_stmt, 0, initvar);
2208*38fd1498Szrj   gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
2209*38fd1498Szrj   gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
2210*38fd1498Szrj   gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
2211*38fd1498Szrj   gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
2212*38fd1498Szrj 						cvar_base,
2213*38fd1498Szrj 						build_int_cst (type, 1)));
2214*38fd1498Szrj 
2215*38fd1498Szrj   gsi = gsi_last_bb (for_bb);
2216*38fd1498Szrj   gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
2217*38fd1498Szrj   SSA_NAME_DEF_STMT (initvar) = for_stmt;
2218*38fd1498Szrj 
2219*38fd1498Szrj   /* Emit GIMPLE_OMP_CONTINUE.  */
2220*38fd1498Szrj   continue_bb = single_pred (loop->latch);
2221*38fd1498Szrj   gsi = gsi_last_bb (continue_bb);
2222*38fd1498Szrj   omp_cont_stmt = gimple_build_omp_continue (cvar_next, cvar);
2223*38fd1498Szrj   gimple_set_location (omp_cont_stmt, loc);
2224*38fd1498Szrj   gsi_insert_after (&gsi, omp_cont_stmt, GSI_NEW_STMT);
2225*38fd1498Szrj   SSA_NAME_DEF_STMT (cvar_next) = omp_cont_stmt;
2226*38fd1498Szrj 
2227*38fd1498Szrj   /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR.  */
2228*38fd1498Szrj   gsi = gsi_last_bb (ex_bb);
2229*38fd1498Szrj   omp_return_stmt2 = gimple_build_omp_return (true);
2230*38fd1498Szrj   gimple_set_location (omp_return_stmt2, loc);
2231*38fd1498Szrj   gsi_insert_after (&gsi, omp_return_stmt2, GSI_NEW_STMT);
2232*38fd1498Szrj 
2233*38fd1498Szrj   /* After the above dom info is hosed.  Re-compute it.  */
2234*38fd1498Szrj   free_dominance_info (CDI_DOMINATORS);
2235*38fd1498Szrj   calculate_dominance_info (CDI_DOMINATORS);
2236*38fd1498Szrj }
2237*38fd1498Szrj 
2238*38fd1498Szrj /* Return number of phis in bb.  If COUNT_VIRTUAL_P is false, don't count the
2239*38fd1498Szrj    virtual phi.  */
2240*38fd1498Szrj 
2241*38fd1498Szrj static unsigned int
num_phis(basic_block bb,bool count_virtual_p)2242*38fd1498Szrj num_phis (basic_block bb, bool count_virtual_p)
2243*38fd1498Szrj {
2244*38fd1498Szrj   unsigned int nr_phis = 0;
2245*38fd1498Szrj   gphi_iterator gsi;
2246*38fd1498Szrj   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2247*38fd1498Szrj     {
2248*38fd1498Szrj       if (!count_virtual_p && virtual_operand_p (PHI_RESULT (gsi.phi ())))
2249*38fd1498Szrj 	continue;
2250*38fd1498Szrj 
2251*38fd1498Szrj       nr_phis++;
2252*38fd1498Szrj     }
2253*38fd1498Szrj 
2254*38fd1498Szrj   return nr_phis;
2255*38fd1498Szrj }
2256*38fd1498Szrj 
2257*38fd1498Szrj /* Generates code to execute the iterations of LOOP in N_THREADS
2258*38fd1498Szrj    threads in parallel, which can be 0 if that number is to be determined
2259*38fd1498Szrj    later.
2260*38fd1498Szrj 
2261*38fd1498Szrj    NITER describes number of iterations of LOOP.
2262*38fd1498Szrj    REDUCTION_LIST describes the reductions existent in the LOOP.  */
2263*38fd1498Szrj 
2264*38fd1498Szrj static void
gen_parallel_loop(struct loop * loop,reduction_info_table_type * reduction_list,unsigned n_threads,struct tree_niter_desc * niter,bool oacc_kernels_p)2265*38fd1498Szrj gen_parallel_loop (struct loop *loop,
2266*38fd1498Szrj 		   reduction_info_table_type *reduction_list,
2267*38fd1498Szrj 		   unsigned n_threads, struct tree_niter_desc *niter,
2268*38fd1498Szrj 		   bool oacc_kernels_p)
2269*38fd1498Szrj {
2270*38fd1498Szrj   tree many_iterations_cond, type, nit;
2271*38fd1498Szrj   tree arg_struct, new_arg_struct;
2272*38fd1498Szrj   gimple_seq stmts;
2273*38fd1498Szrj   edge entry, exit;
2274*38fd1498Szrj   struct clsn_data clsn_data;
2275*38fd1498Szrj   location_t loc;
2276*38fd1498Szrj   gimple *cond_stmt;
2277*38fd1498Szrj   unsigned int m_p_thread=2;
2278*38fd1498Szrj 
2279*38fd1498Szrj   /* From
2280*38fd1498Szrj 
2281*38fd1498Szrj      ---------------------------------------------------------------------
2282*38fd1498Szrj      loop
2283*38fd1498Szrj        {
2284*38fd1498Szrj 	 IV = phi (INIT, IV + STEP)
2285*38fd1498Szrj 	 BODY1;
2286*38fd1498Szrj 	 if (COND)
2287*38fd1498Szrj 	   break;
2288*38fd1498Szrj 	 BODY2;
2289*38fd1498Szrj        }
2290*38fd1498Szrj      ---------------------------------------------------------------------
2291*38fd1498Szrj 
2292*38fd1498Szrj      with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
2293*38fd1498Szrj      we generate the following code:
2294*38fd1498Szrj 
2295*38fd1498Szrj      ---------------------------------------------------------------------
2296*38fd1498Szrj 
2297*38fd1498Szrj      if (MAY_BE_ZERO
2298*38fd1498Szrj      || NITER < MIN_PER_THREAD * N_THREADS)
2299*38fd1498Szrj      goto original;
2300*38fd1498Szrj 
2301*38fd1498Szrj      BODY1;
2302*38fd1498Szrj      store all local loop-invariant variables used in body of the loop to DATA.
2303*38fd1498Szrj      GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
2304*38fd1498Szrj      load the variables from DATA.
2305*38fd1498Szrj      GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
2306*38fd1498Szrj      BODY2;
2307*38fd1498Szrj      BODY1;
2308*38fd1498Szrj      GIMPLE_OMP_CONTINUE;
2309*38fd1498Szrj      GIMPLE_OMP_RETURN         -- GIMPLE_OMP_FOR
2310*38fd1498Szrj      GIMPLE_OMP_RETURN         -- GIMPLE_OMP_PARALLEL
2311*38fd1498Szrj      goto end;
2312*38fd1498Szrj 
2313*38fd1498Szrj      original:
2314*38fd1498Szrj      loop
2315*38fd1498Szrj        {
2316*38fd1498Szrj 	 IV = phi (INIT, IV + STEP)
2317*38fd1498Szrj 	 BODY1;
2318*38fd1498Szrj 	 if (COND)
2319*38fd1498Szrj 	   break;
2320*38fd1498Szrj 	 BODY2;
2321*38fd1498Szrj        }
2322*38fd1498Szrj 
2323*38fd1498Szrj      end:
2324*38fd1498Szrj 
2325*38fd1498Szrj    */
2326*38fd1498Szrj 
2327*38fd1498Szrj   /* Create two versions of the loop -- in the old one, we know that the
2328*38fd1498Szrj      number of iterations is large enough, and we will transform it into the
2329*38fd1498Szrj      loop that will be split to loop_fn, the new one will be used for the
2330*38fd1498Szrj      remaining iterations.  */
2331*38fd1498Szrj 
2332*38fd1498Szrj   /* We should compute a better number-of-iterations value for outer loops.
2333*38fd1498Szrj      That is, if we have
2334*38fd1498Szrj 
2335*38fd1498Szrj     for (i = 0; i < n; ++i)
2336*38fd1498Szrj       for (j = 0; j < m; ++j)
2337*38fd1498Szrj         ...
2338*38fd1498Szrj 
2339*38fd1498Szrj     we should compute nit = n * m, not nit = n.
2340*38fd1498Szrj     Also may_be_zero handling would need to be adjusted.  */
2341*38fd1498Szrj 
2342*38fd1498Szrj   type = TREE_TYPE (niter->niter);
2343*38fd1498Szrj   nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
2344*38fd1498Szrj 			      NULL_TREE);
2345*38fd1498Szrj   if (stmts)
2346*38fd1498Szrj     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2347*38fd1498Szrj 
2348*38fd1498Szrj   if (!oacc_kernels_p)
2349*38fd1498Szrj     {
2350*38fd1498Szrj       if (loop->inner)
2351*38fd1498Szrj 	m_p_thread=2;
2352*38fd1498Szrj       else
2353*38fd1498Szrj 	m_p_thread=MIN_PER_THREAD;
2354*38fd1498Szrj 
2355*38fd1498Szrj       gcc_checking_assert (n_threads != 0);
2356*38fd1498Szrj       many_iterations_cond =
2357*38fd1498Szrj 	fold_build2 (GE_EXPR, boolean_type_node,
2358*38fd1498Szrj 		     nit, build_int_cst (type, m_p_thread * n_threads - 1));
2359*38fd1498Szrj 
2360*38fd1498Szrj       many_iterations_cond
2361*38fd1498Szrj 	= fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2362*38fd1498Szrj 		       invert_truthvalue (unshare_expr (niter->may_be_zero)),
2363*38fd1498Szrj 		       many_iterations_cond);
2364*38fd1498Szrj       many_iterations_cond
2365*38fd1498Szrj 	= force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
2366*38fd1498Szrj       if (stmts)
2367*38fd1498Szrj 	gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2368*38fd1498Szrj       if (!is_gimple_condexpr (many_iterations_cond))
2369*38fd1498Szrj 	{
2370*38fd1498Szrj 	  many_iterations_cond
2371*38fd1498Szrj 	    = force_gimple_operand (many_iterations_cond, &stmts,
2372*38fd1498Szrj 				    true, NULL_TREE);
2373*38fd1498Szrj 	  if (stmts)
2374*38fd1498Szrj 	    gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop),
2375*38fd1498Szrj 					      stmts);
2376*38fd1498Szrj 	}
2377*38fd1498Szrj 
2378*38fd1498Szrj       initialize_original_copy_tables ();
2379*38fd1498Szrj 
2380*38fd1498Szrj       /* We assume that the loop usually iterates a lot.  */
2381*38fd1498Szrj       loop_version (loop, many_iterations_cond, NULL,
2382*38fd1498Szrj 		    profile_probability::likely (),
2383*38fd1498Szrj 		    profile_probability::unlikely (),
2384*38fd1498Szrj 		    profile_probability::likely (),
2385*38fd1498Szrj 		    profile_probability::unlikely (), true);
2386*38fd1498Szrj       update_ssa (TODO_update_ssa);
2387*38fd1498Szrj       free_original_copy_tables ();
2388*38fd1498Szrj     }
2389*38fd1498Szrj 
2390*38fd1498Szrj   /* Base all the induction variables in LOOP on a single control one.  */
2391*38fd1498Szrj   canonicalize_loop_ivs (loop, &nit, true);
2392*38fd1498Szrj   if (num_phis (loop->header, false) != reduction_list->elements () + 1)
2393*38fd1498Szrj     {
2394*38fd1498Szrj       /* The call to canonicalize_loop_ivs above failed to "base all the
2395*38fd1498Szrj 	 induction variables in LOOP on a single control one".  Do damage
2396*38fd1498Szrj 	 control.  */
2397*38fd1498Szrj       basic_block preheader = loop_preheader_edge (loop)->src;
2398*38fd1498Szrj       basic_block cond_bb = single_pred (preheader);
2399*38fd1498Szrj       gcond *cond = as_a <gcond *> (gsi_stmt (gsi_last_bb (cond_bb)));
2400*38fd1498Szrj       gimple_cond_make_true (cond);
2401*38fd1498Szrj       update_stmt (cond);
2402*38fd1498Szrj       /* We've gotten rid of the duplicate loop created by loop_version, but
2403*38fd1498Szrj 	 we can't undo whatever canonicalize_loop_ivs has done.
2404*38fd1498Szrj 	 TODO: Fix this properly by ensuring that the call to
2405*38fd1498Szrj 	 canonicalize_loop_ivs succeeds.  */
2406*38fd1498Szrj       if (dump_file
2407*38fd1498Szrj 	  && (dump_flags & TDF_DETAILS))
2408*38fd1498Szrj 	fprintf (dump_file, "canonicalize_loop_ivs failed for loop %d,"
2409*38fd1498Szrj 		 " aborting transformation\n", loop->num);
2410*38fd1498Szrj       return;
2411*38fd1498Szrj     }
2412*38fd1498Szrj 
2413*38fd1498Szrj   /* Ensure that the exit condition is the first statement in the loop.
2414*38fd1498Szrj      The common case is that latch of the loop is empty (apart from the
2415*38fd1498Szrj      increment) and immediately follows the loop exit test.  Attempt to move the
2416*38fd1498Szrj      entry of the loop directly before the exit check and increase the number of
2417*38fd1498Szrj      iterations of the loop by one.  */
2418*38fd1498Szrj   if (try_transform_to_exit_first_loop_alt (loop, reduction_list, nit))
2419*38fd1498Szrj     {
2420*38fd1498Szrj       if (dump_file
2421*38fd1498Szrj 	  && (dump_flags & TDF_DETAILS))
2422*38fd1498Szrj 	fprintf (dump_file,
2423*38fd1498Szrj 		 "alternative exit-first loop transform succeeded"
2424*38fd1498Szrj 		 " for loop %d\n", loop->num);
2425*38fd1498Szrj     }
2426*38fd1498Szrj   else
2427*38fd1498Szrj     {
2428*38fd1498Szrj       if (oacc_kernels_p)
2429*38fd1498Szrj 	n_threads = 1;
2430*38fd1498Szrj 
2431*38fd1498Szrj       /* Fall back on the method that handles more cases, but duplicates the
2432*38fd1498Szrj 	 loop body: move the exit condition of LOOP to the beginning of its
2433*38fd1498Szrj 	 header, and duplicate the part of the last iteration that gets disabled
2434*38fd1498Szrj 	 to the exit of the loop.  */
2435*38fd1498Szrj       transform_to_exit_first_loop (loop, reduction_list, nit);
2436*38fd1498Szrj     }
2437*38fd1498Szrj 
2438*38fd1498Szrj   /* Generate initializations for reductions.  */
2439*38fd1498Szrj   if (reduction_list->elements () > 0)
2440*38fd1498Szrj     reduction_list->traverse <struct loop *, initialize_reductions> (loop);
2441*38fd1498Szrj 
2442*38fd1498Szrj   /* Eliminate the references to local variables from the loop.  */
2443*38fd1498Szrj   gcc_assert (single_exit (loop));
2444*38fd1498Szrj   entry = loop_preheader_edge (loop);
2445*38fd1498Szrj   exit = single_dom_exit (loop);
2446*38fd1498Szrj 
2447*38fd1498Szrj   /* This rewrites the body in terms of new variables.  This has already
2448*38fd1498Szrj      been done for oacc_kernels_p in pass_lower_omp/lower_omp ().  */
2449*38fd1498Szrj   if (!oacc_kernels_p)
2450*38fd1498Szrj     {
2451*38fd1498Szrj       eliminate_local_variables (entry, exit);
2452*38fd1498Szrj       /* In the old loop, move all variables non-local to the loop to a
2453*38fd1498Szrj 	 structure and back, and create separate decls for the variables used in
2454*38fd1498Szrj 	 loop.  */
2455*38fd1498Szrj       separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
2456*38fd1498Szrj 				&new_arg_struct, &clsn_data);
2457*38fd1498Szrj     }
2458*38fd1498Szrj   else
2459*38fd1498Szrj     {
2460*38fd1498Szrj       arg_struct = NULL_TREE;
2461*38fd1498Szrj       new_arg_struct = NULL_TREE;
2462*38fd1498Szrj       clsn_data.load = NULL_TREE;
2463*38fd1498Szrj       clsn_data.load_bb = exit->dest;
2464*38fd1498Szrj       clsn_data.store = NULL_TREE;
2465*38fd1498Szrj       clsn_data.store_bb = NULL;
2466*38fd1498Szrj     }
2467*38fd1498Szrj 
2468*38fd1498Szrj   /* Create the parallel constructs.  */
2469*38fd1498Szrj   loc = UNKNOWN_LOCATION;
2470*38fd1498Szrj   cond_stmt = last_stmt (loop->header);
2471*38fd1498Szrj   if (cond_stmt)
2472*38fd1498Szrj     loc = gimple_location (cond_stmt);
2473*38fd1498Szrj   create_parallel_loop (loop, create_loop_fn (loc), arg_struct, new_arg_struct,
2474*38fd1498Szrj 			n_threads, loc, oacc_kernels_p);
2475*38fd1498Szrj   if (reduction_list->elements () > 0)
2476*38fd1498Szrj     create_call_for_reduction (loop, reduction_list, &clsn_data);
2477*38fd1498Szrj 
2478*38fd1498Szrj   scev_reset ();
2479*38fd1498Szrj 
2480*38fd1498Szrj   /* Free loop bound estimations that could contain references to
2481*38fd1498Szrj      removed statements.  */
2482*38fd1498Szrj   free_numbers_of_iterations_estimates (cfun);
2483*38fd1498Szrj }
2484*38fd1498Szrj 
2485*38fd1498Szrj /* Returns true when LOOP contains vector phi nodes.  */
2486*38fd1498Szrj 
2487*38fd1498Szrj static bool
loop_has_vector_phi_nodes(struct loop * loop ATTRIBUTE_UNUSED)2488*38fd1498Szrj loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
2489*38fd1498Szrj {
2490*38fd1498Szrj   unsigned i;
2491*38fd1498Szrj   basic_block *bbs = get_loop_body_in_dom_order (loop);
2492*38fd1498Szrj   gphi_iterator gsi;
2493*38fd1498Szrj   bool res = true;
2494*38fd1498Szrj 
2495*38fd1498Szrj   for (i = 0; i < loop->num_nodes; i++)
2496*38fd1498Szrj     for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
2497*38fd1498Szrj       if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi.phi ()))) == VECTOR_TYPE)
2498*38fd1498Szrj 	goto end;
2499*38fd1498Szrj 
2500*38fd1498Szrj   res = false;
2501*38fd1498Szrj  end:
2502*38fd1498Szrj   free (bbs);
2503*38fd1498Szrj   return res;
2504*38fd1498Szrj }
2505*38fd1498Szrj 
2506*38fd1498Szrj /* Create a reduction_info struct, initialize it with REDUC_STMT
2507*38fd1498Szrj    and PHI, insert it to the REDUCTION_LIST.  */
2508*38fd1498Szrj 
2509*38fd1498Szrj static void
build_new_reduction(reduction_info_table_type * reduction_list,gimple * reduc_stmt,gphi * phi)2510*38fd1498Szrj build_new_reduction (reduction_info_table_type *reduction_list,
2511*38fd1498Szrj 		     gimple *reduc_stmt, gphi *phi)
2512*38fd1498Szrj {
2513*38fd1498Szrj   reduction_info **slot;
2514*38fd1498Szrj   struct reduction_info *new_reduction;
2515*38fd1498Szrj   enum tree_code reduction_code;
2516*38fd1498Szrj 
2517*38fd1498Szrj   gcc_assert (reduc_stmt);
2518*38fd1498Szrj 
2519*38fd1498Szrj   if (gimple_code (reduc_stmt) == GIMPLE_PHI)
2520*38fd1498Szrj     {
2521*38fd1498Szrj       tree op1 = PHI_ARG_DEF (reduc_stmt, 0);
2522*38fd1498Szrj       gimple *def1 = SSA_NAME_DEF_STMT (op1);
2523*38fd1498Szrj       reduction_code = gimple_assign_rhs_code (def1);
2524*38fd1498Szrj     }
2525*38fd1498Szrj   else
2526*38fd1498Szrj     reduction_code = gimple_assign_rhs_code (reduc_stmt);
2527*38fd1498Szrj   /* Check for OpenMP supported reduction.  */
2528*38fd1498Szrj   switch (reduction_code)
2529*38fd1498Szrj     {
2530*38fd1498Szrj     case PLUS_EXPR:
2531*38fd1498Szrj     case MULT_EXPR:
2532*38fd1498Szrj     case MAX_EXPR:
2533*38fd1498Szrj     case MIN_EXPR:
2534*38fd1498Szrj     case BIT_IOR_EXPR:
2535*38fd1498Szrj     case BIT_XOR_EXPR:
2536*38fd1498Szrj     case BIT_AND_EXPR:
2537*38fd1498Szrj     case TRUTH_OR_EXPR:
2538*38fd1498Szrj     case TRUTH_XOR_EXPR:
2539*38fd1498Szrj     case TRUTH_AND_EXPR:
2540*38fd1498Szrj       break;
2541*38fd1498Szrj     default:
2542*38fd1498Szrj       return;
2543*38fd1498Szrj     }
2544*38fd1498Szrj 
2545*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
2546*38fd1498Szrj     {
2547*38fd1498Szrj       fprintf (dump_file,
2548*38fd1498Szrj 	       "Detected reduction. reduction stmt is:\n");
2549*38fd1498Szrj       print_gimple_stmt (dump_file, reduc_stmt, 0);
2550*38fd1498Szrj       fprintf (dump_file, "\n");
2551*38fd1498Szrj     }
2552*38fd1498Szrj 
2553*38fd1498Szrj   new_reduction = XCNEW (struct reduction_info);
2554*38fd1498Szrj 
2555*38fd1498Szrj   new_reduction->reduc_stmt = reduc_stmt;
2556*38fd1498Szrj   new_reduction->reduc_phi = phi;
2557*38fd1498Szrj   new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
2558*38fd1498Szrj   new_reduction->reduction_code = reduction_code;
2559*38fd1498Szrj   slot = reduction_list->find_slot (new_reduction, INSERT);
2560*38fd1498Szrj   *slot = new_reduction;
2561*38fd1498Szrj }
2562*38fd1498Szrj 
2563*38fd1498Szrj /* Callback for htab_traverse.  Sets gimple_uid of reduc_phi stmts.  */
2564*38fd1498Szrj 
2565*38fd1498Szrj int
set_reduc_phi_uids(reduction_info ** slot,void * data ATTRIBUTE_UNUSED)2566*38fd1498Szrj set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
2567*38fd1498Szrj {
2568*38fd1498Szrj   struct reduction_info *const red = *slot;
2569*38fd1498Szrj   gimple_set_uid (red->reduc_phi, red->reduc_version);
2570*38fd1498Szrj   return 1;
2571*38fd1498Szrj }
2572*38fd1498Szrj 
2573*38fd1498Szrj /* Return true if the type of reduction performed by STMT is suitable
2574*38fd1498Szrj    for this pass.  */
2575*38fd1498Szrj 
2576*38fd1498Szrj static bool
valid_reduction_p(gimple * stmt)2577*38fd1498Szrj valid_reduction_p (gimple *stmt)
2578*38fd1498Szrj {
2579*38fd1498Szrj   /* Parallelization would reassociate the operation, which isn't
2580*38fd1498Szrj      allowed for in-order reductions.  */
2581*38fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2582*38fd1498Szrj   vect_reduction_type reduc_type = STMT_VINFO_REDUC_TYPE (stmt_info);
2583*38fd1498Szrj   return reduc_type != FOLD_LEFT_REDUCTION;
2584*38fd1498Szrj }
2585*38fd1498Szrj 
2586*38fd1498Szrj /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST.  */
2587*38fd1498Szrj 
2588*38fd1498Szrj static void
gather_scalar_reductions(loop_p loop,reduction_info_table_type * reduction_list)2589*38fd1498Szrj gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list)
2590*38fd1498Szrj {
2591*38fd1498Szrj   gphi_iterator gsi;
2592*38fd1498Szrj   loop_vec_info simple_loop_info;
2593*38fd1498Szrj   auto_vec<gphi *, 4> double_reduc_phis;
2594*38fd1498Szrj   auto_vec<gimple *, 4> double_reduc_stmts;
2595*38fd1498Szrj 
2596*38fd1498Szrj   if (!stmt_vec_info_vec.exists ())
2597*38fd1498Szrj     init_stmt_vec_info_vec ();
2598*38fd1498Szrj 
2599*38fd1498Szrj   simple_loop_info = vect_analyze_loop_form (loop);
2600*38fd1498Szrj   if (simple_loop_info == NULL)
2601*38fd1498Szrj     goto gather_done;
2602*38fd1498Szrj 
2603*38fd1498Szrj   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2604*38fd1498Szrj     {
2605*38fd1498Szrj       gphi *phi = gsi.phi ();
2606*38fd1498Szrj       affine_iv iv;
2607*38fd1498Szrj       tree res = PHI_RESULT (phi);
2608*38fd1498Szrj       bool double_reduc;
2609*38fd1498Szrj 
2610*38fd1498Szrj       if (virtual_operand_p (res))
2611*38fd1498Szrj 	continue;
2612*38fd1498Szrj 
2613*38fd1498Szrj       if (simple_iv (loop, loop, res, &iv, true))
2614*38fd1498Szrj 	continue;
2615*38fd1498Szrj 
2616*38fd1498Szrj       gimple *reduc_stmt
2617*38fd1498Szrj 	= vect_force_simple_reduction (simple_loop_info, phi,
2618*38fd1498Szrj 				       &double_reduc, true);
2619*38fd1498Szrj       if (!reduc_stmt || !valid_reduction_p (reduc_stmt))
2620*38fd1498Szrj 	continue;
2621*38fd1498Szrj 
2622*38fd1498Szrj       if (double_reduc)
2623*38fd1498Szrj 	{
2624*38fd1498Szrj 	  if (loop->inner->inner != NULL)
2625*38fd1498Szrj 	    continue;
2626*38fd1498Szrj 
2627*38fd1498Szrj 	  double_reduc_phis.safe_push (phi);
2628*38fd1498Szrj 	  double_reduc_stmts.safe_push (reduc_stmt);
2629*38fd1498Szrj 	  continue;
2630*38fd1498Szrj 	}
2631*38fd1498Szrj 
2632*38fd1498Szrj       build_new_reduction (reduction_list, reduc_stmt, phi);
2633*38fd1498Szrj     }
2634*38fd1498Szrj   delete simple_loop_info;
2635*38fd1498Szrj 
2636*38fd1498Szrj   if (!double_reduc_phis.is_empty ())
2637*38fd1498Szrj     {
2638*38fd1498Szrj       simple_loop_info = vect_analyze_loop_form (loop->inner);
2639*38fd1498Szrj       if (simple_loop_info)
2640*38fd1498Szrj 	{
2641*38fd1498Szrj 	  gphi *phi;
2642*38fd1498Szrj 	  unsigned int i;
2643*38fd1498Szrj 
2644*38fd1498Szrj 	  FOR_EACH_VEC_ELT (double_reduc_phis, i, phi)
2645*38fd1498Szrj 	    {
2646*38fd1498Szrj 	      affine_iv iv;
2647*38fd1498Szrj 	      tree res = PHI_RESULT (phi);
2648*38fd1498Szrj 	      bool double_reduc;
2649*38fd1498Szrj 
2650*38fd1498Szrj 	      use_operand_p use_p;
2651*38fd1498Szrj 	      gimple *inner_stmt;
2652*38fd1498Szrj 	      bool single_use_p = single_imm_use (res, &use_p, &inner_stmt);
2653*38fd1498Szrj 	      gcc_assert (single_use_p);
2654*38fd1498Szrj 	      if (gimple_code (inner_stmt) != GIMPLE_PHI)
2655*38fd1498Szrj 		continue;
2656*38fd1498Szrj 	      gphi *inner_phi = as_a <gphi *> (inner_stmt);
2657*38fd1498Szrj 	      if (simple_iv (loop->inner, loop->inner, PHI_RESULT (inner_phi),
2658*38fd1498Szrj 			     &iv, true))
2659*38fd1498Szrj 		continue;
2660*38fd1498Szrj 
2661*38fd1498Szrj 	      gimple *inner_reduc_stmt
2662*38fd1498Szrj 		= vect_force_simple_reduction (simple_loop_info, inner_phi,
2663*38fd1498Szrj 					       &double_reduc, true);
2664*38fd1498Szrj 	      gcc_assert (!double_reduc);
2665*38fd1498Szrj 	      if (inner_reduc_stmt == NULL
2666*38fd1498Szrj 		  || !valid_reduction_p (inner_reduc_stmt))
2667*38fd1498Szrj 		continue;
2668*38fd1498Szrj 
2669*38fd1498Szrj 	      build_new_reduction (reduction_list, double_reduc_stmts[i], phi);
2670*38fd1498Szrj 	    }
2671*38fd1498Szrj 	  delete simple_loop_info;
2672*38fd1498Szrj 	}
2673*38fd1498Szrj     }
2674*38fd1498Szrj 
2675*38fd1498Szrj  gather_done:
2676*38fd1498Szrj   /* Release the claim on gimple_uid.  */
2677*38fd1498Szrj   free_stmt_vec_info_vec ();
2678*38fd1498Szrj 
2679*38fd1498Szrj   if (reduction_list->elements () == 0)
2680*38fd1498Szrj     return;
2681*38fd1498Szrj 
2682*38fd1498Szrj   /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2683*38fd1498Szrj      and free_stmt_vec_info_vec, we can set gimple_uid of reduc_phi stmts only
2684*38fd1498Szrj      now.  */
2685*38fd1498Szrj   basic_block bb;
2686*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
2687*38fd1498Szrj     for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2688*38fd1498Szrj       gimple_set_uid (gsi_stmt (gsi), (unsigned int)-1);
2689*38fd1498Szrj   reduction_list->traverse <void *, set_reduc_phi_uids> (NULL);
2690*38fd1498Szrj }
2691*38fd1498Szrj 
2692*38fd1498Szrj /* Try to initialize NITER for code generation part.  */
2693*38fd1498Szrj 
2694*38fd1498Szrj static bool
try_get_loop_niter(loop_p loop,struct tree_niter_desc * niter)2695*38fd1498Szrj try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2696*38fd1498Szrj {
2697*38fd1498Szrj   edge exit = single_dom_exit (loop);
2698*38fd1498Szrj 
2699*38fd1498Szrj   gcc_assert (exit);
2700*38fd1498Szrj 
2701*38fd1498Szrj   /* We need to know # of iterations, and there should be no uses of values
2702*38fd1498Szrj      defined inside loop outside of it, unless the values are invariants of
2703*38fd1498Szrj      the loop.  */
2704*38fd1498Szrj   if (!number_of_iterations_exit (loop, exit, niter, false))
2705*38fd1498Szrj     {
2706*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
2707*38fd1498Szrj 	fprintf (dump_file, "  FAILED: number of iterations not known\n");
2708*38fd1498Szrj       return false;
2709*38fd1498Szrj     }
2710*38fd1498Szrj 
2711*38fd1498Szrj   return true;
2712*38fd1498Szrj }
2713*38fd1498Szrj 
2714*38fd1498Szrj /* Return the default def of the first function argument.  */
2715*38fd1498Szrj 
2716*38fd1498Szrj static tree
get_omp_data_i_param(void)2717*38fd1498Szrj get_omp_data_i_param (void)
2718*38fd1498Szrj {
2719*38fd1498Szrj   tree decl = DECL_ARGUMENTS (cfun->decl);
2720*38fd1498Szrj   gcc_assert (DECL_CHAIN (decl) == NULL_TREE);
2721*38fd1498Szrj   return ssa_default_def (cfun, decl);
2722*38fd1498Szrj }
2723*38fd1498Szrj 
2724*38fd1498Szrj /* For PHI in loop header of LOOP, look for pattern:
2725*38fd1498Szrj 
2726*38fd1498Szrj    <bb preheader>
2727*38fd1498Szrj    .omp_data_i = &.omp_data_arr;
2728*38fd1498Szrj    addr = .omp_data_i->sum;
2729*38fd1498Szrj    sum_a = *addr;
2730*38fd1498Szrj 
2731*38fd1498Szrj    <bb header>:
2732*38fd1498Szrj    sum_b = PHI <sum_a (preheader), sum_c (latch)>
2733*38fd1498Szrj 
2734*38fd1498Szrj    and return addr.  Otherwise, return NULL_TREE.  */
2735*38fd1498Szrj 
2736*38fd1498Szrj static tree
find_reduc_addr(struct loop * loop,gphi * phi)2737*38fd1498Szrj find_reduc_addr (struct loop *loop, gphi *phi)
2738*38fd1498Szrj {
2739*38fd1498Szrj   edge e = loop_preheader_edge (loop);
2740*38fd1498Szrj   tree arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
2741*38fd1498Szrj   gimple *stmt = SSA_NAME_DEF_STMT (arg);
2742*38fd1498Szrj   if (!gimple_assign_single_p (stmt))
2743*38fd1498Szrj     return NULL_TREE;
2744*38fd1498Szrj   tree memref = gimple_assign_rhs1 (stmt);
2745*38fd1498Szrj   if (TREE_CODE (memref) != MEM_REF)
2746*38fd1498Szrj     return NULL_TREE;
2747*38fd1498Szrj   tree addr = TREE_OPERAND (memref, 0);
2748*38fd1498Szrj 
2749*38fd1498Szrj   gimple *stmt2 = SSA_NAME_DEF_STMT (addr);
2750*38fd1498Szrj   if (!gimple_assign_single_p (stmt2))
2751*38fd1498Szrj     return NULL_TREE;
2752*38fd1498Szrj   tree compref = gimple_assign_rhs1 (stmt2);
2753*38fd1498Szrj   if (TREE_CODE (compref) != COMPONENT_REF)
2754*38fd1498Szrj     return NULL_TREE;
2755*38fd1498Szrj   tree addr2 = TREE_OPERAND (compref, 0);
2756*38fd1498Szrj   if (TREE_CODE (addr2) != MEM_REF)
2757*38fd1498Szrj     return NULL_TREE;
2758*38fd1498Szrj   addr2 = TREE_OPERAND (addr2, 0);
2759*38fd1498Szrj   if (TREE_CODE (addr2) != SSA_NAME
2760*38fd1498Szrj       || addr2 != get_omp_data_i_param ())
2761*38fd1498Szrj     return NULL_TREE;
2762*38fd1498Szrj 
2763*38fd1498Szrj   return addr;
2764*38fd1498Szrj }
2765*38fd1498Szrj 
2766*38fd1498Szrj /* Try to initialize REDUCTION_LIST for code generation part.
2767*38fd1498Szrj    REDUCTION_LIST describes the reductions.  */
2768*38fd1498Szrj 
2769*38fd1498Szrj static bool
try_create_reduction_list(loop_p loop,reduction_info_table_type * reduction_list,bool oacc_kernels_p)2770*38fd1498Szrj try_create_reduction_list (loop_p loop,
2771*38fd1498Szrj 			   reduction_info_table_type *reduction_list,
2772*38fd1498Szrj 			   bool oacc_kernels_p)
2773*38fd1498Szrj {
2774*38fd1498Szrj   edge exit = single_dom_exit (loop);
2775*38fd1498Szrj   gphi_iterator gsi;
2776*38fd1498Szrj 
2777*38fd1498Szrj   gcc_assert (exit);
2778*38fd1498Szrj 
2779*38fd1498Szrj   /* Try to get rid of exit phis.  */
2780*38fd1498Szrj   final_value_replacement_loop (loop);
2781*38fd1498Szrj 
2782*38fd1498Szrj   gather_scalar_reductions (loop, reduction_list);
2783*38fd1498Szrj 
2784*38fd1498Szrj 
2785*38fd1498Szrj   for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2786*38fd1498Szrj     {
2787*38fd1498Szrj       gphi *phi = gsi.phi ();
2788*38fd1498Szrj       struct reduction_info *red;
2789*38fd1498Szrj       imm_use_iterator imm_iter;
2790*38fd1498Szrj       use_operand_p use_p;
2791*38fd1498Szrj       gimple *reduc_phi;
2792*38fd1498Szrj       tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2793*38fd1498Szrj 
2794*38fd1498Szrj       if (!virtual_operand_p (val))
2795*38fd1498Szrj 	{
2796*38fd1498Szrj 	  if (dump_file && (dump_flags & TDF_DETAILS))
2797*38fd1498Szrj 	    {
2798*38fd1498Szrj 	      fprintf (dump_file, "phi is ");
2799*38fd1498Szrj 	      print_gimple_stmt (dump_file, phi, 0);
2800*38fd1498Szrj 	      fprintf (dump_file, "arg of phi to exit:   value ");
2801*38fd1498Szrj 	      print_generic_expr (dump_file, val);
2802*38fd1498Szrj 	      fprintf (dump_file, " used outside loop\n");
2803*38fd1498Szrj 	      fprintf (dump_file,
2804*38fd1498Szrj 		       "  checking if it is part of reduction pattern:\n");
2805*38fd1498Szrj 	    }
2806*38fd1498Szrj 	  if (reduction_list->elements () == 0)
2807*38fd1498Szrj 	    {
2808*38fd1498Szrj 	      if (dump_file && (dump_flags & TDF_DETAILS))
2809*38fd1498Szrj 		fprintf (dump_file,
2810*38fd1498Szrj 			 "  FAILED: it is not a part of reduction.\n");
2811*38fd1498Szrj 	      return false;
2812*38fd1498Szrj 	    }
2813*38fd1498Szrj 	  reduc_phi = NULL;
2814*38fd1498Szrj 	  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2815*38fd1498Szrj 	    {
2816*38fd1498Szrj 	      if (!gimple_debug_bind_p (USE_STMT (use_p))
2817*38fd1498Szrj 		  && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2818*38fd1498Szrj 		{
2819*38fd1498Szrj 		  reduc_phi = USE_STMT (use_p);
2820*38fd1498Szrj 		  break;
2821*38fd1498Szrj 		}
2822*38fd1498Szrj 	    }
2823*38fd1498Szrj 	  red = reduction_phi (reduction_list, reduc_phi);
2824*38fd1498Szrj 	  if (red == NULL)
2825*38fd1498Szrj 	    {
2826*38fd1498Szrj 	      if (dump_file && (dump_flags & TDF_DETAILS))
2827*38fd1498Szrj 		fprintf (dump_file,
2828*38fd1498Szrj 			 "  FAILED: it is not a part of reduction.\n");
2829*38fd1498Szrj 	      return false;
2830*38fd1498Szrj 	    }
2831*38fd1498Szrj 	  if (red->keep_res != NULL)
2832*38fd1498Szrj 	    {
2833*38fd1498Szrj 	      if (dump_file && (dump_flags & TDF_DETAILS))
2834*38fd1498Szrj 		fprintf (dump_file,
2835*38fd1498Szrj 			 "  FAILED: reduction has multiple exit phis.\n");
2836*38fd1498Szrj 	      return false;
2837*38fd1498Szrj 	    }
2838*38fd1498Szrj 	  red->keep_res = phi;
2839*38fd1498Szrj 	  if (dump_file && (dump_flags & TDF_DETAILS))
2840*38fd1498Szrj 	    {
2841*38fd1498Szrj 	      fprintf (dump_file, "reduction phi is  ");
2842*38fd1498Szrj 	      print_gimple_stmt (dump_file, red->reduc_phi, 0);
2843*38fd1498Szrj 	      fprintf (dump_file, "reduction stmt is  ");
2844*38fd1498Szrj 	      print_gimple_stmt (dump_file, red->reduc_stmt, 0);
2845*38fd1498Szrj 	    }
2846*38fd1498Szrj 	}
2847*38fd1498Szrj     }
2848*38fd1498Szrj 
2849*38fd1498Szrj   /* The iterations of the loop may communicate only through bivs whose
2850*38fd1498Szrj      iteration space can be distributed efficiently.  */
2851*38fd1498Szrj   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2852*38fd1498Szrj     {
2853*38fd1498Szrj       gphi *phi = gsi.phi ();
2854*38fd1498Szrj       tree def = PHI_RESULT (phi);
2855*38fd1498Szrj       affine_iv iv;
2856*38fd1498Szrj 
2857*38fd1498Szrj       if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
2858*38fd1498Szrj 	{
2859*38fd1498Szrj 	  struct reduction_info *red;
2860*38fd1498Szrj 
2861*38fd1498Szrj 	  red = reduction_phi (reduction_list, phi);
2862*38fd1498Szrj 	  if (red == NULL)
2863*38fd1498Szrj 	    {
2864*38fd1498Szrj 	      if (dump_file && (dump_flags & TDF_DETAILS))
2865*38fd1498Szrj 		fprintf (dump_file,
2866*38fd1498Szrj 			 "  FAILED: scalar dependency between iterations\n");
2867*38fd1498Szrj 	      return false;
2868*38fd1498Szrj 	    }
2869*38fd1498Szrj 	}
2870*38fd1498Szrj     }
2871*38fd1498Szrj 
2872*38fd1498Szrj   if (oacc_kernels_p)
2873*38fd1498Szrj     {
2874*38fd1498Szrj       for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi);
2875*38fd1498Szrj 	   gsi_next (&gsi))
2876*38fd1498Szrj 	{
2877*38fd1498Szrj 	  gphi *phi = gsi.phi ();
2878*38fd1498Szrj 	  tree def = PHI_RESULT (phi);
2879*38fd1498Szrj 	  affine_iv iv;
2880*38fd1498Szrj 
2881*38fd1498Szrj 	  if (!virtual_operand_p (def)
2882*38fd1498Szrj 	      && !simple_iv (loop, loop, def, &iv, true))
2883*38fd1498Szrj 	    {
2884*38fd1498Szrj 	      tree addr = find_reduc_addr (loop, phi);
2885*38fd1498Szrj 	      if (addr == NULL_TREE)
2886*38fd1498Szrj 		return false;
2887*38fd1498Szrj 	      struct reduction_info *red = reduction_phi (reduction_list, phi);
2888*38fd1498Szrj 	      red->reduc_addr = addr;
2889*38fd1498Szrj 	    }
2890*38fd1498Szrj 	}
2891*38fd1498Szrj     }
2892*38fd1498Szrj 
2893*38fd1498Szrj   return true;
2894*38fd1498Szrj }
2895*38fd1498Szrj 
2896*38fd1498Szrj /* Return true if LOOP contains phis with ADDR_EXPR in args.  */
2897*38fd1498Szrj 
2898*38fd1498Szrj static bool
loop_has_phi_with_address_arg(struct loop * loop)2899*38fd1498Szrj loop_has_phi_with_address_arg (struct loop *loop)
2900*38fd1498Szrj {
2901*38fd1498Szrj   basic_block *bbs = get_loop_body (loop);
2902*38fd1498Szrj   bool res = false;
2903*38fd1498Szrj 
2904*38fd1498Szrj   unsigned i, j;
2905*38fd1498Szrj   gphi_iterator gsi;
2906*38fd1498Szrj   for (i = 0; i < loop->num_nodes; i++)
2907*38fd1498Szrj     for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
2908*38fd1498Szrj       {
2909*38fd1498Szrj 	gphi *phi = gsi.phi ();
2910*38fd1498Szrj 	for (j = 0; j < gimple_phi_num_args (phi); j++)
2911*38fd1498Szrj 	  {
2912*38fd1498Szrj 	    tree arg = gimple_phi_arg_def (phi, j);
2913*38fd1498Szrj 	    if (TREE_CODE (arg) == ADDR_EXPR)
2914*38fd1498Szrj 	      {
2915*38fd1498Szrj 		/* This should be handled by eliminate_local_variables, but that
2916*38fd1498Szrj 		   function currently ignores phis.  */
2917*38fd1498Szrj 		res = true;
2918*38fd1498Szrj 		goto end;
2919*38fd1498Szrj 	      }
2920*38fd1498Szrj 	  }
2921*38fd1498Szrj       }
2922*38fd1498Szrj  end:
2923*38fd1498Szrj   free (bbs);
2924*38fd1498Szrj 
2925*38fd1498Szrj   return res;
2926*38fd1498Szrj }
2927*38fd1498Szrj 
2928*38fd1498Szrj /* Return true if memory ref REF (corresponding to the stmt at GSI in
2929*38fd1498Szrj    REGIONS_BB[I]) conflicts with the statements in REGIONS_BB[I] after gsi,
2930*38fd1498Szrj    or the statements in REGIONS_BB[I + n].  REF_IS_STORE indicates if REF is a
2931*38fd1498Szrj    store.  Ignore conflicts with SKIP_STMT.  */
2932*38fd1498Szrj 
2933*38fd1498Szrj static bool
ref_conflicts_with_region(gimple_stmt_iterator gsi,ao_ref * ref,bool ref_is_store,vec<basic_block> region_bbs,unsigned int i,gimple * skip_stmt)2934*38fd1498Szrj ref_conflicts_with_region (gimple_stmt_iterator gsi, ao_ref *ref,
2935*38fd1498Szrj 			   bool ref_is_store, vec<basic_block> region_bbs,
2936*38fd1498Szrj 			   unsigned int i, gimple *skip_stmt)
2937*38fd1498Szrj {
2938*38fd1498Szrj   basic_block bb = region_bbs[i];
2939*38fd1498Szrj   gsi_next (&gsi);
2940*38fd1498Szrj 
2941*38fd1498Szrj   while (true)
2942*38fd1498Szrj     {
2943*38fd1498Szrj       for (; !gsi_end_p (gsi);
2944*38fd1498Szrj 	   gsi_next (&gsi))
2945*38fd1498Szrj 	{
2946*38fd1498Szrj 	  gimple *stmt = gsi_stmt (gsi);
2947*38fd1498Szrj 	  if (stmt == skip_stmt)
2948*38fd1498Szrj 	    {
2949*38fd1498Szrj 	      if (dump_file)
2950*38fd1498Szrj 		{
2951*38fd1498Szrj 		  fprintf (dump_file, "skipping reduction store: ");
2952*38fd1498Szrj 		  print_gimple_stmt (dump_file, stmt, 0);
2953*38fd1498Szrj 		}
2954*38fd1498Szrj 	      continue;
2955*38fd1498Szrj 	    }
2956*38fd1498Szrj 
2957*38fd1498Szrj 	  if (!gimple_vdef (stmt)
2958*38fd1498Szrj 	      && !gimple_vuse (stmt))
2959*38fd1498Szrj 	    continue;
2960*38fd1498Szrj 
2961*38fd1498Szrj 	  if (gimple_code (stmt) == GIMPLE_RETURN)
2962*38fd1498Szrj 	    continue;
2963*38fd1498Szrj 
2964*38fd1498Szrj 	  if (ref_is_store)
2965*38fd1498Szrj 	    {
2966*38fd1498Szrj 	      if (ref_maybe_used_by_stmt_p (stmt, ref))
2967*38fd1498Szrj 		{
2968*38fd1498Szrj 		  if (dump_file)
2969*38fd1498Szrj 		    {
2970*38fd1498Szrj 		      fprintf (dump_file, "Stmt ");
2971*38fd1498Szrj 		      print_gimple_stmt (dump_file, stmt, 0);
2972*38fd1498Szrj 		    }
2973*38fd1498Szrj 		  return true;
2974*38fd1498Szrj 		}
2975*38fd1498Szrj 	    }
2976*38fd1498Szrj 	  else
2977*38fd1498Szrj 	    {
2978*38fd1498Szrj 	      if (stmt_may_clobber_ref_p_1 (stmt, ref))
2979*38fd1498Szrj 		{
2980*38fd1498Szrj 		  if (dump_file)
2981*38fd1498Szrj 		    {
2982*38fd1498Szrj 		      fprintf (dump_file, "Stmt ");
2983*38fd1498Szrj 		      print_gimple_stmt (dump_file, stmt, 0);
2984*38fd1498Szrj 		    }
2985*38fd1498Szrj 		  return true;
2986*38fd1498Szrj 		}
2987*38fd1498Szrj 	    }
2988*38fd1498Szrj 	}
2989*38fd1498Szrj       i++;
2990*38fd1498Szrj       if (i == region_bbs.length ())
2991*38fd1498Szrj 	break;
2992*38fd1498Szrj       bb = region_bbs[i];
2993*38fd1498Szrj       gsi = gsi_start_bb (bb);
2994*38fd1498Szrj     }
2995*38fd1498Szrj 
2996*38fd1498Szrj   return false;
2997*38fd1498Szrj }
2998*38fd1498Szrj 
2999*38fd1498Szrj /* Return true if the bbs in REGION_BBS but not in in_loop_bbs can be executed
3000*38fd1498Szrj    in parallel with REGION_BBS containing the loop.  Return the stores of
3001*38fd1498Szrj    reduction results in REDUCTION_STORES.  */
3002*38fd1498Szrj 
3003*38fd1498Szrj static bool
oacc_entry_exit_ok_1(bitmap in_loop_bbs,vec<basic_block> region_bbs,reduction_info_table_type * reduction_list,bitmap reduction_stores)3004*38fd1498Szrj oacc_entry_exit_ok_1 (bitmap in_loop_bbs, vec<basic_block> region_bbs,
3005*38fd1498Szrj 		      reduction_info_table_type *reduction_list,
3006*38fd1498Szrj 		      bitmap reduction_stores)
3007*38fd1498Szrj {
3008*38fd1498Szrj   tree omp_data_i = get_omp_data_i_param ();
3009*38fd1498Szrj 
3010*38fd1498Szrj   unsigned i;
3011*38fd1498Szrj   basic_block bb;
3012*38fd1498Szrj   FOR_EACH_VEC_ELT (region_bbs, i, bb)
3013*38fd1498Szrj     {
3014*38fd1498Szrj       if (bitmap_bit_p (in_loop_bbs, bb->index))
3015*38fd1498Szrj 	continue;
3016*38fd1498Szrj 
3017*38fd1498Szrj       gimple_stmt_iterator gsi;
3018*38fd1498Szrj       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
3019*38fd1498Szrj 	   gsi_next (&gsi))
3020*38fd1498Szrj 	{
3021*38fd1498Szrj 	  gimple *stmt = gsi_stmt (gsi);
3022*38fd1498Szrj 	  gimple *skip_stmt = NULL;
3023*38fd1498Szrj 
3024*38fd1498Szrj 	  if (is_gimple_debug (stmt)
3025*38fd1498Szrj 	      || gimple_code (stmt) == GIMPLE_COND)
3026*38fd1498Szrj 	    continue;
3027*38fd1498Szrj 
3028*38fd1498Szrj 	  ao_ref ref;
3029*38fd1498Szrj 	  bool ref_is_store = false;
3030*38fd1498Szrj 	  if (gimple_assign_load_p (stmt))
3031*38fd1498Szrj 	    {
3032*38fd1498Szrj 	      tree rhs = gimple_assign_rhs1 (stmt);
3033*38fd1498Szrj 	      tree base = get_base_address (rhs);
3034*38fd1498Szrj 	      if (TREE_CODE (base) == MEM_REF
3035*38fd1498Szrj 		  && operand_equal_p (TREE_OPERAND (base, 0), omp_data_i, 0))
3036*38fd1498Szrj 		continue;
3037*38fd1498Szrj 
3038*38fd1498Szrj 	      tree lhs = gimple_assign_lhs (stmt);
3039*38fd1498Szrj 	      if (TREE_CODE (lhs) == SSA_NAME
3040*38fd1498Szrj 		  && has_single_use (lhs))
3041*38fd1498Szrj 		{
3042*38fd1498Szrj 		  use_operand_p use_p;
3043*38fd1498Szrj 		  gimple *use_stmt;
3044*38fd1498Szrj 		  single_imm_use (lhs, &use_p, &use_stmt);
3045*38fd1498Szrj 		  if (gimple_code (use_stmt) == GIMPLE_PHI)
3046*38fd1498Szrj 		    {
3047*38fd1498Szrj 		      struct reduction_info *red;
3048*38fd1498Szrj 		      red = reduction_phi (reduction_list, use_stmt);
3049*38fd1498Szrj 		      tree val = PHI_RESULT (red->keep_res);
3050*38fd1498Szrj 		      if (has_single_use (val))
3051*38fd1498Szrj 			{
3052*38fd1498Szrj 			  single_imm_use (val, &use_p, &use_stmt);
3053*38fd1498Szrj 			  if (gimple_store_p (use_stmt))
3054*38fd1498Szrj 			    {
3055*38fd1498Szrj 			      unsigned int id
3056*38fd1498Szrj 				= SSA_NAME_VERSION (gimple_vdef (use_stmt));
3057*38fd1498Szrj 			      bitmap_set_bit (reduction_stores, id);
3058*38fd1498Szrj 			      skip_stmt = use_stmt;
3059*38fd1498Szrj 			      if (dump_file)
3060*38fd1498Szrj 				{
3061*38fd1498Szrj 				  fprintf (dump_file, "found reduction load: ");
3062*38fd1498Szrj 				  print_gimple_stmt (dump_file, stmt, 0);
3063*38fd1498Szrj 				}
3064*38fd1498Szrj 			    }
3065*38fd1498Szrj 			}
3066*38fd1498Szrj 		    }
3067*38fd1498Szrj 		}
3068*38fd1498Szrj 
3069*38fd1498Szrj 	      ao_ref_init (&ref, rhs);
3070*38fd1498Szrj 	    }
3071*38fd1498Szrj 	  else if (gimple_store_p (stmt))
3072*38fd1498Szrj 	    {
3073*38fd1498Szrj 	      ao_ref_init (&ref, gimple_assign_lhs (stmt));
3074*38fd1498Szrj 	      ref_is_store = true;
3075*38fd1498Szrj 	    }
3076*38fd1498Szrj 	  else if (gimple_code (stmt) == GIMPLE_OMP_RETURN)
3077*38fd1498Szrj 	    continue;
3078*38fd1498Szrj 	  else if (!gimple_has_side_effects (stmt)
3079*38fd1498Szrj 		   && !gimple_could_trap_p (stmt)
3080*38fd1498Szrj 		   && !stmt_could_throw_p (stmt)
3081*38fd1498Szrj 		   && !gimple_vdef (stmt)
3082*38fd1498Szrj 		   && !gimple_vuse (stmt))
3083*38fd1498Szrj 	    continue;
3084*38fd1498Szrj 	  else if (gimple_call_internal_p (stmt, IFN_GOACC_DIM_POS))
3085*38fd1498Szrj 	    continue;
3086*38fd1498Szrj 	  else if (gimple_code (stmt) == GIMPLE_RETURN)
3087*38fd1498Szrj 	    continue;
3088*38fd1498Szrj 	  else
3089*38fd1498Szrj 	    {
3090*38fd1498Szrj 	      if (dump_file)
3091*38fd1498Szrj 		{
3092*38fd1498Szrj 		  fprintf (dump_file, "Unhandled stmt in entry/exit: ");
3093*38fd1498Szrj 		  print_gimple_stmt (dump_file, stmt, 0);
3094*38fd1498Szrj 		}
3095*38fd1498Szrj 	      return false;
3096*38fd1498Szrj 	    }
3097*38fd1498Szrj 
3098*38fd1498Szrj 	  if (ref_conflicts_with_region (gsi, &ref, ref_is_store, region_bbs,
3099*38fd1498Szrj 					 i, skip_stmt))
3100*38fd1498Szrj 	    {
3101*38fd1498Szrj 	      if (dump_file)
3102*38fd1498Szrj 		{
3103*38fd1498Szrj 		  fprintf (dump_file, "conflicts with entry/exit stmt: ");
3104*38fd1498Szrj 		  print_gimple_stmt (dump_file, stmt, 0);
3105*38fd1498Szrj 		}
3106*38fd1498Szrj 	      return false;
3107*38fd1498Szrj 	    }
3108*38fd1498Szrj 	}
3109*38fd1498Szrj     }
3110*38fd1498Szrj 
3111*38fd1498Szrj   return true;
3112*38fd1498Szrj }
3113*38fd1498Szrj 
3114*38fd1498Szrj /* Find stores inside REGION_BBS and outside IN_LOOP_BBS, and guard them with
3115*38fd1498Szrj    gang_pos == 0, except when the stores are REDUCTION_STORES.  Return true
3116*38fd1498Szrj    if any changes were made.  */
3117*38fd1498Szrj 
3118*38fd1498Szrj static bool
oacc_entry_exit_single_gang(bitmap in_loop_bbs,vec<basic_block> region_bbs,bitmap reduction_stores)3119*38fd1498Szrj oacc_entry_exit_single_gang (bitmap in_loop_bbs, vec<basic_block> region_bbs,
3120*38fd1498Szrj 			     bitmap reduction_stores)
3121*38fd1498Szrj {
3122*38fd1498Szrj   tree gang_pos = NULL_TREE;
3123*38fd1498Szrj   bool changed = false;
3124*38fd1498Szrj 
3125*38fd1498Szrj   unsigned i;
3126*38fd1498Szrj   basic_block bb;
3127*38fd1498Szrj   FOR_EACH_VEC_ELT (region_bbs, i, bb)
3128*38fd1498Szrj     {
3129*38fd1498Szrj       if (bitmap_bit_p (in_loop_bbs, bb->index))
3130*38fd1498Szrj 	continue;
3131*38fd1498Szrj 
3132*38fd1498Szrj       gimple_stmt_iterator gsi;
3133*38fd1498Szrj       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
3134*38fd1498Szrj 	{
3135*38fd1498Szrj 	  gimple *stmt = gsi_stmt (gsi);
3136*38fd1498Szrj 
3137*38fd1498Szrj 	  if (!gimple_store_p (stmt))
3138*38fd1498Szrj 	    {
3139*38fd1498Szrj 	      /* Update gsi to point to next stmt.  */
3140*38fd1498Szrj 	      gsi_next (&gsi);
3141*38fd1498Szrj 	      continue;
3142*38fd1498Szrj 	    }
3143*38fd1498Szrj 
3144*38fd1498Szrj 	  if (bitmap_bit_p (reduction_stores,
3145*38fd1498Szrj 			    SSA_NAME_VERSION (gimple_vdef (stmt))))
3146*38fd1498Szrj 	    {
3147*38fd1498Szrj 	      if (dump_file)
3148*38fd1498Szrj 		{
3149*38fd1498Szrj 		  fprintf (dump_file,
3150*38fd1498Szrj 			   "skipped reduction store for single-gang"
3151*38fd1498Szrj 			   " neutering: ");
3152*38fd1498Szrj 		  print_gimple_stmt (dump_file, stmt, 0);
3153*38fd1498Szrj 		}
3154*38fd1498Szrj 
3155*38fd1498Szrj 	      /* Update gsi to point to next stmt.  */
3156*38fd1498Szrj 	      gsi_next (&gsi);
3157*38fd1498Szrj 	      continue;
3158*38fd1498Szrj 	    }
3159*38fd1498Szrj 
3160*38fd1498Szrj 	  changed = true;
3161*38fd1498Szrj 
3162*38fd1498Szrj 	  if (gang_pos == NULL_TREE)
3163*38fd1498Szrj 	    {
3164*38fd1498Szrj 	      tree arg = build_int_cst (integer_type_node, GOMP_DIM_GANG);
3165*38fd1498Szrj 	      gcall *gang_single
3166*38fd1498Szrj 		= gimple_build_call_internal (IFN_GOACC_DIM_POS, 1, arg);
3167*38fd1498Szrj 	      gang_pos = make_ssa_name (integer_type_node);
3168*38fd1498Szrj 	      gimple_call_set_lhs (gang_single, gang_pos);
3169*38fd1498Szrj 	      gimple_stmt_iterator start
3170*38fd1498Szrj 		= gsi_start_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
3171*38fd1498Szrj 	      tree vuse = ssa_default_def (cfun, gimple_vop (cfun));
3172*38fd1498Szrj 	      gimple_set_vuse (gang_single, vuse);
3173*38fd1498Szrj 	      gsi_insert_before (&start, gang_single, GSI_SAME_STMT);
3174*38fd1498Szrj 	    }
3175*38fd1498Szrj 
3176*38fd1498Szrj 	  if (dump_file)
3177*38fd1498Szrj 	    {
3178*38fd1498Szrj 	      fprintf (dump_file,
3179*38fd1498Szrj 		       "found store that needs single-gang neutering: ");
3180*38fd1498Szrj 	      print_gimple_stmt (dump_file, stmt, 0);
3181*38fd1498Szrj 	    }
3182*38fd1498Szrj 
3183*38fd1498Szrj 	  {
3184*38fd1498Szrj 	    /* Split block before store.  */
3185*38fd1498Szrj 	    gimple_stmt_iterator gsi2 = gsi;
3186*38fd1498Szrj 	    gsi_prev (&gsi2);
3187*38fd1498Szrj 	    edge e;
3188*38fd1498Szrj 	    if (gsi_end_p (gsi2))
3189*38fd1498Szrj 	      {
3190*38fd1498Szrj 		e = split_block_after_labels (bb);
3191*38fd1498Szrj 		gsi2 = gsi_last_bb (bb);
3192*38fd1498Szrj 	      }
3193*38fd1498Szrj 	    else
3194*38fd1498Szrj 	      e = split_block (bb, gsi_stmt (gsi2));
3195*38fd1498Szrj 	    basic_block bb2 = e->dest;
3196*38fd1498Szrj 
3197*38fd1498Szrj 	    /* Split block after store.  */
3198*38fd1498Szrj 	    gimple_stmt_iterator gsi3 = gsi_start_bb (bb2);
3199*38fd1498Szrj 	    edge e2 = split_block (bb2, gsi_stmt (gsi3));
3200*38fd1498Szrj 	    basic_block bb3 = e2->dest;
3201*38fd1498Szrj 
3202*38fd1498Szrj 	    gimple *cond
3203*38fd1498Szrj 	      = gimple_build_cond (EQ_EXPR, gang_pos, integer_zero_node,
3204*38fd1498Szrj 				   NULL_TREE, NULL_TREE);
3205*38fd1498Szrj 	    gsi_insert_after (&gsi2, cond, GSI_NEW_STMT);
3206*38fd1498Szrj 
3207*38fd1498Szrj 	    edge e3 = make_edge (bb, bb3, EDGE_FALSE_VALUE);
3208*38fd1498Szrj 	    /* FIXME: What is the probability?  */
3209*38fd1498Szrj 	    e3->probability = profile_probability::guessed_never ();
3210*38fd1498Szrj 	    e->flags = EDGE_TRUE_VALUE;
3211*38fd1498Szrj 
3212*38fd1498Szrj 	    tree vdef = gimple_vdef (stmt);
3213*38fd1498Szrj 	    tree vuse = gimple_vuse (stmt);
3214*38fd1498Szrj 
3215*38fd1498Szrj 	    tree phi_res = copy_ssa_name (vdef);
3216*38fd1498Szrj 	    gphi *new_phi = create_phi_node (phi_res, bb3);
3217*38fd1498Szrj 	    replace_uses_by (vdef, phi_res);
3218*38fd1498Szrj 	    add_phi_arg (new_phi, vuse, e3, UNKNOWN_LOCATION);
3219*38fd1498Szrj 	    add_phi_arg (new_phi, vdef, e2, UNKNOWN_LOCATION);
3220*38fd1498Szrj 
3221*38fd1498Szrj 	    /* Update gsi to point to next stmt.  */
3222*38fd1498Szrj 	    bb = bb3;
3223*38fd1498Szrj 	    gsi = gsi_start_bb (bb);
3224*38fd1498Szrj 	  }
3225*38fd1498Szrj 	}
3226*38fd1498Szrj     }
3227*38fd1498Szrj 
3228*38fd1498Szrj   return changed;
3229*38fd1498Szrj }
3230*38fd1498Szrj 
3231*38fd1498Szrj /* Return true if the statements before and after the LOOP can be executed in
3232*38fd1498Szrj    parallel with the function containing the loop.  Resolve conflicting stores
3233*38fd1498Szrj    outside LOOP by guarding them such that only a single gang executes them.  */
3234*38fd1498Szrj 
3235*38fd1498Szrj static bool
oacc_entry_exit_ok(struct loop * loop,reduction_info_table_type * reduction_list)3236*38fd1498Szrj oacc_entry_exit_ok (struct loop *loop,
3237*38fd1498Szrj 		    reduction_info_table_type *reduction_list)
3238*38fd1498Szrj {
3239*38fd1498Szrj   basic_block *loop_bbs = get_loop_body_in_dom_order (loop);
3240*38fd1498Szrj   vec<basic_block> region_bbs
3241*38fd1498Szrj     = get_all_dominated_blocks (CDI_DOMINATORS, ENTRY_BLOCK_PTR_FOR_FN (cfun));
3242*38fd1498Szrj 
3243*38fd1498Szrj   bitmap in_loop_bbs = BITMAP_ALLOC (NULL);
3244*38fd1498Szrj   bitmap_clear (in_loop_bbs);
3245*38fd1498Szrj   for (unsigned int i = 0; i < loop->num_nodes; i++)
3246*38fd1498Szrj     bitmap_set_bit (in_loop_bbs, loop_bbs[i]->index);
3247*38fd1498Szrj 
3248*38fd1498Szrj   bitmap reduction_stores = BITMAP_ALLOC (NULL);
3249*38fd1498Szrj   bool res = oacc_entry_exit_ok_1 (in_loop_bbs, region_bbs, reduction_list,
3250*38fd1498Szrj 				   reduction_stores);
3251*38fd1498Szrj 
3252*38fd1498Szrj   if (res)
3253*38fd1498Szrj     {
3254*38fd1498Szrj       bool changed = oacc_entry_exit_single_gang (in_loop_bbs, region_bbs,
3255*38fd1498Szrj 						  reduction_stores);
3256*38fd1498Szrj       if (changed)
3257*38fd1498Szrj 	{
3258*38fd1498Szrj 	  free_dominance_info (CDI_DOMINATORS);
3259*38fd1498Szrj 	  calculate_dominance_info (CDI_DOMINATORS);
3260*38fd1498Szrj 	}
3261*38fd1498Szrj     }
3262*38fd1498Szrj 
3263*38fd1498Szrj   region_bbs.release ();
3264*38fd1498Szrj   free (loop_bbs);
3265*38fd1498Szrj 
3266*38fd1498Szrj   BITMAP_FREE (in_loop_bbs);
3267*38fd1498Szrj   BITMAP_FREE (reduction_stores);
3268*38fd1498Szrj 
3269*38fd1498Szrj   return res;
3270*38fd1498Szrj }
3271*38fd1498Szrj 
3272*38fd1498Szrj /* Detect parallel loops and generate parallel code using libgomp
3273*38fd1498Szrj    primitives.  Returns true if some loop was parallelized, false
3274*38fd1498Szrj    otherwise.  */
3275*38fd1498Szrj 
3276*38fd1498Szrj static bool
parallelize_loops(bool oacc_kernels_p)3277*38fd1498Szrj parallelize_loops (bool oacc_kernels_p)
3278*38fd1498Szrj {
3279*38fd1498Szrj   unsigned n_threads;
3280*38fd1498Szrj   bool changed = false;
3281*38fd1498Szrj   struct loop *loop;
3282*38fd1498Szrj   struct loop *skip_loop = NULL;
3283*38fd1498Szrj   struct tree_niter_desc niter_desc;
3284*38fd1498Szrj   struct obstack parloop_obstack;
3285*38fd1498Szrj   HOST_WIDE_INT estimated;
3286*38fd1498Szrj   source_location loop_loc;
3287*38fd1498Szrj 
3288*38fd1498Szrj   /* Do not parallelize loops in the functions created by parallelization.  */
3289*38fd1498Szrj   if (!oacc_kernels_p
3290*38fd1498Szrj       && parallelized_function_p (cfun->decl))
3291*38fd1498Szrj     return false;
3292*38fd1498Szrj 
3293*38fd1498Szrj   /* Do not parallelize loops in offloaded functions.  */
3294*38fd1498Szrj   if (!oacc_kernels_p
3295*38fd1498Szrj       && oacc_get_fn_attrib (cfun->decl) != NULL)
3296*38fd1498Szrj      return false;
3297*38fd1498Szrj 
3298*38fd1498Szrj   if (cfun->has_nonlocal_label)
3299*38fd1498Szrj     return false;
3300*38fd1498Szrj 
3301*38fd1498Szrj   /* For OpenACC kernels, n_threads will be determined later; otherwise, it's
3302*38fd1498Szrj      the argument to -ftree-parallelize-loops.  */
3303*38fd1498Szrj   if (oacc_kernels_p)
3304*38fd1498Szrj     n_threads = 0;
3305*38fd1498Szrj   else
3306*38fd1498Szrj     n_threads = flag_tree_parallelize_loops;
3307*38fd1498Szrj 
3308*38fd1498Szrj   gcc_obstack_init (&parloop_obstack);
3309*38fd1498Szrj   reduction_info_table_type reduction_list (10);
3310*38fd1498Szrj 
3311*38fd1498Szrj   calculate_dominance_info (CDI_DOMINATORS);
3312*38fd1498Szrj 
3313*38fd1498Szrj   FOR_EACH_LOOP (loop, 0)
3314*38fd1498Szrj     {
3315*38fd1498Szrj       if (loop == skip_loop)
3316*38fd1498Szrj 	{
3317*38fd1498Szrj 	  if (!loop->in_oacc_kernels_region
3318*38fd1498Szrj 	      && dump_file && (dump_flags & TDF_DETAILS))
3319*38fd1498Szrj 	    fprintf (dump_file,
3320*38fd1498Szrj 		     "Skipping loop %d as inner loop of parallelized loop\n",
3321*38fd1498Szrj 		     loop->num);
3322*38fd1498Szrj 
3323*38fd1498Szrj 	  skip_loop = loop->inner;
3324*38fd1498Szrj 	  continue;
3325*38fd1498Szrj 	}
3326*38fd1498Szrj       else
3327*38fd1498Szrj 	skip_loop = NULL;
3328*38fd1498Szrj 
3329*38fd1498Szrj       reduction_list.empty ();
3330*38fd1498Szrj 
3331*38fd1498Szrj       if (oacc_kernels_p)
3332*38fd1498Szrj 	{
3333*38fd1498Szrj 	  if (!loop->in_oacc_kernels_region)
3334*38fd1498Szrj 	    continue;
3335*38fd1498Szrj 
3336*38fd1498Szrj 	  /* Don't try to parallelize inner loops in an oacc kernels region.  */
3337*38fd1498Szrj 	  if (loop->inner)
3338*38fd1498Szrj 	    skip_loop = loop->inner;
3339*38fd1498Szrj 
3340*38fd1498Szrj 	  if (dump_file && (dump_flags & TDF_DETAILS))
3341*38fd1498Szrj 	    fprintf (dump_file,
3342*38fd1498Szrj 		     "Trying loop %d with header bb %d in oacc kernels"
3343*38fd1498Szrj 		     " region\n", loop->num, loop->header->index);
3344*38fd1498Szrj 	}
3345*38fd1498Szrj 
3346*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
3347*38fd1498Szrj       {
3348*38fd1498Szrj         fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
3349*38fd1498Szrj 	if (loop->inner)
3350*38fd1498Szrj 	  fprintf (dump_file, "loop %d is not innermost\n",loop->num);
3351*38fd1498Szrj 	else
3352*38fd1498Szrj 	  fprintf (dump_file, "loop %d is innermost\n",loop->num);
3353*38fd1498Szrj       }
3354*38fd1498Szrj 
3355*38fd1498Szrj       if (!single_dom_exit (loop))
3356*38fd1498Szrj       {
3357*38fd1498Szrj 
3358*38fd1498Szrj         if (dump_file && (dump_flags & TDF_DETAILS))
3359*38fd1498Szrj 	  fprintf (dump_file, "loop is !single_dom_exit\n");
3360*38fd1498Szrj 
3361*38fd1498Szrj 	continue;
3362*38fd1498Szrj       }
3363*38fd1498Szrj 
3364*38fd1498Szrj       if (/* And of course, the loop must be parallelizable.  */
3365*38fd1498Szrj 	  !can_duplicate_loop_p (loop)
3366*38fd1498Szrj 	  || loop_has_blocks_with_irreducible_flag (loop)
3367*38fd1498Szrj 	  || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
3368*38fd1498Szrj 	  /* FIXME: the check for vector phi nodes could be removed.  */
3369*38fd1498Szrj 	  || loop_has_vector_phi_nodes (loop))
3370*38fd1498Szrj 	continue;
3371*38fd1498Szrj 
3372*38fd1498Szrj       estimated = estimated_loop_iterations_int (loop);
3373*38fd1498Szrj       if (estimated == -1)
3374*38fd1498Szrj 	estimated = get_likely_max_loop_iterations_int (loop);
3375*38fd1498Szrj       /* FIXME: Bypass this check as graphite doesn't update the
3376*38fd1498Szrj 	 count and frequency correctly now.  */
3377*38fd1498Szrj       if (!flag_loop_parallelize_all
3378*38fd1498Szrj 	  && !oacc_kernels_p
3379*38fd1498Szrj 	  && ((estimated != -1
3380*38fd1498Szrj 	       && (estimated
3381*38fd1498Szrj 		   < ((HOST_WIDE_INT) n_threads
3382*38fd1498Szrj 		      * (loop->inner ? 2 : MIN_PER_THREAD) - 1)))
3383*38fd1498Szrj 	      /* Do not bother with loops in cold areas.  */
3384*38fd1498Szrj 	      || optimize_loop_nest_for_size_p (loop)))
3385*38fd1498Szrj 	continue;
3386*38fd1498Szrj 
3387*38fd1498Szrj       if (!try_get_loop_niter (loop, &niter_desc))
3388*38fd1498Szrj 	continue;
3389*38fd1498Szrj 
3390*38fd1498Szrj       if (!try_create_reduction_list (loop, &reduction_list, oacc_kernels_p))
3391*38fd1498Szrj 	continue;
3392*38fd1498Szrj 
3393*38fd1498Szrj       if (loop_has_phi_with_address_arg (loop))
3394*38fd1498Szrj 	continue;
3395*38fd1498Szrj 
3396*38fd1498Szrj       if (!loop->can_be_parallel
3397*38fd1498Szrj 	  && !loop_parallel_p (loop, &parloop_obstack))
3398*38fd1498Szrj 	continue;
3399*38fd1498Szrj 
3400*38fd1498Szrj       if (oacc_kernels_p
3401*38fd1498Szrj 	&& !oacc_entry_exit_ok (loop, &reduction_list))
3402*38fd1498Szrj 	{
3403*38fd1498Szrj 	  if (dump_file)
3404*38fd1498Szrj 	    fprintf (dump_file, "entry/exit not ok: FAILED\n");
3405*38fd1498Szrj 	  continue;
3406*38fd1498Szrj 	}
3407*38fd1498Szrj 
3408*38fd1498Szrj       changed = true;
3409*38fd1498Szrj       skip_loop = loop->inner;
3410*38fd1498Szrj 
3411*38fd1498Szrj       loop_loc = find_loop_location (loop);
3412*38fd1498Szrj       if (loop->inner)
3413*38fd1498Szrj 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc,
3414*38fd1498Szrj 			 "parallelizing outer loop %d\n", loop->num);
3415*38fd1498Szrj       else
3416*38fd1498Szrj 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc,
3417*38fd1498Szrj 			 "parallelizing inner loop %d\n", loop->num);
3418*38fd1498Szrj 
3419*38fd1498Szrj       gen_parallel_loop (loop, &reduction_list,
3420*38fd1498Szrj 			 n_threads, &niter_desc, oacc_kernels_p);
3421*38fd1498Szrj     }
3422*38fd1498Szrj 
3423*38fd1498Szrj   obstack_free (&parloop_obstack, NULL);
3424*38fd1498Szrj 
3425*38fd1498Szrj   /* Parallelization will cause new function calls to be inserted through
3426*38fd1498Szrj      which local variables will escape.  Reset the points-to solution
3427*38fd1498Szrj      for ESCAPED.  */
3428*38fd1498Szrj   if (changed)
3429*38fd1498Szrj     pt_solution_reset (&cfun->gimple_df->escaped);
3430*38fd1498Szrj 
3431*38fd1498Szrj   return changed;
3432*38fd1498Szrj }
3433*38fd1498Szrj 
3434*38fd1498Szrj /* Parallelization.  */
3435*38fd1498Szrj 
3436*38fd1498Szrj namespace {
3437*38fd1498Szrj 
3438*38fd1498Szrj const pass_data pass_data_parallelize_loops =
3439*38fd1498Szrj {
3440*38fd1498Szrj   GIMPLE_PASS, /* type */
3441*38fd1498Szrj   "parloops", /* name */
3442*38fd1498Szrj   OPTGROUP_LOOP, /* optinfo_flags */
3443*38fd1498Szrj   TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
3444*38fd1498Szrj   ( PROP_cfg | PROP_ssa ), /* properties_required */
3445*38fd1498Szrj   0, /* properties_provided */
3446*38fd1498Szrj   0, /* properties_destroyed */
3447*38fd1498Szrj   0, /* todo_flags_start */
3448*38fd1498Szrj   0, /* todo_flags_finish */
3449*38fd1498Szrj };
3450*38fd1498Szrj 
3451*38fd1498Szrj class pass_parallelize_loops : public gimple_opt_pass
3452*38fd1498Szrj {
3453*38fd1498Szrj public:
pass_parallelize_loops(gcc::context * ctxt)3454*38fd1498Szrj   pass_parallelize_loops (gcc::context *ctxt)
3455*38fd1498Szrj     : gimple_opt_pass (pass_data_parallelize_loops, ctxt),
3456*38fd1498Szrj       oacc_kernels_p (false)
3457*38fd1498Szrj   {}
3458*38fd1498Szrj 
3459*38fd1498Szrj   /* opt_pass methods: */
gate(function *)3460*38fd1498Szrj   virtual bool gate (function *)
3461*38fd1498Szrj   {
3462*38fd1498Szrj     if (oacc_kernels_p)
3463*38fd1498Szrj       return flag_openacc;
3464*38fd1498Szrj     else
3465*38fd1498Szrj       return flag_tree_parallelize_loops > 1;
3466*38fd1498Szrj   }
3467*38fd1498Szrj   virtual unsigned int execute (function *);
clone()3468*38fd1498Szrj   opt_pass * clone () { return new pass_parallelize_loops (m_ctxt); }
set_pass_param(unsigned int n,bool param)3469*38fd1498Szrj   void set_pass_param (unsigned int n, bool param)
3470*38fd1498Szrj     {
3471*38fd1498Szrj       gcc_assert (n == 0);
3472*38fd1498Szrj       oacc_kernels_p = param;
3473*38fd1498Szrj     }
3474*38fd1498Szrj 
3475*38fd1498Szrj  private:
3476*38fd1498Szrj   bool oacc_kernels_p;
3477*38fd1498Szrj }; // class pass_parallelize_loops
3478*38fd1498Szrj 
3479*38fd1498Szrj unsigned
execute(function * fun)3480*38fd1498Szrj pass_parallelize_loops::execute (function *fun)
3481*38fd1498Szrj {
3482*38fd1498Szrj   tree nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3483*38fd1498Szrj   if (nthreads == NULL_TREE)
3484*38fd1498Szrj     return 0;
3485*38fd1498Szrj 
3486*38fd1498Szrj   bool in_loop_pipeline = scev_initialized_p ();
3487*38fd1498Szrj   if (!in_loop_pipeline)
3488*38fd1498Szrj     loop_optimizer_init (LOOPS_NORMAL
3489*38fd1498Szrj 			 | LOOPS_HAVE_RECORDED_EXITS);
3490*38fd1498Szrj 
3491*38fd1498Szrj   if (number_of_loops (fun) <= 1)
3492*38fd1498Szrj     return 0;
3493*38fd1498Szrj 
3494*38fd1498Szrj   if (!in_loop_pipeline)
3495*38fd1498Szrj     {
3496*38fd1498Szrj       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3497*38fd1498Szrj       scev_initialize ();
3498*38fd1498Szrj     }
3499*38fd1498Szrj 
3500*38fd1498Szrj   unsigned int todo = 0;
3501*38fd1498Szrj   if (parallelize_loops (oacc_kernels_p))
3502*38fd1498Szrj     {
3503*38fd1498Szrj       fun->curr_properties &= ~(PROP_gimple_eomp);
3504*38fd1498Szrj 
3505*38fd1498Szrj       checking_verify_loop_structure ();
3506*38fd1498Szrj 
3507*38fd1498Szrj       todo |= TODO_update_ssa;
3508*38fd1498Szrj     }
3509*38fd1498Szrj 
3510*38fd1498Szrj   if (!in_loop_pipeline)
3511*38fd1498Szrj     {
3512*38fd1498Szrj       scev_finalize ();
3513*38fd1498Szrj       loop_optimizer_finalize ();
3514*38fd1498Szrj     }
3515*38fd1498Szrj 
3516*38fd1498Szrj   return todo;
3517*38fd1498Szrj }
3518*38fd1498Szrj 
3519*38fd1498Szrj } // anon namespace
3520*38fd1498Szrj 
3521*38fd1498Szrj gimple_opt_pass *
make_pass_parallelize_loops(gcc::context * ctxt)3522*38fd1498Szrj make_pass_parallelize_loops (gcc::context *ctxt)
3523*38fd1498Szrj {
3524*38fd1498Szrj   return new pass_parallelize_loops (ctxt);
3525*38fd1498Szrj }
3526