xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-parloops.c (revision 53b02e147d4ed531c0d2a5ca9b3e8026ba3e99b5)
1 /* Loop autoparallelization.
2    Copyright (C) 2006-2019 Free Software Foundation, Inc.
3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4    Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "cgraph.h"
32 #include "gimple-pretty-print.h"
33 #include "fold-const.h"
34 #include "gimplify.h"
35 #include "gimple-iterator.h"
36 #include "gimplify-me.h"
37 #include "gimple-walk.h"
38 #include "stor-layout.h"
39 #include "tree-nested.h"
40 #include "tree-cfg.h"
41 #include "tree-ssa-loop-ivopts.h"
42 #include "tree-ssa-loop-manip.h"
43 #include "tree-ssa-loop-niter.h"
44 #include "tree-ssa-loop.h"
45 #include "tree-into-ssa.h"
46 #include "cfgloop.h"
47 #include "tree-scalar-evolution.h"
48 #include "langhooks.h"
49 #include "tree-vectorizer.h"
50 #include "tree-hasher.h"
51 #include "tree-parloops.h"
52 #include "omp-general.h"
53 #include "omp-low.h"
54 #include "tree-ssa.h"
55 #include "params.h"
56 #include "params-enum.h"
57 #include "tree-ssa-alias.h"
58 #include "tree-eh.h"
59 #include "gomp-constants.h"
60 #include "tree-dfa.h"
61 #include "stringpool.h"
62 #include "attribs.h"
63 
64 /* This pass tries to distribute iterations of loops into several threads.
65    The implementation is straightforward -- for each loop we test whether its
66    iterations are independent, and if it is the case (and some additional
67    conditions regarding profitability and correctness are satisfied), we
68    add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
69    machinery do its job.
70 
71    The most of the complexity is in bringing the code into shape expected
72    by the omp expanders:
73    -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
74       variable and that the exit test is at the start of the loop body
75    -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
76       variables by accesses through pointers, and breaking up ssa chains
77       by storing the values incoming to the parallelized loop to a structure
78       passed to the new function as an argument (something similar is done
79       in omp gimplification, unfortunately only a small part of the code
80       can be shared).
81 
82    TODO:
83    -- if there are several parallelizable loops in a function, it may be
84       possible to generate the threads just once (using synchronization to
85       ensure that cross-loop dependences are obeyed).
86    -- handling of common reduction patterns for outer loops.
87 
88    More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC  */
89 /*
90   Reduction handling:
91   currently we use vect_force_simple_reduction() to detect reduction patterns.
92   The code transformation will be introduced by an example.
93 
94 
95 parloop
96 {
97   int sum=1;
98 
99   for (i = 0; i < N; i++)
100    {
101     x[i] = i + 3;
102     sum+=x[i];
103    }
104 }
105 
106 gimple-like code:
107 header_bb:
108 
109   # sum_29 = PHI <sum_11(5), 1(3)>
110   # i_28 = PHI <i_12(5), 0(3)>
111   D.1795_8 = i_28 + 3;
112   x[i_28] = D.1795_8;
113   sum_11 = D.1795_8 + sum_29;
114   i_12 = i_28 + 1;
115   if (N_6(D) > i_12)
116     goto header_bb;
117 
118 
119 exit_bb:
120 
121   # sum_21 = PHI <sum_11(4)>
122   printf (&"%d"[0], sum_21);
123 
124 
125 after reduction transformation (only relevant parts):
126 
127 parloop
128 {
129 
130 ....
131 
132 
133   # Storing the initial value given by the user.  #
134 
135   .paral_data_store.32.sum.27 = 1;
136 
137   #pragma omp parallel num_threads(4)
138 
139   #pragma omp for schedule(static)
140 
141   # The neutral element corresponding to the particular
142   reduction's operation, e.g. 0 for PLUS_EXPR,
143   1 for MULT_EXPR, etc. replaces the user's initial value.  #
144 
145   # sum.27_29 = PHI <sum.27_11, 0>
146 
147   sum.27_11 = D.1827_8 + sum.27_29;
148 
149   GIMPLE_OMP_CONTINUE
150 
151   # Adding this reduction phi is done at create_phi_for_local_result() #
152   # sum.27_56 = PHI <sum.27_11, 0>
153   GIMPLE_OMP_RETURN
154 
155   # Creating the atomic operation is done at
156   create_call_for_reduction_1()  #
157 
158   #pragma omp atomic_load
159   D.1839_59 = *&.paral_data_load.33_51->reduction.23;
160   D.1840_60 = sum.27_56 + D.1839_59;
161   #pragma omp atomic_store (D.1840_60);
162 
163   GIMPLE_OMP_RETURN
164 
165  # collecting the result after the join of the threads is done at
166   create_loads_for_reductions().
167   The value computed by the threads is loaded from the
168   shared struct.  #
169 
170 
171   .paral_data_load.33_52 = &.paral_data_store.32;
172   sum_37 =  .paral_data_load.33_52->sum.27;
173   sum_43 = D.1795_41 + sum_37;
174 
175   exit bb:
176   # sum_21 = PHI <sum_43, sum_26>
177   printf (&"%d"[0], sum_21);
178 
179 ...
180 
181 }
182 
183 */
184 
185 /* Minimal number of iterations of a loop that should be executed in each
186    thread.  */
187 #define MIN_PER_THREAD PARAM_VALUE (PARAM_PARLOOPS_MIN_PER_THREAD)
188 
189 /* Element of the hashtable, representing a
190    reduction in the current loop.  */
191 struct reduction_info
192 {
193   gimple *reduc_stmt;		/* reduction statement.  */
194   gimple *reduc_phi;		/* The phi node defining the reduction.  */
195   enum tree_code reduction_code;/* code for the reduction operation.  */
196   unsigned reduc_version;	/* SSA_NAME_VERSION of original reduc_phi
197 				   result.  */
198   gphi *keep_res;		/* The PHI_RESULT of this phi is the resulting value
199 				   of the reduction variable when existing the loop. */
200   tree initial_value;		/* The initial value of the reduction var before entering the loop.  */
201   tree field;			/*  the name of the field in the parloop data structure intended for reduction.  */
202   tree reduc_addr;		/* The address of the reduction variable for
203 				   openacc reductions.  */
204   tree init;			/* reduction initialization value.  */
205   gphi *new_phi;		/* (helper field) Newly created phi node whose result
206 				   will be passed to the atomic operation.  Represents
207 				   the local result each thread computed for the reduction
208 				   operation.  */
209 };
210 
211 /* Reduction info hashtable helpers.  */
212 
213 struct reduction_hasher : free_ptr_hash <reduction_info>
214 {
215   static inline hashval_t hash (const reduction_info *);
216   static inline bool equal (const reduction_info *, const reduction_info *);
217 };
218 
219 /* Equality and hash functions for hashtab code.  */
220 
221 inline bool
222 reduction_hasher::equal (const reduction_info *a, const reduction_info *b)
223 {
224   return (a->reduc_phi == b->reduc_phi);
225 }
226 
227 inline hashval_t
228 reduction_hasher::hash (const reduction_info *a)
229 {
230   return a->reduc_version;
231 }
232 
233 typedef hash_table<reduction_hasher> reduction_info_table_type;
234 
235 
236 static struct reduction_info *
237 reduction_phi (reduction_info_table_type *reduction_list, gimple *phi)
238 {
239   struct reduction_info tmpred, *red;
240 
241   if (reduction_list->elements () == 0 || phi == NULL)
242     return NULL;
243 
244   if (gimple_uid (phi) == (unsigned int)-1
245       || gimple_uid (phi) == 0)
246     return NULL;
247 
248   tmpred.reduc_phi = phi;
249   tmpred.reduc_version = gimple_uid (phi);
250   red = reduction_list->find (&tmpred);
251   gcc_assert (red == NULL || red->reduc_phi == phi);
252 
253   return red;
254 }
255 
256 /* Element of hashtable of names to copy.  */
257 
258 struct name_to_copy_elt
259 {
260   unsigned version;	/* The version of the name to copy.  */
261   tree new_name;	/* The new name used in the copy.  */
262   tree field;		/* The field of the structure used to pass the
263 			   value.  */
264 };
265 
266 /* Name copies hashtable helpers.  */
267 
268 struct name_to_copy_hasher : free_ptr_hash <name_to_copy_elt>
269 {
270   static inline hashval_t hash (const name_to_copy_elt *);
271   static inline bool equal (const name_to_copy_elt *, const name_to_copy_elt *);
272 };
273 
274 /* Equality and hash functions for hashtab code.  */
275 
276 inline bool
277 name_to_copy_hasher::equal (const name_to_copy_elt *a, const name_to_copy_elt *b)
278 {
279   return a->version == b->version;
280 }
281 
282 inline hashval_t
283 name_to_copy_hasher::hash (const name_to_copy_elt *a)
284 {
285   return (hashval_t) a->version;
286 }
287 
288 typedef hash_table<name_to_copy_hasher> name_to_copy_table_type;
289 
290 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
291    matrix.  Rather than use floats, we simply keep a single DENOMINATOR that
292    represents the denominator for every element in the matrix.  */
293 typedef struct lambda_trans_matrix_s
294 {
295   lambda_matrix matrix;
296   int rowsize;
297   int colsize;
298   int denominator;
299 } *lambda_trans_matrix;
300 #define LTM_MATRIX(T) ((T)->matrix)
301 #define LTM_ROWSIZE(T) ((T)->rowsize)
302 #define LTM_COLSIZE(T) ((T)->colsize)
303 #define LTM_DENOMINATOR(T) ((T)->denominator)
304 
305 /* Allocate a new transformation matrix.  */
306 
307 static lambda_trans_matrix
308 lambda_trans_matrix_new (int colsize, int rowsize,
309 			 struct obstack * lambda_obstack)
310 {
311   lambda_trans_matrix ret;
312 
313   ret = (lambda_trans_matrix)
314     obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
315   LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
316   LTM_ROWSIZE (ret) = rowsize;
317   LTM_COLSIZE (ret) = colsize;
318   LTM_DENOMINATOR (ret) = 1;
319   return ret;
320 }
321 
322 /* Multiply a vector VEC by a matrix MAT.
323    MAT is an M*N matrix, and VEC is a vector with length N.  The result
324    is stored in DEST which must be a vector of length M.  */
325 
326 static void
327 lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
328 			   lambda_vector vec, lambda_vector dest)
329 {
330   int i, j;
331 
332   lambda_vector_clear (dest, m);
333   for (i = 0; i < m; i++)
334     for (j = 0; j < n; j++)
335       dest[i] += matrix[i][j] * vec[j];
336 }
337 
338 /* Return true if TRANS is a legal transformation matrix that respects
339    the dependence vectors in DISTS and DIRS.  The conservative answer
340    is false.
341 
342    "Wolfe proves that a unimodular transformation represented by the
343    matrix T is legal when applied to a loop nest with a set of
344    lexicographically non-negative distance vectors RDG if and only if
345    for each vector d in RDG, (T.d >= 0) is lexicographically positive.
346    i.e.: if and only if it transforms the lexicographically positive
347    distance vectors to lexicographically positive vectors.  Note that
348    a unimodular matrix must transform the zero vector (and only it) to
349    the zero vector." S.Muchnick.  */
350 
351 static bool
352 lambda_transform_legal_p (lambda_trans_matrix trans,
353 			  int nb_loops,
354 			  vec<ddr_p> dependence_relations)
355 {
356   unsigned int i, j;
357   lambda_vector distres;
358   struct data_dependence_relation *ddr;
359 
360   gcc_assert (LTM_COLSIZE (trans) == nb_loops
361 	      && LTM_ROWSIZE (trans) == nb_loops);
362 
363   /* When there are no dependences, the transformation is correct.  */
364   if (dependence_relations.length () == 0)
365     return true;
366 
367   ddr = dependence_relations[0];
368   if (ddr == NULL)
369     return true;
370 
371   /* When there is an unknown relation in the dependence_relations, we
372      know that it is no worth looking at this loop nest: give up.  */
373   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
374     return false;
375 
376   distres = lambda_vector_new (nb_loops);
377 
378   /* For each distance vector in the dependence graph.  */
379   FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
380     {
381       /* Don't care about relations for which we know that there is no
382 	 dependence, nor about read-read (aka. output-dependences):
383 	 these data accesses can happen in any order.  */
384       if (DDR_ARE_DEPENDENT (ddr) == chrec_known
385 	  || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
386 	continue;
387 
388       /* Conservatively answer: "this transformation is not valid".  */
389       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
390 	return false;
391 
392       /* If the dependence could not be captured by a distance vector,
393 	 conservatively answer that the transform is not valid.  */
394       if (DDR_NUM_DIST_VECTS (ddr) == 0)
395 	return false;
396 
397       /* Compute trans.dist_vect */
398       for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
399 	{
400 	  lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
401 				     DDR_DIST_VECT (ddr, j), distres);
402 
403 	  if (!lambda_vector_lexico_pos (distres, nb_loops))
404 	    return false;
405 	}
406     }
407   return true;
408 }
409 
410 /* Data dependency analysis. Returns true if the iterations of LOOP
411    are independent on each other (that is, if we can execute them
412    in parallel).  */
413 
414 static bool
415 loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
416 {
417   vec<ddr_p> dependence_relations;
418   vec<data_reference_p> datarefs;
419   lambda_trans_matrix trans;
420   bool ret = false;
421 
422   if (dump_file && (dump_flags & TDF_DETAILS))
423   {
424     fprintf (dump_file, "Considering loop %d\n", loop->num);
425     if (!loop->inner)
426       fprintf (dump_file, "loop is innermost\n");
427     else
428       fprintf (dump_file, "loop NOT innermost\n");
429    }
430 
431   /* Check for problems with dependences.  If the loop can be reversed,
432      the iterations are independent.  */
433   auto_vec<loop_p, 3> loop_nest;
434   datarefs.create (10);
435   dependence_relations.create (100);
436   if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
437 					   &dependence_relations))
438     {
439       if (dump_file && (dump_flags & TDF_DETAILS))
440 	fprintf (dump_file, "  FAILED: cannot analyze data dependencies\n");
441       ret = false;
442       goto end;
443     }
444   if (dump_file && (dump_flags & TDF_DETAILS))
445     dump_data_dependence_relations (dump_file, dependence_relations);
446 
447   trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
448   LTM_MATRIX (trans)[0][0] = -1;
449 
450   if (lambda_transform_legal_p (trans, 1, dependence_relations))
451     {
452       ret = true;
453       if (dump_file && (dump_flags & TDF_DETAILS))
454 	fprintf (dump_file, "  SUCCESS: may be parallelized\n");
455     }
456   else if (dump_file && (dump_flags & TDF_DETAILS))
457     fprintf (dump_file,
458 	     "  FAILED: data dependencies exist across iterations\n");
459 
460  end:
461   free_dependence_relations (dependence_relations);
462   free_data_refs (datarefs);
463 
464   return ret;
465 }
466 
467 /* Return true when LOOP contains basic blocks marked with the
468    BB_IRREDUCIBLE_LOOP flag.  */
469 
470 static inline bool
471 loop_has_blocks_with_irreducible_flag (struct loop *loop)
472 {
473   unsigned i;
474   basic_block *bbs = get_loop_body_in_dom_order (loop);
475   bool res = true;
476 
477   for (i = 0; i < loop->num_nodes; i++)
478     if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
479       goto end;
480 
481   res = false;
482  end:
483   free (bbs);
484   return res;
485 }
486 
487 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
488    The assignment statement is placed on edge ENTRY.  DECL_ADDRESS maps decls
489    to their addresses that can be reused.  The address of OBJ is known to
490    be invariant in the whole function.  Other needed statements are placed
491    right before GSI.  */
492 
493 static tree
494 take_address_of (tree obj, tree type, edge entry,
495 		 int_tree_htab_type *decl_address, gimple_stmt_iterator *gsi)
496 {
497   int uid;
498   tree *var_p, name, addr;
499   gassign *stmt;
500   gimple_seq stmts;
501 
502   /* Since the address of OBJ is invariant, the trees may be shared.
503      Avoid rewriting unrelated parts of the code.  */
504   obj = unshare_expr (obj);
505   for (var_p = &obj;
506        handled_component_p (*var_p);
507        var_p = &TREE_OPERAND (*var_p, 0))
508     continue;
509 
510   /* Canonicalize the access to base on a MEM_REF.  */
511   if (DECL_P (*var_p))
512     *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
513 
514   /* Assign a canonical SSA name to the address of the base decl used
515      in the address and share it for all accesses and addresses based
516      on it.  */
517   uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
518   int_tree_map elt;
519   elt.uid = uid;
520   int_tree_map *slot = decl_address->find_slot (elt, INSERT);
521   if (!slot->to)
522     {
523       if (gsi == NULL)
524 	return NULL;
525       addr = TREE_OPERAND (*var_p, 0);
526       const char *obj_name
527 	= get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
528       if (obj_name)
529 	name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
530       else
531 	name = make_ssa_name (TREE_TYPE (addr));
532       stmt = gimple_build_assign (name, addr);
533       gsi_insert_on_edge_immediate (entry, stmt);
534 
535       slot->uid = uid;
536       slot->to = name;
537     }
538   else
539     name = slot->to;
540 
541   /* Express the address in terms of the canonical SSA name.  */
542   TREE_OPERAND (*var_p, 0) = name;
543   if (gsi == NULL)
544     return build_fold_addr_expr_with_type (obj, type);
545 
546   name = force_gimple_operand (build_addr (obj),
547 			       &stmts, true, NULL_TREE);
548   if (!gimple_seq_empty_p (stmts))
549     gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
550 
551   if (!useless_type_conversion_p (type, TREE_TYPE (name)))
552     {
553       name = force_gimple_operand (fold_convert (type, name), &stmts, true,
554 				   NULL_TREE);
555       if (!gimple_seq_empty_p (stmts))
556 	gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
557     }
558 
559   return name;
560 }
561 
562 static tree
563 reduc_stmt_res (gimple *stmt)
564 {
565   return (gimple_code (stmt) == GIMPLE_PHI
566 	  ? gimple_phi_result (stmt)
567 	  : gimple_assign_lhs (stmt));
568 }
569 
570 /* Callback for htab_traverse.  Create the initialization statement
571    for reduction described in SLOT, and place it at the preheader of
572    the loop described in DATA.  */
573 
574 int
575 initialize_reductions (reduction_info **slot, struct loop *loop)
576 {
577   tree init;
578   tree type, arg;
579   edge e;
580 
581   struct reduction_info *const reduc = *slot;
582 
583   /* Create initialization in preheader:
584      reduction_variable = initialization value of reduction.  */
585 
586   /* In the phi node at the header, replace the argument coming
587      from the preheader with the reduction initialization value.  */
588 
589   /* Initialize the reduction.  */
590   type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
591   init = omp_reduction_init_op (gimple_location (reduc->reduc_stmt),
592 				reduc->reduction_code, type);
593   reduc->init = init;
594 
595   /* Replace the argument representing the initialization value
596      with the initialization value for the reduction (neutral
597      element for the particular operation, e.g. 0 for PLUS_EXPR,
598      1 for MULT_EXPR, etc).
599      Keep the old value in a new variable "reduction_initial",
600      that will be taken in consideration after the parallel
601      computing is done.  */
602 
603   e = loop_preheader_edge (loop);
604   arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
605   /* Create new variable to hold the initial value.  */
606 
607   SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
608 	   (reduc->reduc_phi, loop_preheader_edge (loop)), init);
609   reduc->initial_value = arg;
610   return 1;
611 }
612 
613 struct elv_data
614 {
615   struct walk_stmt_info info;
616   edge entry;
617   int_tree_htab_type *decl_address;
618   gimple_stmt_iterator *gsi;
619   bool changed;
620   bool reset;
621 };
622 
623 /* Eliminates references to local variables in *TP out of the single
624    entry single exit region starting at DTA->ENTRY.
625    DECL_ADDRESS contains addresses of the references that had their
626    address taken already.  If the expression is changed, CHANGED is
627    set to true.  Callback for walk_tree.  */
628 
629 static tree
630 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
631 {
632   struct elv_data *const dta = (struct elv_data *) data;
633   tree t = *tp, var, addr, addr_type, type, obj;
634 
635   if (DECL_P (t))
636     {
637       *walk_subtrees = 0;
638 
639       if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
640 	return NULL_TREE;
641 
642       type = TREE_TYPE (t);
643       addr_type = build_pointer_type (type);
644       addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
645 			      dta->gsi);
646       if (dta->gsi == NULL && addr == NULL_TREE)
647 	{
648 	  dta->reset = true;
649 	  return NULL_TREE;
650 	}
651 
652       *tp = build_simple_mem_ref (addr);
653 
654       dta->changed = true;
655       return NULL_TREE;
656     }
657 
658   if (TREE_CODE (t) == ADDR_EXPR)
659     {
660       /* ADDR_EXPR may appear in two contexts:
661 	 -- as a gimple operand, when the address taken is a function invariant
662 	 -- as gimple rhs, when the resulting address in not a function
663 	    invariant
664 	 We do not need to do anything special in the latter case (the base of
665 	 the memory reference whose address is taken may be replaced in the
666 	 DECL_P case).  The former case is more complicated, as we need to
667 	 ensure that the new address is still a gimple operand.  Thus, it
668 	 is not sufficient to replace just the base of the memory reference --
669 	 we need to move the whole computation of the address out of the
670 	 loop.  */
671       if (!is_gimple_val (t))
672 	return NULL_TREE;
673 
674       *walk_subtrees = 0;
675       obj = TREE_OPERAND (t, 0);
676       var = get_base_address (obj);
677       if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
678 	return NULL_TREE;
679 
680       addr_type = TREE_TYPE (t);
681       addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
682 			      dta->gsi);
683       if (dta->gsi == NULL && addr == NULL_TREE)
684 	{
685 	  dta->reset = true;
686 	  return NULL_TREE;
687 	}
688       *tp = addr;
689 
690       dta->changed = true;
691       return NULL_TREE;
692     }
693 
694   if (!EXPR_P (t))
695     *walk_subtrees = 0;
696 
697   return NULL_TREE;
698 }
699 
700 /* Moves the references to local variables in STMT at *GSI out of the single
701    entry single exit region starting at ENTRY.  DECL_ADDRESS contains
702    addresses of the references that had their address taken
703    already.  */
704 
705 static void
706 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
707 				int_tree_htab_type *decl_address)
708 {
709   struct elv_data dta;
710   gimple *stmt = gsi_stmt (*gsi);
711 
712   memset (&dta.info, '\0', sizeof (dta.info));
713   dta.entry = entry;
714   dta.decl_address = decl_address;
715   dta.changed = false;
716   dta.reset = false;
717 
718   if (gimple_debug_bind_p (stmt))
719     {
720       dta.gsi = NULL;
721       walk_tree (gimple_debug_bind_get_value_ptr (stmt),
722 		 eliminate_local_variables_1, &dta.info, NULL);
723       if (dta.reset)
724 	{
725 	  gimple_debug_bind_reset_value (stmt);
726 	  dta.changed = true;
727 	}
728     }
729   else if (gimple_clobber_p (stmt))
730     {
731       unlink_stmt_vdef (stmt);
732       stmt = gimple_build_nop ();
733       gsi_replace (gsi, stmt, false);
734       dta.changed = true;
735     }
736   else
737     {
738       dta.gsi = gsi;
739       walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
740     }
741 
742   if (dta.changed)
743     update_stmt (stmt);
744 }
745 
746 /* Eliminates the references to local variables from the single entry
747    single exit region between the ENTRY and EXIT edges.
748 
749    This includes:
750    1) Taking address of a local variable -- these are moved out of the
751    region (and temporary variable is created to hold the address if
752    necessary).
753 
754    2) Dereferencing a local variable -- these are replaced with indirect
755    references.  */
756 
757 static void
758 eliminate_local_variables (edge entry, edge exit)
759 {
760   basic_block bb;
761   auto_vec<basic_block, 3> body;
762   unsigned i;
763   gimple_stmt_iterator gsi;
764   bool has_debug_stmt = false;
765   int_tree_htab_type decl_address (10);
766   basic_block entry_bb = entry->src;
767   basic_block exit_bb = exit->dest;
768 
769   gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
770 
771   FOR_EACH_VEC_ELT (body, i, bb)
772     if (bb != entry_bb && bb != exit_bb)
773       {
774         for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
775 	  if (is_gimple_debug (gsi_stmt (gsi)))
776 	    {
777 	      if (gimple_debug_bind_p (gsi_stmt (gsi)))
778 	        has_debug_stmt = true;
779 	    }
780 	  else
781 	    eliminate_local_variables_stmt (entry, &gsi, &decl_address);
782       }
783 
784   if (has_debug_stmt)
785     FOR_EACH_VEC_ELT (body, i, bb)
786       if (bb != entry_bb && bb != exit_bb)
787 	for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
788 	  if (gimple_debug_bind_p (gsi_stmt (gsi)))
789 	    eliminate_local_variables_stmt (entry, &gsi, &decl_address);
790 }
791 
792 /* Returns true if expression EXPR is not defined between ENTRY and
793    EXIT, i.e. if all its operands are defined outside of the region.  */
794 
795 static bool
796 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
797 {
798   basic_block entry_bb = entry->src;
799   basic_block exit_bb = exit->dest;
800   basic_block def_bb;
801 
802   if (is_gimple_min_invariant (expr))
803     return true;
804 
805   if (TREE_CODE (expr) == SSA_NAME)
806     {
807       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
808       if (def_bb
809 	  && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
810 	  && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
811 	return false;
812 
813       return true;
814     }
815 
816   return false;
817 }
818 
819 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
820    The copies are stored to NAME_COPIES, if NAME was already duplicated,
821    its duplicate stored in NAME_COPIES is returned.
822 
823    Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
824    duplicated, storing the copies in DECL_COPIES.  */
825 
826 static tree
827 separate_decls_in_region_name (tree name, name_to_copy_table_type *name_copies,
828 			       int_tree_htab_type *decl_copies,
829 			       bool copy_name_p)
830 {
831   tree copy, var, var_copy;
832   unsigned idx, uid, nuid;
833   struct int_tree_map ielt;
834   struct name_to_copy_elt elt, *nelt;
835   name_to_copy_elt **slot;
836   int_tree_map *dslot;
837 
838   if (TREE_CODE (name) != SSA_NAME)
839     return name;
840 
841   idx = SSA_NAME_VERSION (name);
842   elt.version = idx;
843   slot = name_copies->find_slot_with_hash (&elt, idx,
844 					   copy_name_p ? INSERT : NO_INSERT);
845   if (slot && *slot)
846     return (*slot)->new_name;
847 
848   if (copy_name_p)
849     {
850       copy = duplicate_ssa_name (name, NULL);
851       nelt = XNEW (struct name_to_copy_elt);
852       nelt->version = idx;
853       nelt->new_name = copy;
854       nelt->field = NULL_TREE;
855       *slot = nelt;
856     }
857   else
858     {
859       gcc_assert (!slot);
860       copy = name;
861     }
862 
863   var = SSA_NAME_VAR (name);
864   if (!var)
865     return copy;
866 
867   uid = DECL_UID (var);
868   ielt.uid = uid;
869   dslot = decl_copies->find_slot_with_hash (ielt, uid, INSERT);
870   if (!dslot->to)
871     {
872       var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
873       DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
874       dslot->uid = uid;
875       dslot->to = var_copy;
876 
877       /* Ensure that when we meet this decl next time, we won't duplicate
878          it again.  */
879       nuid = DECL_UID (var_copy);
880       ielt.uid = nuid;
881       dslot = decl_copies->find_slot_with_hash (ielt, nuid, INSERT);
882       gcc_assert (!dslot->to);
883       dslot->uid = nuid;
884       dslot->to = var_copy;
885     }
886   else
887     var_copy = dslot->to;
888 
889   replace_ssa_name_symbol (copy, var_copy);
890   return copy;
891 }
892 
893 /* Finds the ssa names used in STMT that are defined outside the
894    region between ENTRY and EXIT and replaces such ssa names with
895    their duplicates.  The duplicates are stored to NAME_COPIES.  Base
896    decls of all ssa names used in STMT (including those defined in
897    LOOP) are replaced with the new temporary variables; the
898    replacement decls are stored in DECL_COPIES.  */
899 
900 static void
901 separate_decls_in_region_stmt (edge entry, edge exit, gimple *stmt,
902 			       name_to_copy_table_type *name_copies,
903 			       int_tree_htab_type *decl_copies)
904 {
905   use_operand_p use;
906   def_operand_p def;
907   ssa_op_iter oi;
908   tree name, copy;
909   bool copy_name_p;
910 
911   FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
912   {
913     name = DEF_FROM_PTR (def);
914     gcc_assert (TREE_CODE (name) == SSA_NAME);
915     copy = separate_decls_in_region_name (name, name_copies, decl_copies,
916 					  false);
917     gcc_assert (copy == name);
918   }
919 
920   FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
921   {
922     name = USE_FROM_PTR (use);
923     if (TREE_CODE (name) != SSA_NAME)
924       continue;
925 
926     copy_name_p = expr_invariant_in_region_p (entry, exit, name);
927     copy = separate_decls_in_region_name (name, name_copies, decl_copies,
928 					  copy_name_p);
929     SET_USE (use, copy);
930   }
931 }
932 
933 /* Finds the ssa names used in STMT that are defined outside the
934    region between ENTRY and EXIT and replaces such ssa names with
935    their duplicates.  The duplicates are stored to NAME_COPIES.  Base
936    decls of all ssa names used in STMT (including those defined in
937    LOOP) are replaced with the new temporary variables; the
938    replacement decls are stored in DECL_COPIES.  */
939 
940 static bool
941 separate_decls_in_region_debug (gimple *stmt,
942 				name_to_copy_table_type *name_copies,
943 				int_tree_htab_type *decl_copies)
944 {
945   use_operand_p use;
946   ssa_op_iter oi;
947   tree var, name;
948   struct int_tree_map ielt;
949   struct name_to_copy_elt elt;
950   name_to_copy_elt **slot;
951   int_tree_map *dslot;
952 
953   if (gimple_debug_bind_p (stmt))
954     var = gimple_debug_bind_get_var (stmt);
955   else if (gimple_debug_source_bind_p (stmt))
956     var = gimple_debug_source_bind_get_var (stmt);
957   else
958     return true;
959   if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
960     return true;
961   gcc_assert (DECL_P (var) && SSA_VAR_P (var));
962   ielt.uid = DECL_UID (var);
963   dslot = decl_copies->find_slot_with_hash (ielt, ielt.uid, NO_INSERT);
964   if (!dslot)
965     return true;
966   if (gimple_debug_bind_p (stmt))
967     gimple_debug_bind_set_var (stmt, dslot->to);
968   else if (gimple_debug_source_bind_p (stmt))
969     gimple_debug_source_bind_set_var (stmt, dslot->to);
970 
971   FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
972   {
973     name = USE_FROM_PTR (use);
974     if (TREE_CODE (name) != SSA_NAME)
975       continue;
976 
977     elt.version = SSA_NAME_VERSION (name);
978     slot = name_copies->find_slot_with_hash (&elt, elt.version, NO_INSERT);
979     if (!slot)
980       {
981 	gimple_debug_bind_reset_value (stmt);
982 	update_stmt (stmt);
983 	break;
984       }
985 
986     SET_USE (use, (*slot)->new_name);
987   }
988 
989   return false;
990 }
991 
992 /* Callback for htab_traverse.  Adds a field corresponding to the reduction
993    specified in SLOT. The type is passed in DATA.  */
994 
995 int
996 add_field_for_reduction (reduction_info **slot, tree type)
997 {
998 
999   struct reduction_info *const red = *slot;
1000   tree var = reduc_stmt_res (red->reduc_stmt);
1001   tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
1002 			   SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
1003 
1004   insert_field_into_struct (type, field);
1005 
1006   red->field = field;
1007 
1008   return 1;
1009 }
1010 
1011 /* Callback for htab_traverse.  Adds a field corresponding to a ssa name
1012    described in SLOT. The type is passed in DATA.  */
1013 
1014 int
1015 add_field_for_name (name_to_copy_elt **slot, tree type)
1016 {
1017   struct name_to_copy_elt *const elt = *slot;
1018   tree name = ssa_name (elt->version);
1019   tree field = build_decl (UNKNOWN_LOCATION,
1020 			   FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1021 			   TREE_TYPE (name));
1022 
1023   insert_field_into_struct (type, field);
1024   elt->field = field;
1025 
1026   return 1;
1027 }
1028 
1029 /* Callback for htab_traverse.  A local result is the intermediate result
1030    computed by a single
1031    thread, or the initial value in case no iteration was executed.
1032    This function creates a phi node reflecting these values.
1033    The phi's result will be stored in NEW_PHI field of the
1034    reduction's data structure.  */
1035 
1036 int
1037 create_phi_for_local_result (reduction_info **slot, struct loop *loop)
1038 {
1039   struct reduction_info *const reduc = *slot;
1040   edge e;
1041   gphi *new_phi;
1042   basic_block store_bb, continue_bb;
1043   tree local_res;
1044   location_t locus;
1045 
1046   /* STORE_BB is the block where the phi
1047      should be stored.  It is the destination of the loop exit.
1048      (Find the fallthru edge from GIMPLE_OMP_CONTINUE).  */
1049   continue_bb = single_pred (loop->latch);
1050   store_bb = FALLTHRU_EDGE (continue_bb)->dest;
1051 
1052   /* STORE_BB has two predecessors.  One coming from  the loop
1053      (the reduction's result is computed at the loop),
1054      and another coming from a block preceding the loop,
1055      when no iterations
1056      are executed (the initial value should be taken).  */
1057   if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (continue_bb))
1058     e = EDGE_PRED (store_bb, 1);
1059   else
1060     e = EDGE_PRED (store_bb, 0);
1061   tree lhs = reduc_stmt_res (reduc->reduc_stmt);
1062   local_res = copy_ssa_name (lhs);
1063   locus = gimple_location (reduc->reduc_stmt);
1064   new_phi = create_phi_node (local_res, store_bb);
1065   add_phi_arg (new_phi, reduc->init, e, locus);
1066   add_phi_arg (new_phi, lhs, FALLTHRU_EDGE (continue_bb), locus);
1067   reduc->new_phi = new_phi;
1068 
1069   return 1;
1070 }
1071 
1072 struct clsn_data
1073 {
1074   tree store;
1075   tree load;
1076 
1077   basic_block store_bb;
1078   basic_block load_bb;
1079 };
1080 
1081 /* Callback for htab_traverse.  Create an atomic instruction for the
1082    reduction described in SLOT.
1083    DATA annotates the place in memory the atomic operation relates to,
1084    and the basic block it needs to be generated in.  */
1085 
1086 int
1087 create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
1088 {
1089   struct reduction_info *const reduc = *slot;
1090   gimple_stmt_iterator gsi;
1091   tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1092   tree load_struct;
1093   basic_block bb;
1094   basic_block new_bb;
1095   edge e;
1096   tree t, addr, ref, x;
1097   tree tmp_load, name;
1098   gimple *load;
1099 
1100   if (reduc->reduc_addr == NULL_TREE)
1101     {
1102       load_struct = build_simple_mem_ref (clsn_data->load);
1103       t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1104 
1105       addr = build_addr (t);
1106     }
1107   else
1108     {
1109       /* Set the address for the atomic store.  */
1110       addr = reduc->reduc_addr;
1111 
1112       /* Remove the non-atomic store '*addr = sum'.  */
1113       tree res = PHI_RESULT (reduc->keep_res);
1114       use_operand_p use_p;
1115       gimple *stmt;
1116       bool single_use_p = single_imm_use (res, &use_p, &stmt);
1117       gcc_assert (single_use_p);
1118       replace_uses_by (gimple_vdef (stmt),
1119 		       gimple_vuse (stmt));
1120       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1121       gsi_remove (&gsi, true);
1122     }
1123 
1124   /* Create phi node.  */
1125   bb = clsn_data->load_bb;
1126 
1127   gsi = gsi_last_bb (bb);
1128   e = split_block (bb, gsi_stmt (gsi));
1129   new_bb = e->dest;
1130 
1131   tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)));
1132   tmp_load = make_ssa_name (tmp_load);
1133   load = gimple_build_omp_atomic_load (tmp_load, addr,
1134 				       OMP_MEMORY_ORDER_RELAXED);
1135   SSA_NAME_DEF_STMT (tmp_load) = load;
1136   gsi = gsi_start_bb (new_bb);
1137   gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1138 
1139   e = split_block (new_bb, load);
1140   new_bb = e->dest;
1141   gsi = gsi_start_bb (new_bb);
1142   ref = tmp_load;
1143   x = fold_build2 (reduc->reduction_code,
1144 		   TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1145 		   PHI_RESULT (reduc->new_phi));
1146 
1147   name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1148 				   GSI_CONTINUE_LINKING);
1149 
1150   gimple *store = gimple_build_omp_atomic_store (name,
1151 						 OMP_MEMORY_ORDER_RELAXED);
1152   gsi_insert_after (&gsi, store, GSI_NEW_STMT);
1153   return 1;
1154 }
1155 
1156 /* Create the atomic operation at the join point of the threads.
1157    REDUCTION_LIST describes the reductions in the LOOP.
1158    LD_ST_DATA describes the shared data structure where
1159    shared data is stored in and loaded from.  */
1160 static void
1161 create_call_for_reduction (struct loop *loop,
1162 			   reduction_info_table_type *reduction_list,
1163 			   struct clsn_data *ld_st_data)
1164 {
1165   reduction_list->traverse <struct loop *, create_phi_for_local_result> (loop);
1166   /* Find the fallthru edge from GIMPLE_OMP_CONTINUE.  */
1167   basic_block continue_bb = single_pred (loop->latch);
1168   ld_st_data->load_bb = FALLTHRU_EDGE (continue_bb)->dest;
1169   reduction_list
1170     ->traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
1171 }
1172 
1173 /* Callback for htab_traverse.  Loads the final reduction value at the
1174    join point of all threads, and inserts it in the right place.  */
1175 
1176 int
1177 create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
1178 {
1179   struct reduction_info *const red = *slot;
1180   gimple *stmt;
1181   gimple_stmt_iterator gsi;
1182   tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
1183   tree load_struct;
1184   tree name;
1185   tree x;
1186 
1187   /* If there's no exit phi, the result of the reduction is unused.  */
1188   if (red->keep_res == NULL)
1189     return 1;
1190 
1191   gsi = gsi_after_labels (clsn_data->load_bb);
1192   load_struct = build_simple_mem_ref (clsn_data->load);
1193   load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1194 			NULL_TREE);
1195 
1196   x = load_struct;
1197   name = PHI_RESULT (red->keep_res);
1198   stmt = gimple_build_assign (name, x);
1199 
1200   gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1201 
1202   for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1203        !gsi_end_p (gsi); gsi_next (&gsi))
1204     if (gsi_stmt (gsi) == red->keep_res)
1205       {
1206 	remove_phi_node (&gsi, false);
1207 	return 1;
1208       }
1209   gcc_unreachable ();
1210 }
1211 
1212 /* Load the reduction result that was stored in LD_ST_DATA.
1213    REDUCTION_LIST describes the list of reductions that the
1214    loads should be generated for.  */
1215 static void
1216 create_final_loads_for_reduction (reduction_info_table_type *reduction_list,
1217 				  struct clsn_data *ld_st_data)
1218 {
1219   gimple_stmt_iterator gsi;
1220   tree t;
1221   gimple *stmt;
1222 
1223   gsi = gsi_after_labels (ld_st_data->load_bb);
1224   t = build_fold_addr_expr (ld_st_data->store);
1225   stmt = gimple_build_assign (ld_st_data->load, t);
1226 
1227   gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1228 
1229   reduction_list
1230     ->traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
1231 
1232 }
1233 
1234 /* Callback for htab_traverse.  Store the neutral value for the
1235   particular reduction's operation, e.g. 0 for PLUS_EXPR,
1236   1 for MULT_EXPR, etc. into the reduction field.
1237   The reduction is specified in SLOT. The store information is
1238   passed in DATA.  */
1239 
1240 int
1241 create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
1242 {
1243   struct reduction_info *const red = *slot;
1244   tree t;
1245   gimple *stmt;
1246   gimple_stmt_iterator gsi;
1247   tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
1248 
1249   gsi = gsi_last_bb (clsn_data->store_bb);
1250   t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1251   stmt = gimple_build_assign (t, red->initial_value);
1252   gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1253 
1254   return 1;
1255 }
1256 
1257 /* Callback for htab_traverse.  Creates loads to a field of LOAD in LOAD_BB and
1258    store to a field of STORE in STORE_BB for the ssa name and its duplicate
1259    specified in SLOT.  */
1260 
1261 int
1262 create_loads_and_stores_for_name (name_to_copy_elt **slot,
1263 				  struct clsn_data *clsn_data)
1264 {
1265   struct name_to_copy_elt *const elt = *slot;
1266   tree t;
1267   gimple *stmt;
1268   gimple_stmt_iterator gsi;
1269   tree type = TREE_TYPE (elt->new_name);
1270   tree load_struct;
1271 
1272   gsi = gsi_last_bb (clsn_data->store_bb);
1273   t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1274   stmt = gimple_build_assign (t, ssa_name (elt->version));
1275   gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1276 
1277   gsi = gsi_last_bb (clsn_data->load_bb);
1278   load_struct = build_simple_mem_ref (clsn_data->load);
1279   t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1280   stmt = gimple_build_assign (elt->new_name, t);
1281   gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1282 
1283   return 1;
1284 }
1285 
1286 /* Moves all the variables used in LOOP and defined outside of it (including
1287    the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1288    name) to a structure created for this purpose.  The code
1289 
1290    while (1)
1291      {
1292        use (a);
1293        use (b);
1294      }
1295 
1296    is transformed this way:
1297 
1298    bb0:
1299    old.a = a;
1300    old.b = b;
1301 
1302    bb1:
1303    a' = new->a;
1304    b' = new->b;
1305    while (1)
1306      {
1307        use (a');
1308        use (b');
1309      }
1310 
1311    `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT.  The
1312    pointer `new' is intentionally not initialized (the loop will be split to a
1313    separate function later, and `new' will be initialized from its arguments).
1314    LD_ST_DATA holds information about the shared data structure used to pass
1315    information among the threads.  It is initialized here, and
1316    gen_parallel_loop will pass it to create_call_for_reduction that
1317    needs this information.  REDUCTION_LIST describes the reductions
1318    in LOOP.  */
1319 
1320 static void
1321 separate_decls_in_region (edge entry, edge exit,
1322 			  reduction_info_table_type *reduction_list,
1323 			  tree *arg_struct, tree *new_arg_struct,
1324 			  struct clsn_data *ld_st_data)
1325 
1326 {
1327   basic_block bb1 = split_edge (entry);
1328   basic_block bb0 = single_pred (bb1);
1329   name_to_copy_table_type name_copies (10);
1330   int_tree_htab_type decl_copies (10);
1331   unsigned i;
1332   tree type, type_name, nvar;
1333   gimple_stmt_iterator gsi;
1334   struct clsn_data clsn_data;
1335   auto_vec<basic_block, 3> body;
1336   basic_block bb;
1337   basic_block entry_bb = bb1;
1338   basic_block exit_bb = exit->dest;
1339   bool has_debug_stmt = false;
1340 
1341   entry = single_succ_edge (entry_bb);
1342   gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1343 
1344   FOR_EACH_VEC_ELT (body, i, bb)
1345     {
1346       if (bb != entry_bb && bb != exit_bb)
1347 	{
1348 	  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1349 	    separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1350 					   &name_copies, &decl_copies);
1351 
1352 	  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1353 	    {
1354 	      gimple *stmt = gsi_stmt (gsi);
1355 
1356 	      if (is_gimple_debug (stmt))
1357 		has_debug_stmt = true;
1358 	      else
1359 		separate_decls_in_region_stmt (entry, exit, stmt,
1360 					       &name_copies, &decl_copies);
1361 	    }
1362 	}
1363     }
1364 
1365   /* Now process debug bind stmts.  We must not create decls while
1366      processing debug stmts, so we defer their processing so as to
1367      make sure we will have debug info for as many variables as
1368      possible (all of those that were dealt with in the loop above),
1369      and discard those for which we know there's nothing we can
1370      do.  */
1371   if (has_debug_stmt)
1372     FOR_EACH_VEC_ELT (body, i, bb)
1373       if (bb != entry_bb && bb != exit_bb)
1374 	{
1375 	  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1376 	    {
1377 	      gimple *stmt = gsi_stmt (gsi);
1378 
1379 	      if (is_gimple_debug (stmt))
1380 		{
1381 		  if (separate_decls_in_region_debug (stmt, &name_copies,
1382 						      &decl_copies))
1383 		    {
1384 		      gsi_remove (&gsi, true);
1385 		      continue;
1386 		    }
1387 		}
1388 
1389 	      gsi_next (&gsi);
1390 	    }
1391 	}
1392 
1393   if (name_copies.elements () == 0 && reduction_list->elements () == 0)
1394     {
1395       /* It may happen that there is nothing to copy (if there are only
1396          loop carried and external variables in the loop).  */
1397       *arg_struct = NULL;
1398       *new_arg_struct = NULL;
1399     }
1400   else
1401     {
1402       /* Create the type for the structure to store the ssa names to.  */
1403       type = lang_hooks.types.make_type (RECORD_TYPE);
1404       type_name = build_decl (UNKNOWN_LOCATION,
1405 			      TYPE_DECL, create_tmp_var_name (".paral_data"),
1406 			      type);
1407       TYPE_NAME (type) = type_name;
1408 
1409       name_copies.traverse <tree, add_field_for_name> (type);
1410       if (reduction_list && reduction_list->elements () > 0)
1411 	{
1412 	  /* Create the fields for reductions.  */
1413 	  reduction_list->traverse <tree, add_field_for_reduction> (type);
1414 	}
1415       layout_type (type);
1416 
1417       /* Create the loads and stores.  */
1418       *arg_struct = create_tmp_var (type, ".paral_data_store");
1419       nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1420       *new_arg_struct = make_ssa_name (nvar);
1421 
1422       ld_st_data->store = *arg_struct;
1423       ld_st_data->load = *new_arg_struct;
1424       ld_st_data->store_bb = bb0;
1425       ld_st_data->load_bb = bb1;
1426 
1427       name_copies
1428 	.traverse <struct clsn_data *, create_loads_and_stores_for_name>
1429 		  (ld_st_data);
1430 
1431       /* Load the calculation from memory (after the join of the threads).  */
1432 
1433       if (reduction_list && reduction_list->elements () > 0)
1434 	{
1435 	  reduction_list
1436 	    ->traverse <struct clsn_data *, create_stores_for_reduction>
1437 	    (ld_st_data);
1438 	  clsn_data.load = make_ssa_name (nvar);
1439 	  clsn_data.load_bb = exit->dest;
1440 	  clsn_data.store = ld_st_data->store;
1441 	  create_final_loads_for_reduction (reduction_list, &clsn_data);
1442 	}
1443     }
1444 }
1445 
1446 /* Returns true if FN was created to run in parallel.  */
1447 
1448 bool
1449 parallelized_function_p (tree fndecl)
1450 {
1451   cgraph_node *node = cgraph_node::get (fndecl);
1452   gcc_assert (node != NULL);
1453   return node->parallelized_function;
1454 }
1455 
1456 /* Creates and returns an empty function that will receive the body of
1457    a parallelized loop.  */
1458 
1459 static tree
1460 create_loop_fn (location_t loc)
1461 {
1462   char buf[100];
1463   char *tname;
1464   tree decl, type, name, t;
1465   struct function *act_cfun = cfun;
1466   static unsigned loopfn_num;
1467 
1468   loc = LOCATION_LOCUS (loc);
1469   snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1470   ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1471   clean_symbol_name (tname);
1472   name = get_identifier (tname);
1473   type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1474 
1475   decl = build_decl (loc, FUNCTION_DECL, name, type);
1476   TREE_STATIC (decl) = 1;
1477   TREE_USED (decl) = 1;
1478   DECL_ARTIFICIAL (decl) = 1;
1479   DECL_IGNORED_P (decl) = 0;
1480   TREE_PUBLIC (decl) = 0;
1481   DECL_UNINLINABLE (decl) = 1;
1482   DECL_EXTERNAL (decl) = 0;
1483   DECL_CONTEXT (decl) = NULL_TREE;
1484   DECL_INITIAL (decl) = make_node (BLOCK);
1485   BLOCK_SUPERCONTEXT (DECL_INITIAL (decl)) = decl;
1486 
1487   t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
1488   DECL_ARTIFICIAL (t) = 1;
1489   DECL_IGNORED_P (t) = 1;
1490   DECL_RESULT (decl) = t;
1491 
1492   t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
1493 		  ptr_type_node);
1494   DECL_ARTIFICIAL (t) = 1;
1495   DECL_ARG_TYPE (t) = ptr_type_node;
1496   DECL_CONTEXT (t) = decl;
1497   TREE_USED (t) = 1;
1498   DECL_ARGUMENTS (decl) = t;
1499 
1500   allocate_struct_function (decl, false);
1501 
1502   /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1503      it.  */
1504   set_cfun (act_cfun);
1505 
1506   return decl;
1507 }
1508 
1509 /* Replace uses of NAME by VAL in block BB.  */
1510 
1511 static void
1512 replace_uses_in_bb_by (tree name, tree val, basic_block bb)
1513 {
1514   gimple *use_stmt;
1515   imm_use_iterator imm_iter;
1516 
1517   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, name)
1518     {
1519       if (gimple_bb (use_stmt) != bb)
1520 	continue;
1521 
1522       use_operand_p use_p;
1523       FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1524 	SET_USE (use_p, val);
1525     }
1526 }
1527 
1528 /* Do transformation from:
1529 
1530      <bb preheader>:
1531      ...
1532      goto <bb header>
1533 
1534      <bb header>:
1535      ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1536      sum_a = PHI <sum_init (preheader), sum_b (latch)>
1537      ...
1538      use (ivtmp_a)
1539      ...
1540      sum_b = sum_a + sum_update
1541      ...
1542      if (ivtmp_a < n)
1543        goto <bb latch>;
1544      else
1545        goto <bb exit>;
1546 
1547      <bb latch>:
1548      ivtmp_b = ivtmp_a + 1;
1549      goto <bb header>
1550 
1551      <bb exit>:
1552      sum_z = PHI <sum_b (cond[1]), ...>
1553 
1554      [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
1555 	 that's <bb header>.
1556 
1557    to:
1558 
1559      <bb preheader>:
1560      ...
1561      goto <bb newheader>
1562 
1563      <bb header>:
1564      ivtmp_a = PHI <ivtmp_c (latch)>
1565      sum_a = PHI <sum_c (latch)>
1566      ...
1567      use (ivtmp_a)
1568      ...
1569      sum_b = sum_a + sum_update
1570      ...
1571      goto <bb latch>;
1572 
1573      <bb newheader>:
1574      ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1575      sum_c = PHI <sum_init (preheader), sum_b (latch)>
1576      if (ivtmp_c < n + 1)
1577        goto <bb header>;
1578      else
1579        goto <bb newexit>;
1580 
1581      <bb latch>:
1582      ivtmp_b = ivtmp_a + 1;
1583      goto <bb newheader>
1584 
1585      <bb newexit>:
1586      sum_y = PHI <sum_c (newheader)>
1587 
1588      <bb exit>:
1589      sum_z = PHI <sum_y (newexit), ...>
1590 
1591 
1592    In unified diff format:
1593 
1594       <bb preheader>:
1595       ...
1596 -     goto <bb header>
1597 +     goto <bb newheader>
1598 
1599       <bb header>:
1600 -     ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1601 -     sum_a = PHI <sum_init (preheader), sum_b (latch)>
1602 +     ivtmp_a = PHI <ivtmp_c (latch)>
1603 +     sum_a = PHI <sum_c (latch)>
1604       ...
1605       use (ivtmp_a)
1606       ...
1607       sum_b = sum_a + sum_update
1608       ...
1609 -     if (ivtmp_a < n)
1610 -       goto <bb latch>;
1611 +     goto <bb latch>;
1612 +
1613 +     <bb newheader>:
1614 +     ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1615 +     sum_c = PHI <sum_init (preheader), sum_b (latch)>
1616 +     if (ivtmp_c < n + 1)
1617 +       goto <bb header>;
1618       else
1619 	goto <bb exit>;
1620 
1621       <bb latch>:
1622       ivtmp_b = ivtmp_a + 1;
1623 -     goto <bb header>
1624 +     goto <bb newheader>
1625 
1626 +    <bb newexit>:
1627 +    sum_y = PHI <sum_c (newheader)>
1628 
1629       <bb exit>:
1630 -     sum_z = PHI <sum_b (cond[1]), ...>
1631 +     sum_z = PHI <sum_y (newexit), ...>
1632 
1633    Note: the example does not show any virtual phis, but these are handled more
1634    or less as reductions.
1635 
1636 
1637    Moves the exit condition of LOOP to the beginning of its header.
1638    REDUCTION_LIST describes the reductions in LOOP.  BOUND is the new loop
1639    bound.  */
1640 
1641 static void
1642 transform_to_exit_first_loop_alt (struct loop *loop,
1643 				  reduction_info_table_type *reduction_list,
1644 				  tree bound)
1645 {
1646   basic_block header = loop->header;
1647   basic_block latch = loop->latch;
1648   edge exit = single_dom_exit (loop);
1649   basic_block exit_block = exit->dest;
1650   gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1651   tree control = gimple_cond_lhs (cond_stmt);
1652   edge e;
1653 
1654   /* Rewriting virtuals into loop-closed ssa normal form makes this
1655      transformation simpler.  It also ensures that the virtuals are in
1656      loop-closed ssa normal from after the transformation, which is required by
1657      create_parallel_loop.  */
1658   rewrite_virtuals_into_loop_closed_ssa (loop);
1659 
1660   /* Create the new_header block.  */
1661   basic_block new_header = split_block_before_cond_jump (exit->src);
1662   edge edge_at_split = single_pred_edge (new_header);
1663 
1664   /* Redirect entry edge to new_header.  */
1665   edge entry = loop_preheader_edge (loop);
1666   e = redirect_edge_and_branch (entry, new_header);
1667   gcc_assert (e == entry);
1668 
1669   /* Redirect post_inc_edge to new_header.  */
1670   edge post_inc_edge = single_succ_edge (latch);
1671   e = redirect_edge_and_branch (post_inc_edge, new_header);
1672   gcc_assert (e == post_inc_edge);
1673 
1674   /* Redirect post_cond_edge to header.  */
1675   edge post_cond_edge = single_pred_edge (latch);
1676   e = redirect_edge_and_branch (post_cond_edge, header);
1677   gcc_assert (e == post_cond_edge);
1678 
1679   /* Redirect edge_at_split to latch.  */
1680   e = redirect_edge_and_branch (edge_at_split, latch);
1681   gcc_assert (e == edge_at_split);
1682 
1683   /* Set the new loop bound.  */
1684   gimple_cond_set_rhs (cond_stmt, bound);
1685   update_stmt (cond_stmt);
1686 
1687   /* Repair the ssa.  */
1688   vec<edge_var_map> *v = redirect_edge_var_map_vector (post_inc_edge);
1689   edge_var_map *vm;
1690   gphi_iterator gsi;
1691   int i;
1692   for (gsi = gsi_start_phis (header), i = 0;
1693        !gsi_end_p (gsi) && v->iterate (i, &vm);
1694        gsi_next (&gsi), i++)
1695     {
1696       gphi *phi = gsi.phi ();
1697       tree res_a = PHI_RESULT (phi);
1698 
1699       /* Create new phi.  */
1700       tree res_c = copy_ssa_name (res_a, phi);
1701       gphi *nphi = create_phi_node (res_c, new_header);
1702 
1703       /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'.  */
1704       replace_uses_in_bb_by (res_a, res_c, new_header);
1705 
1706       /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi.  */
1707       add_phi_arg (phi, res_c, post_cond_edge, UNKNOWN_LOCATION);
1708 
1709       /* Replace sum_b with sum_c in exit phi.  */
1710       tree res_b = redirect_edge_var_map_def (vm);
1711       replace_uses_in_bb_by (res_b, res_c, exit_block);
1712 
1713       struct reduction_info *red = reduction_phi (reduction_list, phi);
1714       gcc_assert (virtual_operand_p (res_a)
1715 		  || res_a == control
1716 		  || red != NULL);
1717 
1718       if (red)
1719 	{
1720 	  /* Register the new reduction phi.  */
1721 	  red->reduc_phi = nphi;
1722 	  gimple_set_uid (red->reduc_phi, red->reduc_version);
1723 	}
1724     }
1725   gcc_assert (gsi_end_p (gsi) && !v->iterate (i, &vm));
1726 
1727   /* Set the preheader argument of the new phis to ivtmp/sum_init.  */
1728   flush_pending_stmts (entry);
1729 
1730   /* Set the latch arguments of the new phis to ivtmp/sum_b.  */
1731   flush_pending_stmts (post_inc_edge);
1732 
1733 
1734   basic_block new_exit_block = NULL;
1735   if (!single_pred_p (exit->dest))
1736     {
1737       /* Create a new empty exit block, inbetween the new loop header and the
1738 	 old exit block.  The function separate_decls_in_region needs this block
1739 	 to insert code that is active on loop exit, but not any other path.  */
1740       new_exit_block = split_edge (exit);
1741     }
1742 
1743   /* Insert and register the reduction exit phis.  */
1744   for (gphi_iterator gsi = gsi_start_phis (exit_block);
1745        !gsi_end_p (gsi);
1746        gsi_next (&gsi))
1747     {
1748       gphi *phi = gsi.phi ();
1749       gphi *nphi = NULL;
1750       tree res_z = PHI_RESULT (phi);
1751       tree res_c;
1752 
1753       if (new_exit_block != NULL)
1754 	{
1755 	  /* Now that we have a new exit block, duplicate the phi of the old
1756 	     exit block in the new exit block to preserve loop-closed ssa.  */
1757 	  edge succ_new_exit_block = single_succ_edge (new_exit_block);
1758 	  edge pred_new_exit_block = single_pred_edge (new_exit_block);
1759 	  tree res_y = copy_ssa_name (res_z, phi);
1760 	  nphi = create_phi_node (res_y, new_exit_block);
1761 	  res_c = PHI_ARG_DEF_FROM_EDGE (phi, succ_new_exit_block);
1762 	  add_phi_arg (nphi, res_c, pred_new_exit_block, UNKNOWN_LOCATION);
1763 	  add_phi_arg (phi, res_y, succ_new_exit_block, UNKNOWN_LOCATION);
1764 	}
1765       else
1766 	res_c = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1767 
1768       if (virtual_operand_p (res_z))
1769 	continue;
1770 
1771       gimple *reduc_phi = SSA_NAME_DEF_STMT (res_c);
1772       struct reduction_info *red = reduction_phi (reduction_list, reduc_phi);
1773       if (red != NULL)
1774 	red->keep_res = (nphi != NULL
1775 			 ? nphi
1776 			 : phi);
1777     }
1778 
1779   /* We're going to cancel the loop at the end of gen_parallel_loop, but until
1780      then we're still using some fields, so only bother about fields that are
1781      still used: header and latch.
1782      The loop has a new header bb, so we update it.  The latch bb stays the
1783      same.  */
1784   loop->header = new_header;
1785 
1786   /* Recalculate dominance info.  */
1787   free_dominance_info (CDI_DOMINATORS);
1788   calculate_dominance_info (CDI_DOMINATORS);
1789 
1790   checking_verify_ssa (true, true);
1791 }
1792 
1793 /* Tries to moves the exit condition of LOOP to the beginning of its header
1794    without duplication of the loop body.  NIT is the number of iterations of the
1795    loop.  REDUCTION_LIST describes the reductions in LOOP.  Return true if
1796    transformation is successful.  */
1797 
1798 static bool
1799 try_transform_to_exit_first_loop_alt (struct loop *loop,
1800 				      reduction_info_table_type *reduction_list,
1801 				      tree nit)
1802 {
1803   /* Check whether the latch contains a single statement.  */
1804   if (!gimple_seq_nondebug_singleton_p (bb_seq (loop->latch)))
1805     return false;
1806 
1807   /* Check whether the latch contains no phis.  */
1808   if (phi_nodes (loop->latch) != NULL)
1809     return false;
1810 
1811   /* Check whether the latch contains the loop iv increment.  */
1812   edge back = single_succ_edge (loop->latch);
1813   edge exit = single_dom_exit (loop);
1814   gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1815   tree control = gimple_cond_lhs (cond_stmt);
1816   gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (control));
1817   tree inc_res = gimple_phi_arg_def (phi, back->dest_idx);
1818   if (gimple_bb (SSA_NAME_DEF_STMT (inc_res)) != loop->latch)
1819     return false;
1820 
1821   /* Check whether there's no code between the loop condition and the latch.  */
1822   if (!single_pred_p (loop->latch)
1823       || single_pred (loop->latch) != exit->src)
1824     return false;
1825 
1826   tree alt_bound = NULL_TREE;
1827   tree nit_type = TREE_TYPE (nit);
1828 
1829   /* Figure out whether nit + 1 overflows.  */
1830   if (TREE_CODE (nit) == INTEGER_CST)
1831     {
1832       if (!tree_int_cst_equal (nit, TYPE_MAX_VALUE (nit_type)))
1833 	{
1834 	  alt_bound = fold_build2_loc (UNKNOWN_LOCATION, PLUS_EXPR, nit_type,
1835 				       nit, build_one_cst (nit_type));
1836 
1837 	  gcc_assert (TREE_CODE (alt_bound) == INTEGER_CST);
1838 	  transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1839 	  return true;
1840 	}
1841       else
1842 	{
1843 	  /* Todo: Figure out if we can trigger this, if it's worth to handle
1844 	     optimally, and if we can handle it optimally.  */
1845 	  return false;
1846 	}
1847     }
1848 
1849   gcc_assert (TREE_CODE (nit) == SSA_NAME);
1850 
1851   /* Variable nit is the loop bound as returned by canonicalize_loop_ivs, for an
1852      iv with base 0 and step 1 that is incremented in the latch, like this:
1853 
1854      <bb header>:
1855      # iv_1 = PHI <0 (preheader), iv_2 (latch)>
1856      ...
1857      if (iv_1 < nit)
1858        goto <bb latch>;
1859      else
1860        goto <bb exit>;
1861 
1862      <bb latch>:
1863      iv_2 = iv_1 + 1;
1864      goto <bb header>;
1865 
1866      The range of iv_1 is [0, nit].  The latch edge is taken for
1867      iv_1 == [0, nit - 1] and the exit edge is taken for iv_1 == nit.  So the
1868      number of latch executions is equal to nit.
1869 
1870      The function max_loop_iterations gives us the maximum number of latch
1871      executions, so it gives us the maximum value of nit.  */
1872   widest_int nit_max;
1873   if (!max_loop_iterations (loop, &nit_max))
1874     return false;
1875 
1876   /* Check if nit + 1 overflows.  */
1877   widest_int type_max = wi::to_widest (TYPE_MAX_VALUE (nit_type));
1878   if (nit_max >= type_max)
1879     return false;
1880 
1881   gimple *def = SSA_NAME_DEF_STMT (nit);
1882 
1883   /* Try to find nit + 1, in the form of n in an assignment nit = n - 1.  */
1884   if (def
1885       && is_gimple_assign (def)
1886       && gimple_assign_rhs_code (def) == PLUS_EXPR)
1887     {
1888       tree op1 = gimple_assign_rhs1 (def);
1889       tree op2 = gimple_assign_rhs2 (def);
1890       if (integer_minus_onep (op1))
1891 	alt_bound = op2;
1892       else if (integer_minus_onep (op2))
1893 	alt_bound = op1;
1894     }
1895 
1896   /* If not found, insert nit + 1.  */
1897   if (alt_bound == NULL_TREE)
1898     {
1899       alt_bound = fold_build2 (PLUS_EXPR, nit_type, nit,
1900 			       build_int_cst_type (nit_type, 1));
1901 
1902       gimple_stmt_iterator gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
1903 
1904       alt_bound
1905 	= force_gimple_operand_gsi (&gsi, alt_bound, true, NULL_TREE, false,
1906 				    GSI_CONTINUE_LINKING);
1907     }
1908 
1909   transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1910   return true;
1911 }
1912 
1913 /* Moves the exit condition of LOOP to the beginning of its header.  NIT is the
1914    number of iterations of the loop.  REDUCTION_LIST describes the reductions in
1915    LOOP.  */
1916 
1917 static void
1918 transform_to_exit_first_loop (struct loop *loop,
1919 			      reduction_info_table_type *reduction_list,
1920 			      tree nit)
1921 {
1922   basic_block *bbs, *nbbs, ex_bb, orig_header;
1923   unsigned n;
1924   bool ok;
1925   edge exit = single_dom_exit (loop), hpred;
1926   tree control, control_name, res, t;
1927   gphi *phi, *nphi;
1928   gassign *stmt;
1929   gcond *cond_stmt, *cond_nit;
1930   tree nit_1;
1931 
1932   split_block_after_labels (loop->header);
1933   orig_header = single_succ (loop->header);
1934   hpred = single_succ_edge (loop->header);
1935 
1936   cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1937   control = gimple_cond_lhs (cond_stmt);
1938   gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1939 
1940   /* Make sure that we have phi nodes on exit for all loop header phis
1941      (create_parallel_loop requires that).  */
1942   for (gphi_iterator gsi = gsi_start_phis (loop->header);
1943        !gsi_end_p (gsi);
1944        gsi_next (&gsi))
1945     {
1946       phi = gsi.phi ();
1947       res = PHI_RESULT (phi);
1948       t = copy_ssa_name (res, phi);
1949       SET_PHI_RESULT (phi, t);
1950       nphi = create_phi_node (res, orig_header);
1951       add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1952 
1953       if (res == control)
1954 	{
1955 	  gimple_cond_set_lhs (cond_stmt, t);
1956 	  update_stmt (cond_stmt);
1957 	  control = t;
1958 	}
1959     }
1960 
1961   bbs = get_loop_body_in_dom_order (loop);
1962 
1963   for (n = 0; bbs[n] != exit->src; n++)
1964    continue;
1965   nbbs = XNEWVEC (basic_block, n);
1966   ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1967 				   bbs + 1, n, nbbs);
1968   gcc_assert (ok);
1969   free (bbs);
1970   ex_bb = nbbs[0];
1971   free (nbbs);
1972 
1973   /* Other than reductions, the only gimple reg that should be copied
1974      out of the loop is the control variable.  */
1975   exit = single_dom_exit (loop);
1976   control_name = NULL_TREE;
1977   for (gphi_iterator gsi = gsi_start_phis (ex_bb);
1978        !gsi_end_p (gsi); )
1979     {
1980       phi = gsi.phi ();
1981       res = PHI_RESULT (phi);
1982       if (virtual_operand_p (res))
1983 	{
1984 	  gsi_next (&gsi);
1985 	  continue;
1986 	}
1987 
1988       /* Check if it is a part of reduction.  If it is,
1989          keep the phi at the reduction's keep_res field.  The
1990          PHI_RESULT of this phi is the resulting value of the reduction
1991          variable when exiting the loop.  */
1992 
1993       if (reduction_list->elements () > 0)
1994 	{
1995 	  struct reduction_info *red;
1996 
1997 	  tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1998 	  red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1999 	  if (red)
2000 	    {
2001 	      red->keep_res = phi;
2002 	      gsi_next (&gsi);
2003 	      continue;
2004 	    }
2005 	}
2006       gcc_assert (control_name == NULL_TREE
2007 		  && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
2008       control_name = res;
2009       remove_phi_node (&gsi, false);
2010     }
2011   gcc_assert (control_name != NULL_TREE);
2012 
2013   /* Initialize the control variable to number of iterations
2014      according to the rhs of the exit condition.  */
2015   gimple_stmt_iterator gsi = gsi_after_labels (ex_bb);
2016   cond_nit = as_a <gcond *> (last_stmt (exit->src));
2017   nit_1 =  gimple_cond_rhs (cond_nit);
2018   nit_1 = force_gimple_operand_gsi (&gsi,
2019 				  fold_convert (TREE_TYPE (control_name), nit_1),
2020 				  false, NULL_TREE, false, GSI_SAME_STMT);
2021   stmt = gimple_build_assign (control_name, nit_1);
2022   gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
2023 }
2024 
2025 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
2026    LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
2027    NEW_DATA is the variable that should be initialized from the argument
2028    of LOOP_FN.  N_THREADS is the requested number of threads, which can be 0 if
2029    that number is to be determined later.  */
2030 
2031 static void
2032 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
2033 		      tree new_data, unsigned n_threads, location_t loc,
2034 		      bool oacc_kernels_p)
2035 {
2036   gimple_stmt_iterator gsi;
2037   basic_block for_bb, ex_bb, continue_bb;
2038   tree t, param;
2039   gomp_parallel *omp_par_stmt;
2040   gimple *omp_return_stmt1, *omp_return_stmt2;
2041   gimple *phi;
2042   gcond *cond_stmt;
2043   gomp_for *for_stmt;
2044   gomp_continue *omp_cont_stmt;
2045   tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
2046   edge exit, nexit, guard, end, e;
2047 
2048   if (oacc_kernels_p)
2049     {
2050       gcc_checking_assert (lookup_attribute ("oacc kernels",
2051 					     DECL_ATTRIBUTES (cfun->decl)));
2052       /* Indicate to later processing that this is a parallelized OpenACC
2053 	 kernels construct.  */
2054       DECL_ATTRIBUTES (cfun->decl)
2055 	= tree_cons (get_identifier ("oacc kernels parallelized"),
2056 		     NULL_TREE, DECL_ATTRIBUTES (cfun->decl));
2057     }
2058   else
2059     {
2060       /* Prepare the GIMPLE_OMP_PARALLEL statement.  */
2061 
2062       basic_block bb = loop_preheader_edge (loop)->src;
2063       basic_block paral_bb = single_pred (bb);
2064       gsi = gsi_last_bb (paral_bb);
2065 
2066       gcc_checking_assert (n_threads != 0);
2067       t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
2068       OMP_CLAUSE_NUM_THREADS_EXPR (t)
2069 	= build_int_cst (integer_type_node, n_threads);
2070       omp_par_stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
2071       gimple_set_location (omp_par_stmt, loc);
2072 
2073       gsi_insert_after (&gsi, omp_par_stmt, GSI_NEW_STMT);
2074 
2075       /* Initialize NEW_DATA.  */
2076       if (data)
2077 	{
2078 	  gassign *assign_stmt;
2079 
2080 	  gsi = gsi_after_labels (bb);
2081 
2082 	  param = make_ssa_name (DECL_ARGUMENTS (loop_fn));
2083 	  assign_stmt = gimple_build_assign (param, build_fold_addr_expr (data));
2084 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2085 
2086 	  assign_stmt = gimple_build_assign (new_data,
2087 					     fold_convert (TREE_TYPE (new_data), param));
2088 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2089 	}
2090 
2091       /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL.  */
2092       bb = split_loop_exit_edge (single_dom_exit (loop));
2093       gsi = gsi_last_bb (bb);
2094       omp_return_stmt1 = gimple_build_omp_return (false);
2095       gimple_set_location (omp_return_stmt1, loc);
2096       gsi_insert_after (&gsi, omp_return_stmt1, GSI_NEW_STMT);
2097     }
2098 
2099   /* Extract data for GIMPLE_OMP_FOR.  */
2100   gcc_assert (loop->header == single_dom_exit (loop)->src);
2101   cond_stmt = as_a <gcond *> (last_stmt (loop->header));
2102 
2103   cvar = gimple_cond_lhs (cond_stmt);
2104   cvar_base = SSA_NAME_VAR (cvar);
2105   phi = SSA_NAME_DEF_STMT (cvar);
2106   cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
2107   initvar = copy_ssa_name (cvar);
2108   SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
2109 	   initvar);
2110   cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2111 
2112   gsi = gsi_last_nondebug_bb (loop->latch);
2113   gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
2114   gsi_remove (&gsi, true);
2115 
2116   /* Prepare cfg.  */
2117   for_bb = split_edge (loop_preheader_edge (loop));
2118   ex_bb = split_loop_exit_edge (single_dom_exit (loop));
2119   extract_true_false_edges_from_block (loop->header, &nexit, &exit);
2120   gcc_assert (exit == single_dom_exit (loop));
2121 
2122   guard = make_edge (for_bb, ex_bb, 0);
2123   /* FIXME: What is the probability?  */
2124   guard->probability = profile_probability::guessed_never ();
2125   /* Split the latch edge, so LOOPS_HAVE_SIMPLE_LATCHES is still valid.  */
2126   loop->latch = split_edge (single_succ_edge (loop->latch));
2127   single_pred_edge (loop->latch)->flags = 0;
2128   end = make_single_succ_edge (single_pred (loop->latch), ex_bb, EDGE_FALLTHRU);
2129   rescan_loop_exit (end, true, false);
2130 
2131   for (gphi_iterator gpi = gsi_start_phis (ex_bb);
2132        !gsi_end_p (gpi); gsi_next (&gpi))
2133     {
2134       location_t locus;
2135       gphi *phi = gpi.phi ();
2136       tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2137       gimple *def_stmt = SSA_NAME_DEF_STMT (def);
2138 
2139       /* If the exit phi is not connected to a header phi in the same loop, this
2140 	 value is not modified in the loop, and we're done with this phi.  */
2141       if (!(gimple_code (def_stmt) == GIMPLE_PHI
2142 	    && gimple_bb (def_stmt) == loop->header))
2143 	{
2144 	  locus = gimple_phi_arg_location_from_edge (phi, exit);
2145 	  add_phi_arg (phi, def, guard, locus);
2146 	  add_phi_arg (phi, def, end, locus);
2147 	  continue;
2148 	}
2149 
2150       gphi *stmt = as_a <gphi *> (def_stmt);
2151       def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
2152       locus = gimple_phi_arg_location_from_edge (stmt,
2153 						 loop_preheader_edge (loop));
2154       add_phi_arg (phi, def, guard, locus);
2155 
2156       def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
2157       locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
2158       add_phi_arg (phi, def, end, locus);
2159     }
2160   e = redirect_edge_and_branch (exit, nexit->dest);
2161   PENDING_STMT (e) = NULL;
2162 
2163   /* Emit GIMPLE_OMP_FOR.  */
2164   if (oacc_kernels_p)
2165     /* Parallelized OpenACC kernels constructs use gang parallelism.  See also
2166        omp-offload.c:execute_oacc_device_lower.  */
2167     t = build_omp_clause (loc, OMP_CLAUSE_GANG);
2168   else
2169     {
2170       t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
2171       int chunk_size = PARAM_VALUE (PARAM_PARLOOPS_CHUNK_SIZE);
2172       enum PARAM_PARLOOPS_SCHEDULE_KIND schedule_type \
2173 	= (enum PARAM_PARLOOPS_SCHEDULE_KIND) PARAM_VALUE (PARAM_PARLOOPS_SCHEDULE);
2174       switch (schedule_type)
2175 	{
2176 	case PARAM_PARLOOPS_SCHEDULE_KIND_static:
2177 	  OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
2178 	  break;
2179 	case PARAM_PARLOOPS_SCHEDULE_KIND_dynamic:
2180 	  OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_DYNAMIC;
2181 	  break;
2182 	case PARAM_PARLOOPS_SCHEDULE_KIND_guided:
2183 	  OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_GUIDED;
2184 	  break;
2185 	case PARAM_PARLOOPS_SCHEDULE_KIND_auto:
2186 	  OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_AUTO;
2187 	  chunk_size = 0;
2188 	  break;
2189 	case PARAM_PARLOOPS_SCHEDULE_KIND_runtime:
2190 	  OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_RUNTIME;
2191 	  chunk_size = 0;
2192 	  break;
2193 	default:
2194 	  gcc_unreachable ();
2195 	}
2196       if (chunk_size != 0)
2197 	OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t)
2198 	  = build_int_cst (integer_type_node, chunk_size);
2199     }
2200 
2201   for_stmt = gimple_build_omp_for (NULL,
2202 				   (oacc_kernels_p
2203 				    ? GF_OMP_FOR_KIND_OACC_LOOP
2204 				    : GF_OMP_FOR_KIND_FOR),
2205 				   t, 1, NULL);
2206 
2207   gimple_cond_set_lhs (cond_stmt, cvar_base);
2208   type = TREE_TYPE (cvar);
2209   gimple_set_location (for_stmt, loc);
2210   gimple_omp_for_set_index (for_stmt, 0, initvar);
2211   gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
2212   gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
2213   gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
2214   gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
2215 						cvar_base,
2216 						build_int_cst (type, 1)));
2217 
2218   gsi = gsi_last_bb (for_bb);
2219   gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
2220   SSA_NAME_DEF_STMT (initvar) = for_stmt;
2221 
2222   /* Emit GIMPLE_OMP_CONTINUE.  */
2223   continue_bb = single_pred (loop->latch);
2224   gsi = gsi_last_bb (continue_bb);
2225   omp_cont_stmt = gimple_build_omp_continue (cvar_next, cvar);
2226   gimple_set_location (omp_cont_stmt, loc);
2227   gsi_insert_after (&gsi, omp_cont_stmt, GSI_NEW_STMT);
2228   SSA_NAME_DEF_STMT (cvar_next) = omp_cont_stmt;
2229 
2230   /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR.  */
2231   gsi = gsi_last_bb (ex_bb);
2232   omp_return_stmt2 = gimple_build_omp_return (true);
2233   gimple_set_location (omp_return_stmt2, loc);
2234   gsi_insert_after (&gsi, omp_return_stmt2, GSI_NEW_STMT);
2235 
2236   /* After the above dom info is hosed.  Re-compute it.  */
2237   free_dominance_info (CDI_DOMINATORS);
2238   calculate_dominance_info (CDI_DOMINATORS);
2239 }
2240 
2241 /* Return number of phis in bb.  If COUNT_VIRTUAL_P is false, don't count the
2242    virtual phi.  */
2243 
2244 static unsigned int
2245 num_phis (basic_block bb, bool count_virtual_p)
2246 {
2247   unsigned int nr_phis = 0;
2248   gphi_iterator gsi;
2249   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2250     {
2251       if (!count_virtual_p && virtual_operand_p (PHI_RESULT (gsi.phi ())))
2252 	continue;
2253 
2254       nr_phis++;
2255     }
2256 
2257   return nr_phis;
2258 }
2259 
2260 /* Generates code to execute the iterations of LOOP in N_THREADS
2261    threads in parallel, which can be 0 if that number is to be determined
2262    later.
2263 
2264    NITER describes number of iterations of LOOP.
2265    REDUCTION_LIST describes the reductions existent in the LOOP.  */
2266 
2267 static void
2268 gen_parallel_loop (struct loop *loop,
2269 		   reduction_info_table_type *reduction_list,
2270 		   unsigned n_threads, struct tree_niter_desc *niter,
2271 		   bool oacc_kernels_p)
2272 {
2273   tree many_iterations_cond, type, nit;
2274   tree arg_struct, new_arg_struct;
2275   gimple_seq stmts;
2276   edge entry, exit;
2277   struct clsn_data clsn_data;
2278   location_t loc;
2279   gimple *cond_stmt;
2280   unsigned int m_p_thread=2;
2281 
2282   /* From
2283 
2284      ---------------------------------------------------------------------
2285      loop
2286        {
2287 	 IV = phi (INIT, IV + STEP)
2288 	 BODY1;
2289 	 if (COND)
2290 	   break;
2291 	 BODY2;
2292        }
2293      ---------------------------------------------------------------------
2294 
2295      with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
2296      we generate the following code:
2297 
2298      ---------------------------------------------------------------------
2299 
2300      if (MAY_BE_ZERO
2301      || NITER < MIN_PER_THREAD * N_THREADS)
2302      goto original;
2303 
2304      BODY1;
2305      store all local loop-invariant variables used in body of the loop to DATA.
2306      GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
2307      load the variables from DATA.
2308      GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
2309      BODY2;
2310      BODY1;
2311      GIMPLE_OMP_CONTINUE;
2312      GIMPLE_OMP_RETURN         -- GIMPLE_OMP_FOR
2313      GIMPLE_OMP_RETURN         -- GIMPLE_OMP_PARALLEL
2314      goto end;
2315 
2316      original:
2317      loop
2318        {
2319 	 IV = phi (INIT, IV + STEP)
2320 	 BODY1;
2321 	 if (COND)
2322 	   break;
2323 	 BODY2;
2324        }
2325 
2326      end:
2327 
2328    */
2329 
2330   /* Create two versions of the loop -- in the old one, we know that the
2331      number of iterations is large enough, and we will transform it into the
2332      loop that will be split to loop_fn, the new one will be used for the
2333      remaining iterations.  */
2334 
2335   /* We should compute a better number-of-iterations value for outer loops.
2336      That is, if we have
2337 
2338     for (i = 0; i < n; ++i)
2339       for (j = 0; j < m; ++j)
2340         ...
2341 
2342     we should compute nit = n * m, not nit = n.
2343     Also may_be_zero handling would need to be adjusted.  */
2344 
2345   type = TREE_TYPE (niter->niter);
2346   nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
2347 			      NULL_TREE);
2348   if (stmts)
2349     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2350 
2351   if (!oacc_kernels_p)
2352     {
2353       if (loop->inner)
2354 	m_p_thread=2;
2355       else
2356 	m_p_thread=MIN_PER_THREAD;
2357 
2358       gcc_checking_assert (n_threads != 0);
2359       many_iterations_cond =
2360 	fold_build2 (GE_EXPR, boolean_type_node,
2361 		     nit, build_int_cst (type, m_p_thread * n_threads - 1));
2362 
2363       many_iterations_cond
2364 	= fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2365 		       invert_truthvalue (unshare_expr (niter->may_be_zero)),
2366 		       many_iterations_cond);
2367       many_iterations_cond
2368 	= force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
2369       if (stmts)
2370 	gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2371       if (!is_gimple_condexpr (many_iterations_cond))
2372 	{
2373 	  many_iterations_cond
2374 	    = force_gimple_operand (many_iterations_cond, &stmts,
2375 				    true, NULL_TREE);
2376 	  if (stmts)
2377 	    gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop),
2378 					      stmts);
2379 	}
2380 
2381       initialize_original_copy_tables ();
2382 
2383       /* We assume that the loop usually iterates a lot.  */
2384       loop_version (loop, many_iterations_cond, NULL,
2385 		    profile_probability::likely (),
2386 		    profile_probability::unlikely (),
2387 		    profile_probability::likely (),
2388 		    profile_probability::unlikely (), true);
2389       update_ssa (TODO_update_ssa);
2390       free_original_copy_tables ();
2391     }
2392 
2393   /* Base all the induction variables in LOOP on a single control one.  */
2394   canonicalize_loop_ivs (loop, &nit, true);
2395   if (num_phis (loop->header, false) != reduction_list->elements () + 1)
2396     {
2397       /* The call to canonicalize_loop_ivs above failed to "base all the
2398 	 induction variables in LOOP on a single control one".  Do damage
2399 	 control.  */
2400       basic_block preheader = loop_preheader_edge (loop)->src;
2401       basic_block cond_bb = single_pred (preheader);
2402       gcond *cond = as_a <gcond *> (gsi_stmt (gsi_last_bb (cond_bb)));
2403       gimple_cond_make_true (cond);
2404       update_stmt (cond);
2405       /* We've gotten rid of the duplicate loop created by loop_version, but
2406 	 we can't undo whatever canonicalize_loop_ivs has done.
2407 	 TODO: Fix this properly by ensuring that the call to
2408 	 canonicalize_loop_ivs succeeds.  */
2409       if (dump_file
2410 	  && (dump_flags & TDF_DETAILS))
2411 	fprintf (dump_file, "canonicalize_loop_ivs failed for loop %d,"
2412 		 " aborting transformation\n", loop->num);
2413       return;
2414     }
2415 
2416   /* Ensure that the exit condition is the first statement in the loop.
2417      The common case is that latch of the loop is empty (apart from the
2418      increment) and immediately follows the loop exit test.  Attempt to move the
2419      entry of the loop directly before the exit check and increase the number of
2420      iterations of the loop by one.  */
2421   if (try_transform_to_exit_first_loop_alt (loop, reduction_list, nit))
2422     {
2423       if (dump_file
2424 	  && (dump_flags & TDF_DETAILS))
2425 	fprintf (dump_file,
2426 		 "alternative exit-first loop transform succeeded"
2427 		 " for loop %d\n", loop->num);
2428     }
2429   else
2430     {
2431       if (oacc_kernels_p)
2432 	n_threads = 1;
2433 
2434       /* Fall back on the method that handles more cases, but duplicates the
2435 	 loop body: move the exit condition of LOOP to the beginning of its
2436 	 header, and duplicate the part of the last iteration that gets disabled
2437 	 to the exit of the loop.  */
2438       transform_to_exit_first_loop (loop, reduction_list, nit);
2439     }
2440 
2441   /* Generate initializations for reductions.  */
2442   if (reduction_list->elements () > 0)
2443     reduction_list->traverse <struct loop *, initialize_reductions> (loop);
2444 
2445   /* Eliminate the references to local variables from the loop.  */
2446   gcc_assert (single_exit (loop));
2447   entry = loop_preheader_edge (loop);
2448   exit = single_dom_exit (loop);
2449 
2450   /* This rewrites the body in terms of new variables.  This has already
2451      been done for oacc_kernels_p in pass_lower_omp/lower_omp ().  */
2452   if (!oacc_kernels_p)
2453     {
2454       eliminate_local_variables (entry, exit);
2455       /* In the old loop, move all variables non-local to the loop to a
2456 	 structure and back, and create separate decls for the variables used in
2457 	 loop.  */
2458       separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
2459 				&new_arg_struct, &clsn_data);
2460     }
2461   else
2462     {
2463       arg_struct = NULL_TREE;
2464       new_arg_struct = NULL_TREE;
2465       clsn_data.load = NULL_TREE;
2466       clsn_data.load_bb = exit->dest;
2467       clsn_data.store = NULL_TREE;
2468       clsn_data.store_bb = NULL;
2469     }
2470 
2471   /* Create the parallel constructs.  */
2472   loc = UNKNOWN_LOCATION;
2473   cond_stmt = last_stmt (loop->header);
2474   if (cond_stmt)
2475     loc = gimple_location (cond_stmt);
2476   create_parallel_loop (loop, create_loop_fn (loc), arg_struct, new_arg_struct,
2477 			n_threads, loc, oacc_kernels_p);
2478   if (reduction_list->elements () > 0)
2479     create_call_for_reduction (loop, reduction_list, &clsn_data);
2480 
2481   scev_reset ();
2482 
2483   /* Free loop bound estimations that could contain references to
2484      removed statements.  */
2485   free_numbers_of_iterations_estimates (cfun);
2486 }
2487 
2488 /* Returns true when LOOP contains vector phi nodes.  */
2489 
2490 static bool
2491 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
2492 {
2493   unsigned i;
2494   basic_block *bbs = get_loop_body_in_dom_order (loop);
2495   gphi_iterator gsi;
2496   bool res = true;
2497 
2498   for (i = 0; i < loop->num_nodes; i++)
2499     for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
2500       if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi.phi ()))) == VECTOR_TYPE)
2501 	goto end;
2502 
2503   res = false;
2504  end:
2505   free (bbs);
2506   return res;
2507 }
2508 
2509 /* Create a reduction_info struct, initialize it with REDUC_STMT
2510    and PHI, insert it to the REDUCTION_LIST.  */
2511 
2512 static void
2513 build_new_reduction (reduction_info_table_type *reduction_list,
2514 		     gimple *reduc_stmt, gphi *phi)
2515 {
2516   reduction_info **slot;
2517   struct reduction_info *new_reduction;
2518   enum tree_code reduction_code;
2519 
2520   gcc_assert (reduc_stmt);
2521 
2522   if (gimple_code (reduc_stmt) == GIMPLE_PHI)
2523     {
2524       tree op1 = PHI_ARG_DEF (reduc_stmt, 0);
2525       gimple *def1 = SSA_NAME_DEF_STMT (op1);
2526       reduction_code = gimple_assign_rhs_code (def1);
2527     }
2528   else
2529     reduction_code = gimple_assign_rhs_code (reduc_stmt);
2530   /* Check for OpenMP supported reduction.  */
2531   switch (reduction_code)
2532     {
2533     case PLUS_EXPR:
2534     case MULT_EXPR:
2535     case MAX_EXPR:
2536     case MIN_EXPR:
2537     case BIT_IOR_EXPR:
2538     case BIT_XOR_EXPR:
2539     case BIT_AND_EXPR:
2540     case TRUTH_OR_EXPR:
2541     case TRUTH_XOR_EXPR:
2542     case TRUTH_AND_EXPR:
2543       break;
2544     default:
2545       return;
2546     }
2547 
2548   if (dump_file && (dump_flags & TDF_DETAILS))
2549     {
2550       fprintf (dump_file,
2551 	       "Detected reduction. reduction stmt is:\n");
2552       print_gimple_stmt (dump_file, reduc_stmt, 0);
2553       fprintf (dump_file, "\n");
2554     }
2555 
2556   new_reduction = XCNEW (struct reduction_info);
2557 
2558   new_reduction->reduc_stmt = reduc_stmt;
2559   new_reduction->reduc_phi = phi;
2560   new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
2561   new_reduction->reduction_code = reduction_code;
2562   slot = reduction_list->find_slot (new_reduction, INSERT);
2563   *slot = new_reduction;
2564 }
2565 
2566 /* Callback for htab_traverse.  Sets gimple_uid of reduc_phi stmts.  */
2567 
2568 int
2569 set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
2570 {
2571   struct reduction_info *const red = *slot;
2572   gimple_set_uid (red->reduc_phi, red->reduc_version);
2573   return 1;
2574 }
2575 
2576 /* Return true if the type of reduction performed by STMT_INFO is suitable
2577    for this pass.  */
2578 
2579 static bool
2580 valid_reduction_p (stmt_vec_info stmt_info)
2581 {
2582   /* Parallelization would reassociate the operation, which isn't
2583      allowed for in-order reductions.  */
2584   vect_reduction_type reduc_type = STMT_VINFO_REDUC_TYPE (stmt_info);
2585   return reduc_type != FOLD_LEFT_REDUCTION;
2586 }
2587 
2588 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST.  */
2589 
2590 static void
2591 gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list)
2592 {
2593   gphi_iterator gsi;
2594   loop_vec_info simple_loop_info;
2595   auto_vec<gphi *, 4> double_reduc_phis;
2596   auto_vec<gimple *, 4> double_reduc_stmts;
2597 
2598   vec_info_shared shared;
2599   simple_loop_info = vect_analyze_loop_form (loop, &shared);
2600   if (simple_loop_info == NULL)
2601     goto gather_done;
2602 
2603   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2604     {
2605       gphi *phi = gsi.phi ();
2606       affine_iv iv;
2607       tree res = PHI_RESULT (phi);
2608       bool double_reduc;
2609 
2610       if (virtual_operand_p (res))
2611 	continue;
2612 
2613       if (simple_iv (loop, loop, res, &iv, true))
2614 	continue;
2615 
2616       stmt_vec_info reduc_stmt_info
2617 	= vect_force_simple_reduction (simple_loop_info,
2618 				       simple_loop_info->lookup_stmt (phi),
2619 				       &double_reduc, true);
2620       if (!reduc_stmt_info || !valid_reduction_p (reduc_stmt_info))
2621 	continue;
2622 
2623       if (double_reduc)
2624 	{
2625 	  if (loop->inner->inner != NULL)
2626 	    continue;
2627 
2628 	  double_reduc_phis.safe_push (phi);
2629 	  double_reduc_stmts.safe_push (reduc_stmt_info->stmt);
2630 	  continue;
2631 	}
2632 
2633       build_new_reduction (reduction_list, reduc_stmt_info->stmt, phi);
2634     }
2635   delete simple_loop_info;
2636 
2637   if (!double_reduc_phis.is_empty ())
2638     {
2639       vec_info_shared shared;
2640       simple_loop_info = vect_analyze_loop_form (loop->inner, &shared);
2641       if (simple_loop_info)
2642 	{
2643 	  gphi *phi;
2644 	  unsigned int i;
2645 
2646 	  FOR_EACH_VEC_ELT (double_reduc_phis, i, phi)
2647 	    {
2648 	      affine_iv iv;
2649 	      tree res = PHI_RESULT (phi);
2650 	      bool double_reduc;
2651 
2652 	      use_operand_p use_p;
2653 	      gimple *inner_stmt;
2654 	      bool single_use_p = single_imm_use (res, &use_p, &inner_stmt);
2655 	      gcc_assert (single_use_p);
2656 	      if (gimple_code (inner_stmt) != GIMPLE_PHI)
2657 		continue;
2658 	      gphi *inner_phi = as_a <gphi *> (inner_stmt);
2659 	      if (simple_iv (loop->inner, loop->inner, PHI_RESULT (inner_phi),
2660 			     &iv, true))
2661 		continue;
2662 
2663 	      stmt_vec_info inner_phi_info
2664 		= simple_loop_info->lookup_stmt (inner_phi);
2665 	      stmt_vec_info inner_reduc_stmt_info
2666 		= vect_force_simple_reduction (simple_loop_info,
2667 					       inner_phi_info,
2668 					       &double_reduc, true);
2669 	      gcc_assert (!double_reduc);
2670 	      if (!inner_reduc_stmt_info
2671 		  || !valid_reduction_p (inner_reduc_stmt_info))
2672 		continue;
2673 
2674 	      build_new_reduction (reduction_list, double_reduc_stmts[i], phi);
2675 	    }
2676 	  delete simple_loop_info;
2677 	}
2678     }
2679 
2680  gather_done:
2681   if (reduction_list->elements () == 0)
2682     return;
2683 
2684   /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2685      and delete simple_loop_info, we can set gimple_uid of reduc_phi stmts only
2686      now.  */
2687   basic_block bb;
2688   FOR_EACH_BB_FN (bb, cfun)
2689     for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2690       gimple_set_uid (gsi_stmt (gsi), (unsigned int)-1);
2691   reduction_list->traverse <void *, set_reduc_phi_uids> (NULL);
2692 }
2693 
2694 /* Try to initialize NITER for code generation part.  */
2695 
2696 static bool
2697 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2698 {
2699   edge exit = single_dom_exit (loop);
2700 
2701   gcc_assert (exit);
2702 
2703   /* We need to know # of iterations, and there should be no uses of values
2704      defined inside loop outside of it, unless the values are invariants of
2705      the loop.  */
2706   if (!number_of_iterations_exit (loop, exit, niter, false))
2707     {
2708       if (dump_file && (dump_flags & TDF_DETAILS))
2709 	fprintf (dump_file, "  FAILED: number of iterations not known\n");
2710       return false;
2711     }
2712 
2713   return true;
2714 }
2715 
2716 /* Return the default def of the first function argument.  */
2717 
2718 static tree
2719 get_omp_data_i_param (void)
2720 {
2721   tree decl = DECL_ARGUMENTS (cfun->decl);
2722   gcc_assert (DECL_CHAIN (decl) == NULL_TREE);
2723   return ssa_default_def (cfun, decl);
2724 }
2725 
2726 /* For PHI in loop header of LOOP, look for pattern:
2727 
2728    <bb preheader>
2729    .omp_data_i = &.omp_data_arr;
2730    addr = .omp_data_i->sum;
2731    sum_a = *addr;
2732 
2733    <bb header>:
2734    sum_b = PHI <sum_a (preheader), sum_c (latch)>
2735 
2736    and return addr.  Otherwise, return NULL_TREE.  */
2737 
2738 static tree
2739 find_reduc_addr (struct loop *loop, gphi *phi)
2740 {
2741   edge e = loop_preheader_edge (loop);
2742   tree arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
2743   gimple *stmt = SSA_NAME_DEF_STMT (arg);
2744   if (!gimple_assign_single_p (stmt))
2745     return NULL_TREE;
2746   tree memref = gimple_assign_rhs1 (stmt);
2747   if (TREE_CODE (memref) != MEM_REF)
2748     return NULL_TREE;
2749   tree addr = TREE_OPERAND (memref, 0);
2750 
2751   gimple *stmt2 = SSA_NAME_DEF_STMT (addr);
2752   if (!gimple_assign_single_p (stmt2))
2753     return NULL_TREE;
2754   tree compref = gimple_assign_rhs1 (stmt2);
2755   if (TREE_CODE (compref) != COMPONENT_REF)
2756     return NULL_TREE;
2757   tree addr2 = TREE_OPERAND (compref, 0);
2758   if (TREE_CODE (addr2) != MEM_REF)
2759     return NULL_TREE;
2760   addr2 = TREE_OPERAND (addr2, 0);
2761   if (TREE_CODE (addr2) != SSA_NAME
2762       || addr2 != get_omp_data_i_param ())
2763     return NULL_TREE;
2764 
2765   return addr;
2766 }
2767 
2768 /* Try to initialize REDUCTION_LIST for code generation part.
2769    REDUCTION_LIST describes the reductions.  */
2770 
2771 static bool
2772 try_create_reduction_list (loop_p loop,
2773 			   reduction_info_table_type *reduction_list,
2774 			   bool oacc_kernels_p)
2775 {
2776   edge exit = single_dom_exit (loop);
2777   gphi_iterator gsi;
2778 
2779   gcc_assert (exit);
2780 
2781   /* Try to get rid of exit phis.  */
2782   final_value_replacement_loop (loop);
2783 
2784   gather_scalar_reductions (loop, reduction_list);
2785 
2786 
2787   for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2788     {
2789       gphi *phi = gsi.phi ();
2790       struct reduction_info *red;
2791       imm_use_iterator imm_iter;
2792       use_operand_p use_p;
2793       gimple *reduc_phi;
2794       tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2795 
2796       if (!virtual_operand_p (val))
2797 	{
2798 	  if (TREE_CODE (val) != SSA_NAME)
2799 	    {
2800 	      if (dump_file && (dump_flags & TDF_DETAILS))
2801 		fprintf (dump_file,
2802 			 "  FAILED: exit PHI argument invariant.\n");
2803 	      return false;
2804 	    }
2805 
2806 	  if (dump_file && (dump_flags & TDF_DETAILS))
2807 	    {
2808 	      fprintf (dump_file, "phi is ");
2809 	      print_gimple_stmt (dump_file, phi, 0);
2810 	      fprintf (dump_file, "arg of phi to exit:   value ");
2811 	      print_generic_expr (dump_file, val);
2812 	      fprintf (dump_file, " used outside loop\n");
2813 	      fprintf (dump_file,
2814 		       "  checking if it is part of reduction pattern:\n");
2815 	    }
2816 	  if (reduction_list->elements () == 0)
2817 	    {
2818 	      if (dump_file && (dump_flags & TDF_DETAILS))
2819 		fprintf (dump_file,
2820 			 "  FAILED: it is not a part of reduction.\n");
2821 	      return false;
2822 	    }
2823 	  reduc_phi = NULL;
2824 	  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2825 	    {
2826 	      if (!gimple_debug_bind_p (USE_STMT (use_p))
2827 		  && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2828 		{
2829 		  reduc_phi = USE_STMT (use_p);
2830 		  break;
2831 		}
2832 	    }
2833 	  red = reduction_phi (reduction_list, reduc_phi);
2834 	  if (red == NULL)
2835 	    {
2836 	      if (dump_file && (dump_flags & TDF_DETAILS))
2837 		fprintf (dump_file,
2838 			 "  FAILED: it is not a part of reduction.\n");
2839 	      return false;
2840 	    }
2841 	  if (red->keep_res != NULL)
2842 	    {
2843 	      if (dump_file && (dump_flags & TDF_DETAILS))
2844 		fprintf (dump_file,
2845 			 "  FAILED: reduction has multiple exit phis.\n");
2846 	      return false;
2847 	    }
2848 	  red->keep_res = phi;
2849 	  if (dump_file && (dump_flags & TDF_DETAILS))
2850 	    {
2851 	      fprintf (dump_file, "reduction phi is  ");
2852 	      print_gimple_stmt (dump_file, red->reduc_phi, 0);
2853 	      fprintf (dump_file, "reduction stmt is  ");
2854 	      print_gimple_stmt (dump_file, red->reduc_stmt, 0);
2855 	    }
2856 	}
2857     }
2858 
2859   /* The iterations of the loop may communicate only through bivs whose
2860      iteration space can be distributed efficiently.  */
2861   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2862     {
2863       gphi *phi = gsi.phi ();
2864       tree def = PHI_RESULT (phi);
2865       affine_iv iv;
2866 
2867       if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
2868 	{
2869 	  struct reduction_info *red;
2870 
2871 	  red = reduction_phi (reduction_list, phi);
2872 	  if (red == NULL)
2873 	    {
2874 	      if (dump_file && (dump_flags & TDF_DETAILS))
2875 		fprintf (dump_file,
2876 			 "  FAILED: scalar dependency between iterations\n");
2877 	      return false;
2878 	    }
2879 	}
2880     }
2881 
2882   if (oacc_kernels_p)
2883     {
2884       for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi);
2885 	   gsi_next (&gsi))
2886 	{
2887 	  gphi *phi = gsi.phi ();
2888 	  tree def = PHI_RESULT (phi);
2889 	  affine_iv iv;
2890 
2891 	  if (!virtual_operand_p (def)
2892 	      && !simple_iv (loop, loop, def, &iv, true))
2893 	    {
2894 	      tree addr = find_reduc_addr (loop, phi);
2895 	      if (addr == NULL_TREE)
2896 		return false;
2897 	      struct reduction_info *red = reduction_phi (reduction_list, phi);
2898 	      red->reduc_addr = addr;
2899 	    }
2900 	}
2901     }
2902 
2903   return true;
2904 }
2905 
2906 /* Return true if LOOP contains phis with ADDR_EXPR in args.  */
2907 
2908 static bool
2909 loop_has_phi_with_address_arg (struct loop *loop)
2910 {
2911   basic_block *bbs = get_loop_body (loop);
2912   bool res = false;
2913 
2914   unsigned i, j;
2915   gphi_iterator gsi;
2916   for (i = 0; i < loop->num_nodes; i++)
2917     for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
2918       {
2919 	gphi *phi = gsi.phi ();
2920 	for (j = 0; j < gimple_phi_num_args (phi); j++)
2921 	  {
2922 	    tree arg = gimple_phi_arg_def (phi, j);
2923 	    if (TREE_CODE (arg) == ADDR_EXPR)
2924 	      {
2925 		/* This should be handled by eliminate_local_variables, but that
2926 		   function currently ignores phis.  */
2927 		res = true;
2928 		goto end;
2929 	      }
2930 	  }
2931       }
2932  end:
2933   free (bbs);
2934 
2935   return res;
2936 }
2937 
2938 /* Return true if memory ref REF (corresponding to the stmt at GSI in
2939    REGIONS_BB[I]) conflicts with the statements in REGIONS_BB[I] after gsi,
2940    or the statements in REGIONS_BB[I + n].  REF_IS_STORE indicates if REF is a
2941    store.  Ignore conflicts with SKIP_STMT.  */
2942 
2943 static bool
2944 ref_conflicts_with_region (gimple_stmt_iterator gsi, ao_ref *ref,
2945 			   bool ref_is_store, vec<basic_block> region_bbs,
2946 			   unsigned int i, gimple *skip_stmt)
2947 {
2948   basic_block bb = region_bbs[i];
2949   gsi_next (&gsi);
2950 
2951   while (true)
2952     {
2953       for (; !gsi_end_p (gsi);
2954 	   gsi_next (&gsi))
2955 	{
2956 	  gimple *stmt = gsi_stmt (gsi);
2957 	  if (stmt == skip_stmt)
2958 	    {
2959 	      if (dump_file)
2960 		{
2961 		  fprintf (dump_file, "skipping reduction store: ");
2962 		  print_gimple_stmt (dump_file, stmt, 0);
2963 		}
2964 	      continue;
2965 	    }
2966 
2967 	  if (!gimple_vdef (stmt)
2968 	      && !gimple_vuse (stmt))
2969 	    continue;
2970 
2971 	  if (gimple_code (stmt) == GIMPLE_RETURN)
2972 	    continue;
2973 
2974 	  if (ref_is_store)
2975 	    {
2976 	      if (ref_maybe_used_by_stmt_p (stmt, ref))
2977 		{
2978 		  if (dump_file)
2979 		    {
2980 		      fprintf (dump_file, "Stmt ");
2981 		      print_gimple_stmt (dump_file, stmt, 0);
2982 		    }
2983 		  return true;
2984 		}
2985 	    }
2986 	  else
2987 	    {
2988 	      if (stmt_may_clobber_ref_p_1 (stmt, ref))
2989 		{
2990 		  if (dump_file)
2991 		    {
2992 		      fprintf (dump_file, "Stmt ");
2993 		      print_gimple_stmt (dump_file, stmt, 0);
2994 		    }
2995 		  return true;
2996 		}
2997 	    }
2998 	}
2999       i++;
3000       if (i == region_bbs.length ())
3001 	break;
3002       bb = region_bbs[i];
3003       gsi = gsi_start_bb (bb);
3004     }
3005 
3006   return false;
3007 }
3008 
3009 /* Return true if the bbs in REGION_BBS but not in in_loop_bbs can be executed
3010    in parallel with REGION_BBS containing the loop.  Return the stores of
3011    reduction results in REDUCTION_STORES.  */
3012 
3013 static bool
3014 oacc_entry_exit_ok_1 (bitmap in_loop_bbs, vec<basic_block> region_bbs,
3015 		      reduction_info_table_type *reduction_list,
3016 		      bitmap reduction_stores)
3017 {
3018   tree omp_data_i = get_omp_data_i_param ();
3019 
3020   unsigned i;
3021   basic_block bb;
3022   FOR_EACH_VEC_ELT (region_bbs, i, bb)
3023     {
3024       if (bitmap_bit_p (in_loop_bbs, bb->index))
3025 	continue;
3026 
3027       gimple_stmt_iterator gsi;
3028       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
3029 	   gsi_next (&gsi))
3030 	{
3031 	  gimple *stmt = gsi_stmt (gsi);
3032 	  gimple *skip_stmt = NULL;
3033 
3034 	  if (is_gimple_debug (stmt)
3035 	      || gimple_code (stmt) == GIMPLE_COND)
3036 	    continue;
3037 
3038 	  ao_ref ref;
3039 	  bool ref_is_store = false;
3040 	  if (gimple_assign_load_p (stmt))
3041 	    {
3042 	      tree rhs = gimple_assign_rhs1 (stmt);
3043 	      tree base = get_base_address (rhs);
3044 	      if (TREE_CODE (base) == MEM_REF
3045 		  && operand_equal_p (TREE_OPERAND (base, 0), omp_data_i, 0))
3046 		continue;
3047 
3048 	      tree lhs = gimple_assign_lhs (stmt);
3049 	      if (TREE_CODE (lhs) == SSA_NAME
3050 		  && has_single_use (lhs))
3051 		{
3052 		  use_operand_p use_p;
3053 		  gimple *use_stmt;
3054 		  single_imm_use (lhs, &use_p, &use_stmt);
3055 		  if (gimple_code (use_stmt) == GIMPLE_PHI)
3056 		    {
3057 		      struct reduction_info *red;
3058 		      red = reduction_phi (reduction_list, use_stmt);
3059 		      tree val = PHI_RESULT (red->keep_res);
3060 		      if (has_single_use (val))
3061 			{
3062 			  single_imm_use (val, &use_p, &use_stmt);
3063 			  if (gimple_store_p (use_stmt))
3064 			    {
3065 			      unsigned int id
3066 				= SSA_NAME_VERSION (gimple_vdef (use_stmt));
3067 			      bitmap_set_bit (reduction_stores, id);
3068 			      skip_stmt = use_stmt;
3069 			      if (dump_file)
3070 				{
3071 				  fprintf (dump_file, "found reduction load: ");
3072 				  print_gimple_stmt (dump_file, stmt, 0);
3073 				}
3074 			    }
3075 			}
3076 		    }
3077 		}
3078 
3079 	      ao_ref_init (&ref, rhs);
3080 	    }
3081 	  else if (gimple_store_p (stmt))
3082 	    {
3083 	      ao_ref_init (&ref, gimple_assign_lhs (stmt));
3084 	      ref_is_store = true;
3085 	    }
3086 	  else if (gimple_code (stmt) == GIMPLE_OMP_RETURN)
3087 	    continue;
3088 	  else if (!gimple_has_side_effects (stmt)
3089 		   && !gimple_could_trap_p (stmt)
3090 		   && !stmt_could_throw_p (cfun, stmt)
3091 		   && !gimple_vdef (stmt)
3092 		   && !gimple_vuse (stmt))
3093 	    continue;
3094 	  else if (gimple_call_internal_p (stmt, IFN_GOACC_DIM_POS))
3095 	    continue;
3096 	  else if (gimple_code (stmt) == GIMPLE_RETURN)
3097 	    continue;
3098 	  else
3099 	    {
3100 	      if (dump_file)
3101 		{
3102 		  fprintf (dump_file, "Unhandled stmt in entry/exit: ");
3103 		  print_gimple_stmt (dump_file, stmt, 0);
3104 		}
3105 	      return false;
3106 	    }
3107 
3108 	  if (ref_conflicts_with_region (gsi, &ref, ref_is_store, region_bbs,
3109 					 i, skip_stmt))
3110 	    {
3111 	      if (dump_file)
3112 		{
3113 		  fprintf (dump_file, "conflicts with entry/exit stmt: ");
3114 		  print_gimple_stmt (dump_file, stmt, 0);
3115 		}
3116 	      return false;
3117 	    }
3118 	}
3119     }
3120 
3121   return true;
3122 }
3123 
3124 /* Find stores inside REGION_BBS and outside IN_LOOP_BBS, and guard them with
3125    gang_pos == 0, except when the stores are REDUCTION_STORES.  Return true
3126    if any changes were made.  */
3127 
3128 static bool
3129 oacc_entry_exit_single_gang (bitmap in_loop_bbs, vec<basic_block> region_bbs,
3130 			     bitmap reduction_stores)
3131 {
3132   tree gang_pos = NULL_TREE;
3133   bool changed = false;
3134 
3135   unsigned i;
3136   basic_block bb;
3137   FOR_EACH_VEC_ELT (region_bbs, i, bb)
3138     {
3139       if (bitmap_bit_p (in_loop_bbs, bb->index))
3140 	continue;
3141 
3142       gimple_stmt_iterator gsi;
3143       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
3144 	{
3145 	  gimple *stmt = gsi_stmt (gsi);
3146 
3147 	  if (!gimple_store_p (stmt))
3148 	    {
3149 	      /* Update gsi to point to next stmt.  */
3150 	      gsi_next (&gsi);
3151 	      continue;
3152 	    }
3153 
3154 	  if (bitmap_bit_p (reduction_stores,
3155 			    SSA_NAME_VERSION (gimple_vdef (stmt))))
3156 	    {
3157 	      if (dump_file)
3158 		{
3159 		  fprintf (dump_file,
3160 			   "skipped reduction store for single-gang"
3161 			   " neutering: ");
3162 		  print_gimple_stmt (dump_file, stmt, 0);
3163 		}
3164 
3165 	      /* Update gsi to point to next stmt.  */
3166 	      gsi_next (&gsi);
3167 	      continue;
3168 	    }
3169 
3170 	  changed = true;
3171 
3172 	  if (gang_pos == NULL_TREE)
3173 	    {
3174 	      tree arg = build_int_cst (integer_type_node, GOMP_DIM_GANG);
3175 	      gcall *gang_single
3176 		= gimple_build_call_internal (IFN_GOACC_DIM_POS, 1, arg);
3177 	      gang_pos = make_ssa_name (integer_type_node);
3178 	      gimple_call_set_lhs (gang_single, gang_pos);
3179 	      gimple_stmt_iterator start
3180 		= gsi_start_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
3181 	      tree vuse = ssa_default_def (cfun, gimple_vop (cfun));
3182 	      gimple_set_vuse (gang_single, vuse);
3183 	      gsi_insert_before (&start, gang_single, GSI_SAME_STMT);
3184 	    }
3185 
3186 	  if (dump_file)
3187 	    {
3188 	      fprintf (dump_file,
3189 		       "found store that needs single-gang neutering: ");
3190 	      print_gimple_stmt (dump_file, stmt, 0);
3191 	    }
3192 
3193 	  {
3194 	    /* Split block before store.  */
3195 	    gimple_stmt_iterator gsi2 = gsi;
3196 	    gsi_prev (&gsi2);
3197 	    edge e;
3198 	    if (gsi_end_p (gsi2))
3199 	      {
3200 		e = split_block_after_labels (bb);
3201 		gsi2 = gsi_last_bb (bb);
3202 	      }
3203 	    else
3204 	      e = split_block (bb, gsi_stmt (gsi2));
3205 	    basic_block bb2 = e->dest;
3206 
3207 	    /* Split block after store.  */
3208 	    gimple_stmt_iterator gsi3 = gsi_start_bb (bb2);
3209 	    edge e2 = split_block (bb2, gsi_stmt (gsi3));
3210 	    basic_block bb3 = e2->dest;
3211 
3212 	    gimple *cond
3213 	      = gimple_build_cond (EQ_EXPR, gang_pos, integer_zero_node,
3214 				   NULL_TREE, NULL_TREE);
3215 	    gsi_insert_after (&gsi2, cond, GSI_NEW_STMT);
3216 
3217 	    edge e3 = make_edge (bb, bb3, EDGE_FALSE_VALUE);
3218 	    /* FIXME: What is the probability?  */
3219 	    e3->probability = profile_probability::guessed_never ();
3220 	    e->flags = EDGE_TRUE_VALUE;
3221 
3222 	    tree vdef = gimple_vdef (stmt);
3223 	    tree vuse = gimple_vuse (stmt);
3224 
3225 	    tree phi_res = copy_ssa_name (vdef);
3226 	    gphi *new_phi = create_phi_node (phi_res, bb3);
3227 	    replace_uses_by (vdef, phi_res);
3228 	    add_phi_arg (new_phi, vuse, e3, UNKNOWN_LOCATION);
3229 	    add_phi_arg (new_phi, vdef, e2, UNKNOWN_LOCATION);
3230 
3231 	    /* Update gsi to point to next stmt.  */
3232 	    bb = bb3;
3233 	    gsi = gsi_start_bb (bb);
3234 	  }
3235 	}
3236     }
3237 
3238   return changed;
3239 }
3240 
3241 /* Return true if the statements before and after the LOOP can be executed in
3242    parallel with the function containing the loop.  Resolve conflicting stores
3243    outside LOOP by guarding them such that only a single gang executes them.  */
3244 
3245 static bool
3246 oacc_entry_exit_ok (struct loop *loop,
3247 		    reduction_info_table_type *reduction_list)
3248 {
3249   basic_block *loop_bbs = get_loop_body_in_dom_order (loop);
3250   vec<basic_block> region_bbs
3251     = get_all_dominated_blocks (CDI_DOMINATORS, ENTRY_BLOCK_PTR_FOR_FN (cfun));
3252 
3253   bitmap in_loop_bbs = BITMAP_ALLOC (NULL);
3254   bitmap_clear (in_loop_bbs);
3255   for (unsigned int i = 0; i < loop->num_nodes; i++)
3256     bitmap_set_bit (in_loop_bbs, loop_bbs[i]->index);
3257 
3258   bitmap reduction_stores = BITMAP_ALLOC (NULL);
3259   bool res = oacc_entry_exit_ok_1 (in_loop_bbs, region_bbs, reduction_list,
3260 				   reduction_stores);
3261 
3262   if (res)
3263     {
3264       bool changed = oacc_entry_exit_single_gang (in_loop_bbs, region_bbs,
3265 						  reduction_stores);
3266       if (changed)
3267 	{
3268 	  free_dominance_info (CDI_DOMINATORS);
3269 	  calculate_dominance_info (CDI_DOMINATORS);
3270 	}
3271     }
3272 
3273   region_bbs.release ();
3274   free (loop_bbs);
3275 
3276   BITMAP_FREE (in_loop_bbs);
3277   BITMAP_FREE (reduction_stores);
3278 
3279   return res;
3280 }
3281 
3282 /* Detect parallel loops and generate parallel code using libgomp
3283    primitives.  Returns true if some loop was parallelized, false
3284    otherwise.  */
3285 
3286 static bool
3287 parallelize_loops (bool oacc_kernels_p)
3288 {
3289   unsigned n_threads;
3290   bool changed = false;
3291   struct loop *loop;
3292   struct loop *skip_loop = NULL;
3293   struct tree_niter_desc niter_desc;
3294   struct obstack parloop_obstack;
3295   HOST_WIDE_INT estimated;
3296 
3297   /* Do not parallelize loops in the functions created by parallelization.  */
3298   if (!oacc_kernels_p
3299       && parallelized_function_p (cfun->decl))
3300     return false;
3301 
3302   /* Do not parallelize loops in offloaded functions.  */
3303   if (!oacc_kernels_p
3304       && oacc_get_fn_attrib (cfun->decl) != NULL)
3305      return false;
3306 
3307   if (cfun->has_nonlocal_label)
3308     return false;
3309 
3310   /* For OpenACC kernels, n_threads will be determined later; otherwise, it's
3311      the argument to -ftree-parallelize-loops.  */
3312   if (oacc_kernels_p)
3313     n_threads = 0;
3314   else
3315     n_threads = flag_tree_parallelize_loops;
3316 
3317   gcc_obstack_init (&parloop_obstack);
3318   reduction_info_table_type reduction_list (10);
3319 
3320   calculate_dominance_info (CDI_DOMINATORS);
3321 
3322   FOR_EACH_LOOP (loop, 0)
3323     {
3324       if (loop == skip_loop)
3325 	{
3326 	  if (!loop->in_oacc_kernels_region
3327 	      && dump_file && (dump_flags & TDF_DETAILS))
3328 	    fprintf (dump_file,
3329 		     "Skipping loop %d as inner loop of parallelized loop\n",
3330 		     loop->num);
3331 
3332 	  skip_loop = loop->inner;
3333 	  continue;
3334 	}
3335       else
3336 	skip_loop = NULL;
3337 
3338       reduction_list.empty ();
3339 
3340       if (oacc_kernels_p)
3341 	{
3342 	  if (!loop->in_oacc_kernels_region)
3343 	    continue;
3344 
3345 	  /* Don't try to parallelize inner loops in an oacc kernels region.  */
3346 	  if (loop->inner)
3347 	    skip_loop = loop->inner;
3348 
3349 	  if (dump_file && (dump_flags & TDF_DETAILS))
3350 	    fprintf (dump_file,
3351 		     "Trying loop %d with header bb %d in oacc kernels"
3352 		     " region\n", loop->num, loop->header->index);
3353 	}
3354 
3355       if (dump_file && (dump_flags & TDF_DETAILS))
3356       {
3357         fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
3358 	if (loop->inner)
3359 	  fprintf (dump_file, "loop %d is not innermost\n",loop->num);
3360 	else
3361 	  fprintf (dump_file, "loop %d is innermost\n",loop->num);
3362       }
3363 
3364       if (!single_dom_exit (loop))
3365       {
3366 
3367         if (dump_file && (dump_flags & TDF_DETAILS))
3368 	  fprintf (dump_file, "loop is !single_dom_exit\n");
3369 
3370 	continue;
3371       }
3372 
3373       if (/* And of course, the loop must be parallelizable.  */
3374 	  !can_duplicate_loop_p (loop)
3375 	  || loop_has_blocks_with_irreducible_flag (loop)
3376 	  || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
3377 	  /* FIXME: the check for vector phi nodes could be removed.  */
3378 	  || loop_has_vector_phi_nodes (loop))
3379 	continue;
3380 
3381       estimated = estimated_loop_iterations_int (loop);
3382       if (estimated == -1)
3383 	estimated = get_likely_max_loop_iterations_int (loop);
3384       /* FIXME: Bypass this check as graphite doesn't update the
3385 	 count and frequency correctly now.  */
3386       if (!flag_loop_parallelize_all
3387 	  && !oacc_kernels_p
3388 	  && ((estimated != -1
3389 	       && (estimated
3390 		   < ((HOST_WIDE_INT) n_threads
3391 		      * (loop->inner ? 2 : MIN_PER_THREAD) - 1)))
3392 	      /* Do not bother with loops in cold areas.  */
3393 	      || optimize_loop_nest_for_size_p (loop)))
3394 	continue;
3395 
3396       if (!try_get_loop_niter (loop, &niter_desc))
3397 	continue;
3398 
3399       if (!try_create_reduction_list (loop, &reduction_list, oacc_kernels_p))
3400 	continue;
3401 
3402       if (loop_has_phi_with_address_arg (loop))
3403 	continue;
3404 
3405       if (!loop->can_be_parallel
3406 	  && !loop_parallel_p (loop, &parloop_obstack))
3407 	continue;
3408 
3409       if (oacc_kernels_p
3410 	&& !oacc_entry_exit_ok (loop, &reduction_list))
3411 	{
3412 	  if (dump_file)
3413 	    fprintf (dump_file, "entry/exit not ok: FAILED\n");
3414 	  continue;
3415 	}
3416 
3417       changed = true;
3418       skip_loop = loop->inner;
3419 
3420       if (dump_enabled_p ())
3421 	{
3422 	  dump_user_location_t loop_loc = find_loop_location (loop);
3423 	  if (loop->inner)
3424 	    dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc,
3425 			     "parallelizing outer loop %d\n", loop->num);
3426 	  else
3427 	    dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc,
3428 			     "parallelizing inner loop %d\n", loop->num);
3429 	}
3430 
3431       gen_parallel_loop (loop, &reduction_list,
3432 			 n_threads, &niter_desc, oacc_kernels_p);
3433     }
3434 
3435   obstack_free (&parloop_obstack, NULL);
3436 
3437   /* Parallelization will cause new function calls to be inserted through
3438      which local variables will escape.  Reset the points-to solution
3439      for ESCAPED.  */
3440   if (changed)
3441     pt_solution_reset (&cfun->gimple_df->escaped);
3442 
3443   return changed;
3444 }
3445 
3446 /* Parallelization.  */
3447 
3448 namespace {
3449 
3450 const pass_data pass_data_parallelize_loops =
3451 {
3452   GIMPLE_PASS, /* type */
3453   "parloops", /* name */
3454   OPTGROUP_LOOP, /* optinfo_flags */
3455   TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
3456   ( PROP_cfg | PROP_ssa ), /* properties_required */
3457   0, /* properties_provided */
3458   0, /* properties_destroyed */
3459   0, /* todo_flags_start */
3460   0, /* todo_flags_finish */
3461 };
3462 
3463 class pass_parallelize_loops : public gimple_opt_pass
3464 {
3465 public:
3466   pass_parallelize_loops (gcc::context *ctxt)
3467     : gimple_opt_pass (pass_data_parallelize_loops, ctxt),
3468       oacc_kernels_p (false)
3469   {}
3470 
3471   /* opt_pass methods: */
3472   virtual bool gate (function *)
3473   {
3474     if (oacc_kernels_p)
3475       return flag_openacc;
3476     else
3477       return flag_tree_parallelize_loops > 1;
3478   }
3479   virtual unsigned int execute (function *);
3480   opt_pass * clone () { return new pass_parallelize_loops (m_ctxt); }
3481   void set_pass_param (unsigned int n, bool param)
3482     {
3483       gcc_assert (n == 0);
3484       oacc_kernels_p = param;
3485     }
3486 
3487  private:
3488   bool oacc_kernels_p;
3489 }; // class pass_parallelize_loops
3490 
3491 unsigned
3492 pass_parallelize_loops::execute (function *fun)
3493 {
3494   tree nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3495   if (nthreads == NULL_TREE)
3496     return 0;
3497 
3498   bool in_loop_pipeline = scev_initialized_p ();
3499   if (!in_loop_pipeline)
3500     loop_optimizer_init (LOOPS_NORMAL
3501 			 | LOOPS_HAVE_RECORDED_EXITS);
3502 
3503   if (number_of_loops (fun) <= 1)
3504     return 0;
3505 
3506   if (!in_loop_pipeline)
3507     {
3508       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3509       scev_initialize ();
3510     }
3511 
3512   unsigned int todo = 0;
3513   if (parallelize_loops (oacc_kernels_p))
3514     {
3515       fun->curr_properties &= ~(PROP_gimple_eomp);
3516 
3517       checking_verify_loop_structure ();
3518 
3519       todo |= TODO_update_ssa;
3520     }
3521 
3522   if (!in_loop_pipeline)
3523     {
3524       scev_finalize ();
3525       loop_optimizer_finalize ();
3526     }
3527 
3528   return todo;
3529 }
3530 
3531 } // anon namespace
3532 
3533 gimple_opt_pass *
3534 make_pass_parallelize_loops (gcc::context *ctxt)
3535 {
3536   return new pass_parallelize_loops (ctxt);
3537 }
3538