xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/omp-expand.c (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 /* Expansion pass for OMP directives.  Outlines regions of certain OMP
2    directives to separate functions, converts others into explicit calls to the
3    runtime library (libgomp) and so forth
4 
5 Copyright (C) 2005-2019 Free Software Foundation, Inc.
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "gomp-constants.h"
57 #include "gimple-pretty-print.h"
58 #include "hsa-common.h"
59 #include "stringpool.h"
60 #include "attribs.h"
61 
62 /* OMP region information.  Every parallel and workshare
63    directive is enclosed between two markers, the OMP_* directive
64    and a corresponding GIMPLE_OMP_RETURN statement.  */
65 
66 struct omp_region
67 {
68   /* The enclosing region.  */
69   struct omp_region *outer;
70 
71   /* First child region.  */
72   struct omp_region *inner;
73 
74   /* Next peer region.  */
75   struct omp_region *next;
76 
77   /* Block containing the omp directive as its last stmt.  */
78   basic_block entry;
79 
80   /* Block containing the GIMPLE_OMP_RETURN as its last stmt.  */
81   basic_block exit;
82 
83   /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt.  */
84   basic_block cont;
85 
86   /* If this is a combined parallel+workshare region, this is a list
87      of additional arguments needed by the combined parallel+workshare
88      library call.  */
89   vec<tree, va_gc> *ws_args;
90 
91   /* The code for the omp directive of this region.  */
92   enum gimple_code type;
93 
94   /* Schedule kind, only used for GIMPLE_OMP_FOR type regions.  */
95   enum omp_clause_schedule_kind sched_kind;
96 
97   /* Schedule modifiers.  */
98   unsigned char sched_modifiers;
99 
100   /* True if this is a combined parallel+workshare region.  */
101   bool is_combined_parallel;
102 
103   /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
104      a depend clause.  */
105   gomp_ordered *ord_stmt;
106 };
107 
108 static struct omp_region *root_omp_region;
109 static bool omp_any_child_fn_dumped;
110 
111 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
112 				     bool = false);
113 static gphi *find_phi_with_arg_on_edge (tree, edge);
114 static void expand_omp (struct omp_region *region);
115 
116 /* Return true if REGION is a combined parallel+workshare region.  */
117 
118 static inline bool
119 is_combined_parallel (struct omp_region *region)
120 {
121   return region->is_combined_parallel;
122 }
123 
124 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
125    is the immediate dominator of PAR_ENTRY_BB, return true if there
126    are no data dependencies that would prevent expanding the parallel
127    directive at PAR_ENTRY_BB as a combined parallel+workshare region.
128 
129    When expanding a combined parallel+workshare region, the call to
130    the child function may need additional arguments in the case of
131    GIMPLE_OMP_FOR regions.  In some cases, these arguments are
132    computed out of variables passed in from the parent to the child
133    via 'struct .omp_data_s'.  For instance:
134 
135 	#pragma omp parallel for schedule (guided, i * 4)
136 	for (j ...)
137 
138    Is lowered into:
139 
140 	# BLOCK 2 (PAR_ENTRY_BB)
141 	.omp_data_o.i = i;
142 	#pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
143 
144 	# BLOCK 3 (WS_ENTRY_BB)
145 	.omp_data_i = &.omp_data_o;
146 	D.1667 = .omp_data_i->i;
147 	D.1598 = D.1667 * 4;
148 	#pragma omp for schedule (guided, D.1598)
149 
150    When we outline the parallel region, the call to the child function
151    'bar.omp_fn.0' will need the value D.1598 in its argument list, but
152    that value is computed *after* the call site.  So, in principle we
153    cannot do the transformation.
154 
155    To see whether the code in WS_ENTRY_BB blocks the combined
156    parallel+workshare call, we collect all the variables used in the
157    GIMPLE_OMP_FOR header check whether they appear on the LHS of any
158    statement in WS_ENTRY_BB.  If so, then we cannot emit the combined
159    call.
160 
161    FIXME.  If we had the SSA form built at this point, we could merely
162    hoist the code in block 3 into block 2 and be done with it.  But at
163    this point we don't have dataflow information and though we could
164    hack something up here, it is really not worth the aggravation.  */
165 
166 static bool
167 workshare_safe_to_combine_p (basic_block ws_entry_bb)
168 {
169   struct omp_for_data fd;
170   gimple *ws_stmt = last_stmt (ws_entry_bb);
171 
172   if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
173     return true;
174 
175   gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
176   if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
177     return false;
178 
179   omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
180 
181   if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
182     return false;
183   if (fd.iter_type != long_integer_type_node)
184     return false;
185 
186   /* FIXME.  We give up too easily here.  If any of these arguments
187      are not constants, they will likely involve variables that have
188      been mapped into fields of .omp_data_s for sharing with the child
189      function.  With appropriate data flow, it would be possible to
190      see through this.  */
191   if (!is_gimple_min_invariant (fd.loop.n1)
192       || !is_gimple_min_invariant (fd.loop.n2)
193       || !is_gimple_min_invariant (fd.loop.step)
194       || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
195     return false;
196 
197   return true;
198 }
199 
200 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
201    presence (SIMD_SCHEDULE).  */
202 
203 static tree
204 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
205 {
206   if (!simd_schedule || integer_zerop (chunk_size))
207     return chunk_size;
208 
209   poly_uint64 vf = omp_max_vf ();
210   if (known_eq (vf, 1U))
211     return chunk_size;
212 
213   tree type = TREE_TYPE (chunk_size);
214   chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
215 			    build_int_cst (type, vf - 1));
216   return fold_build2 (BIT_AND_EXPR, type, chunk_size,
217 		      build_int_cst (type, -vf));
218 }
219 
220 /* Collect additional arguments needed to emit a combined
221    parallel+workshare call.  WS_STMT is the workshare directive being
222    expanded.  */
223 
224 static vec<tree, va_gc> *
225 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
226 {
227   tree t;
228   location_t loc = gimple_location (ws_stmt);
229   vec<tree, va_gc> *ws_args;
230 
231   if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
232     {
233       struct omp_for_data fd;
234       tree n1, n2;
235 
236       omp_extract_for_data (for_stmt, &fd, NULL);
237       n1 = fd.loop.n1;
238       n2 = fd.loop.n2;
239 
240       if (gimple_omp_for_combined_into_p (for_stmt))
241 	{
242 	  tree innerc
243 	    = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
244 			       OMP_CLAUSE__LOOPTEMP_);
245 	  gcc_assert (innerc);
246 	  n1 = OMP_CLAUSE_DECL (innerc);
247 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
248 				    OMP_CLAUSE__LOOPTEMP_);
249 	  gcc_assert (innerc);
250 	  n2 = OMP_CLAUSE_DECL (innerc);
251 	}
252 
253       vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
254 
255       t = fold_convert_loc (loc, long_integer_type_node, n1);
256       ws_args->quick_push (t);
257 
258       t = fold_convert_loc (loc, long_integer_type_node, n2);
259       ws_args->quick_push (t);
260 
261       t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
262       ws_args->quick_push (t);
263 
264       if (fd.chunk_size)
265 	{
266 	  t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
267 	  t = omp_adjust_chunk_size (t, fd.simd_schedule);
268 	  ws_args->quick_push (t);
269 	}
270 
271       return ws_args;
272     }
273   else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
274     {
275       /* Number of sections is equal to the number of edges from the
276 	 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
277 	 the exit of the sections region.  */
278       basic_block bb = single_succ (gimple_bb (ws_stmt));
279       t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
280       vec_alloc (ws_args, 1);
281       ws_args->quick_push (t);
282       return ws_args;
283     }
284 
285   gcc_unreachable ();
286 }
287 
288 /* Discover whether REGION is a combined parallel+workshare region.  */
289 
290 static void
291 determine_parallel_type (struct omp_region *region)
292 {
293   basic_block par_entry_bb, par_exit_bb;
294   basic_block ws_entry_bb, ws_exit_bb;
295 
296   if (region == NULL || region->inner == NULL
297       || region->exit == NULL || region->inner->exit == NULL
298       || region->inner->cont == NULL)
299     return;
300 
301   /* We only support parallel+for and parallel+sections.  */
302   if (region->type != GIMPLE_OMP_PARALLEL
303       || (region->inner->type != GIMPLE_OMP_FOR
304 	  && region->inner->type != GIMPLE_OMP_SECTIONS))
305     return;
306 
307   /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
308      WS_EXIT_BB -> PAR_EXIT_BB.  */
309   par_entry_bb = region->entry;
310   par_exit_bb = region->exit;
311   ws_entry_bb = region->inner->entry;
312   ws_exit_bb = region->inner->exit;
313 
314   /* Give up for task reductions on the parallel, while it is implementable,
315      adding another big set of APIs or slowing down the normal paths is
316      not acceptable.  */
317   tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
318   if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
319     return;
320 
321   if (single_succ (par_entry_bb) == ws_entry_bb
322       && single_succ (ws_exit_bb) == par_exit_bb
323       && workshare_safe_to_combine_p (ws_entry_bb)
324       && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
325 	  || (last_and_only_stmt (ws_entry_bb)
326 	      && last_and_only_stmt (par_exit_bb))))
327     {
328       gimple *par_stmt = last_stmt (par_entry_bb);
329       gimple *ws_stmt = last_stmt (ws_entry_bb);
330 
331       if (region->inner->type == GIMPLE_OMP_FOR)
332 	{
333 	  /* If this is a combined parallel loop, we need to determine
334 	     whether or not to use the combined library calls.  There
335 	     are two cases where we do not apply the transformation:
336 	     static loops and any kind of ordered loop.  In the first
337 	     case, we already open code the loop so there is no need
338 	     to do anything else.  In the latter case, the combined
339 	     parallel loop call would still need extra synchronization
340 	     to implement ordered semantics, so there would not be any
341 	     gain in using the combined call.  */
342 	  tree clauses = gimple_omp_for_clauses (ws_stmt);
343 	  tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
344 	  if (c == NULL
345 	      || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
346 		  == OMP_CLAUSE_SCHEDULE_STATIC)
347 	      || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
348 	      || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_))
349 	    return;
350 	}
351       else if (region->inner->type == GIMPLE_OMP_SECTIONS
352 	       && omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
353 				   OMP_CLAUSE__REDUCTEMP_))
354 	return;
355 
356       region->is_combined_parallel = true;
357       region->inner->is_combined_parallel = true;
358       region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
359     }
360 }
361 
362 /* Debugging dumps for parallel regions.  */
363 void dump_omp_region (FILE *, struct omp_region *, int);
364 void debug_omp_region (struct omp_region *);
365 void debug_all_omp_regions (void);
366 
367 /* Dump the parallel region tree rooted at REGION.  */
368 
369 void
370 dump_omp_region (FILE *file, struct omp_region *region, int indent)
371 {
372   fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
373 	   gimple_code_name[region->type]);
374 
375   if (region->inner)
376     dump_omp_region (file, region->inner, indent + 4);
377 
378   if (region->cont)
379     {
380       fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
381 	       region->cont->index);
382     }
383 
384   if (region->exit)
385     fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
386 	     region->exit->index);
387   else
388     fprintf (file, "%*s[no exit marker]\n", indent, "");
389 
390   if (region->next)
391     dump_omp_region (file, region->next, indent);
392 }
393 
394 DEBUG_FUNCTION void
395 debug_omp_region (struct omp_region *region)
396 {
397   dump_omp_region (stderr, region, 0);
398 }
399 
400 DEBUG_FUNCTION void
401 debug_all_omp_regions (void)
402 {
403   dump_omp_region (stderr, root_omp_region, 0);
404 }
405 
406 /* Create a new parallel region starting at STMT inside region PARENT.  */
407 
408 static struct omp_region *
409 new_omp_region (basic_block bb, enum gimple_code type,
410 		struct omp_region *parent)
411 {
412   struct omp_region *region = XCNEW (struct omp_region);
413 
414   region->outer = parent;
415   region->entry = bb;
416   region->type = type;
417 
418   if (parent)
419     {
420       /* This is a nested region.  Add it to the list of inner
421 	 regions in PARENT.  */
422       region->next = parent->inner;
423       parent->inner = region;
424     }
425   else
426     {
427       /* This is a toplevel region.  Add it to the list of toplevel
428 	 regions in ROOT_OMP_REGION.  */
429       region->next = root_omp_region;
430       root_omp_region = region;
431     }
432 
433   return region;
434 }
435 
436 /* Release the memory associated with the region tree rooted at REGION.  */
437 
438 static void
439 free_omp_region_1 (struct omp_region *region)
440 {
441   struct omp_region *i, *n;
442 
443   for (i = region->inner; i ; i = n)
444     {
445       n = i->next;
446       free_omp_region_1 (i);
447     }
448 
449   free (region);
450 }
451 
452 /* Release the memory for the entire omp region tree.  */
453 
454 void
455 omp_free_regions (void)
456 {
457   struct omp_region *r, *n;
458   for (r = root_omp_region; r ; r = n)
459     {
460       n = r->next;
461       free_omp_region_1 (r);
462     }
463   root_omp_region = NULL;
464 }
465 
466 /* A convenience function to build an empty GIMPLE_COND with just the
467    condition.  */
468 
469 static gcond *
470 gimple_build_cond_empty (tree cond)
471 {
472   enum tree_code pred_code;
473   tree lhs, rhs;
474 
475   gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
476   return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
477 }
478 
479 /* Return true if a parallel REGION is within a declare target function or
480    within a target region and is not a part of a gridified target.  */
481 
482 static bool
483 parallel_needs_hsa_kernel_p (struct omp_region *region)
484 {
485   bool indirect = false;
486   for (region = region->outer; region; region = region->outer)
487     {
488       if (region->type == GIMPLE_OMP_PARALLEL)
489 	indirect = true;
490       else if (region->type == GIMPLE_OMP_TARGET)
491 	{
492 	  gomp_target *tgt_stmt
493 	    = as_a <gomp_target *> (last_stmt (region->entry));
494 
495 	  if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
496 			       OMP_CLAUSE__GRIDDIM_))
497 	    return indirect;
498 	  else
499 	    return true;
500 	}
501     }
502 
503   if (lookup_attribute ("omp declare target",
504 			DECL_ATTRIBUTES (current_function_decl)))
505     return true;
506 
507   return false;
508 }
509 
510 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
511    Add CHILD_FNDECL to decl chain of the supercontext of the block
512    ENTRY_BLOCK - this is the block which originally contained the
513    code from which CHILD_FNDECL was created.
514 
515    Together, these actions ensure that the debug info for the outlined
516    function will be emitted with the correct lexical scope.  */
517 
518 static void
519 adjust_context_and_scope (struct omp_region *region, tree entry_block,
520 			  tree child_fndecl)
521 {
522   tree parent_fndecl = NULL_TREE;
523   gimple *entry_stmt;
524   /* OMP expansion expands inner regions before outer ones, so if
525      we e.g. have explicit task region nested in parallel region, when
526      expanding the task region current_function_decl will be the original
527      source function, but we actually want to use as context the child
528      function of the parallel.  */
529   for (region = region->outer;
530        region && parent_fndecl == NULL_TREE; region = region->outer)
531     switch (region->type)
532       {
533       case GIMPLE_OMP_PARALLEL:
534       case GIMPLE_OMP_TASK:
535       case GIMPLE_OMP_TEAMS:
536 	entry_stmt = last_stmt (region->entry);
537 	parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
538 	break;
539       case GIMPLE_OMP_TARGET:
540 	entry_stmt = last_stmt (region->entry);
541 	parent_fndecl
542 	  = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
543 	break;
544       default:
545 	break;
546       }
547 
548   if (parent_fndecl == NULL_TREE)
549     parent_fndecl = current_function_decl;
550   DECL_CONTEXT (child_fndecl) = parent_fndecl;
551 
552   if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
553     {
554       tree b = BLOCK_SUPERCONTEXT (entry_block);
555       if (TREE_CODE (b) == BLOCK)
556         {
557 	  DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
558 	  BLOCK_VARS (b) = child_fndecl;
559 	}
560     }
561 }
562 
563 /* Build the function calls to GOMP_parallel etc to actually
564    generate the parallel operation.  REGION is the parallel region
565    being expanded.  BB is the block where to insert the code.  WS_ARGS
566    will be set if this is a call to a combined parallel+workshare
567    construct, it contains the list of additional arguments needed by
568    the workshare construct.  */
569 
570 static void
571 expand_parallel_call (struct omp_region *region, basic_block bb,
572 		      gomp_parallel *entry_stmt,
573 		      vec<tree, va_gc> *ws_args)
574 {
575   tree t, t1, t2, val, cond, c, clauses, flags;
576   gimple_stmt_iterator gsi;
577   gimple *stmt;
578   enum built_in_function start_ix;
579   int start_ix2;
580   location_t clause_loc;
581   vec<tree, va_gc> *args;
582 
583   clauses = gimple_omp_parallel_clauses (entry_stmt);
584 
585   /* Determine what flavor of GOMP_parallel we will be
586      emitting.  */
587   start_ix = BUILT_IN_GOMP_PARALLEL;
588   tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
589   if (rtmp)
590     start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
591   else if (is_combined_parallel (region))
592     {
593       switch (region->inner->type)
594 	{
595 	case GIMPLE_OMP_FOR:
596 	  gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
597 	  switch (region->inner->sched_kind)
598 	    {
599 	    case OMP_CLAUSE_SCHEDULE_RUNTIME:
600 	      if ((region->inner->sched_modifiers
601 		   & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
602 		start_ix2 = 6;
603 	      else if ((region->inner->sched_modifiers
604 			& OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
605 		start_ix2 = 7;
606 	      else
607 		start_ix2 = 3;
608 	      break;
609 	    case OMP_CLAUSE_SCHEDULE_DYNAMIC:
610 	    case OMP_CLAUSE_SCHEDULE_GUIDED:
611 	      if ((region->inner->sched_modifiers
612 		   & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
613 		{
614 		  start_ix2 = 3 + region->inner->sched_kind;
615 		  break;
616 		}
617 	      /* FALLTHRU */
618 	    default:
619 	      start_ix2 = region->inner->sched_kind;
620 	      break;
621 	    }
622 	  start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
623 	  start_ix = (enum built_in_function) start_ix2;
624 	  break;
625 	case GIMPLE_OMP_SECTIONS:
626 	  start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
627 	  break;
628 	default:
629 	  gcc_unreachable ();
630 	}
631     }
632 
633   /* By default, the value of NUM_THREADS is zero (selected at run time)
634      and there is no conditional.  */
635   cond = NULL_TREE;
636   val = build_int_cst (unsigned_type_node, 0);
637   flags = build_int_cst (unsigned_type_node, 0);
638 
639   c = omp_find_clause (clauses, OMP_CLAUSE_IF);
640   if (c)
641     cond = OMP_CLAUSE_IF_EXPR (c);
642 
643   c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
644   if (c)
645     {
646       val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
647       clause_loc = OMP_CLAUSE_LOCATION (c);
648     }
649   else
650     clause_loc = gimple_location (entry_stmt);
651 
652   c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
653   if (c)
654     flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
655 
656   /* Ensure 'val' is of the correct type.  */
657   val = fold_convert_loc (clause_loc, unsigned_type_node, val);
658 
659   /* If we found the clause 'if (cond)', build either
660      (cond != 0) or (cond ? val : 1u).  */
661   if (cond)
662     {
663       cond = gimple_boolify (cond);
664 
665       if (integer_zerop (val))
666 	val = fold_build2_loc (clause_loc,
667 			   EQ_EXPR, unsigned_type_node, cond,
668 			   build_int_cst (TREE_TYPE (cond), 0));
669       else
670 	{
671 	  basic_block cond_bb, then_bb, else_bb;
672 	  edge e, e_then, e_else;
673 	  tree tmp_then, tmp_else, tmp_join, tmp_var;
674 
675 	  tmp_var = create_tmp_var (TREE_TYPE (val));
676 	  if (gimple_in_ssa_p (cfun))
677 	    {
678 	      tmp_then = make_ssa_name (tmp_var);
679 	      tmp_else = make_ssa_name (tmp_var);
680 	      tmp_join = make_ssa_name (tmp_var);
681 	    }
682 	  else
683 	    {
684 	      tmp_then = tmp_var;
685 	      tmp_else = tmp_var;
686 	      tmp_join = tmp_var;
687 	    }
688 
689 	  e = split_block_after_labels (bb);
690 	  cond_bb = e->src;
691 	  bb = e->dest;
692 	  remove_edge (e);
693 
694 	  then_bb = create_empty_bb (cond_bb);
695 	  else_bb = create_empty_bb (then_bb);
696 	  set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
697 	  set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
698 
699 	  stmt = gimple_build_cond_empty (cond);
700 	  gsi = gsi_start_bb (cond_bb);
701 	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
702 
703 	  gsi = gsi_start_bb (then_bb);
704 	  expand_omp_build_assign (&gsi, tmp_then, val, true);
705 
706 	  gsi = gsi_start_bb (else_bb);
707 	  expand_omp_build_assign (&gsi, tmp_else,
708 				   build_int_cst (unsigned_type_node, 1),
709 				   true);
710 
711 	  make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
712 	  make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
713 	  add_bb_to_loop (then_bb, cond_bb->loop_father);
714 	  add_bb_to_loop (else_bb, cond_bb->loop_father);
715 	  e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
716 	  e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
717 
718 	  if (gimple_in_ssa_p (cfun))
719 	    {
720 	      gphi *phi = create_phi_node (tmp_join, bb);
721 	      add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
722 	      add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
723 	    }
724 
725 	  val = tmp_join;
726 	}
727 
728       gsi = gsi_start_bb (bb);
729       val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
730 				      false, GSI_CONTINUE_LINKING);
731     }
732 
733   gsi = gsi_last_nondebug_bb (bb);
734   t = gimple_omp_parallel_data_arg (entry_stmt);
735   if (t == NULL)
736     t1 = null_pointer_node;
737   else
738     t1 = build_fold_addr_expr (t);
739   tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
740   t2 = build_fold_addr_expr (child_fndecl);
741 
742   vec_alloc (args, 4 + vec_safe_length (ws_args));
743   args->quick_push (t2);
744   args->quick_push (t1);
745   args->quick_push (val);
746   if (ws_args)
747     args->splice (*ws_args);
748   args->quick_push (flags);
749 
750   t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
751 			       builtin_decl_explicit (start_ix), args);
752 
753   if (rtmp)
754     {
755       tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
756       t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
757 		  fold_convert (type,
758 				fold_convert (pointer_sized_int_node, t)));
759     }
760   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
761 			    false, GSI_CONTINUE_LINKING);
762 
763   if (hsa_gen_requested_p ()
764       && parallel_needs_hsa_kernel_p (region))
765     {
766       cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
767       hsa_register_kernel (child_cnode);
768     }
769 }
770 
771 /* Build the function call to GOMP_task to actually
772    generate the task operation.  BB is the block where to insert the code.  */
773 
774 static void
775 expand_task_call (struct omp_region *region, basic_block bb,
776 		  gomp_task *entry_stmt)
777 {
778   tree t1, t2, t3;
779   gimple_stmt_iterator gsi;
780   location_t loc = gimple_location (entry_stmt);
781 
782   tree clauses = gimple_omp_task_clauses (entry_stmt);
783 
784   tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
785   tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
786   tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
787   tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
788   tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
789   tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
790 
791   unsigned int iflags
792     = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
793       | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
794       | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
795 
796   bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
797   tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
798   tree num_tasks = NULL_TREE;
799   bool ull = false;
800   if (taskloop_p)
801     {
802       gimple *g = last_stmt (region->outer->entry);
803       gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
804 		  && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
805       struct omp_for_data fd;
806       omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
807       startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
808       endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
809 				OMP_CLAUSE__LOOPTEMP_);
810       startvar = OMP_CLAUSE_DECL (startvar);
811       endvar = OMP_CLAUSE_DECL (endvar);
812       step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
813       if (fd.loop.cond_code == LT_EXPR)
814 	iflags |= GOMP_TASK_FLAG_UP;
815       tree tclauses = gimple_omp_for_clauses (g);
816       num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
817       if (num_tasks)
818 	num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
819       else
820 	{
821 	  num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
822 	  if (num_tasks)
823 	    {
824 	      iflags |= GOMP_TASK_FLAG_GRAINSIZE;
825 	      num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
826 	    }
827 	  else
828 	    num_tasks = integer_zero_node;
829 	}
830       num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
831       if (ifc == NULL_TREE)
832 	iflags |= GOMP_TASK_FLAG_IF;
833       if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
834 	iflags |= GOMP_TASK_FLAG_NOGROUP;
835       ull = fd.iter_type == long_long_unsigned_type_node;
836       if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
837 	iflags |= GOMP_TASK_FLAG_REDUCTION;
838     }
839   else if (priority)
840     iflags |= GOMP_TASK_FLAG_PRIORITY;
841 
842   tree flags = build_int_cst (unsigned_type_node, iflags);
843 
844   tree cond = boolean_true_node;
845   if (ifc)
846     {
847       if (taskloop_p)
848 	{
849 	  tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
850 	  t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
851 			       build_int_cst (unsigned_type_node,
852 					      GOMP_TASK_FLAG_IF),
853 			       build_int_cst (unsigned_type_node, 0));
854 	  flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
855 				   flags, t);
856 	}
857       else
858 	cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
859     }
860 
861   if (finalc)
862     {
863       tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
864       t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
865 			   build_int_cst (unsigned_type_node,
866 					  GOMP_TASK_FLAG_FINAL),
867 			   build_int_cst (unsigned_type_node, 0));
868       flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
869     }
870   if (depend)
871     depend = OMP_CLAUSE_DECL (depend);
872   else
873     depend = build_int_cst (ptr_type_node, 0);
874   if (priority)
875     priority = fold_convert (integer_type_node,
876 			     OMP_CLAUSE_PRIORITY_EXPR (priority));
877   else
878     priority = integer_zero_node;
879 
880   gsi = gsi_last_nondebug_bb (bb);
881   tree t = gimple_omp_task_data_arg (entry_stmt);
882   if (t == NULL)
883     t2 = null_pointer_node;
884   else
885     t2 = build_fold_addr_expr_loc (loc, t);
886   t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
887   t = gimple_omp_task_copy_fn (entry_stmt);
888   if (t == NULL)
889     t3 = null_pointer_node;
890   else
891     t3 = build_fold_addr_expr_loc (loc, t);
892 
893   if (taskloop_p)
894     t = build_call_expr (ull
895 			 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
896 			 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
897 			 11, t1, t2, t3,
898 			 gimple_omp_task_arg_size (entry_stmt),
899 			 gimple_omp_task_arg_align (entry_stmt), flags,
900 			 num_tasks, priority, startvar, endvar, step);
901   else
902     t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
903 			 9, t1, t2, t3,
904 			 gimple_omp_task_arg_size (entry_stmt),
905 			 gimple_omp_task_arg_align (entry_stmt), cond, flags,
906 			 depend, priority);
907 
908   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
909 			    false, GSI_CONTINUE_LINKING);
910 }
911 
912 /* Build the function call to GOMP_taskwait_depend to actually
913    generate the taskwait operation.  BB is the block where to insert the
914    code.  */
915 
916 static void
917 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
918 {
919   tree clauses = gimple_omp_task_clauses (entry_stmt);
920   tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
921   if (depend == NULL_TREE)
922     return;
923 
924   depend = OMP_CLAUSE_DECL (depend);
925 
926   gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
927   tree t
928     = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
929 		       1, depend);
930 
931   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
932 			    false, GSI_CONTINUE_LINKING);
933 }
934 
935 /* Build the function call to GOMP_teams_reg to actually
936    generate the host teams operation.  REGION is the teams region
937    being expanded.  BB is the block where to insert the code.  */
938 
939 static void
940 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
941 {
942   tree clauses = gimple_omp_teams_clauses (entry_stmt);
943   tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
944   if (num_teams == NULL_TREE)
945     num_teams = build_int_cst (unsigned_type_node, 0);
946   else
947     {
948       num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
949       num_teams = fold_convert (unsigned_type_node, num_teams);
950     }
951   tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
952   if (thread_limit == NULL_TREE)
953     thread_limit = build_int_cst (unsigned_type_node, 0);
954   else
955     {
956       thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
957       thread_limit = fold_convert (unsigned_type_node, thread_limit);
958     }
959 
960   gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
961   tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
962   if (t == NULL)
963     t1 = null_pointer_node;
964   else
965     t1 = build_fold_addr_expr (t);
966   tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
967   tree t2 = build_fold_addr_expr (child_fndecl);
968 
969   vec<tree, va_gc> *args;
970   vec_alloc (args, 5);
971   args->quick_push (t2);
972   args->quick_push (t1);
973   args->quick_push (num_teams);
974   args->quick_push (thread_limit);
975   /* For future extensibility.  */
976   args->quick_push (build_zero_cst (unsigned_type_node));
977 
978   t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
979 			       builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
980 			       args);
981 
982   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
983 			    false, GSI_CONTINUE_LINKING);
984 }
985 
986 /* Chain all the DECLs in LIST by their TREE_CHAIN fields.  */
987 
988 static tree
989 vec2chain (vec<tree, va_gc> *v)
990 {
991   tree chain = NULL_TREE, t;
992   unsigned ix;
993 
994   FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
995     {
996       DECL_CHAIN (t) = chain;
997       chain = t;
998     }
999 
1000   return chain;
1001 }
1002 
1003 /* Remove barriers in REGION->EXIT's block.  Note that this is only
1004    valid for GIMPLE_OMP_PARALLEL regions.  Since the end of a parallel region
1005    is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
1006    left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1007    removed.  */
1008 
1009 static void
1010 remove_exit_barrier (struct omp_region *region)
1011 {
1012   gimple_stmt_iterator gsi;
1013   basic_block exit_bb;
1014   edge_iterator ei;
1015   edge e;
1016   gimple *stmt;
1017   int any_addressable_vars = -1;
1018 
1019   exit_bb = region->exit;
1020 
1021   /* If the parallel region doesn't return, we don't have REGION->EXIT
1022      block at all.  */
1023   if (! exit_bb)
1024     return;
1025 
1026   /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN.  The
1027      workshare's GIMPLE_OMP_RETURN will be in a preceding block.  The kinds of
1028      statements that can appear in between are extremely limited -- no
1029      memory operations at all.  Here, we allow nothing at all, so the
1030      only thing we allow to precede this GIMPLE_OMP_RETURN is a label.  */
1031   gsi = gsi_last_nondebug_bb (exit_bb);
1032   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1033   gsi_prev_nondebug (&gsi);
1034   if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1035     return;
1036 
1037   FOR_EACH_EDGE (e, ei, exit_bb->preds)
1038     {
1039       gsi = gsi_last_nondebug_bb (e->src);
1040       if (gsi_end_p (gsi))
1041 	continue;
1042       stmt = gsi_stmt (gsi);
1043       if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1044 	  && !gimple_omp_return_nowait_p (stmt))
1045 	{
1046 	  /* OpenMP 3.0 tasks unfortunately prevent this optimization
1047 	     in many cases.  If there could be tasks queued, the barrier
1048 	     might be needed to let the tasks run before some local
1049 	     variable of the parallel that the task uses as shared
1050 	     runs out of scope.  The task can be spawned either
1051 	     from within current function (this would be easy to check)
1052 	     or from some function it calls and gets passed an address
1053 	     of such a variable.  */
1054 	  if (any_addressable_vars < 0)
1055 	    {
1056 	      gomp_parallel *parallel_stmt
1057 		= as_a <gomp_parallel *> (last_stmt (region->entry));
1058 	      tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1059 	      tree local_decls, block, decl;
1060 	      unsigned ix;
1061 
1062 	      any_addressable_vars = 0;
1063 	      FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1064 		if (TREE_ADDRESSABLE (decl))
1065 		  {
1066 		    any_addressable_vars = 1;
1067 		    break;
1068 		  }
1069 	      for (block = gimple_block (stmt);
1070 		   !any_addressable_vars
1071 		   && block
1072 		   && TREE_CODE (block) == BLOCK;
1073 		   block = BLOCK_SUPERCONTEXT (block))
1074 		{
1075 		  for (local_decls = BLOCK_VARS (block);
1076 		       local_decls;
1077 		       local_decls = DECL_CHAIN (local_decls))
1078 		    if (TREE_ADDRESSABLE (local_decls))
1079 		      {
1080 			any_addressable_vars = 1;
1081 			break;
1082 		      }
1083 		  if (block == gimple_block (parallel_stmt))
1084 		    break;
1085 		}
1086 	    }
1087 	  if (!any_addressable_vars)
1088 	    gimple_omp_return_set_nowait (stmt);
1089 	}
1090     }
1091 }
1092 
1093 static void
1094 remove_exit_barriers (struct omp_region *region)
1095 {
1096   if (region->type == GIMPLE_OMP_PARALLEL)
1097     remove_exit_barrier (region);
1098 
1099   if (region->inner)
1100     {
1101       region = region->inner;
1102       remove_exit_barriers (region);
1103       while (region->next)
1104 	{
1105 	  region = region->next;
1106 	  remove_exit_barriers (region);
1107 	}
1108     }
1109 }
1110 
1111 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1112    calls.  These can't be declared as const functions, but
1113    within one parallel body they are constant, so they can be
1114    transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1115    which are declared const.  Similarly for task body, except
1116    that in untied task omp_get_thread_num () can change at any task
1117    scheduling point.  */
1118 
1119 static void
1120 optimize_omp_library_calls (gimple *entry_stmt)
1121 {
1122   basic_block bb;
1123   gimple_stmt_iterator gsi;
1124   tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1125   tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1126   tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1127   tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1128   bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1129 		      && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1130 					  OMP_CLAUSE_UNTIED) != NULL);
1131 
1132   FOR_EACH_BB_FN (bb, cfun)
1133     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1134       {
1135 	gimple *call = gsi_stmt (gsi);
1136 	tree decl;
1137 
1138 	if (is_gimple_call (call)
1139 	    && (decl = gimple_call_fndecl (call))
1140 	    && DECL_EXTERNAL (decl)
1141 	    && TREE_PUBLIC (decl)
1142 	    && DECL_INITIAL (decl) == NULL)
1143 	  {
1144 	    tree built_in;
1145 
1146 	    if (DECL_NAME (decl) == thr_num_id)
1147 	      {
1148 		/* In #pragma omp task untied omp_get_thread_num () can change
1149 		   during the execution of the task region.  */
1150 		if (untied_task)
1151 		  continue;
1152 		built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1153 	      }
1154 	    else if (DECL_NAME (decl) == num_thr_id)
1155 	      built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1156 	    else
1157 	      continue;
1158 
1159 	    if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1160 		|| gimple_call_num_args (call) != 0)
1161 	      continue;
1162 
1163 	    if (flag_exceptions && !TREE_NOTHROW (decl))
1164 	      continue;
1165 
1166 	    if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1167 		|| !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1168 					TREE_TYPE (TREE_TYPE (built_in))))
1169 	      continue;
1170 
1171 	    gimple_call_set_fndecl (call, built_in);
1172 	  }
1173       }
1174 }
1175 
1176 /* Callback for expand_omp_build_assign.  Return non-NULL if *tp needs to be
1177    regimplified.  */
1178 
1179 static tree
1180 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1181 {
1182   tree t = *tp;
1183 
1184   /* Any variable with DECL_VALUE_EXPR needs to be regimplified.  */
1185   if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1186     return t;
1187 
1188   if (TREE_CODE (t) == ADDR_EXPR)
1189     recompute_tree_invariant_for_addr_expr (t);
1190 
1191   *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1192   return NULL_TREE;
1193 }
1194 
1195 /* Prepend or append TO = FROM assignment before or after *GSI_P.  */
1196 
1197 static void
1198 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1199 			 bool after)
1200 {
1201   bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1202   from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1203 				   !after, after ? GSI_CONTINUE_LINKING
1204 						 : GSI_SAME_STMT);
1205   gimple *stmt = gimple_build_assign (to, from);
1206   if (after)
1207     gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1208   else
1209     gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1210   if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1211       || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1212     {
1213       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1214       gimple_regimplify_operands (stmt, &gsi);
1215     }
1216 }
1217 
1218 /* Expand the OpenMP parallel or task directive starting at REGION.  */
1219 
1220 static void
1221 expand_omp_taskreg (struct omp_region *region)
1222 {
1223   basic_block entry_bb, exit_bb, new_bb;
1224   struct function *child_cfun;
1225   tree child_fn, block, t;
1226   gimple_stmt_iterator gsi;
1227   gimple *entry_stmt, *stmt;
1228   edge e;
1229   vec<tree, va_gc> *ws_args;
1230 
1231   entry_stmt = last_stmt (region->entry);
1232   if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1233       && gimple_omp_task_taskwait_p (entry_stmt))
1234     {
1235       new_bb = region->entry;
1236       gsi = gsi_last_nondebug_bb (region->entry);
1237       gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1238       gsi_remove (&gsi, true);
1239       expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1240       return;
1241     }
1242 
1243   child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1244   child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1245 
1246   entry_bb = region->entry;
1247   if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1248     exit_bb = region->cont;
1249   else
1250     exit_bb = region->exit;
1251 
1252   if (is_combined_parallel (region))
1253     ws_args = region->ws_args;
1254   else
1255     ws_args = NULL;
1256 
1257   if (child_cfun->cfg)
1258     {
1259       /* Due to inlining, it may happen that we have already outlined
1260 	 the region, in which case all we need to do is make the
1261 	 sub-graph unreachable and emit the parallel call.  */
1262       edge entry_succ_e, exit_succ_e;
1263 
1264       entry_succ_e = single_succ_edge (entry_bb);
1265 
1266       gsi = gsi_last_nondebug_bb (entry_bb);
1267       gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1268 		  || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1269 		  || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1270       gsi_remove (&gsi, true);
1271 
1272       new_bb = entry_bb;
1273       if (exit_bb)
1274 	{
1275 	  exit_succ_e = single_succ_edge (exit_bb);
1276 	  make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1277 	}
1278       remove_edge_and_dominated_blocks (entry_succ_e);
1279     }
1280   else
1281     {
1282       unsigned srcidx, dstidx, num;
1283 
1284       /* If the parallel region needs data sent from the parent
1285 	 function, then the very first statement (except possible
1286 	 tree profile counter updates) of the parallel body
1287 	 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
1288 	 &.OMP_DATA_O is passed as an argument to the child function,
1289 	 we need to replace it with the argument as seen by the child
1290 	 function.
1291 
1292 	 In most cases, this will end up being the identity assignment
1293 	 .OMP_DATA_I = .OMP_DATA_I.  However, if the parallel body had
1294 	 a function call that has been inlined, the original PARM_DECL
1295 	 .OMP_DATA_I may have been converted into a different local
1296 	 variable.  In which case, we need to keep the assignment.  */
1297       if (gimple_omp_taskreg_data_arg (entry_stmt))
1298 	{
1299 	  basic_block entry_succ_bb
1300 	    = single_succ_p (entry_bb) ? single_succ (entry_bb)
1301 				       : FALLTHRU_EDGE (entry_bb)->dest;
1302 	  tree arg;
1303 	  gimple *parcopy_stmt = NULL;
1304 
1305 	  for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1306 	    {
1307 	      gimple *stmt;
1308 
1309 	      gcc_assert (!gsi_end_p (gsi));
1310 	      stmt = gsi_stmt (gsi);
1311 	      if (gimple_code (stmt) != GIMPLE_ASSIGN)
1312 		continue;
1313 
1314 	      if (gimple_num_ops (stmt) == 2)
1315 		{
1316 		  tree arg = gimple_assign_rhs1 (stmt);
1317 
1318 		  /* We're ignore the subcode because we're
1319 		     effectively doing a STRIP_NOPS.  */
1320 
1321 		  if (TREE_CODE (arg) == ADDR_EXPR
1322 		      && (TREE_OPERAND (arg, 0)
1323 			  == gimple_omp_taskreg_data_arg (entry_stmt)))
1324 		    {
1325 		      parcopy_stmt = stmt;
1326 		      break;
1327 		    }
1328 		}
1329 	    }
1330 
1331 	  gcc_assert (parcopy_stmt != NULL);
1332 	  arg = DECL_ARGUMENTS (child_fn);
1333 
1334 	  if (!gimple_in_ssa_p (cfun))
1335 	    {
1336 	      if (gimple_assign_lhs (parcopy_stmt) == arg)
1337 		gsi_remove (&gsi, true);
1338 	      else
1339 		{
1340 		  /* ?? Is setting the subcode really necessary ??  */
1341 		  gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1342 		  gimple_assign_set_rhs1 (parcopy_stmt, arg);
1343 		}
1344 	    }
1345 	  else
1346 	    {
1347 	      tree lhs = gimple_assign_lhs (parcopy_stmt);
1348 	      gcc_assert (SSA_NAME_VAR (lhs) == arg);
1349 	      /* We'd like to set the rhs to the default def in the child_fn,
1350 		 but it's too early to create ssa names in the child_fn.
1351 		 Instead, we set the rhs to the parm.  In
1352 		 move_sese_region_to_fn, we introduce a default def for the
1353 		 parm, map the parm to it's default def, and once we encounter
1354 		 this stmt, replace the parm with the default def.  */
1355 	      gimple_assign_set_rhs1 (parcopy_stmt, arg);
1356 	      update_stmt (parcopy_stmt);
1357 	    }
1358 	}
1359 
1360       /* Declare local variables needed in CHILD_CFUN.  */
1361       block = DECL_INITIAL (child_fn);
1362       BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1363       /* The gimplifier could record temporaries in parallel/task block
1364 	 rather than in containing function's local_decls chain,
1365 	 which would mean cgraph missed finalizing them.  Do it now.  */
1366       for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1367 	if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1368 	  varpool_node::finalize_decl (t);
1369       DECL_SAVED_TREE (child_fn) = NULL;
1370       /* We'll create a CFG for child_fn, so no gimple body is needed.  */
1371       gimple_set_body (child_fn, NULL);
1372       TREE_USED (block) = 1;
1373 
1374       /* Reset DECL_CONTEXT on function arguments.  */
1375       for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1376 	DECL_CONTEXT (t) = child_fn;
1377 
1378       /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1379 	 so that it can be moved to the child function.  */
1380       gsi = gsi_last_nondebug_bb (entry_bb);
1381       stmt = gsi_stmt (gsi);
1382       gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1383 			   || gimple_code (stmt) == GIMPLE_OMP_TASK
1384 			   || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1385       e = split_block (entry_bb, stmt);
1386       gsi_remove (&gsi, true);
1387       entry_bb = e->dest;
1388       edge e2 = NULL;
1389       if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1390 	single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1391       else
1392 	{
1393 	  e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1394 	  gcc_assert (e2->dest == region->exit);
1395 	  remove_edge (BRANCH_EDGE (entry_bb));
1396 	  set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1397 	  gsi = gsi_last_nondebug_bb (region->exit);
1398 	  gcc_assert (!gsi_end_p (gsi)
1399 		      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1400 	  gsi_remove (&gsi, true);
1401 	}
1402 
1403       /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR.  */
1404       if (exit_bb)
1405 	{
1406 	  gsi = gsi_last_nondebug_bb (exit_bb);
1407 	  gcc_assert (!gsi_end_p (gsi)
1408 		      && (gimple_code (gsi_stmt (gsi))
1409 			  == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1410 	  stmt = gimple_build_return (NULL);
1411 	  gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1412 	  gsi_remove (&gsi, true);
1413 	}
1414 
1415       /* Move the parallel region into CHILD_CFUN.  */
1416 
1417       if (gimple_in_ssa_p (cfun))
1418 	{
1419 	  init_tree_ssa (child_cfun);
1420 	  init_ssa_operands (child_cfun);
1421 	  child_cfun->gimple_df->in_ssa_p = true;
1422 	  block = NULL_TREE;
1423 	}
1424       else
1425 	block = gimple_block (entry_stmt);
1426 
1427       new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1428       if (exit_bb)
1429 	single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1430       if (e2)
1431 	{
1432 	  basic_block dest_bb = e2->dest;
1433 	  if (!exit_bb)
1434 	    make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1435 	  remove_edge (e2);
1436 	  set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1437 	}
1438       /* When the OMP expansion process cannot guarantee an up-to-date
1439 	 loop tree arrange for the child function to fixup loops.  */
1440       if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1441 	child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1442 
1443       /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
1444       num = vec_safe_length (child_cfun->local_decls);
1445       for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1446 	{
1447 	  t = (*child_cfun->local_decls)[srcidx];
1448 	  if (DECL_CONTEXT (t) == cfun->decl)
1449 	    continue;
1450 	  if (srcidx != dstidx)
1451 	    (*child_cfun->local_decls)[dstidx] = t;
1452 	  dstidx++;
1453 	}
1454       if (dstidx != num)
1455 	vec_safe_truncate (child_cfun->local_decls, dstidx);
1456 
1457       /* Inform the callgraph about the new function.  */
1458       child_cfun->curr_properties = cfun->curr_properties;
1459       child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1460       child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1461       cgraph_node *node = cgraph_node::get_create (child_fn);
1462       node->parallelized_function = 1;
1463       cgraph_node::add_new_function (child_fn, true);
1464 
1465       bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1466 		      && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1467 
1468       /* Fix the callgraph edges for child_cfun.  Those for cfun will be
1469 	 fixed in a following pass.  */
1470       push_cfun (child_cfun);
1471       if (need_asm)
1472 	assign_assembler_name_if_needed (child_fn);
1473 
1474       if (optimize)
1475 	optimize_omp_library_calls (entry_stmt);
1476       update_max_bb_count ();
1477       cgraph_edge::rebuild_edges ();
1478 
1479       /* Some EH regions might become dead, see PR34608.  If
1480 	 pass_cleanup_cfg isn't the first pass to happen with the
1481 	 new child, these dead EH edges might cause problems.
1482 	 Clean them up now.  */
1483       if (flag_exceptions)
1484 	{
1485 	  basic_block bb;
1486 	  bool changed = false;
1487 
1488 	  FOR_EACH_BB_FN (bb, cfun)
1489 	    changed |= gimple_purge_dead_eh_edges (bb);
1490 	  if (changed)
1491 	    cleanup_tree_cfg ();
1492 	}
1493       if (gimple_in_ssa_p (cfun))
1494 	update_ssa (TODO_update_ssa);
1495       if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1496 	verify_loop_structure ();
1497       pop_cfun ();
1498 
1499       if (dump_file && !gimple_in_ssa_p (cfun))
1500 	{
1501 	  omp_any_child_fn_dumped = true;
1502 	  dump_function_header (dump_file, child_fn, dump_flags);
1503 	  dump_function_to_file (child_fn, dump_file, dump_flags);
1504 	}
1505     }
1506 
1507   adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1508 
1509   if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1510     expand_parallel_call (region, new_bb,
1511 			  as_a <gomp_parallel *> (entry_stmt), ws_args);
1512   else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1513     expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1514   else
1515     expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1516   if (gimple_in_ssa_p (cfun))
1517     update_ssa (TODO_update_ssa_only_virtuals);
1518 }
1519 
1520 /* Information about members of an OpenACC collapsed loop nest.  */
1521 
1522 struct oacc_collapse
1523 {
1524   tree base;  /* Base value.  */
1525   tree iters; /* Number of steps.  */
1526   tree step;  /* Step size.  */
1527   tree tile;  /* Tile increment (if tiled).  */
1528   tree outer; /* Tile iterator var. */
1529 };
1530 
1531 /* Helper for expand_oacc_for.  Determine collapsed loop information.
1532    Fill in COUNTS array.  Emit any initialization code before GSI.
1533    Return the calculated outer loop bound of BOUND_TYPE.  */
1534 
1535 static tree
1536 expand_oacc_collapse_init (const struct omp_for_data *fd,
1537 			   gimple_stmt_iterator *gsi,
1538 			   oacc_collapse *counts, tree bound_type,
1539 			   location_t loc)
1540 {
1541   tree tiling = fd->tiling;
1542   tree total = build_int_cst (bound_type, 1);
1543   int ix;
1544 
1545   gcc_assert (integer_onep (fd->loop.step));
1546   gcc_assert (integer_zerop (fd->loop.n1));
1547 
1548   /* When tiling, the first operand of the tile clause applies to the
1549      innermost loop, and we work outwards from there.  Seems
1550      backwards, but whatever.  */
1551   for (ix = fd->collapse; ix--;)
1552     {
1553       const omp_for_data_loop *loop = &fd->loops[ix];
1554 
1555       tree iter_type = TREE_TYPE (loop->v);
1556       tree diff_type = iter_type;
1557       tree plus_type = iter_type;
1558 
1559       gcc_assert (loop->cond_code == fd->loop.cond_code);
1560 
1561       if (POINTER_TYPE_P (iter_type))
1562 	plus_type = sizetype;
1563       if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1564 	diff_type = signed_type_for (diff_type);
1565       if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1566 	diff_type = integer_type_node;
1567 
1568       if (tiling)
1569 	{
1570 	  tree num = build_int_cst (integer_type_node, fd->collapse);
1571 	  tree loop_no = build_int_cst (integer_type_node, ix);
1572 	  tree tile = TREE_VALUE (tiling);
1573 	  gcall *call
1574 	    = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1575 					  /* gwv-outer=*/integer_zero_node,
1576 					  /* gwv-inner=*/integer_zero_node);
1577 
1578 	  counts[ix].outer = create_tmp_var (iter_type, ".outer");
1579 	  counts[ix].tile = create_tmp_var (diff_type, ".tile");
1580 	  gimple_call_set_lhs (call, counts[ix].tile);
1581 	  gimple_set_location (call, loc);
1582 	  gsi_insert_before (gsi, call, GSI_SAME_STMT);
1583 
1584 	  tiling = TREE_CHAIN (tiling);
1585 	}
1586       else
1587 	{
1588 	  counts[ix].tile = NULL;
1589 	  counts[ix].outer = loop->v;
1590 	}
1591 
1592       tree b = loop->n1;
1593       tree e = loop->n2;
1594       tree s = loop->step;
1595       bool up = loop->cond_code == LT_EXPR;
1596       tree dir = build_int_cst (diff_type, up ? +1 : -1);
1597       bool negating;
1598       tree expr;
1599 
1600       b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1601 				    true, GSI_SAME_STMT);
1602       e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1603 				    true, GSI_SAME_STMT);
1604 
1605       /* Convert the step, avoiding possible unsigned->signed overflow.  */
1606       negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1607       if (negating)
1608 	s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1609       s = fold_convert (diff_type, s);
1610       if (negating)
1611 	s = fold_build1 (NEGATE_EXPR, diff_type, s);
1612       s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1613 				    true, GSI_SAME_STMT);
1614 
1615       /* Determine the range, avoiding possible unsigned->signed overflow.  */
1616       negating = !up && TYPE_UNSIGNED (iter_type);
1617       expr = fold_build2 (MINUS_EXPR, plus_type,
1618 			  fold_convert (plus_type, negating ? b : e),
1619 			  fold_convert (plus_type, negating ? e : b));
1620       expr = fold_convert (diff_type, expr);
1621       if (negating)
1622 	expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1623       tree range = force_gimple_operand_gsi
1624 	(gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1625 
1626       /* Determine number of iterations.  */
1627       expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1628       expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1629       expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1630 
1631       tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1632 					     true, GSI_SAME_STMT);
1633 
1634       counts[ix].base = b;
1635       counts[ix].iters = iters;
1636       counts[ix].step = s;
1637 
1638       total = fold_build2 (MULT_EXPR, bound_type, total,
1639 			   fold_convert (bound_type, iters));
1640     }
1641 
1642   return total;
1643 }
1644 
1645 /* Emit initializers for collapsed loop members.  INNER is true if
1646    this is for the element loop of a TILE.  IVAR is the outer
1647    loop iteration variable, from which collapsed loop iteration values
1648    are  calculated.  COUNTS array has been initialized by
1649    expand_oacc_collapse_inits.  */
1650 
1651 static void
1652 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1653 			   gimple_stmt_iterator *gsi,
1654 			   const oacc_collapse *counts, tree ivar)
1655 {
1656   tree ivar_type = TREE_TYPE (ivar);
1657 
1658   /*  The most rapidly changing iteration variable is the innermost
1659       one.  */
1660   for (int ix = fd->collapse; ix--;)
1661     {
1662       const omp_for_data_loop *loop = &fd->loops[ix];
1663       const oacc_collapse *collapse = &counts[ix];
1664       tree v = inner ? loop->v : collapse->outer;
1665       tree iter_type = TREE_TYPE (v);
1666       tree diff_type = TREE_TYPE (collapse->step);
1667       tree plus_type = iter_type;
1668       enum tree_code plus_code = PLUS_EXPR;
1669       tree expr;
1670 
1671       if (POINTER_TYPE_P (iter_type))
1672 	{
1673 	  plus_code = POINTER_PLUS_EXPR;
1674 	  plus_type = sizetype;
1675 	}
1676 
1677       expr = ivar;
1678       if (ix)
1679 	{
1680 	  tree mod = fold_convert (ivar_type, collapse->iters);
1681 	  ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1682 	  expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1683 	  ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1684 					   true, GSI_SAME_STMT);
1685 	}
1686 
1687       expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1688 			  collapse->step);
1689       expr = fold_build2 (plus_code, iter_type,
1690 			  inner ? collapse->outer : collapse->base,
1691 			  fold_convert (plus_type, expr));
1692       expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1693 				       true, GSI_SAME_STMT);
1694       gassign *ass = gimple_build_assign (v, expr);
1695       gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1696     }
1697 }
1698 
1699 /* Helper function for expand_omp_{for_*,simd}.  If this is the outermost
1700    of the combined collapse > 1 loop constructs, generate code like:
1701 	if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1702 	if (cond3 is <)
1703 	  adj = STEP3 - 1;
1704 	else
1705 	  adj = STEP3 + 1;
1706 	count3 = (adj + N32 - N31) / STEP3;
1707 	if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1708 	if (cond2 is <)
1709 	  adj = STEP2 - 1;
1710 	else
1711 	  adj = STEP2 + 1;
1712 	count2 = (adj + N22 - N21) / STEP2;
1713 	if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1714 	if (cond1 is <)
1715 	  adj = STEP1 - 1;
1716 	else
1717 	  adj = STEP1 + 1;
1718 	count1 = (adj + N12 - N11) / STEP1;
1719 	count = count1 * count2 * count3;
1720    Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1721 	count = 0;
1722    and set ZERO_ITER_BB to that bb.  If this isn't the outermost
1723    of the combined loop constructs, just initialize COUNTS array
1724    from the _looptemp_ clauses.  */
1725 
1726 /* NOTE: It *could* be better to moosh all of the BBs together,
1727    creating one larger BB with all the computation and the unexpected
1728    jump at the end.  I.e.
1729 
1730    bool zero3, zero2, zero1, zero;
1731 
1732    zero3 = N32 c3 N31;
1733    count3 = (N32 - N31) /[cl] STEP3;
1734    zero2 = N22 c2 N21;
1735    count2 = (N22 - N21) /[cl] STEP2;
1736    zero1 = N12 c1 N11;
1737    count1 = (N12 - N11) /[cl] STEP1;
1738    zero = zero3 || zero2 || zero1;
1739    count = count1 * count2 * count3;
1740    if (__builtin_expect(zero, false)) goto zero_iter_bb;
1741 
1742    After all, we expect the zero=false, and thus we expect to have to
1743    evaluate all of the comparison expressions, so short-circuiting
1744    oughtn't be a win.  Since the condition isn't protecting a
1745    denominator, we're not concerned about divide-by-zero, so we can
1746    fully evaluate count even if a numerator turned out to be wrong.
1747 
1748    It seems like putting this all together would create much better
1749    scheduling opportunities, and less pressure on the chip's branch
1750    predictor.  */
1751 
1752 static void
1753 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1754 			    basic_block &entry_bb, tree *counts,
1755 			    basic_block &zero_iter1_bb, int &first_zero_iter1,
1756 			    basic_block &zero_iter2_bb, int &first_zero_iter2,
1757 			    basic_block &l2_dom_bb)
1758 {
1759   tree t, type = TREE_TYPE (fd->loop.v);
1760   edge e, ne;
1761   int i;
1762 
1763   /* Collapsed loops need work for expansion into SSA form.  */
1764   gcc_assert (!gimple_in_ssa_p (cfun));
1765 
1766   if (gimple_omp_for_combined_into_p (fd->for_stmt)
1767       && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1768     {
1769       gcc_assert (fd->ordered == 0);
1770       /* First two _looptemp_ clauses are for istart/iend, counts[0]
1771 	 isn't supposed to be handled, as the inner loop doesn't
1772 	 use it.  */
1773       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1774 				     OMP_CLAUSE__LOOPTEMP_);
1775       gcc_assert (innerc);
1776       for (i = 0; i < fd->collapse; i++)
1777 	{
1778 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1779 				    OMP_CLAUSE__LOOPTEMP_);
1780 	  gcc_assert (innerc);
1781 	  if (i)
1782 	    counts[i] = OMP_CLAUSE_DECL (innerc);
1783 	  else
1784 	    counts[0] = NULL_TREE;
1785 	}
1786       return;
1787     }
1788 
1789   for (i = fd->collapse; i < fd->ordered; i++)
1790     {
1791       tree itype = TREE_TYPE (fd->loops[i].v);
1792       counts[i] = NULL_TREE;
1793       t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1794 		       fold_convert (itype, fd->loops[i].n1),
1795 		       fold_convert (itype, fd->loops[i].n2));
1796       if (t && integer_zerop (t))
1797 	{
1798 	  for (i = fd->collapse; i < fd->ordered; i++)
1799 	    counts[i] = build_int_cst (type, 0);
1800 	  break;
1801 	}
1802     }
1803   for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1804     {
1805       tree itype = TREE_TYPE (fd->loops[i].v);
1806 
1807       if (i >= fd->collapse && counts[i])
1808 	continue;
1809       if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1810 	  && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1811 				fold_convert (itype, fd->loops[i].n1),
1812 				fold_convert (itype, fd->loops[i].n2)))
1813 	      == NULL_TREE || !integer_onep (t)))
1814 	{
1815 	  gcond *cond_stmt;
1816 	  tree n1, n2;
1817 	  n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1818 	  n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1819 					 true, GSI_SAME_STMT);
1820 	  n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1821 	  n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1822 					 true, GSI_SAME_STMT);
1823 	  cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1824 					 NULL_TREE, NULL_TREE);
1825 	  gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1826 	  if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1827 			 expand_omp_regimplify_p, NULL, NULL)
1828 	      || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1829 			    expand_omp_regimplify_p, NULL, NULL))
1830 	    {
1831 	      *gsi = gsi_for_stmt (cond_stmt);
1832 	      gimple_regimplify_operands (cond_stmt, gsi);
1833 	    }
1834 	  e = split_block (entry_bb, cond_stmt);
1835 	  basic_block &zero_iter_bb
1836 	    = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1837 	  int &first_zero_iter
1838 	    = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1839 	  if (zero_iter_bb == NULL)
1840 	    {
1841 	      gassign *assign_stmt;
1842 	      first_zero_iter = i;
1843 	      zero_iter_bb = create_empty_bb (entry_bb);
1844 	      add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1845 	      *gsi = gsi_after_labels (zero_iter_bb);
1846 	      if (i < fd->collapse)
1847 		assign_stmt = gimple_build_assign (fd->loop.n2,
1848 						   build_zero_cst (type));
1849 	      else
1850 		{
1851 		  counts[i] = create_tmp_reg (type, ".count");
1852 		  assign_stmt
1853 		    = gimple_build_assign (counts[i], build_zero_cst (type));
1854 		}
1855 	      gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1856 	      set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1857 				       entry_bb);
1858 	    }
1859 	  ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1860 	  ne->probability = profile_probability::very_unlikely ();
1861 	  e->flags = EDGE_TRUE_VALUE;
1862 	  e->probability = ne->probability.invert ();
1863 	  if (l2_dom_bb == NULL)
1864 	    l2_dom_bb = entry_bb;
1865 	  entry_bb = e->dest;
1866 	  *gsi = gsi_last_nondebug_bb (entry_bb);
1867 	}
1868 
1869       if (POINTER_TYPE_P (itype))
1870 	itype = signed_type_for (itype);
1871       t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1872 				 ? -1 : 1));
1873       t = fold_build2 (PLUS_EXPR, itype,
1874 		       fold_convert (itype, fd->loops[i].step), t);
1875       t = fold_build2 (PLUS_EXPR, itype, t,
1876 		       fold_convert (itype, fd->loops[i].n2));
1877       t = fold_build2 (MINUS_EXPR, itype, t,
1878 		       fold_convert (itype, fd->loops[i].n1));
1879       /* ?? We could probably use CEIL_DIV_EXPR instead of
1880 	 TRUNC_DIV_EXPR and adjusting by hand.  Unless we can't
1881 	 generate the same code in the end because generically we
1882 	 don't know that the values involved must be negative for
1883 	 GT??  */
1884       if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1885 	t = fold_build2 (TRUNC_DIV_EXPR, itype,
1886 			 fold_build1 (NEGATE_EXPR, itype, t),
1887 			 fold_build1 (NEGATE_EXPR, itype,
1888 				      fold_convert (itype,
1889 						    fd->loops[i].step)));
1890       else
1891 	t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1892 			 fold_convert (itype, fd->loops[i].step));
1893       t = fold_convert (type, t);
1894       if (TREE_CODE (t) == INTEGER_CST)
1895 	counts[i] = t;
1896       else
1897 	{
1898 	  if (i < fd->collapse || i != first_zero_iter2)
1899 	    counts[i] = create_tmp_reg (type, ".count");
1900 	  expand_omp_build_assign (gsi, counts[i], t);
1901 	}
1902       if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1903 	{
1904 	  if (i == 0)
1905 	    t = counts[0];
1906 	  else
1907 	    t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1908 	  expand_omp_build_assign (gsi, fd->loop.n2, t);
1909 	}
1910     }
1911 }
1912 
1913 /* Helper function for expand_omp_{for_*,simd}.  Generate code like:
1914 	T = V;
1915 	V3 = N31 + (T % count3) * STEP3;
1916 	T = T / count3;
1917 	V2 = N21 + (T % count2) * STEP2;
1918 	T = T / count2;
1919 	V1 = N11 + T * STEP1;
1920    if this loop doesn't have an inner loop construct combined with it.
1921    If it does have an inner loop construct combined with it and the
1922    iteration count isn't known constant, store values from counts array
1923    into its _looptemp_ temporaries instead.  */
1924 
1925 static void
1926 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1927 			  tree *counts, gimple *inner_stmt, tree startvar)
1928 {
1929   int i;
1930   if (gimple_omp_for_combined_p (fd->for_stmt))
1931     {
1932       /* If fd->loop.n2 is constant, then no propagation of the counts
1933 	 is needed, they are constant.  */
1934       if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1935 	return;
1936 
1937       tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1938 		     ? gimple_omp_taskreg_clauses (inner_stmt)
1939 		     : gimple_omp_for_clauses (inner_stmt);
1940       /* First two _looptemp_ clauses are for istart/iend, counts[0]
1941 	 isn't supposed to be handled, as the inner loop doesn't
1942 	 use it.  */
1943       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1944       gcc_assert (innerc);
1945       for (i = 0; i < fd->collapse; i++)
1946 	{
1947 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1948 				    OMP_CLAUSE__LOOPTEMP_);
1949 	  gcc_assert (innerc);
1950 	  if (i)
1951 	    {
1952 	      tree tem = OMP_CLAUSE_DECL (innerc);
1953 	      tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1954 	      t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1955 					    false, GSI_CONTINUE_LINKING);
1956 	      gassign *stmt = gimple_build_assign (tem, t);
1957 	      gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1958 	    }
1959 	}
1960       return;
1961     }
1962 
1963   tree type = TREE_TYPE (fd->loop.v);
1964   tree tem = create_tmp_reg (type, ".tem");
1965   gassign *stmt = gimple_build_assign (tem, startvar);
1966   gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1967 
1968   for (i = fd->collapse - 1; i >= 0; i--)
1969     {
1970       tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1971       itype = vtype;
1972       if (POINTER_TYPE_P (vtype))
1973 	itype = signed_type_for (vtype);
1974       if (i != 0)
1975 	t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1976       else
1977 	t = tem;
1978       t = fold_convert (itype, t);
1979       t = fold_build2 (MULT_EXPR, itype, t,
1980 		       fold_convert (itype, fd->loops[i].step));
1981       if (POINTER_TYPE_P (vtype))
1982 	t = fold_build_pointer_plus (fd->loops[i].n1, t);
1983       else
1984 	t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1985       t = force_gimple_operand_gsi (gsi, t,
1986 				    DECL_P (fd->loops[i].v)
1987 				    && TREE_ADDRESSABLE (fd->loops[i].v),
1988 				    NULL_TREE, false,
1989 				    GSI_CONTINUE_LINKING);
1990       stmt = gimple_build_assign (fd->loops[i].v, t);
1991       gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1992       if (i != 0)
1993 	{
1994 	  t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1995 	  t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1996 					false, GSI_CONTINUE_LINKING);
1997 	  stmt = gimple_build_assign (tem, t);
1998 	  gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1999 	}
2000     }
2001 }
2002 
2003 /* Helper function for expand_omp_for_*.  Generate code like:
2004     L10:
2005 	V3 += STEP3;
2006 	if (V3 cond3 N32) goto BODY_BB; else goto L11;
2007     L11:
2008 	V3 = N31;
2009 	V2 += STEP2;
2010 	if (V2 cond2 N22) goto BODY_BB; else goto L12;
2011     L12:
2012 	V2 = N21;
2013 	V1 += STEP1;
2014 	goto BODY_BB;  */
2015 
2016 static basic_block
2017 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
2018 			     basic_block body_bb)
2019 {
2020   basic_block last_bb, bb, collapse_bb = NULL;
2021   int i;
2022   gimple_stmt_iterator gsi;
2023   edge e;
2024   tree t;
2025   gimple *stmt;
2026 
2027   last_bb = cont_bb;
2028   for (i = fd->collapse - 1; i >= 0; i--)
2029     {
2030       tree vtype = TREE_TYPE (fd->loops[i].v);
2031 
2032       bb = create_empty_bb (last_bb);
2033       add_bb_to_loop (bb, last_bb->loop_father);
2034       gsi = gsi_start_bb (bb);
2035 
2036       if (i < fd->collapse - 1)
2037 	{
2038 	  e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
2039 	  e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2040 
2041 	  t = fd->loops[i + 1].n1;
2042 	  t = force_gimple_operand_gsi (&gsi, t,
2043 					DECL_P (fd->loops[i + 1].v)
2044 					&& TREE_ADDRESSABLE (fd->loops[i
2045 								       + 1].v),
2046 					NULL_TREE, false,
2047 					GSI_CONTINUE_LINKING);
2048 	  stmt = gimple_build_assign (fd->loops[i + 1].v, t);
2049 	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2050 	}
2051       else
2052 	collapse_bb = bb;
2053 
2054       set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
2055 
2056       if (POINTER_TYPE_P (vtype))
2057 	t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
2058       else
2059 	t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
2060       t = force_gimple_operand_gsi (&gsi, t,
2061 				    DECL_P (fd->loops[i].v)
2062 				    && TREE_ADDRESSABLE (fd->loops[i].v),
2063 				    NULL_TREE, false, GSI_CONTINUE_LINKING);
2064       stmt = gimple_build_assign (fd->loops[i].v, t);
2065       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2066 
2067       if (i > 0)
2068 	{
2069 	  t = fd->loops[i].n2;
2070 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2071 					false, GSI_CONTINUE_LINKING);
2072 	  tree v = fd->loops[i].v;
2073 	  if (DECL_P (v) && TREE_ADDRESSABLE (v))
2074 	    v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
2075 					  false, GSI_CONTINUE_LINKING);
2076 	  t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
2077 	  stmt = gimple_build_cond_empty (t);
2078 	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2079 	  if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
2080 			 expand_omp_regimplify_p, NULL, NULL)
2081 	      || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
2082 			    expand_omp_regimplify_p, NULL, NULL))
2083 	    gimple_regimplify_operands (stmt, &gsi);
2084 	  e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
2085 	  e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
2086 	}
2087       else
2088 	make_edge (bb, body_bb, EDGE_FALLTHRU);
2089       last_bb = bb;
2090     }
2091 
2092   return collapse_bb;
2093 }
2094 
2095 /* Expand #pragma omp ordered depend(source).  */
2096 
2097 static void
2098 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2099 			   tree *counts, location_t loc)
2100 {
2101   enum built_in_function source_ix
2102     = fd->iter_type == long_integer_type_node
2103       ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
2104   gimple *g
2105     = gimple_build_call (builtin_decl_explicit (source_ix), 1,
2106 			 build_fold_addr_expr (counts[fd->ordered]));
2107   gimple_set_location (g, loc);
2108   gsi_insert_before (gsi, g, GSI_SAME_STMT);
2109 }
2110 
2111 /* Expand a single depend from #pragma omp ordered depend(sink:...).  */
2112 
2113 static void
2114 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2115 			 tree *counts, tree c, location_t loc)
2116 {
2117   auto_vec<tree, 10> args;
2118   enum built_in_function sink_ix
2119     = fd->iter_type == long_integer_type_node
2120       ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2121   tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2122   int i;
2123   gimple_stmt_iterator gsi2 = *gsi;
2124   bool warned_step = false;
2125 
2126   for (i = 0; i < fd->ordered; i++)
2127     {
2128       tree step = NULL_TREE;
2129       off = TREE_PURPOSE (deps);
2130       if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2131 	{
2132 	  step = TREE_OPERAND (off, 1);
2133 	  off = TREE_OPERAND (off, 0);
2134 	}
2135       if (!integer_zerop (off))
2136 	{
2137 	  gcc_assert (fd->loops[i].cond_code == LT_EXPR
2138 		      || fd->loops[i].cond_code == GT_EXPR);
2139 	  bool forward = fd->loops[i].cond_code == LT_EXPR;
2140 	  if (step)
2141 	    {
2142 	      /* Non-simple Fortran DO loops.  If step is variable,
2143 		 we don't know at compile even the direction, so can't
2144 		 warn.  */
2145 	      if (TREE_CODE (step) != INTEGER_CST)
2146 		break;
2147 	      forward = tree_int_cst_sgn (step) != -1;
2148 	    }
2149 	  if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2150 	    warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2151 				"waiting for lexically later iteration");
2152 	  break;
2153 	}
2154       deps = TREE_CHAIN (deps);
2155     }
2156   /* If all offsets corresponding to the collapsed loops are zero,
2157      this depend clause can be ignored.  FIXME: but there is still a
2158      flush needed.  We need to emit one __sync_synchronize () for it
2159      though (perhaps conditionally)?  Solve this together with the
2160      conservative dependence folding optimization.
2161   if (i >= fd->collapse)
2162     return;  */
2163 
2164   deps = OMP_CLAUSE_DECL (c);
2165   gsi_prev (&gsi2);
2166   edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2167   edge e2 = split_block_after_labels (e1->dest);
2168 
2169   gsi2 = gsi_after_labels (e1->dest);
2170   *gsi = gsi_last_bb (e1->src);
2171   for (i = 0; i < fd->ordered; i++)
2172     {
2173       tree itype = TREE_TYPE (fd->loops[i].v);
2174       tree step = NULL_TREE;
2175       tree orig_off = NULL_TREE;
2176       if (POINTER_TYPE_P (itype))
2177 	itype = sizetype;
2178       if (i)
2179 	deps = TREE_CHAIN (deps);
2180       off = TREE_PURPOSE (deps);
2181       if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2182 	{
2183 	  step = TREE_OPERAND (off, 1);
2184 	  off = TREE_OPERAND (off, 0);
2185 	  gcc_assert (fd->loops[i].cond_code == LT_EXPR
2186 		      && integer_onep (fd->loops[i].step)
2187 		      && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2188 	}
2189       tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2190       if (step)
2191 	{
2192 	  off = fold_convert_loc (loc, itype, off);
2193 	  orig_off = off;
2194 	  off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2195 	}
2196 
2197       if (integer_zerop (off))
2198 	t = boolean_true_node;
2199       else
2200 	{
2201 	  tree a;
2202 	  tree co = fold_convert_loc (loc, itype, off);
2203 	  if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2204 	    {
2205 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2206 		co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2207 	      a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2208 				   TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2209 				   co);
2210 	    }
2211 	  else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2212 	    a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2213 				 fd->loops[i].v, co);
2214 	  else
2215 	    a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2216 				 fd->loops[i].v, co);
2217 	  if (step)
2218 	    {
2219 	      tree t1, t2;
2220 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2221 		t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2222 				      fd->loops[i].n1);
2223 	      else
2224 		t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2225 				      fd->loops[i].n2);
2226 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2227 		t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2228 				      fd->loops[i].n2);
2229 	      else
2230 		t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2231 				      fd->loops[i].n1);
2232 	      t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2233 				   step, build_int_cst (TREE_TYPE (step), 0));
2234 	      if (TREE_CODE (step) != INTEGER_CST)
2235 		{
2236 		  t1 = unshare_expr (t1);
2237 		  t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2238 						 false, GSI_CONTINUE_LINKING);
2239 		  t2 = unshare_expr (t2);
2240 		  t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2241 						 false, GSI_CONTINUE_LINKING);
2242 		}
2243 	      t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2244 				   t, t2, t1);
2245 	    }
2246 	  else if (fd->loops[i].cond_code == LT_EXPR)
2247 	    {
2248 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2249 		t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2250 				     fd->loops[i].n1);
2251 	      else
2252 		t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2253 				     fd->loops[i].n2);
2254 	    }
2255 	  else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2256 	    t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2257 				 fd->loops[i].n2);
2258 	  else
2259 	    t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2260 				 fd->loops[i].n1);
2261 	}
2262       if (cond)
2263 	cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2264       else
2265 	cond = t;
2266 
2267       off = fold_convert_loc (loc, itype, off);
2268 
2269       if (step
2270 	  || (fd->loops[i].cond_code == LT_EXPR
2271 	      ? !integer_onep (fd->loops[i].step)
2272 	      : !integer_minus_onep (fd->loops[i].step)))
2273 	{
2274 	  if (step == NULL_TREE
2275 	      && TYPE_UNSIGNED (itype)
2276 	      && fd->loops[i].cond_code == GT_EXPR)
2277 	    t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2278 				 fold_build1_loc (loc, NEGATE_EXPR, itype,
2279 						  s));
2280 	  else
2281 	    t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2282 				 orig_off ? orig_off : off, s);
2283 	  t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2284 			       build_int_cst (itype, 0));
2285 	  if (integer_zerop (t) && !warned_step)
2286 	    {
2287 	      warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2288 				  "refers to iteration never in the iteration "
2289 				  "space");
2290 	      warned_step = true;
2291 	    }
2292 	  cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2293 				  cond, t);
2294 	}
2295 
2296       if (i <= fd->collapse - 1 && fd->collapse > 1)
2297 	t = fd->loop.v;
2298       else if (counts[i])
2299 	t = counts[i];
2300       else
2301 	{
2302 	  t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2303 			       fd->loops[i].v, fd->loops[i].n1);
2304 	  t = fold_convert_loc (loc, fd->iter_type, t);
2305 	}
2306       if (step)
2307 	/* We have divided off by step already earlier.  */;
2308       else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2309 	off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2310 			       fold_build1_loc (loc, NEGATE_EXPR, itype,
2311 						s));
2312       else
2313 	off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2314       if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2315 	off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2316       off = fold_convert_loc (loc, fd->iter_type, off);
2317       if (i <= fd->collapse - 1 && fd->collapse > 1)
2318 	{
2319 	  if (i)
2320 	    off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2321 				   off);
2322 	  if (i < fd->collapse - 1)
2323 	    {
2324 	      coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2325 				      counts[i]);
2326 	      continue;
2327 	    }
2328 	}
2329       off = unshare_expr (off);
2330       t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2331       t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2332 				    true, GSI_SAME_STMT);
2333       args.safe_push (t);
2334     }
2335   gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2336   gimple_set_location (g, loc);
2337   gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2338 
2339   cond = unshare_expr (cond);
2340   cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2341 				   GSI_CONTINUE_LINKING);
2342   gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2343   edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2344   e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2345   e1->probability = e3->probability.invert ();
2346   e1->flags = EDGE_TRUE_VALUE;
2347   set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2348 
2349   *gsi = gsi_after_labels (e2->dest);
2350 }
2351 
2352 /* Expand all #pragma omp ordered depend(source) and
2353    #pragma omp ordered depend(sink:...) constructs in the current
2354    #pragma omp for ordered(n) region.  */
2355 
2356 static void
2357 expand_omp_ordered_source_sink (struct omp_region *region,
2358 				struct omp_for_data *fd, tree *counts,
2359 				basic_block cont_bb)
2360 {
2361   struct omp_region *inner;
2362   int i;
2363   for (i = fd->collapse - 1; i < fd->ordered; i++)
2364     if (i == fd->collapse - 1 && fd->collapse > 1)
2365       counts[i] = NULL_TREE;
2366     else if (i >= fd->collapse && !cont_bb)
2367       counts[i] = build_zero_cst (fd->iter_type);
2368     else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2369 	     && integer_onep (fd->loops[i].step))
2370       counts[i] = NULL_TREE;
2371     else
2372       counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2373   tree atype
2374     = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2375   counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2376   TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2377 
2378   for (inner = region->inner; inner; inner = inner->next)
2379     if (inner->type == GIMPLE_OMP_ORDERED)
2380       {
2381 	gomp_ordered *ord_stmt = inner->ord_stmt;
2382 	gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2383 	location_t loc = gimple_location (ord_stmt);
2384 	tree c;
2385 	for (c = gimple_omp_ordered_clauses (ord_stmt);
2386 	     c; c = OMP_CLAUSE_CHAIN (c))
2387 	  if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2388 	    break;
2389 	if (c)
2390 	  expand_omp_ordered_source (&gsi, fd, counts, loc);
2391 	for (c = gimple_omp_ordered_clauses (ord_stmt);
2392 	     c; c = OMP_CLAUSE_CHAIN (c))
2393 	  if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2394 	    expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2395 	gsi_remove (&gsi, true);
2396       }
2397 }
2398 
2399 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2400    collapsed.  */
2401 
2402 static basic_block
2403 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2404 			      basic_block cont_bb, basic_block body_bb,
2405 			      bool ordered_lastprivate)
2406 {
2407   if (fd->ordered == fd->collapse)
2408     return cont_bb;
2409 
2410   if (!cont_bb)
2411     {
2412       gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2413       for (int i = fd->collapse; i < fd->ordered; i++)
2414 	{
2415 	  tree type = TREE_TYPE (fd->loops[i].v);
2416 	  tree n1 = fold_convert (type, fd->loops[i].n1);
2417 	  expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2418 	  tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2419 			      size_int (i - fd->collapse + 1),
2420 			      NULL_TREE, NULL_TREE);
2421 	  expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2422 	}
2423       return NULL;
2424     }
2425 
2426   for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2427     {
2428       tree t, type = TREE_TYPE (fd->loops[i].v);
2429       gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2430       expand_omp_build_assign (&gsi, fd->loops[i].v,
2431 			       fold_convert (type, fd->loops[i].n1));
2432       if (counts[i])
2433 	expand_omp_build_assign (&gsi, counts[i],
2434 				 build_zero_cst (fd->iter_type));
2435       tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2436 			  size_int (i - fd->collapse + 1),
2437 			  NULL_TREE, NULL_TREE);
2438       expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2439       if (!gsi_end_p (gsi))
2440 	gsi_prev (&gsi);
2441       else
2442 	gsi = gsi_last_bb (body_bb);
2443       edge e1 = split_block (body_bb, gsi_stmt (gsi));
2444       basic_block new_body = e1->dest;
2445       if (body_bb == cont_bb)
2446 	cont_bb = new_body;
2447       edge e2 = NULL;
2448       basic_block new_header;
2449       if (EDGE_COUNT (cont_bb->preds) > 0)
2450 	{
2451 	  gsi = gsi_last_bb (cont_bb);
2452 	  if (POINTER_TYPE_P (type))
2453 	    t = fold_build_pointer_plus (fd->loops[i].v,
2454 					 fold_convert (sizetype,
2455 						       fd->loops[i].step));
2456 	  else
2457 	    t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2458 			     fold_convert (type, fd->loops[i].step));
2459 	  expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2460 	  if (counts[i])
2461 	    {
2462 	      t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2463 			       build_int_cst (fd->iter_type, 1));
2464 	      expand_omp_build_assign (&gsi, counts[i], t);
2465 	      t = counts[i];
2466 	    }
2467 	  else
2468 	    {
2469 	      t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2470 			       fd->loops[i].v, fd->loops[i].n1);
2471 	      t = fold_convert (fd->iter_type, t);
2472 	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2473 					    true, GSI_SAME_STMT);
2474 	    }
2475 	  aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2476 			 size_int (i - fd->collapse + 1),
2477 			 NULL_TREE, NULL_TREE);
2478 	  expand_omp_build_assign (&gsi, aref, t);
2479 	  gsi_prev (&gsi);
2480 	  e2 = split_block (cont_bb, gsi_stmt (gsi));
2481 	  new_header = e2->dest;
2482 	}
2483       else
2484 	new_header = cont_bb;
2485       gsi = gsi_after_labels (new_header);
2486       tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2487 					 true, GSI_SAME_STMT);
2488       tree n2
2489 	= force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2490 				    true, NULL_TREE, true, GSI_SAME_STMT);
2491       t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2492       gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2493       edge e3 = split_block (new_header, gsi_stmt (gsi));
2494       cont_bb = e3->dest;
2495       remove_edge (e1);
2496       make_edge (body_bb, new_header, EDGE_FALLTHRU);
2497       e3->flags = EDGE_FALSE_VALUE;
2498       e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2499       e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2500       e1->probability = e3->probability.invert ();
2501 
2502       set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2503       set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2504 
2505       if (e2)
2506 	{
2507 	  struct loop *loop = alloc_loop ();
2508 	  loop->header = new_header;
2509 	  loop->latch = e2->src;
2510 	  add_loop (loop, body_bb->loop_father);
2511 	}
2512     }
2513 
2514   /* If there are any lastprivate clauses and it is possible some loops
2515      might have zero iterations, ensure all the decls are initialized,
2516      otherwise we could crash evaluating C++ class iterators with lastprivate
2517      clauses.  */
2518   bool need_inits = false;
2519   for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2520     if (need_inits)
2521       {
2522 	tree type = TREE_TYPE (fd->loops[i].v);
2523 	gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2524 	expand_omp_build_assign (&gsi, fd->loops[i].v,
2525 				 fold_convert (type, fd->loops[i].n1));
2526       }
2527     else
2528       {
2529 	tree type = TREE_TYPE (fd->loops[i].v);
2530 	tree this_cond = fold_build2 (fd->loops[i].cond_code,
2531 				      boolean_type_node,
2532 				      fold_convert (type, fd->loops[i].n1),
2533 				      fold_convert (type, fd->loops[i].n2));
2534 	if (!integer_onep (this_cond))
2535 	  need_inits = true;
2536       }
2537 
2538   return cont_bb;
2539 }
2540 
2541 /* A subroutine of expand_omp_for.  Generate code for a parallel
2542    loop with any schedule.  Given parameters:
2543 
2544 	for (V = N1; V cond N2; V += STEP) BODY;
2545 
2546    where COND is "<" or ">", we generate pseudocode
2547 
2548 	more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2549 	if (more) goto L0; else goto L3;
2550     L0:
2551 	V = istart0;
2552 	iend = iend0;
2553     L1:
2554 	BODY;
2555 	V += STEP;
2556 	if (V cond iend) goto L1; else goto L2;
2557     L2:
2558 	if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2559     L3:
2560 
2561     If this is a combined omp parallel loop, instead of the call to
2562     GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2563     If this is gimple_omp_for_combined_p loop, then instead of assigning
2564     V and iend in L0 we assign the first two _looptemp_ clause decls of the
2565     inner GIMPLE_OMP_FOR and V += STEP; and
2566     if (V cond iend) goto L1; else goto L2; are removed.
2567 
2568     For collapsed loops, given parameters:
2569       collapse(3)
2570       for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2571 	for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2572 	  for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2573 	    BODY;
2574 
2575     we generate pseudocode
2576 
2577 	if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2578 	if (cond3 is <)
2579 	  adj = STEP3 - 1;
2580 	else
2581 	  adj = STEP3 + 1;
2582 	count3 = (adj + N32 - N31) / STEP3;
2583 	if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2584 	if (cond2 is <)
2585 	  adj = STEP2 - 1;
2586 	else
2587 	  adj = STEP2 + 1;
2588 	count2 = (adj + N22 - N21) / STEP2;
2589 	if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2590 	if (cond1 is <)
2591 	  adj = STEP1 - 1;
2592 	else
2593 	  adj = STEP1 + 1;
2594 	count1 = (adj + N12 - N11) / STEP1;
2595 	count = count1 * count2 * count3;
2596 	goto Z1;
2597     Z0:
2598 	count = 0;
2599     Z1:
2600 	more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2601 	if (more) goto L0; else goto L3;
2602     L0:
2603 	V = istart0;
2604 	T = V;
2605 	V3 = N31 + (T % count3) * STEP3;
2606 	T = T / count3;
2607 	V2 = N21 + (T % count2) * STEP2;
2608 	T = T / count2;
2609 	V1 = N11 + T * STEP1;
2610 	iend = iend0;
2611     L1:
2612 	BODY;
2613 	V += 1;
2614 	if (V < iend) goto L10; else goto L2;
2615     L10:
2616 	V3 += STEP3;
2617 	if (V3 cond3 N32) goto L1; else goto L11;
2618     L11:
2619 	V3 = N31;
2620 	V2 += STEP2;
2621 	if (V2 cond2 N22) goto L1; else goto L12;
2622     L12:
2623 	V2 = N21;
2624 	V1 += STEP1;
2625 	goto L1;
2626     L2:
2627 	if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2628     L3:
2629 
2630       */
2631 
2632 static void
2633 expand_omp_for_generic (struct omp_region *region,
2634 			struct omp_for_data *fd,
2635 			enum built_in_function start_fn,
2636 			enum built_in_function next_fn,
2637 			tree sched_arg,
2638 			gimple *inner_stmt)
2639 {
2640   tree type, istart0, iend0, iend;
2641   tree t, vmain, vback, bias = NULL_TREE;
2642   basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2643   basic_block l2_bb = NULL, l3_bb = NULL;
2644   gimple_stmt_iterator gsi;
2645   gassign *assign_stmt;
2646   bool in_combined_parallel = is_combined_parallel (region);
2647   bool broken_loop = region->cont == NULL;
2648   edge e, ne;
2649   tree *counts = NULL;
2650   int i;
2651   bool ordered_lastprivate = false;
2652 
2653   gcc_assert (!broken_loop || !in_combined_parallel);
2654   gcc_assert (fd->iter_type == long_integer_type_node
2655 	      || !in_combined_parallel);
2656 
2657   entry_bb = region->entry;
2658   cont_bb = region->cont;
2659   collapse_bb = NULL;
2660   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2661   gcc_assert (broken_loop
2662 	      || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2663   l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2664   l1_bb = single_succ (l0_bb);
2665   if (!broken_loop)
2666     {
2667       l2_bb = create_empty_bb (cont_bb);
2668       gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2669 		  || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2670 		      == l1_bb));
2671       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2672     }
2673   else
2674     l2_bb = NULL;
2675   l3_bb = BRANCH_EDGE (entry_bb)->dest;
2676   exit_bb = region->exit;
2677 
2678   gsi = gsi_last_nondebug_bb (entry_bb);
2679 
2680   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2681   if (fd->ordered
2682       && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2683 			  OMP_CLAUSE_LASTPRIVATE))
2684     ordered_lastprivate = false;
2685   tree reductions = NULL_TREE;
2686   tree mem = NULL_TREE;
2687   if (sched_arg)
2688     {
2689       if (fd->have_reductemp)
2690 	{
2691 	  tree c = omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2692 				    OMP_CLAUSE__REDUCTEMP_);
2693 	  reductions = OMP_CLAUSE_DECL (c);
2694 	  gcc_assert (TREE_CODE (reductions) == SSA_NAME);
2695 	  gimple *g = SSA_NAME_DEF_STMT (reductions);
2696 	  reductions = gimple_assign_rhs1 (g);
2697 	  OMP_CLAUSE_DECL (c) = reductions;
2698 	  entry_bb = gimple_bb (g);
2699 	  edge e = split_block (entry_bb, g);
2700 	  if (region->entry == entry_bb)
2701 	    region->entry = e->dest;
2702 	  gsi = gsi_last_bb (entry_bb);
2703 	}
2704       else
2705 	reductions = null_pointer_node;
2706       /* For now.  */
2707       mem = null_pointer_node;
2708     }
2709   if (fd->collapse > 1 || fd->ordered)
2710     {
2711       int first_zero_iter1 = -1, first_zero_iter2 = -1;
2712       basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2713 
2714       counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2715       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2716 				  zero_iter1_bb, first_zero_iter1,
2717 				  zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2718 
2719       if (zero_iter1_bb)
2720 	{
2721 	  /* Some counts[i] vars might be uninitialized if
2722 	     some loop has zero iterations.  But the body shouldn't
2723 	     be executed in that case, so just avoid uninit warnings.  */
2724 	  for (i = first_zero_iter1;
2725 	       i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2726 	    if (SSA_VAR_P (counts[i]))
2727 	      TREE_NO_WARNING (counts[i]) = 1;
2728 	  gsi_prev (&gsi);
2729 	  e = split_block (entry_bb, gsi_stmt (gsi));
2730 	  entry_bb = e->dest;
2731 	  make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2732 	  gsi = gsi_last_nondebug_bb (entry_bb);
2733 	  set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2734 				   get_immediate_dominator (CDI_DOMINATORS,
2735 							    zero_iter1_bb));
2736 	}
2737       if (zero_iter2_bb)
2738 	{
2739 	  /* Some counts[i] vars might be uninitialized if
2740 	     some loop has zero iterations.  But the body shouldn't
2741 	     be executed in that case, so just avoid uninit warnings.  */
2742 	  for (i = first_zero_iter2; i < fd->ordered; i++)
2743 	    if (SSA_VAR_P (counts[i]))
2744 	      TREE_NO_WARNING (counts[i]) = 1;
2745 	  if (zero_iter1_bb)
2746 	    make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2747 	  else
2748 	    {
2749 	      gsi_prev (&gsi);
2750 	      e = split_block (entry_bb, gsi_stmt (gsi));
2751 	      entry_bb = e->dest;
2752 	      make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2753 	      gsi = gsi_last_nondebug_bb (entry_bb);
2754 	      set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2755 				       get_immediate_dominator
2756 					 (CDI_DOMINATORS, zero_iter2_bb));
2757 	    }
2758 	}
2759       if (fd->collapse == 1)
2760 	{
2761 	  counts[0] = fd->loop.n2;
2762 	  fd->loop = fd->loops[0];
2763 	}
2764     }
2765 
2766   type = TREE_TYPE (fd->loop.v);
2767   istart0 = create_tmp_var (fd->iter_type, ".istart0");
2768   iend0 = create_tmp_var (fd->iter_type, ".iend0");
2769   TREE_ADDRESSABLE (istart0) = 1;
2770   TREE_ADDRESSABLE (iend0) = 1;
2771 
2772   /* See if we need to bias by LLONG_MIN.  */
2773   if (fd->iter_type == long_long_unsigned_type_node
2774       && TREE_CODE (type) == INTEGER_TYPE
2775       && !TYPE_UNSIGNED (type)
2776       && fd->ordered == 0)
2777     {
2778       tree n1, n2;
2779 
2780       if (fd->loop.cond_code == LT_EXPR)
2781 	{
2782 	  n1 = fd->loop.n1;
2783 	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2784 	}
2785       else
2786 	{
2787 	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2788 	  n2 = fd->loop.n1;
2789 	}
2790       if (TREE_CODE (n1) != INTEGER_CST
2791 	  || TREE_CODE (n2) != INTEGER_CST
2792 	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2793 	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2794     }
2795 
2796   gimple_stmt_iterator gsif = gsi;
2797   gsi_prev (&gsif);
2798 
2799   tree arr = NULL_TREE;
2800   if (in_combined_parallel)
2801     {
2802       gcc_assert (fd->ordered == 0);
2803       /* In a combined parallel loop, emit a call to
2804 	 GOMP_loop_foo_next.  */
2805       t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2806 			   build_fold_addr_expr (istart0),
2807 			   build_fold_addr_expr (iend0));
2808     }
2809   else
2810     {
2811       tree t0, t1, t2, t3, t4;
2812       /* If this is not a combined parallel loop, emit a call to
2813 	 GOMP_loop_foo_start in ENTRY_BB.  */
2814       t4 = build_fold_addr_expr (iend0);
2815       t3 = build_fold_addr_expr (istart0);
2816       if (fd->ordered)
2817 	{
2818 	  t0 = build_int_cst (unsigned_type_node,
2819 			      fd->ordered - fd->collapse + 1);
2820 	  arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2821 							fd->ordered
2822 							- fd->collapse + 1),
2823 				".omp_counts");
2824 	  DECL_NAMELESS (arr) = 1;
2825 	  TREE_ADDRESSABLE (arr) = 1;
2826 	  TREE_STATIC (arr) = 1;
2827 	  vec<constructor_elt, va_gc> *v;
2828 	  vec_alloc (v, fd->ordered - fd->collapse + 1);
2829 	  int idx;
2830 
2831 	  for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2832 	    {
2833 	      tree c;
2834 	      if (idx == 0 && fd->collapse > 1)
2835 		c = fd->loop.n2;
2836 	      else
2837 		c = counts[idx + fd->collapse - 1];
2838 	      tree purpose = size_int (idx);
2839 	      CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2840 	      if (TREE_CODE (c) != INTEGER_CST)
2841 		TREE_STATIC (arr) = 0;
2842 	    }
2843 
2844 	  DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2845 	  if (!TREE_STATIC (arr))
2846 	    force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2847 						    void_type_node, arr),
2848 				      true, NULL_TREE, true, GSI_SAME_STMT);
2849 	  t1 = build_fold_addr_expr (arr);
2850 	  t2 = NULL_TREE;
2851 	}
2852       else
2853 	{
2854 	  t2 = fold_convert (fd->iter_type, fd->loop.step);
2855 	  t1 = fd->loop.n2;
2856 	  t0 = fd->loop.n1;
2857 	  if (gimple_omp_for_combined_into_p (fd->for_stmt))
2858 	    {
2859 	      tree innerc
2860 		= omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2861 				   OMP_CLAUSE__LOOPTEMP_);
2862 	      gcc_assert (innerc);
2863 	      t0 = OMP_CLAUSE_DECL (innerc);
2864 	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2865 					OMP_CLAUSE__LOOPTEMP_);
2866 	      gcc_assert (innerc);
2867 	      t1 = OMP_CLAUSE_DECL (innerc);
2868 	    }
2869 	  if (POINTER_TYPE_P (TREE_TYPE (t0))
2870 	      && TYPE_PRECISION (TREE_TYPE (t0))
2871 		 != TYPE_PRECISION (fd->iter_type))
2872 	    {
2873 	      /* Avoid casting pointers to integer of a different size.  */
2874 	      tree itype = signed_type_for (type);
2875 	      t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2876 	      t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2877 	    }
2878 	  else
2879 	    {
2880 	      t1 = fold_convert (fd->iter_type, t1);
2881 	      t0 = fold_convert (fd->iter_type, t0);
2882 	    }
2883 	  if (bias)
2884 	    {
2885 	      t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2886 	      t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2887 	    }
2888 	}
2889       if (fd->iter_type == long_integer_type_node || fd->ordered)
2890 	{
2891 	  if (fd->chunk_size)
2892 	    {
2893 	      t = fold_convert (fd->iter_type, fd->chunk_size);
2894 	      t = omp_adjust_chunk_size (t, fd->simd_schedule);
2895 	      if (sched_arg)
2896 		{
2897 		  if (fd->ordered)
2898 		    t = build_call_expr (builtin_decl_explicit (start_fn),
2899 					 8, t0, t1, sched_arg, t, t3, t4,
2900 					 reductions, mem);
2901 		  else
2902 		    t = build_call_expr (builtin_decl_explicit (start_fn),
2903 					 9, t0, t1, t2, sched_arg, t, t3, t4,
2904 					 reductions, mem);
2905 		}
2906 	      else if (fd->ordered)
2907 		t = build_call_expr (builtin_decl_explicit (start_fn),
2908 				     5, t0, t1, t, t3, t4);
2909 	      else
2910 		t = build_call_expr (builtin_decl_explicit (start_fn),
2911 				     6, t0, t1, t2, t, t3, t4);
2912 	    }
2913 	  else if (fd->ordered)
2914 	    t = build_call_expr (builtin_decl_explicit (start_fn),
2915 				 4, t0, t1, t3, t4);
2916 	  else
2917 	    t = build_call_expr (builtin_decl_explicit (start_fn),
2918 				 5, t0, t1, t2, t3, t4);
2919 	}
2920       else
2921 	{
2922 	  tree t5;
2923 	  tree c_bool_type;
2924 	  tree bfn_decl;
2925 
2926 	  /* The GOMP_loop_ull_*start functions have additional boolean
2927 	     argument, true for < loops and false for > loops.
2928 	     In Fortran, the C bool type can be different from
2929 	     boolean_type_node.  */
2930 	  bfn_decl = builtin_decl_explicit (start_fn);
2931 	  c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2932 	  t5 = build_int_cst (c_bool_type,
2933 			      fd->loop.cond_code == LT_EXPR ? 1 : 0);
2934 	  if (fd->chunk_size)
2935 	    {
2936 	      tree bfn_decl = builtin_decl_explicit (start_fn);
2937 	      t = fold_convert (fd->iter_type, fd->chunk_size);
2938 	      t = omp_adjust_chunk_size (t, fd->simd_schedule);
2939 	      if (sched_arg)
2940 		t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
2941 				     t, t3, t4, reductions, mem);
2942 	      else
2943 		t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2944 	    }
2945 	  else
2946 	    t = build_call_expr (builtin_decl_explicit (start_fn),
2947 				 6, t5, t0, t1, t2, t3, t4);
2948 	}
2949     }
2950   if (TREE_TYPE (t) != boolean_type_node)
2951     t = fold_build2 (NE_EXPR, boolean_type_node,
2952 		     t, build_int_cst (TREE_TYPE (t), 0));
2953   t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2954 				true, GSI_SAME_STMT);
2955   if (arr && !TREE_STATIC (arr))
2956     {
2957       tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2958       TREE_THIS_VOLATILE (clobber) = 1;
2959       gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2960 			 GSI_SAME_STMT);
2961     }
2962   if (fd->have_reductemp)
2963     {
2964       gimple *g = gsi_stmt (gsi);
2965       gsi_remove (&gsi, true);
2966       release_ssa_name (gimple_assign_lhs (g));
2967 
2968       entry_bb = region->entry;
2969       gsi = gsi_last_nondebug_bb (entry_bb);
2970 
2971       gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2972     }
2973   gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2974 
2975   /* Remove the GIMPLE_OMP_FOR statement.  */
2976   gsi_remove (&gsi, true);
2977 
2978   if (gsi_end_p (gsif))
2979     gsif = gsi_after_labels (gsi_bb (gsif));
2980   gsi_next (&gsif);
2981 
2982   /* Iteration setup for sequential loop goes in L0_BB.  */
2983   tree startvar = fd->loop.v;
2984   tree endvar = NULL_TREE;
2985 
2986   if (gimple_omp_for_combined_p (fd->for_stmt))
2987     {
2988       gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2989 		  && gimple_omp_for_kind (inner_stmt)
2990 		     == GF_OMP_FOR_KIND_SIMD);
2991       tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2992 				     OMP_CLAUSE__LOOPTEMP_);
2993       gcc_assert (innerc);
2994       startvar = OMP_CLAUSE_DECL (innerc);
2995       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2996 				OMP_CLAUSE__LOOPTEMP_);
2997       gcc_assert (innerc);
2998       endvar = OMP_CLAUSE_DECL (innerc);
2999     }
3000 
3001   gsi = gsi_start_bb (l0_bb);
3002   t = istart0;
3003   if (fd->ordered && fd->collapse == 1)
3004     t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3005 		     fold_convert (fd->iter_type, fd->loop.step));
3006   else if (bias)
3007     t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3008   if (fd->ordered && fd->collapse == 1)
3009     {
3010       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3011 	t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3012 			 fd->loop.n1, fold_convert (sizetype, t));
3013       else
3014 	{
3015 	  t = fold_convert (TREE_TYPE (startvar), t);
3016 	  t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3017 			   fd->loop.n1, t);
3018 	}
3019     }
3020   else
3021     {
3022       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3023 	t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3024       t = fold_convert (TREE_TYPE (startvar), t);
3025     }
3026   t = force_gimple_operand_gsi (&gsi, t,
3027 				DECL_P (startvar)
3028 				&& TREE_ADDRESSABLE (startvar),
3029 				NULL_TREE, false, GSI_CONTINUE_LINKING);
3030   assign_stmt = gimple_build_assign (startvar, t);
3031   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3032 
3033   t = iend0;
3034   if (fd->ordered && fd->collapse == 1)
3035     t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3036 		     fold_convert (fd->iter_type, fd->loop.step));
3037   else if (bias)
3038     t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3039   if (fd->ordered && fd->collapse == 1)
3040     {
3041       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3042 	t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3043 			 fd->loop.n1, fold_convert (sizetype, t));
3044       else
3045 	{
3046 	  t = fold_convert (TREE_TYPE (startvar), t);
3047 	  t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3048 			   fd->loop.n1, t);
3049 	}
3050     }
3051   else
3052     {
3053       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3054 	t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3055       t = fold_convert (TREE_TYPE (startvar), t);
3056     }
3057   iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3058 				   false, GSI_CONTINUE_LINKING);
3059   if (endvar)
3060     {
3061       assign_stmt = gimple_build_assign (endvar, iend);
3062       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3063       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
3064 	assign_stmt = gimple_build_assign (fd->loop.v, iend);
3065       else
3066 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
3067       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3068     }
3069   /* Handle linear clause adjustments.  */
3070   tree itercnt = NULL_TREE;
3071   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3072     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3073 	 c; c = OMP_CLAUSE_CHAIN (c))
3074       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3075 	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3076 	{
3077 	  tree d = OMP_CLAUSE_DECL (c);
3078 	  bool is_ref = omp_is_reference (d);
3079 	  tree t = d, a, dest;
3080 	  if (is_ref)
3081 	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3082 	  tree type = TREE_TYPE (t);
3083 	  if (POINTER_TYPE_P (type))
3084 	    type = sizetype;
3085 	  dest = unshare_expr (t);
3086 	  tree v = create_tmp_var (TREE_TYPE (t), NULL);
3087 	  expand_omp_build_assign (&gsif, v, t);
3088 	  if (itercnt == NULL_TREE)
3089 	    {
3090 	      itercnt = startvar;
3091 	      tree n1 = fd->loop.n1;
3092 	      if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
3093 		{
3094 		  itercnt
3095 		    = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
3096 				    itercnt);
3097 		  n1 = fold_convert (TREE_TYPE (itercnt), n1);
3098 		}
3099 	      itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
3100 				     itercnt, n1);
3101 	      itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
3102 				     itercnt, fd->loop.step);
3103 	      itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3104 						  NULL_TREE, false,
3105 						  GSI_CONTINUE_LINKING);
3106 	    }
3107 	  a = fold_build2 (MULT_EXPR, type,
3108 			   fold_convert (type, itercnt),
3109 			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3110 	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3111 			   : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
3112 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3113 					false, GSI_CONTINUE_LINKING);
3114 	  assign_stmt = gimple_build_assign (dest, t);
3115 	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3116 	}
3117   if (fd->collapse > 1)
3118     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3119 
3120   if (fd->ordered)
3121     {
3122       /* Until now, counts array contained number of iterations or
3123 	 variable containing it for ith loop.  From now on, we need
3124 	 those counts only for collapsed loops, and only for the 2nd
3125 	 till the last collapsed one.  Move those one element earlier,
3126 	 we'll use counts[fd->collapse - 1] for the first source/sink
3127 	 iteration counter and so on and counts[fd->ordered]
3128 	 as the array holding the current counter values for
3129 	 depend(source).  */
3130       if (fd->collapse > 1)
3131 	memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
3132       if (broken_loop)
3133 	{
3134 	  int i;
3135 	  for (i = fd->collapse; i < fd->ordered; i++)
3136 	    {
3137 	      tree type = TREE_TYPE (fd->loops[i].v);
3138 	      tree this_cond
3139 		= fold_build2 (fd->loops[i].cond_code, boolean_type_node,
3140 			       fold_convert (type, fd->loops[i].n1),
3141 			       fold_convert (type, fd->loops[i].n2));
3142 	      if (!integer_onep (this_cond))
3143 		break;
3144 	    }
3145 	  if (i < fd->ordered)
3146 	    {
3147 	      cont_bb
3148 		= create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
3149 	      add_bb_to_loop (cont_bb, l1_bb->loop_father);
3150 	      gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
3151 	      gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
3152 	      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3153 	      make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
3154 	      make_edge (cont_bb, l1_bb, 0);
3155 	      l2_bb = create_empty_bb (cont_bb);
3156 	      broken_loop = false;
3157 	    }
3158 	}
3159       expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
3160       cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
3161 					      ordered_lastprivate);
3162       if (counts[fd->collapse - 1])
3163 	{
3164 	  gcc_assert (fd->collapse == 1);
3165 	  gsi = gsi_last_bb (l0_bb);
3166 	  expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
3167 				   istart0, true);
3168 	  gsi = gsi_last_bb (cont_bb);
3169 	  t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
3170 			   build_int_cst (fd->iter_type, 1));
3171 	  expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3172 	  tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3173 			      size_zero_node, NULL_TREE, NULL_TREE);
3174 	  expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3175 	  t = counts[fd->collapse - 1];
3176 	}
3177       else if (fd->collapse > 1)
3178 	t = fd->loop.v;
3179       else
3180 	{
3181 	  t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3182 			   fd->loops[0].v, fd->loops[0].n1);
3183 	  t = fold_convert (fd->iter_type, t);
3184 	}
3185       gsi = gsi_last_bb (l0_bb);
3186       tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3187 			  size_zero_node, NULL_TREE, NULL_TREE);
3188       t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3189 				    false, GSI_CONTINUE_LINKING);
3190       expand_omp_build_assign (&gsi, aref, t, true);
3191     }
3192 
3193   if (!broken_loop)
3194     {
3195       /* Code to control the increment and predicate for the sequential
3196 	 loop goes in the CONT_BB.  */
3197       gsi = gsi_last_nondebug_bb (cont_bb);
3198       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3199       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3200       vmain = gimple_omp_continue_control_use (cont_stmt);
3201       vback = gimple_omp_continue_control_def (cont_stmt);
3202 
3203       if (!gimple_omp_for_combined_p (fd->for_stmt))
3204 	{
3205 	  if (POINTER_TYPE_P (type))
3206 	    t = fold_build_pointer_plus (vmain, fd->loop.step);
3207 	  else
3208 	    t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3209 	  t = force_gimple_operand_gsi (&gsi, t,
3210 					DECL_P (vback)
3211 					&& TREE_ADDRESSABLE (vback),
3212 					NULL_TREE, true, GSI_SAME_STMT);
3213 	  assign_stmt = gimple_build_assign (vback, t);
3214 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3215 
3216 	  if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3217 	    {
3218 	      tree tem;
3219 	      if (fd->collapse > 1)
3220 		tem = fd->loop.v;
3221 	      else
3222 		{
3223 		  tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3224 				     fd->loops[0].v, fd->loops[0].n1);
3225 		  tem = fold_convert (fd->iter_type, tem);
3226 		}
3227 	      tree aref = build4 (ARRAY_REF, fd->iter_type,
3228 				  counts[fd->ordered], size_zero_node,
3229 				  NULL_TREE, NULL_TREE);
3230 	      tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
3231 					      true, GSI_SAME_STMT);
3232 	      expand_omp_build_assign (&gsi, aref, tem);
3233 	    }
3234 
3235 	  t = build2 (fd->loop.cond_code, boolean_type_node,
3236 		      DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3237 		      iend);
3238 	  gcond *cond_stmt = gimple_build_cond_empty (t);
3239 	  gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3240 	}
3241 
3242       /* Remove GIMPLE_OMP_CONTINUE.  */
3243       gsi_remove (&gsi, true);
3244 
3245       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3246 	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3247 
3248       /* Emit code to get the next parallel iteration in L2_BB.  */
3249       gsi = gsi_start_bb (l2_bb);
3250 
3251       t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3252 			   build_fold_addr_expr (istart0),
3253 			   build_fold_addr_expr (iend0));
3254       t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3255 				    false, GSI_CONTINUE_LINKING);
3256       if (TREE_TYPE (t) != boolean_type_node)
3257 	t = fold_build2 (NE_EXPR, boolean_type_node,
3258 			 t, build_int_cst (TREE_TYPE (t), 0));
3259       gcond *cond_stmt = gimple_build_cond_empty (t);
3260       gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3261     }
3262 
3263   /* Add the loop cleanup function.  */
3264   gsi = gsi_last_nondebug_bb (exit_bb);
3265   if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3266     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3267   else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3268     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3269   else
3270     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3271   gcall *call_stmt = gimple_build_call (t, 0);
3272   if (fd->ordered)
3273     {
3274       tree arr = counts[fd->ordered];
3275       tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3276       TREE_THIS_VOLATILE (clobber) = 1;
3277       gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3278 			GSI_SAME_STMT);
3279     }
3280   if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3281     {
3282       gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3283       if (fd->have_reductemp)
3284 	{
3285 	  gimple *g = gimple_build_assign (reductions, NOP_EXPR,
3286 					   gimple_call_lhs (call_stmt));
3287 	  gsi_insert_after (&gsi, g, GSI_SAME_STMT);
3288 	}
3289     }
3290   gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3291   gsi_remove (&gsi, true);
3292 
3293   /* Connect the new blocks.  */
3294   find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3295   find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3296 
3297   if (!broken_loop)
3298     {
3299       gimple_seq phis;
3300 
3301       e = find_edge (cont_bb, l3_bb);
3302       ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3303 
3304       phis = phi_nodes (l3_bb);
3305       for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3306 	{
3307 	  gimple *phi = gsi_stmt (gsi);
3308 	  SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3309 		   PHI_ARG_DEF_FROM_EDGE (phi, e));
3310 	}
3311       remove_edge (e);
3312 
3313       make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3314       e = find_edge (cont_bb, l1_bb);
3315       if (e == NULL)
3316 	{
3317 	  e = BRANCH_EDGE (cont_bb);
3318 	  gcc_assert (single_succ (e->dest) == l1_bb);
3319 	}
3320       if (gimple_omp_for_combined_p (fd->for_stmt))
3321 	{
3322 	  remove_edge (e);
3323 	  e = NULL;
3324 	}
3325       else if (fd->collapse > 1)
3326 	{
3327 	  remove_edge (e);
3328 	  e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3329 	}
3330       else
3331 	e->flags = EDGE_TRUE_VALUE;
3332       if (e)
3333 	{
3334 	  e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3335 	  find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
3336 	}
3337       else
3338 	{
3339 	  e = find_edge (cont_bb, l2_bb);
3340 	  e->flags = EDGE_FALLTHRU;
3341 	}
3342       make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3343 
3344       if (gimple_in_ssa_p (cfun))
3345 	{
3346 	  /* Add phis to the outer loop that connect to the phis in the inner,
3347 	     original loop, and move the loop entry value of the inner phi to
3348 	     the loop entry value of the outer phi.  */
3349 	  gphi_iterator psi;
3350 	  for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3351 	    {
3352 	      location_t locus;
3353 	      gphi *nphi;
3354 	      gphi *exit_phi = psi.phi ();
3355 
3356 	      if (virtual_operand_p (gimple_phi_result (exit_phi)))
3357 		continue;
3358 
3359 	      edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3360 	      tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3361 
3362 	      basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3363 	      edge latch_to_l1 = find_edge (latch, l1_bb);
3364 	      gphi *inner_phi
3365 		= find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3366 
3367 	      tree t = gimple_phi_result (exit_phi);
3368 	      tree new_res = copy_ssa_name (t, NULL);
3369 	      nphi = create_phi_node (new_res, l0_bb);
3370 
3371 	      edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3372 	      t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3373 	      locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3374 	      edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3375 	      add_phi_arg (nphi, t, entry_to_l0, locus);
3376 
3377 	      edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3378 	      add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3379 
3380 	      add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3381 	    }
3382 	}
3383 
3384       set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3385 			       recompute_dominator (CDI_DOMINATORS, l2_bb));
3386       set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3387 			       recompute_dominator (CDI_DOMINATORS, l3_bb));
3388       set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3389 			       recompute_dominator (CDI_DOMINATORS, l0_bb));
3390       set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3391 			       recompute_dominator (CDI_DOMINATORS, l1_bb));
3392 
3393       /* We enter expand_omp_for_generic with a loop.  This original loop may
3394 	 have its own loop struct, or it may be part of an outer loop struct
3395 	 (which may be the fake loop).  */
3396       struct loop *outer_loop = entry_bb->loop_father;
3397       bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3398 
3399       add_bb_to_loop (l2_bb, outer_loop);
3400 
3401       /* We've added a new loop around the original loop.  Allocate the
3402 	 corresponding loop struct.  */
3403       struct loop *new_loop = alloc_loop ();
3404       new_loop->header = l0_bb;
3405       new_loop->latch = l2_bb;
3406       add_loop (new_loop, outer_loop);
3407 
3408       /* Allocate a loop structure for the original loop unless we already
3409 	 had one.  */
3410       if (!orig_loop_has_loop_struct
3411 	  && !gimple_omp_for_combined_p (fd->for_stmt))
3412 	{
3413 	  struct loop *orig_loop = alloc_loop ();
3414 	  orig_loop->header = l1_bb;
3415 	  /* The loop may have multiple latches.  */
3416 	  add_loop (orig_loop, new_loop);
3417 	}
3418     }
3419 }
3420 
3421 /* A subroutine of expand_omp_for.  Generate code for a parallel
3422    loop with static schedule and no specified chunk size.  Given
3423    parameters:
3424 
3425 	for (V = N1; V cond N2; V += STEP) BODY;
3426 
3427    where COND is "<" or ">", we generate pseudocode
3428 
3429 	if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3430 	if (cond is <)
3431 	  adj = STEP - 1;
3432 	else
3433 	  adj = STEP + 1;
3434 	if ((__typeof (V)) -1 > 0 && cond is >)
3435 	  n = -(adj + N2 - N1) / -STEP;
3436 	else
3437 	  n = (adj + N2 - N1) / STEP;
3438 	q = n / nthreads;
3439 	tt = n % nthreads;
3440 	if (threadid < tt) goto L3; else goto L4;
3441     L3:
3442 	tt = 0;
3443 	q = q + 1;
3444     L4:
3445 	s0 = q * threadid + tt;
3446 	e0 = s0 + q;
3447 	V = s0 * STEP + N1;
3448 	if (s0 >= e0) goto L2; else goto L0;
3449     L0:
3450 	e = e0 * STEP + N1;
3451     L1:
3452 	BODY;
3453 	V += STEP;
3454 	if (V cond e) goto L1;
3455     L2:
3456 */
3457 
3458 static void
3459 expand_omp_for_static_nochunk (struct omp_region *region,
3460 			       struct omp_for_data *fd,
3461 			       gimple *inner_stmt)
3462 {
3463   tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3464   tree type, itype, vmain, vback;
3465   basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3466   basic_block body_bb, cont_bb, collapse_bb = NULL;
3467   basic_block fin_bb;
3468   gimple_stmt_iterator gsi;
3469   edge ep;
3470   bool broken_loop = region->cont == NULL;
3471   tree *counts = NULL;
3472   tree n1, n2, step;
3473   tree reductions = NULL_TREE;
3474 
3475   itype = type = TREE_TYPE (fd->loop.v);
3476   if (POINTER_TYPE_P (type))
3477     itype = signed_type_for (type);
3478 
3479   entry_bb = region->entry;
3480   cont_bb = region->cont;
3481   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3482   fin_bb = BRANCH_EDGE (entry_bb)->dest;
3483   gcc_assert (broken_loop
3484 	      || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3485   seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3486   body_bb = single_succ (seq_start_bb);
3487   if (!broken_loop)
3488     {
3489       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3490 		  || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3491       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3492     }
3493   exit_bb = region->exit;
3494 
3495   /* Iteration space partitioning goes in ENTRY_BB.  */
3496   gsi = gsi_last_nondebug_bb (entry_bb);
3497   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3498 
3499   if (fd->collapse > 1)
3500     {
3501       int first_zero_iter = -1, dummy = -1;
3502       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3503 
3504       counts = XALLOCAVEC (tree, fd->collapse);
3505       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3506 				  fin_bb, first_zero_iter,
3507 				  dummy_bb, dummy, l2_dom_bb);
3508       t = NULL_TREE;
3509     }
3510   else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3511     t = integer_one_node;
3512   else
3513     t = fold_binary (fd->loop.cond_code, boolean_type_node,
3514 		     fold_convert (type, fd->loop.n1),
3515 		     fold_convert (type, fd->loop.n2));
3516   if (fd->collapse == 1
3517       && TYPE_UNSIGNED (type)
3518       && (t == NULL_TREE || !integer_onep (t)))
3519     {
3520       n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3521       n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3522 				     true, GSI_SAME_STMT);
3523       n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3524       n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3525 				     true, GSI_SAME_STMT);
3526       gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3527 						 NULL_TREE, NULL_TREE);
3528       gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3529       if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3530 		     expand_omp_regimplify_p, NULL, NULL)
3531 	  || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3532 			expand_omp_regimplify_p, NULL, NULL))
3533 	{
3534 	  gsi = gsi_for_stmt (cond_stmt);
3535 	  gimple_regimplify_operands (cond_stmt, &gsi);
3536 	}
3537       ep = split_block (entry_bb, cond_stmt);
3538       ep->flags = EDGE_TRUE_VALUE;
3539       entry_bb = ep->dest;
3540       ep->probability = profile_probability::very_likely ();
3541       ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3542       ep->probability = profile_probability::very_unlikely ();
3543       if (gimple_in_ssa_p (cfun))
3544 	{
3545 	  int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3546 	  for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3547 	       !gsi_end_p (gpi); gsi_next (&gpi))
3548 	    {
3549 	      gphi *phi = gpi.phi ();
3550 	      add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3551 			   ep, UNKNOWN_LOCATION);
3552 	    }
3553 	}
3554       gsi = gsi_last_bb (entry_bb);
3555     }
3556 
3557   if (fd->have_reductemp)
3558     {
3559       tree t1 = build_int_cst (long_integer_type_node, 0);
3560       tree t2 = build_int_cst (long_integer_type_node, 1);
3561       tree t3 = build_int_cstu (long_integer_type_node,
3562 				(HOST_WIDE_INT_1U << 31) + 1);
3563       tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3564       clauses = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
3565       reductions = OMP_CLAUSE_DECL (clauses);
3566       gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3567       gimple *g = SSA_NAME_DEF_STMT (reductions);
3568       reductions = gimple_assign_rhs1 (g);
3569       OMP_CLAUSE_DECL (clauses) = reductions;
3570       gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
3571       tree t
3572 	= build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
3573 			   9, t1, t2, t2, t3, t1, null_pointer_node,
3574 			   null_pointer_node, reductions, null_pointer_node);
3575       force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3576 				true, GSI_SAME_STMT);
3577       gsi_remove (&gsi2, true);
3578       release_ssa_name (gimple_assign_lhs (g));
3579     }
3580   switch (gimple_omp_for_kind (fd->for_stmt))
3581     {
3582     case GF_OMP_FOR_KIND_FOR:
3583       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3584       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3585       break;
3586     case GF_OMP_FOR_KIND_DISTRIBUTE:
3587       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3588       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3589       break;
3590     default:
3591       gcc_unreachable ();
3592     }
3593   nthreads = build_call_expr (nthreads, 0);
3594   nthreads = fold_convert (itype, nthreads);
3595   nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3596 				       true, GSI_SAME_STMT);
3597   threadid = build_call_expr (threadid, 0);
3598   threadid = fold_convert (itype, threadid);
3599   threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3600 				       true, GSI_SAME_STMT);
3601 
3602   n1 = fd->loop.n1;
3603   n2 = fd->loop.n2;
3604   step = fd->loop.step;
3605   if (gimple_omp_for_combined_into_p (fd->for_stmt))
3606     {
3607       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3608 				     OMP_CLAUSE__LOOPTEMP_);
3609       gcc_assert (innerc);
3610       n1 = OMP_CLAUSE_DECL (innerc);
3611       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3612 				OMP_CLAUSE__LOOPTEMP_);
3613       gcc_assert (innerc);
3614       n2 = OMP_CLAUSE_DECL (innerc);
3615     }
3616   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3617 				 true, NULL_TREE, true, GSI_SAME_STMT);
3618   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3619 				 true, NULL_TREE, true, GSI_SAME_STMT);
3620   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3621 				   true, NULL_TREE, true, GSI_SAME_STMT);
3622 
3623   t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3624   t = fold_build2 (PLUS_EXPR, itype, step, t);
3625   t = fold_build2 (PLUS_EXPR, itype, t, n2);
3626   t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3627   if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3628     t = fold_build2 (TRUNC_DIV_EXPR, itype,
3629 		     fold_build1 (NEGATE_EXPR, itype, t),
3630 		     fold_build1 (NEGATE_EXPR, itype, step));
3631   else
3632     t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3633   t = fold_convert (itype, t);
3634   n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3635 
3636   q = create_tmp_reg (itype, "q");
3637   t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3638   t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3639   gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3640 
3641   tt = create_tmp_reg (itype, "tt");
3642   t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3643   t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3644   gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3645 
3646   t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3647   gcond *cond_stmt = gimple_build_cond_empty (t);
3648   gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3649 
3650   second_bb = split_block (entry_bb, cond_stmt)->dest;
3651   gsi = gsi_last_nondebug_bb (second_bb);
3652   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3653 
3654   gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3655 		     GSI_SAME_STMT);
3656   gassign *assign_stmt
3657     = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3658   gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3659 
3660   third_bb = split_block (second_bb, assign_stmt)->dest;
3661   gsi = gsi_last_nondebug_bb (third_bb);
3662   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3663 
3664   t = build2 (MULT_EXPR, itype, q, threadid);
3665   t = build2 (PLUS_EXPR, itype, t, tt);
3666   s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3667 
3668   t = fold_build2 (PLUS_EXPR, itype, s0, q);
3669   e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3670 
3671   t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3672   gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3673 
3674   /* Remove the GIMPLE_OMP_FOR statement.  */
3675   gsi_remove (&gsi, true);
3676 
3677   /* Setup code for sequential iteration goes in SEQ_START_BB.  */
3678   gsi = gsi_start_bb (seq_start_bb);
3679 
3680   tree startvar = fd->loop.v;
3681   tree endvar = NULL_TREE;
3682 
3683   if (gimple_omp_for_combined_p (fd->for_stmt))
3684     {
3685       tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3686 		     ? gimple_omp_parallel_clauses (inner_stmt)
3687 		     : gimple_omp_for_clauses (inner_stmt);
3688       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3689       gcc_assert (innerc);
3690       startvar = OMP_CLAUSE_DECL (innerc);
3691       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3692 				OMP_CLAUSE__LOOPTEMP_);
3693       gcc_assert (innerc);
3694       endvar = OMP_CLAUSE_DECL (innerc);
3695       if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3696 	  && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3697 	{
3698 	  int i;
3699 	  for (i = 1; i < fd->collapse; i++)
3700 	    {
3701 	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3702 					OMP_CLAUSE__LOOPTEMP_);
3703 	      gcc_assert (innerc);
3704 	    }
3705 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3706 				    OMP_CLAUSE__LOOPTEMP_);
3707 	  if (innerc)
3708 	    {
3709 	      /* If needed (distribute parallel for with lastprivate),
3710 		 propagate down the total number of iterations.  */
3711 	      tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3712 				     fd->loop.n2);
3713 	      t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3714 					    GSI_CONTINUE_LINKING);
3715 	      assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3716 	      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3717 	    }
3718 	}
3719     }
3720   t = fold_convert (itype, s0);
3721   t = fold_build2 (MULT_EXPR, itype, t, step);
3722   if (POINTER_TYPE_P (type))
3723     {
3724       t = fold_build_pointer_plus (n1, t);
3725       if (!POINTER_TYPE_P (TREE_TYPE (startvar))
3726 	  && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
3727 	t = fold_convert (signed_type_for (type), t);
3728     }
3729   else
3730     t = fold_build2 (PLUS_EXPR, type, t, n1);
3731   t = fold_convert (TREE_TYPE (startvar), t);
3732   t = force_gimple_operand_gsi (&gsi, t,
3733 				DECL_P (startvar)
3734 				&& TREE_ADDRESSABLE (startvar),
3735 				NULL_TREE, false, GSI_CONTINUE_LINKING);
3736   assign_stmt = gimple_build_assign (startvar, t);
3737   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3738 
3739   t = fold_convert (itype, e0);
3740   t = fold_build2 (MULT_EXPR, itype, t, step);
3741   if (POINTER_TYPE_P (type))
3742     {
3743       t = fold_build_pointer_plus (n1, t);
3744       if (!POINTER_TYPE_P (TREE_TYPE (startvar))
3745 	  && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
3746 	t = fold_convert (signed_type_for (type), t);
3747     }
3748   else
3749     t = fold_build2 (PLUS_EXPR, type, t, n1);
3750   t = fold_convert (TREE_TYPE (startvar), t);
3751   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3752 				false, GSI_CONTINUE_LINKING);
3753   if (endvar)
3754     {
3755       assign_stmt = gimple_build_assign (endvar, e);
3756       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3757       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3758 	assign_stmt = gimple_build_assign (fd->loop.v, e);
3759       else
3760 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3761       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3762     }
3763   /* Handle linear clause adjustments.  */
3764   tree itercnt = NULL_TREE;
3765   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3766     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3767 	 c; c = OMP_CLAUSE_CHAIN (c))
3768       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3769 	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3770 	{
3771 	  tree d = OMP_CLAUSE_DECL (c);
3772 	  bool is_ref = omp_is_reference (d);
3773 	  tree t = d, a, dest;
3774 	  if (is_ref)
3775 	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3776 	  if (itercnt == NULL_TREE)
3777 	    {
3778 	      if (gimple_omp_for_combined_into_p (fd->for_stmt))
3779 		{
3780 		  itercnt = fold_build2 (MINUS_EXPR, itype,
3781 					 fold_convert (itype, n1),
3782 					 fold_convert (itype, fd->loop.n1));
3783 		  itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3784 		  itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3785 		  itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3786 						      NULL_TREE, false,
3787 						      GSI_CONTINUE_LINKING);
3788 		}
3789 	      else
3790 		itercnt = s0;
3791 	    }
3792 	  tree type = TREE_TYPE (t);
3793 	  if (POINTER_TYPE_P (type))
3794 	    type = sizetype;
3795 	  a = fold_build2 (MULT_EXPR, type,
3796 			   fold_convert (type, itercnt),
3797 			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3798 	  dest = unshare_expr (t);
3799 	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3800 			   : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3801 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3802 					false, GSI_CONTINUE_LINKING);
3803 	  assign_stmt = gimple_build_assign (dest, t);
3804 	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3805 	}
3806   if (fd->collapse > 1)
3807     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3808 
3809   if (!broken_loop)
3810     {
3811       /* The code controlling the sequential loop replaces the
3812 	 GIMPLE_OMP_CONTINUE.  */
3813       gsi = gsi_last_nondebug_bb (cont_bb);
3814       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3815       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3816       vmain = gimple_omp_continue_control_use (cont_stmt);
3817       vback = gimple_omp_continue_control_def (cont_stmt);
3818 
3819       if (!gimple_omp_for_combined_p (fd->for_stmt))
3820 	{
3821 	  if (POINTER_TYPE_P (type))
3822 	    t = fold_build_pointer_plus (vmain, step);
3823 	  else
3824 	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
3825 	  t = force_gimple_operand_gsi (&gsi, t,
3826 					DECL_P (vback)
3827 					&& TREE_ADDRESSABLE (vback),
3828 					NULL_TREE, true, GSI_SAME_STMT);
3829 	  assign_stmt = gimple_build_assign (vback, t);
3830 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3831 
3832 	  t = build2 (fd->loop.cond_code, boolean_type_node,
3833 		      DECL_P (vback) && TREE_ADDRESSABLE (vback)
3834 		      ? t : vback, e);
3835 	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3836 	}
3837 
3838       /* Remove the GIMPLE_OMP_CONTINUE statement.  */
3839       gsi_remove (&gsi, true);
3840 
3841       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3842 	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3843     }
3844 
3845   /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
3846   gsi = gsi_last_nondebug_bb (exit_bb);
3847   if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3848     {
3849       t = gimple_omp_return_lhs (gsi_stmt (gsi));
3850       if (fd->have_reductemp)
3851 	{
3852 	  tree fn;
3853 	  if (t)
3854 	    fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3855 	  else
3856 	    fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3857 	  gcall *g = gimple_build_call (fn, 0);
3858 	  if (t)
3859 	    {
3860 	      gimple_call_set_lhs (g, t);
3861 	      gsi_insert_after (&gsi, gimple_build_assign (reductions,
3862 							   NOP_EXPR, t),
3863 				GSI_SAME_STMT);
3864 	    }
3865 	  gsi_insert_after (&gsi, g, GSI_SAME_STMT);
3866 	}
3867       else
3868 	gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3869     }
3870   gsi_remove (&gsi, true);
3871 
3872   /* Connect all the blocks.  */
3873   ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
3874   ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
3875   ep = find_edge (entry_bb, second_bb);
3876   ep->flags = EDGE_TRUE_VALUE;
3877   ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
3878   find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3879   find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3880 
3881   if (!broken_loop)
3882     {
3883       ep = find_edge (cont_bb, body_bb);
3884       if (ep == NULL)
3885 	{
3886 	  ep = BRANCH_EDGE (cont_bb);
3887 	  gcc_assert (single_succ (ep->dest) == body_bb);
3888 	}
3889       if (gimple_omp_for_combined_p (fd->for_stmt))
3890 	{
3891 	  remove_edge (ep);
3892 	  ep = NULL;
3893 	}
3894       else if (fd->collapse > 1)
3895 	{
3896 	  remove_edge (ep);
3897 	  ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3898 	}
3899       else
3900 	ep->flags = EDGE_TRUE_VALUE;
3901       find_edge (cont_bb, fin_bb)->flags
3902 	= ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3903     }
3904 
3905   set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3906   set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3907   set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3908 
3909   set_immediate_dominator (CDI_DOMINATORS, body_bb,
3910 			   recompute_dominator (CDI_DOMINATORS, body_bb));
3911   set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3912 			   recompute_dominator (CDI_DOMINATORS, fin_bb));
3913 
3914   struct loop *loop = body_bb->loop_father;
3915   if (loop != entry_bb->loop_father)
3916     {
3917       gcc_assert (broken_loop || loop->header == body_bb);
3918       gcc_assert (broken_loop
3919 		  || loop->latch == region->cont
3920 		  || single_pred (loop->latch) == region->cont);
3921       return;
3922     }
3923 
3924   if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3925     {
3926       loop = alloc_loop ();
3927       loop->header = body_bb;
3928       if (collapse_bb == NULL)
3929 	loop->latch = cont_bb;
3930       add_loop (loop, body_bb->loop_father);
3931     }
3932 }
3933 
3934 /* Return phi in E->DEST with ARG on edge E.  */
3935 
3936 static gphi *
3937 find_phi_with_arg_on_edge (tree arg, edge e)
3938 {
3939   basic_block bb = e->dest;
3940 
3941   for (gphi_iterator gpi = gsi_start_phis (bb);
3942        !gsi_end_p (gpi);
3943        gsi_next (&gpi))
3944     {
3945       gphi *phi = gpi.phi ();
3946       if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3947 	return phi;
3948     }
3949 
3950   return NULL;
3951 }
3952 
3953 /* A subroutine of expand_omp_for.  Generate code for a parallel
3954    loop with static schedule and a specified chunk size.  Given
3955    parameters:
3956 
3957 	for (V = N1; V cond N2; V += STEP) BODY;
3958 
3959    where COND is "<" or ">", we generate pseudocode
3960 
3961 	if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3962 	if (cond is <)
3963 	  adj = STEP - 1;
3964 	else
3965 	  adj = STEP + 1;
3966 	if ((__typeof (V)) -1 > 0 && cond is >)
3967 	  n = -(adj + N2 - N1) / -STEP;
3968 	else
3969 	  n = (adj + N2 - N1) / STEP;
3970 	trip = 0;
3971 	V = threadid * CHUNK * STEP + N1;  -- this extra definition of V is
3972 					      here so that V is defined
3973 					      if the loop is not entered
3974     L0:
3975 	s0 = (trip * nthreads + threadid) * CHUNK;
3976 	e0 = min (s0 + CHUNK, n);
3977 	if (s0 < n) goto L1; else goto L4;
3978     L1:
3979 	V = s0 * STEP + N1;
3980 	e = e0 * STEP + N1;
3981     L2:
3982 	BODY;
3983 	V += STEP;
3984 	if (V cond e) goto L2; else goto L3;
3985     L3:
3986 	trip += 1;
3987 	goto L0;
3988     L4:
3989 */
3990 
3991 static void
3992 expand_omp_for_static_chunk (struct omp_region *region,
3993 			     struct omp_for_data *fd, gimple *inner_stmt)
3994 {
3995   tree n, s0, e0, e, t;
3996   tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3997   tree type, itype, vmain, vback, vextra;
3998   basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3999   basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
4000   gimple_stmt_iterator gsi;
4001   edge se;
4002   bool broken_loop = region->cont == NULL;
4003   tree *counts = NULL;
4004   tree n1, n2, step;
4005   tree reductions = NULL_TREE;
4006 
4007   itype = type = TREE_TYPE (fd->loop.v);
4008   if (POINTER_TYPE_P (type))
4009     itype = signed_type_for (type);
4010 
4011   entry_bb = region->entry;
4012   se = split_block (entry_bb, last_stmt (entry_bb));
4013   entry_bb = se->src;
4014   iter_part_bb = se->dest;
4015   cont_bb = region->cont;
4016   gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
4017   fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
4018   gcc_assert (broken_loop
4019 	      || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
4020   seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
4021   body_bb = single_succ (seq_start_bb);
4022   if (!broken_loop)
4023     {
4024       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4025 		  || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4026       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4027       trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
4028     }
4029   exit_bb = region->exit;
4030 
4031   /* Trip and adjustment setup goes in ENTRY_BB.  */
4032   gsi = gsi_last_nondebug_bb (entry_bb);
4033   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4034 
4035   if (fd->collapse > 1)
4036     {
4037       int first_zero_iter = -1, dummy = -1;
4038       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4039 
4040       counts = XALLOCAVEC (tree, fd->collapse);
4041       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4042 				  fin_bb, first_zero_iter,
4043 				  dummy_bb, dummy, l2_dom_bb);
4044       t = NULL_TREE;
4045     }
4046   else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4047     t = integer_one_node;
4048   else
4049     t = fold_binary (fd->loop.cond_code, boolean_type_node,
4050 		     fold_convert (type, fd->loop.n1),
4051 		     fold_convert (type, fd->loop.n2));
4052   if (fd->collapse == 1
4053       && TYPE_UNSIGNED (type)
4054       && (t == NULL_TREE || !integer_onep (t)))
4055     {
4056       n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4057       n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4058 				     true, GSI_SAME_STMT);
4059       n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4060       n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4061 				     true, GSI_SAME_STMT);
4062       gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4063 						 NULL_TREE, NULL_TREE);
4064       gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4065       if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4066 		     expand_omp_regimplify_p, NULL, NULL)
4067 	  || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4068 			expand_omp_regimplify_p, NULL, NULL))
4069 	{
4070 	  gsi = gsi_for_stmt (cond_stmt);
4071 	  gimple_regimplify_operands (cond_stmt, &gsi);
4072 	}
4073       se = split_block (entry_bb, cond_stmt);
4074       se->flags = EDGE_TRUE_VALUE;
4075       entry_bb = se->dest;
4076       se->probability = profile_probability::very_likely ();
4077       se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
4078       se->probability = profile_probability::very_unlikely ();
4079       if (gimple_in_ssa_p (cfun))
4080 	{
4081 	  int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
4082 	  for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4083 	       !gsi_end_p (gpi); gsi_next (&gpi))
4084 	    {
4085 	      gphi *phi = gpi.phi ();
4086 	      add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4087 			   se, UNKNOWN_LOCATION);
4088 	    }
4089 	}
4090       gsi = gsi_last_bb (entry_bb);
4091     }
4092 
4093   if (fd->have_reductemp)
4094     {
4095       tree t1 = build_int_cst (long_integer_type_node, 0);
4096       tree t2 = build_int_cst (long_integer_type_node, 1);
4097       tree t3 = build_int_cstu (long_integer_type_node,
4098 				(HOST_WIDE_INT_1U << 31) + 1);
4099       tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4100       clauses = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4101       reductions = OMP_CLAUSE_DECL (clauses);
4102       gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4103       gimple *g = SSA_NAME_DEF_STMT (reductions);
4104       reductions = gimple_assign_rhs1 (g);
4105       OMP_CLAUSE_DECL (clauses) = reductions;
4106       gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
4107       tree t
4108 	= build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4109 			   9, t1, t2, t2, t3, t1, null_pointer_node,
4110 			   null_pointer_node, reductions, null_pointer_node);
4111       force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4112 				true, GSI_SAME_STMT);
4113       gsi_remove (&gsi2, true);
4114       release_ssa_name (gimple_assign_lhs (g));
4115     }
4116   switch (gimple_omp_for_kind (fd->for_stmt))
4117     {
4118     case GF_OMP_FOR_KIND_FOR:
4119       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4120       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4121       break;
4122     case GF_OMP_FOR_KIND_DISTRIBUTE:
4123       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4124       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4125       break;
4126     default:
4127       gcc_unreachable ();
4128     }
4129   nthreads = build_call_expr (nthreads, 0);
4130   nthreads = fold_convert (itype, nthreads);
4131   nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4132 				       true, GSI_SAME_STMT);
4133   threadid = build_call_expr (threadid, 0);
4134   threadid = fold_convert (itype, threadid);
4135   threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4136 				       true, GSI_SAME_STMT);
4137 
4138   n1 = fd->loop.n1;
4139   n2 = fd->loop.n2;
4140   step = fd->loop.step;
4141   if (gimple_omp_for_combined_into_p (fd->for_stmt))
4142     {
4143       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4144 				     OMP_CLAUSE__LOOPTEMP_);
4145       gcc_assert (innerc);
4146       n1 = OMP_CLAUSE_DECL (innerc);
4147       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4148 				OMP_CLAUSE__LOOPTEMP_);
4149       gcc_assert (innerc);
4150       n2 = OMP_CLAUSE_DECL (innerc);
4151     }
4152   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4153 				 true, NULL_TREE, true, GSI_SAME_STMT);
4154   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4155 				 true, NULL_TREE, true, GSI_SAME_STMT);
4156   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4157 				   true, NULL_TREE, true, GSI_SAME_STMT);
4158   tree chunk_size = fold_convert (itype, fd->chunk_size);
4159   chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
4160   chunk_size
4161     = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
4162 				GSI_SAME_STMT);
4163 
4164   t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4165   t = fold_build2 (PLUS_EXPR, itype, step, t);
4166   t = fold_build2 (PLUS_EXPR, itype, t, n2);
4167   t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
4168   if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
4169     t = fold_build2 (TRUNC_DIV_EXPR, itype,
4170 		     fold_build1 (NEGATE_EXPR, itype, t),
4171 		     fold_build1 (NEGATE_EXPR, itype, step));
4172   else
4173     t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
4174   t = fold_convert (itype, t);
4175   n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4176 				true, GSI_SAME_STMT);
4177 
4178   trip_var = create_tmp_reg (itype, ".trip");
4179   if (gimple_in_ssa_p (cfun))
4180     {
4181       trip_init = make_ssa_name (trip_var);
4182       trip_main = make_ssa_name (trip_var);
4183       trip_back = make_ssa_name (trip_var);
4184     }
4185   else
4186     {
4187       trip_init = trip_var;
4188       trip_main = trip_var;
4189       trip_back = trip_var;
4190     }
4191 
4192   gassign *assign_stmt
4193     = gimple_build_assign (trip_init, build_int_cst (itype, 0));
4194   gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4195 
4196   t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
4197   t = fold_build2 (MULT_EXPR, itype, t, step);
4198   if (POINTER_TYPE_P (type))
4199     t = fold_build_pointer_plus (n1, t);
4200   else
4201     t = fold_build2 (PLUS_EXPR, type, t, n1);
4202   vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4203 				     true, GSI_SAME_STMT);
4204 
4205   /* Remove the GIMPLE_OMP_FOR.  */
4206   gsi_remove (&gsi, true);
4207 
4208   gimple_stmt_iterator gsif = gsi;
4209 
4210   /* Iteration space partitioning goes in ITER_PART_BB.  */
4211   gsi = gsi_last_bb (iter_part_bb);
4212 
4213   t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
4214   t = fold_build2 (PLUS_EXPR, itype, t, threadid);
4215   t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
4216   s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4217 				 false, GSI_CONTINUE_LINKING);
4218 
4219   t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
4220   t = fold_build2 (MIN_EXPR, itype, t, n);
4221   e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4222 				 false, GSI_CONTINUE_LINKING);
4223 
4224   t = build2 (LT_EXPR, boolean_type_node, s0, n);
4225   gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
4226 
4227   /* Setup code for sequential iteration goes in SEQ_START_BB.  */
4228   gsi = gsi_start_bb (seq_start_bb);
4229 
4230   tree startvar = fd->loop.v;
4231   tree endvar = NULL_TREE;
4232 
4233   if (gimple_omp_for_combined_p (fd->for_stmt))
4234     {
4235       tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4236 		     ? gimple_omp_parallel_clauses (inner_stmt)
4237 		     : gimple_omp_for_clauses (inner_stmt);
4238       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4239       gcc_assert (innerc);
4240       startvar = OMP_CLAUSE_DECL (innerc);
4241       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4242 				OMP_CLAUSE__LOOPTEMP_);
4243       gcc_assert (innerc);
4244       endvar = OMP_CLAUSE_DECL (innerc);
4245       if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4246 	  && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4247 	{
4248 	  int i;
4249 	  for (i = 1; i < fd->collapse; i++)
4250 	    {
4251 	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4252 					OMP_CLAUSE__LOOPTEMP_);
4253 	      gcc_assert (innerc);
4254 	    }
4255 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4256 				    OMP_CLAUSE__LOOPTEMP_);
4257 	  if (innerc)
4258 	    {
4259 	      /* If needed (distribute parallel for with lastprivate),
4260 		 propagate down the total number of iterations.  */
4261 	      tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4262 				     fd->loop.n2);
4263 	      t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4264 					    GSI_CONTINUE_LINKING);
4265 	      assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4266 	      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4267 	    }
4268 	}
4269     }
4270 
4271   t = fold_convert (itype, s0);
4272   t = fold_build2 (MULT_EXPR, itype, t, step);
4273   if (POINTER_TYPE_P (type))
4274     {
4275       t = fold_build_pointer_plus (n1, t);
4276       if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4277 	  && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4278 	t = fold_convert (signed_type_for (type), t);
4279     }
4280   else
4281     t = fold_build2 (PLUS_EXPR, type, t, n1);
4282   t = fold_convert (TREE_TYPE (startvar), t);
4283   t = force_gimple_operand_gsi (&gsi, t,
4284 				DECL_P (startvar)
4285 				&& TREE_ADDRESSABLE (startvar),
4286 				NULL_TREE, false, GSI_CONTINUE_LINKING);
4287   assign_stmt = gimple_build_assign (startvar, t);
4288   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4289 
4290   t = fold_convert (itype, e0);
4291   t = fold_build2 (MULT_EXPR, itype, t, step);
4292   if (POINTER_TYPE_P (type))
4293     {
4294       t = fold_build_pointer_plus (n1, t);
4295       if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4296 	  && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4297 	t = fold_convert (signed_type_for (type), t);
4298     }
4299   else
4300     t = fold_build2 (PLUS_EXPR, type, t, n1);
4301   t = fold_convert (TREE_TYPE (startvar), t);
4302   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4303 				false, GSI_CONTINUE_LINKING);
4304   if (endvar)
4305     {
4306       assign_stmt = gimple_build_assign (endvar, e);
4307       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4308       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4309 	assign_stmt = gimple_build_assign (fd->loop.v, e);
4310       else
4311 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4312       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4313     }
4314   /* Handle linear clause adjustments.  */
4315   tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4316   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4317     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4318 	 c; c = OMP_CLAUSE_CHAIN (c))
4319       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4320 	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4321 	{
4322 	  tree d = OMP_CLAUSE_DECL (c);
4323 	  bool is_ref = omp_is_reference (d);
4324 	  tree t = d, a, dest;
4325 	  if (is_ref)
4326 	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4327 	  tree type = TREE_TYPE (t);
4328 	  if (POINTER_TYPE_P (type))
4329 	    type = sizetype;
4330 	  dest = unshare_expr (t);
4331 	  tree v = create_tmp_var (TREE_TYPE (t), NULL);
4332 	  expand_omp_build_assign (&gsif, v, t);
4333 	  if (itercnt == NULL_TREE)
4334 	    {
4335 	      if (gimple_omp_for_combined_into_p (fd->for_stmt))
4336 		{
4337 		  itercntbias
4338 		    = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4339 				   fold_convert (itype, fd->loop.n1));
4340 		  itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4341 					     itercntbias, step);
4342 		  itercntbias
4343 		    = force_gimple_operand_gsi (&gsif, itercntbias, true,
4344 						NULL_TREE, true,
4345 						GSI_SAME_STMT);
4346 		  itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4347 		  itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4348 						      NULL_TREE, false,
4349 						      GSI_CONTINUE_LINKING);
4350 		}
4351 	      else
4352 		itercnt = s0;
4353 	    }
4354 	  a = fold_build2 (MULT_EXPR, type,
4355 			   fold_convert (type, itercnt),
4356 			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4357 	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4358 			   : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4359 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4360 					false, GSI_CONTINUE_LINKING);
4361 	  assign_stmt = gimple_build_assign (dest, t);
4362 	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4363 	}
4364   if (fd->collapse > 1)
4365     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4366 
4367   if (!broken_loop)
4368     {
4369       /* The code controlling the sequential loop goes in CONT_BB,
4370 	 replacing the GIMPLE_OMP_CONTINUE.  */
4371       gsi = gsi_last_nondebug_bb (cont_bb);
4372       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4373       vmain = gimple_omp_continue_control_use (cont_stmt);
4374       vback = gimple_omp_continue_control_def (cont_stmt);
4375 
4376       if (!gimple_omp_for_combined_p (fd->for_stmt))
4377 	{
4378 	  if (POINTER_TYPE_P (type))
4379 	    t = fold_build_pointer_plus (vmain, step);
4380 	  else
4381 	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
4382 	  if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4383 	    t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4384 					  true, GSI_SAME_STMT);
4385 	  assign_stmt = gimple_build_assign (vback, t);
4386 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4387 
4388 	  if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4389 	    t = build2 (EQ_EXPR, boolean_type_node,
4390 			build_int_cst (itype, 0),
4391 			build_int_cst (itype, 1));
4392 	  else
4393 	    t = build2 (fd->loop.cond_code, boolean_type_node,
4394 			DECL_P (vback) && TREE_ADDRESSABLE (vback)
4395 			? t : vback, e);
4396 	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4397 	}
4398 
4399       /* Remove GIMPLE_OMP_CONTINUE.  */
4400       gsi_remove (&gsi, true);
4401 
4402       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4403 	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4404 
4405       /* Trip update code goes into TRIP_UPDATE_BB.  */
4406       gsi = gsi_start_bb (trip_update_bb);
4407 
4408       t = build_int_cst (itype, 1);
4409       t = build2 (PLUS_EXPR, itype, trip_main, t);
4410       assign_stmt = gimple_build_assign (trip_back, t);
4411       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4412     }
4413 
4414   /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
4415   gsi = gsi_last_nondebug_bb (exit_bb);
4416   if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4417     {
4418       t = gimple_omp_return_lhs (gsi_stmt (gsi));
4419       if (fd->have_reductemp)
4420 	{
4421 	  tree fn;
4422 	  if (t)
4423 	    fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4424 	  else
4425 	    fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4426 	  gcall *g = gimple_build_call (fn, 0);
4427 	  if (t)
4428 	    {
4429 	      gimple_call_set_lhs (g, t);
4430 	      gsi_insert_after (&gsi, gimple_build_assign (reductions,
4431 							   NOP_EXPR, t),
4432 				GSI_SAME_STMT);
4433 	    }
4434 	  gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4435 	}
4436       else
4437 	gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4438     }
4439   gsi_remove (&gsi, true);
4440 
4441   /* Connect the new blocks.  */
4442   find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4443   find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4444 
4445   if (!broken_loop)
4446     {
4447       se = find_edge (cont_bb, body_bb);
4448       if (se == NULL)
4449 	{
4450 	  se = BRANCH_EDGE (cont_bb);
4451 	  gcc_assert (single_succ (se->dest) == body_bb);
4452 	}
4453       if (gimple_omp_for_combined_p (fd->for_stmt))
4454 	{
4455 	  remove_edge (se);
4456 	  se = NULL;
4457 	}
4458       else if (fd->collapse > 1)
4459 	{
4460 	  remove_edge (se);
4461 	  se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4462 	}
4463       else
4464 	se->flags = EDGE_TRUE_VALUE;
4465       find_edge (cont_bb, trip_update_bb)->flags
4466 	= se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4467 
4468       redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4469 				iter_part_bb);
4470     }
4471 
4472   if (gimple_in_ssa_p (cfun))
4473     {
4474       gphi_iterator psi;
4475       gphi *phi;
4476       edge re, ene;
4477       edge_var_map *vm;
4478       size_t i;
4479 
4480       gcc_assert (fd->collapse == 1 && !broken_loop);
4481 
4482       /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4483 	 remove arguments of the phi nodes in fin_bb.  We need to create
4484 	 appropriate phi nodes in iter_part_bb instead.  */
4485       se = find_edge (iter_part_bb, fin_bb);
4486       re = single_succ_edge (trip_update_bb);
4487       vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4488       ene = single_succ_edge (entry_bb);
4489 
4490       psi = gsi_start_phis (fin_bb);
4491       for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4492 	   gsi_next (&psi), ++i)
4493 	{
4494 	  gphi *nphi;
4495 	  location_t locus;
4496 
4497 	  phi = psi.phi ();
4498 	  if (operand_equal_p (gimple_phi_arg_def (phi, 0),
4499 			       redirect_edge_var_map_def (vm), 0))
4500 	    continue;
4501 
4502 	  t = gimple_phi_result (phi);
4503 	  gcc_assert (t == redirect_edge_var_map_result (vm));
4504 
4505 	  if (!single_pred_p (fin_bb))
4506 	    t = copy_ssa_name (t, phi);
4507 
4508 	  nphi = create_phi_node (t, iter_part_bb);
4509 
4510 	  t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4511 	  locus = gimple_phi_arg_location_from_edge (phi, se);
4512 
4513 	  /* A special case -- fd->loop.v is not yet computed in
4514 	     iter_part_bb, we need to use vextra instead.  */
4515 	  if (t == fd->loop.v)
4516 	    t = vextra;
4517 	  add_phi_arg (nphi, t, ene, locus);
4518 	  locus = redirect_edge_var_map_location (vm);
4519 	  tree back_arg = redirect_edge_var_map_def (vm);
4520 	  add_phi_arg (nphi, back_arg, re, locus);
4521 	  edge ce = find_edge (cont_bb, body_bb);
4522 	  if (ce == NULL)
4523 	    {
4524 	      ce = BRANCH_EDGE (cont_bb);
4525 	      gcc_assert (single_succ (ce->dest) == body_bb);
4526 	      ce = single_succ_edge (ce->dest);
4527 	    }
4528 	  gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4529 	  gcc_assert (inner_loop_phi != NULL);
4530 	  add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4531 		       find_edge (seq_start_bb, body_bb), locus);
4532 
4533 	  if (!single_pred_p (fin_bb))
4534 	    add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4535 	}
4536       gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4537       redirect_edge_var_map_clear (re);
4538       if (single_pred_p (fin_bb))
4539 	while (1)
4540 	  {
4541 	    psi = gsi_start_phis (fin_bb);
4542 	    if (gsi_end_p (psi))
4543 	      break;
4544 	    remove_phi_node (&psi, false);
4545 	  }
4546 
4547       /* Make phi node for trip.  */
4548       phi = create_phi_node (trip_main, iter_part_bb);
4549       add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4550 		   UNKNOWN_LOCATION);
4551       add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4552 		   UNKNOWN_LOCATION);
4553     }
4554 
4555   if (!broken_loop)
4556     set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4557   set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4558 			   recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4559   set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4560 			   recompute_dominator (CDI_DOMINATORS, fin_bb));
4561   set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4562 			   recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4563   set_immediate_dominator (CDI_DOMINATORS, body_bb,
4564 			   recompute_dominator (CDI_DOMINATORS, body_bb));
4565 
4566   if (!broken_loop)
4567     {
4568       struct loop *loop = body_bb->loop_father;
4569       struct loop *trip_loop = alloc_loop ();
4570       trip_loop->header = iter_part_bb;
4571       trip_loop->latch = trip_update_bb;
4572       add_loop (trip_loop, iter_part_bb->loop_father);
4573 
4574       if (loop != entry_bb->loop_father)
4575 	{
4576 	  gcc_assert (loop->header == body_bb);
4577 	  gcc_assert (loop->latch == region->cont
4578 		      || single_pred (loop->latch) == region->cont);
4579 	  trip_loop->inner = loop;
4580 	  return;
4581 	}
4582 
4583       if (!gimple_omp_for_combined_p (fd->for_stmt))
4584 	{
4585 	  loop = alloc_loop ();
4586 	  loop->header = body_bb;
4587 	  if (collapse_bb == NULL)
4588 	    loop->latch = cont_bb;
4589 	  add_loop (loop, trip_loop);
4590 	}
4591     }
4592 }
4593 
4594 /* A subroutine of expand_omp_for.  Generate code for a simd non-worksharing
4595    loop.  Given parameters:
4596 
4597 	for (V = N1; V cond N2; V += STEP) BODY;
4598 
4599    where COND is "<" or ">", we generate pseudocode
4600 
4601 	V = N1;
4602 	goto L1;
4603     L0:
4604 	BODY;
4605 	V += STEP;
4606     L1:
4607 	if (V cond N2) goto L0; else goto L2;
4608     L2:
4609 
4610     For collapsed loops, given parameters:
4611       collapse(3)
4612       for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4613 	for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4614 	  for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4615 	    BODY;
4616 
4617     we generate pseudocode
4618 
4619 	if (cond3 is <)
4620 	  adj = STEP3 - 1;
4621 	else
4622 	  adj = STEP3 + 1;
4623 	count3 = (adj + N32 - N31) / STEP3;
4624 	if (cond2 is <)
4625 	  adj = STEP2 - 1;
4626 	else
4627 	  adj = STEP2 + 1;
4628 	count2 = (adj + N22 - N21) / STEP2;
4629 	if (cond1 is <)
4630 	  adj = STEP1 - 1;
4631 	else
4632 	  adj = STEP1 + 1;
4633 	count1 = (adj + N12 - N11) / STEP1;
4634 	count = count1 * count2 * count3;
4635 	V = 0;
4636 	V1 = N11;
4637 	V2 = N21;
4638 	V3 = N31;
4639 	goto L1;
4640     L0:
4641 	BODY;
4642 	V += 1;
4643 	V3 += STEP3;
4644 	V2 += (V3 cond3 N32) ? 0 : STEP2;
4645 	V3 = (V3 cond3 N32) ? V3 : N31;
4646 	V1 += (V2 cond2 N22) ? 0 : STEP1;
4647 	V2 = (V2 cond2 N22) ? V2 : N21;
4648     L1:
4649 	if (V < count) goto L0; else goto L2;
4650     L2:
4651 
4652       */
4653 
4654 static void
4655 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4656 {
4657   tree type, t;
4658   basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4659   gimple_stmt_iterator gsi;
4660   gimple *stmt;
4661   gcond *cond_stmt;
4662   bool broken_loop = region->cont == NULL;
4663   edge e, ne;
4664   tree *counts = NULL;
4665   int i;
4666   int safelen_int = INT_MAX;
4667   bool dont_vectorize = false;
4668   tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4669 				  OMP_CLAUSE_SAFELEN);
4670   tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4671 				  OMP_CLAUSE__SIMDUID_);
4672   tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4673 			      OMP_CLAUSE_IF);
4674   tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4675 				  OMP_CLAUSE_SIMDLEN);
4676   tree n1, n2;
4677 
4678   if (safelen)
4679     {
4680       poly_uint64 val;
4681       safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4682       if (!poly_int_tree_p (safelen, &val))
4683 	safelen_int = 0;
4684       else
4685 	safelen_int = MIN (constant_lower_bound (val), INT_MAX);
4686       if (safelen_int == 1)
4687 	safelen_int = 0;
4688     }
4689   if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
4690       || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
4691     {
4692       safelen_int = 0;
4693       dont_vectorize = true;
4694     }
4695   type = TREE_TYPE (fd->loop.v);
4696   entry_bb = region->entry;
4697   cont_bb = region->cont;
4698   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4699   gcc_assert (broken_loop
4700 	      || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4701   l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4702   if (!broken_loop)
4703     {
4704       gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4705       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4706       l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4707       l2_bb = BRANCH_EDGE (entry_bb)->dest;
4708     }
4709   else
4710     {
4711       BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4712       l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4713       l2_bb = single_succ (l1_bb);
4714     }
4715   exit_bb = region->exit;
4716   l2_dom_bb = NULL;
4717 
4718   gsi = gsi_last_nondebug_bb (entry_bb);
4719 
4720   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4721   /* Not needed in SSA form right now.  */
4722   gcc_assert (!gimple_in_ssa_p (cfun));
4723   if (fd->collapse > 1)
4724     {
4725       int first_zero_iter = -1, dummy = -1;
4726       basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4727 
4728       counts = XALLOCAVEC (tree, fd->collapse);
4729       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4730 				  zero_iter_bb, first_zero_iter,
4731 				  dummy_bb, dummy, l2_dom_bb);
4732     }
4733   if (l2_dom_bb == NULL)
4734     l2_dom_bb = l1_bb;
4735 
4736   n1 = fd->loop.n1;
4737   n2 = fd->loop.n2;
4738   if (gimple_omp_for_combined_into_p (fd->for_stmt))
4739     {
4740       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4741 				     OMP_CLAUSE__LOOPTEMP_);
4742       gcc_assert (innerc);
4743       n1 = OMP_CLAUSE_DECL (innerc);
4744       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4745 				OMP_CLAUSE__LOOPTEMP_);
4746       gcc_assert (innerc);
4747       n2 = OMP_CLAUSE_DECL (innerc);
4748     }
4749   tree step = fd->loop.step;
4750 
4751   bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4752 				  OMP_CLAUSE__SIMT_);
4753   if (is_simt)
4754     {
4755       cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4756       is_simt = safelen_int > 1;
4757     }
4758   tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4759   if (is_simt)
4760     {
4761       simt_lane = create_tmp_var (unsigned_type_node);
4762       gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4763       gimple_call_set_lhs (g, simt_lane);
4764       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4765       tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4766 				 fold_convert (TREE_TYPE (step), simt_lane));
4767       n1 = fold_convert (type, n1);
4768       if (POINTER_TYPE_P (type))
4769 	n1 = fold_build_pointer_plus (n1, offset);
4770       else
4771 	n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4772 
4773       /* Collapsed loops not handled for SIMT yet: limit to one lane only.  */
4774       if (fd->collapse > 1)
4775 	simt_maxlane = build_one_cst (unsigned_type_node);
4776       else if (safelen_int < omp_max_simt_vf ())
4777 	simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4778       tree vf
4779 	= build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4780 					unsigned_type_node, 0);
4781       if (simt_maxlane)
4782 	vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4783       vf = fold_convert (TREE_TYPE (step), vf);
4784       step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4785     }
4786 
4787   expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4788   if (fd->collapse > 1)
4789     {
4790       if (gimple_omp_for_combined_into_p (fd->for_stmt))
4791 	{
4792 	  gsi_prev (&gsi);
4793 	  expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4794 	  gsi_next (&gsi);
4795 	}
4796       else
4797 	for (i = 0; i < fd->collapse; i++)
4798 	  {
4799 	    tree itype = TREE_TYPE (fd->loops[i].v);
4800 	    if (POINTER_TYPE_P (itype))
4801 	      itype = signed_type_for (itype);
4802 	    t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4803 	    expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4804 	  }
4805     }
4806 
4807   /* Remove the GIMPLE_OMP_FOR statement.  */
4808   gsi_remove (&gsi, true);
4809 
4810   if (!broken_loop)
4811     {
4812       /* Code to control the increment goes in the CONT_BB.  */
4813       gsi = gsi_last_nondebug_bb (cont_bb);
4814       stmt = gsi_stmt (gsi);
4815       gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4816 
4817       if (POINTER_TYPE_P (type))
4818 	t = fold_build_pointer_plus (fd->loop.v, step);
4819       else
4820 	t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4821       expand_omp_build_assign (&gsi, fd->loop.v, t);
4822 
4823       if (fd->collapse > 1)
4824 	{
4825 	  i = fd->collapse - 1;
4826 	  if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4827 	    {
4828 	      t = fold_convert (sizetype, fd->loops[i].step);
4829 	      t = fold_build_pointer_plus (fd->loops[i].v, t);
4830 	    }
4831 	  else
4832 	    {
4833 	      t = fold_convert (TREE_TYPE (fd->loops[i].v),
4834 				fd->loops[i].step);
4835 	      t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4836 			       fd->loops[i].v, t);
4837 	    }
4838 	  expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4839 
4840 	  for (i = fd->collapse - 1; i > 0; i--)
4841 	    {
4842 	      tree itype = TREE_TYPE (fd->loops[i].v);
4843 	      tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4844 	      if (POINTER_TYPE_P (itype2))
4845 		itype2 = signed_type_for (itype2);
4846 	      t = fold_convert (itype2, fd->loops[i - 1].step);
4847 	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4848 					    GSI_SAME_STMT);
4849 	      t = build3 (COND_EXPR, itype2,
4850 			  build2 (fd->loops[i].cond_code, boolean_type_node,
4851 				  fd->loops[i].v,
4852 				  fold_convert (itype, fd->loops[i].n2)),
4853 			  build_int_cst (itype2, 0), t);
4854 	      if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4855 		t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4856 	      else
4857 		t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4858 	      expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4859 
4860 	      t = fold_convert (itype, fd->loops[i].n1);
4861 	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4862 					    GSI_SAME_STMT);
4863 	      t = build3 (COND_EXPR, itype,
4864 			  build2 (fd->loops[i].cond_code, boolean_type_node,
4865 				  fd->loops[i].v,
4866 				  fold_convert (itype, fd->loops[i].n2)),
4867 			  fd->loops[i].v, t);
4868 	      expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4869 	    }
4870 	}
4871 
4872       /* Remove GIMPLE_OMP_CONTINUE.  */
4873       gsi_remove (&gsi, true);
4874     }
4875 
4876   /* Emit the condition in L1_BB.  */
4877   gsi = gsi_start_bb (l1_bb);
4878 
4879   t = fold_convert (type, n2);
4880   t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4881 				false, GSI_CONTINUE_LINKING);
4882   tree v = fd->loop.v;
4883   if (DECL_P (v) && TREE_ADDRESSABLE (v))
4884     v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4885 				  false, GSI_CONTINUE_LINKING);
4886   t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4887   cond_stmt = gimple_build_cond_empty (t);
4888   gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4889   if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4890 		 NULL, NULL)
4891       || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4892 		    NULL, NULL))
4893     {
4894       gsi = gsi_for_stmt (cond_stmt);
4895       gimple_regimplify_operands (cond_stmt, &gsi);
4896     }
4897 
4898   /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop.  */
4899   if (is_simt)
4900     {
4901       gsi = gsi_start_bb (l2_bb);
4902       step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4903       if (POINTER_TYPE_P (type))
4904 	t = fold_build_pointer_plus (fd->loop.v, step);
4905       else
4906 	t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4907       expand_omp_build_assign (&gsi, fd->loop.v, t);
4908     }
4909 
4910   /* Remove GIMPLE_OMP_RETURN.  */
4911   gsi = gsi_last_nondebug_bb (exit_bb);
4912   gsi_remove (&gsi, true);
4913 
4914   /* Connect the new blocks.  */
4915   remove_edge (FALLTHRU_EDGE (entry_bb));
4916 
4917   if (!broken_loop)
4918     {
4919       remove_edge (BRANCH_EDGE (entry_bb));
4920       make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4921 
4922       e = BRANCH_EDGE (l1_bb);
4923       ne = FALLTHRU_EDGE (l1_bb);
4924       e->flags = EDGE_TRUE_VALUE;
4925     }
4926   else
4927     {
4928       single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4929 
4930       ne = single_succ_edge (l1_bb);
4931       e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4932 
4933     }
4934   ne->flags = EDGE_FALSE_VALUE;
4935   e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4936   ne->probability = e->probability.invert ();
4937 
4938   set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4939   set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4940 
4941   if (simt_maxlane)
4942     {
4943       cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4944 				     NULL_TREE, NULL_TREE);
4945       gsi = gsi_last_bb (entry_bb);
4946       gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4947       make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4948       FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
4949       FALLTHRU_EDGE (entry_bb)->probability
4950 	 = profile_probability::guessed_always ().apply_scale (7, 8);
4951       BRANCH_EDGE (entry_bb)->probability
4952 	 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
4953       l2_dom_bb = entry_bb;
4954     }
4955   set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4956 
4957   if (!broken_loop)
4958     {
4959       struct loop *loop = alloc_loop ();
4960       loop->header = l1_bb;
4961       loop->latch = cont_bb;
4962       add_loop (loop, l1_bb->loop_father);
4963       loop->safelen = safelen_int;
4964       if (simduid)
4965 	{
4966 	  loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4967 	  cfun->has_simduid_loops = true;
4968 	}
4969       /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4970 	 the loop.  */
4971       if ((flag_tree_loop_vectorize
4972 	   || !global_options_set.x_flag_tree_loop_vectorize)
4973 	  && flag_tree_loop_optimize
4974 	  && loop->safelen > 1)
4975 	{
4976 	  loop->force_vectorize = true;
4977 	  cfun->has_force_vectorize_loops = true;
4978 	}
4979       else if (dont_vectorize)
4980 	loop->dont_vectorize = true;
4981     }
4982   else if (simduid)
4983     cfun->has_simduid_loops = true;
4984 }
4985 
4986 /* Taskloop construct is represented after gimplification with
4987    two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4988    in between them.  This routine expands the outer GIMPLE_OMP_FOR,
4989    which should just compute all the needed loop temporaries
4990    for GIMPLE_OMP_TASK.  */
4991 
4992 static void
4993 expand_omp_taskloop_for_outer (struct omp_region *region,
4994 			       struct omp_for_data *fd,
4995 			       gimple *inner_stmt)
4996 {
4997   tree type, bias = NULL_TREE;
4998   basic_block entry_bb, cont_bb, exit_bb;
4999   gimple_stmt_iterator gsi;
5000   gassign *assign_stmt;
5001   tree *counts = NULL;
5002   int i;
5003 
5004   gcc_assert (inner_stmt);
5005   gcc_assert (region->cont);
5006   gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
5007 	      && gimple_omp_task_taskloop_p (inner_stmt));
5008   type = TREE_TYPE (fd->loop.v);
5009 
5010   /* See if we need to bias by LLONG_MIN.  */
5011   if (fd->iter_type == long_long_unsigned_type_node
5012       && TREE_CODE (type) == INTEGER_TYPE
5013       && !TYPE_UNSIGNED (type))
5014     {
5015       tree n1, n2;
5016 
5017       if (fd->loop.cond_code == LT_EXPR)
5018 	{
5019 	  n1 = fd->loop.n1;
5020 	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5021 	}
5022       else
5023 	{
5024 	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5025 	  n2 = fd->loop.n1;
5026 	}
5027       if (TREE_CODE (n1) != INTEGER_CST
5028 	  || TREE_CODE (n2) != INTEGER_CST
5029 	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5030 	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5031     }
5032 
5033   entry_bb = region->entry;
5034   cont_bb = region->cont;
5035   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5036   gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
5037   exit_bb = region->exit;
5038 
5039   gsi = gsi_last_nondebug_bb (entry_bb);
5040   gimple *for_stmt = gsi_stmt (gsi);
5041   gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
5042   if (fd->collapse > 1)
5043     {
5044       int first_zero_iter = -1, dummy = -1;
5045       basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
5046 
5047       counts = XALLOCAVEC (tree, fd->collapse);
5048       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5049 				  zero_iter_bb, first_zero_iter,
5050 				  dummy_bb, dummy, l2_dom_bb);
5051 
5052       if (zero_iter_bb)
5053 	{
5054 	  /* Some counts[i] vars might be uninitialized if
5055 	     some loop has zero iterations.  But the body shouldn't
5056 	     be executed in that case, so just avoid uninit warnings.  */
5057 	  for (i = first_zero_iter; i < fd->collapse; i++)
5058 	    if (SSA_VAR_P (counts[i]))
5059 	      TREE_NO_WARNING (counts[i]) = 1;
5060 	  gsi_prev (&gsi);
5061 	  edge e = split_block (entry_bb, gsi_stmt (gsi));
5062 	  entry_bb = e->dest;
5063 	  make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
5064 	  gsi = gsi_last_bb (entry_bb);
5065 	  set_immediate_dominator (CDI_DOMINATORS, entry_bb,
5066 				   get_immediate_dominator (CDI_DOMINATORS,
5067 							    zero_iter_bb));
5068 	}
5069     }
5070 
5071   tree t0, t1;
5072   t1 = fd->loop.n2;
5073   t0 = fd->loop.n1;
5074   if (POINTER_TYPE_P (TREE_TYPE (t0))
5075       && TYPE_PRECISION (TREE_TYPE (t0))
5076 	 != TYPE_PRECISION (fd->iter_type))
5077     {
5078       /* Avoid casting pointers to integer of a different size.  */
5079       tree itype = signed_type_for (type);
5080       t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
5081       t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
5082     }
5083   else
5084     {
5085       t1 = fold_convert (fd->iter_type, t1);
5086       t0 = fold_convert (fd->iter_type, t0);
5087     }
5088   if (bias)
5089     {
5090       t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
5091       t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
5092     }
5093 
5094   tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
5095 				 OMP_CLAUSE__LOOPTEMP_);
5096   gcc_assert (innerc);
5097   tree startvar = OMP_CLAUSE_DECL (innerc);
5098   innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5099   gcc_assert (innerc);
5100   tree endvar = OMP_CLAUSE_DECL (innerc);
5101   if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
5102     {
5103       gcc_assert (innerc);
5104       for (i = 1; i < fd->collapse; i++)
5105 	{
5106 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5107 				    OMP_CLAUSE__LOOPTEMP_);
5108 	  gcc_assert (innerc);
5109 	}
5110       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5111 				OMP_CLAUSE__LOOPTEMP_);
5112       if (innerc)
5113 	{
5114 	  /* If needed (inner taskloop has lastprivate clause), propagate
5115 	     down the total number of iterations.  */
5116 	  tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
5117 					     NULL_TREE, false,
5118 					     GSI_CONTINUE_LINKING);
5119 	  assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5120 	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5121 	}
5122     }
5123 
5124   t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
5125 				 GSI_CONTINUE_LINKING);
5126   assign_stmt = gimple_build_assign (startvar, t0);
5127   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5128 
5129   t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
5130 				 GSI_CONTINUE_LINKING);
5131   assign_stmt = gimple_build_assign (endvar, t1);
5132   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5133   if (fd->collapse > 1)
5134     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5135 
5136   /* Remove the GIMPLE_OMP_FOR statement.  */
5137   gsi = gsi_for_stmt (for_stmt);
5138   gsi_remove (&gsi, true);
5139 
5140   gsi = gsi_last_nondebug_bb (cont_bb);
5141   gsi_remove (&gsi, true);
5142 
5143   gsi = gsi_last_nondebug_bb (exit_bb);
5144   gsi_remove (&gsi, true);
5145 
5146   FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5147   remove_edge (BRANCH_EDGE (entry_bb));
5148   FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
5149   remove_edge (BRANCH_EDGE (cont_bb));
5150   set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
5151   set_immediate_dominator (CDI_DOMINATORS, region->entry,
5152 			   recompute_dominator (CDI_DOMINATORS, region->entry));
5153 }
5154 
5155 /* Taskloop construct is represented after gimplification with
5156    two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5157    in between them.  This routine expands the inner GIMPLE_OMP_FOR.
5158    GOMP_taskloop{,_ull} function arranges for each task to be given just
5159    a single range of iterations.  */
5160 
5161 static void
5162 expand_omp_taskloop_for_inner (struct omp_region *region,
5163 			       struct omp_for_data *fd,
5164 			       gimple *inner_stmt)
5165 {
5166   tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5167   basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5168   basic_block fin_bb;
5169   gimple_stmt_iterator gsi;
5170   edge ep;
5171   bool broken_loop = region->cont == NULL;
5172   tree *counts = NULL;
5173   tree n1, n2, step;
5174 
5175   itype = type = TREE_TYPE (fd->loop.v);
5176   if (POINTER_TYPE_P (type))
5177     itype = signed_type_for (type);
5178 
5179   /* See if we need to bias by LLONG_MIN.  */
5180   if (fd->iter_type == long_long_unsigned_type_node
5181       && TREE_CODE (type) == INTEGER_TYPE
5182       && !TYPE_UNSIGNED (type))
5183     {
5184       tree n1, n2;
5185 
5186       if (fd->loop.cond_code == LT_EXPR)
5187 	{
5188 	  n1 = fd->loop.n1;
5189 	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5190 	}
5191       else
5192 	{
5193 	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5194 	  n2 = fd->loop.n1;
5195 	}
5196       if (TREE_CODE (n1) != INTEGER_CST
5197 	  || TREE_CODE (n2) != INTEGER_CST
5198 	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5199 	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5200     }
5201 
5202   entry_bb = region->entry;
5203   cont_bb = region->cont;
5204   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5205   fin_bb = BRANCH_EDGE (entry_bb)->dest;
5206   gcc_assert (broken_loop
5207 	      || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5208   body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5209   if (!broken_loop)
5210     {
5211       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5212       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5213     }
5214   exit_bb = region->exit;
5215 
5216   /* Iteration space partitioning goes in ENTRY_BB.  */
5217   gsi = gsi_last_nondebug_bb (entry_bb);
5218   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5219 
5220   if (fd->collapse > 1)
5221     {
5222       int first_zero_iter = -1, dummy = -1;
5223       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5224 
5225       counts = XALLOCAVEC (tree, fd->collapse);
5226       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5227 				  fin_bb, first_zero_iter,
5228 				  dummy_bb, dummy, l2_dom_bb);
5229       t = NULL_TREE;
5230     }
5231   else
5232     t = integer_one_node;
5233 
5234   step = fd->loop.step;
5235   tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5236 				 OMP_CLAUSE__LOOPTEMP_);
5237   gcc_assert (innerc);
5238   n1 = OMP_CLAUSE_DECL (innerc);
5239   innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5240   gcc_assert (innerc);
5241   n2 = OMP_CLAUSE_DECL (innerc);
5242   if (bias)
5243     {
5244       n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5245       n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5246     }
5247   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5248 				 true, NULL_TREE, true, GSI_SAME_STMT);
5249   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5250 				 true, NULL_TREE, true, GSI_SAME_STMT);
5251   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5252 				   true, NULL_TREE, true, GSI_SAME_STMT);
5253 
5254   tree startvar = fd->loop.v;
5255   tree endvar = NULL_TREE;
5256 
5257   if (gimple_omp_for_combined_p (fd->for_stmt))
5258     {
5259       tree clauses = gimple_omp_for_clauses (inner_stmt);
5260       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5261       gcc_assert (innerc);
5262       startvar = OMP_CLAUSE_DECL (innerc);
5263       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5264 				OMP_CLAUSE__LOOPTEMP_);
5265       gcc_assert (innerc);
5266       endvar = OMP_CLAUSE_DECL (innerc);
5267     }
5268   t = fold_convert (TREE_TYPE (startvar), n1);
5269   t = force_gimple_operand_gsi (&gsi, t,
5270 				DECL_P (startvar)
5271 				&& TREE_ADDRESSABLE (startvar),
5272 				NULL_TREE, false, GSI_CONTINUE_LINKING);
5273   gimple *assign_stmt = gimple_build_assign (startvar, t);
5274   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5275 
5276   t = fold_convert (TREE_TYPE (startvar), n2);
5277   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5278 				false, GSI_CONTINUE_LINKING);
5279   if (endvar)
5280     {
5281       assign_stmt = gimple_build_assign (endvar, e);
5282       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5283       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5284 	assign_stmt = gimple_build_assign (fd->loop.v, e);
5285       else
5286 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5287       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5288     }
5289   if (fd->collapse > 1)
5290     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5291 
5292   if (!broken_loop)
5293     {
5294       /* The code controlling the sequential loop replaces the
5295 	 GIMPLE_OMP_CONTINUE.  */
5296       gsi = gsi_last_nondebug_bb (cont_bb);
5297       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5298       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5299       vmain = gimple_omp_continue_control_use (cont_stmt);
5300       vback = gimple_omp_continue_control_def (cont_stmt);
5301 
5302       if (!gimple_omp_for_combined_p (fd->for_stmt))
5303 	{
5304 	  if (POINTER_TYPE_P (type))
5305 	    t = fold_build_pointer_plus (vmain, step);
5306 	  else
5307 	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
5308 	  t = force_gimple_operand_gsi (&gsi, t,
5309 					DECL_P (vback)
5310 					&& TREE_ADDRESSABLE (vback),
5311 					NULL_TREE, true, GSI_SAME_STMT);
5312 	  assign_stmt = gimple_build_assign (vback, t);
5313 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5314 
5315 	  t = build2 (fd->loop.cond_code, boolean_type_node,
5316 		      DECL_P (vback) && TREE_ADDRESSABLE (vback)
5317 		      ? t : vback, e);
5318 	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5319 	}
5320 
5321       /* Remove the GIMPLE_OMP_CONTINUE statement.  */
5322       gsi_remove (&gsi, true);
5323 
5324       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5325 	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5326     }
5327 
5328   /* Remove the GIMPLE_OMP_FOR statement.  */
5329   gsi = gsi_for_stmt (fd->for_stmt);
5330   gsi_remove (&gsi, true);
5331 
5332   /* Remove the GIMPLE_OMP_RETURN statement.  */
5333   gsi = gsi_last_nondebug_bb (exit_bb);
5334   gsi_remove (&gsi, true);
5335 
5336   FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5337   if (!broken_loop)
5338     remove_edge (BRANCH_EDGE (entry_bb));
5339   else
5340     {
5341       remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5342       region->outer->cont = NULL;
5343     }
5344 
5345   /* Connect all the blocks.  */
5346   if (!broken_loop)
5347     {
5348       ep = find_edge (cont_bb, body_bb);
5349       if (gimple_omp_for_combined_p (fd->for_stmt))
5350 	{
5351 	  remove_edge (ep);
5352 	  ep = NULL;
5353 	}
5354       else if (fd->collapse > 1)
5355 	{
5356 	  remove_edge (ep);
5357 	  ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5358 	}
5359       else
5360 	ep->flags = EDGE_TRUE_VALUE;
5361       find_edge (cont_bb, fin_bb)->flags
5362 	= ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5363     }
5364 
5365   set_immediate_dominator (CDI_DOMINATORS, body_bb,
5366 			   recompute_dominator (CDI_DOMINATORS, body_bb));
5367   if (!broken_loop)
5368     set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5369 			     recompute_dominator (CDI_DOMINATORS, fin_bb));
5370 
5371   if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5372     {
5373       struct loop *loop = alloc_loop ();
5374       loop->header = body_bb;
5375       if (collapse_bb == NULL)
5376 	loop->latch = cont_bb;
5377       add_loop (loop, body_bb->loop_father);
5378     }
5379 }
5380 
5381 /* A subroutine of expand_omp_for.  Generate code for an OpenACC
5382    partitioned loop.  The lowering here is abstracted, in that the
5383    loop parameters are passed through internal functions, which are
5384    further lowered by oacc_device_lower, once we get to the target
5385    compiler.  The loop is of the form:
5386 
5387    for (V = B; V LTGT E; V += S) {BODY}
5388 
5389    where LTGT is < or >.  We may have a specified chunking size, CHUNKING
5390    (constant 0 for no chunking) and we will have a GWV partitioning
5391    mask, specifying dimensions over which the loop is to be
5392    partitioned (see note below).  We generate code that looks like
5393    (this ignores tiling):
5394 
5395    <entry_bb> [incoming FALL->body, BRANCH->exit]
5396      typedef signedintify (typeof (V)) T;  // underlying signed integral type
5397      T range = E - B;
5398      T chunk_no = 0;
5399      T DIR = LTGT == '<' ? +1 : -1;
5400      T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5401      T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5402 
5403    <head_bb> [created by splitting end of entry_bb]
5404      T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5405      T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5406      if (!(offset LTGT bound)) goto bottom_bb;
5407 
5408    <body_bb> [incoming]
5409      V = B + offset;
5410      {BODY}
5411 
5412    <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5413      offset += step;
5414      if (offset LTGT bound) goto body_bb; [*]
5415 
5416    <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5417      chunk_no++;
5418      if (chunk < chunk_max) goto head_bb;
5419 
5420    <exit_bb> [incoming]
5421      V = B + ((range -/+ 1) / S +/- 1) * S [*]
5422 
5423    [*] Needed if V live at end of loop.  */
5424 
5425 static void
5426 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5427 {
5428   tree v = fd->loop.v;
5429   enum tree_code cond_code = fd->loop.cond_code;
5430   enum tree_code plus_code = PLUS_EXPR;
5431 
5432   tree chunk_size = integer_minus_one_node;
5433   tree gwv = integer_zero_node;
5434   tree iter_type = TREE_TYPE (v);
5435   tree diff_type = iter_type;
5436   tree plus_type = iter_type;
5437   struct oacc_collapse *counts = NULL;
5438 
5439   gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5440 		       == GF_OMP_FOR_KIND_OACC_LOOP);
5441   gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5442   gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5443 
5444   if (POINTER_TYPE_P (iter_type))
5445     {
5446       plus_code = POINTER_PLUS_EXPR;
5447       plus_type = sizetype;
5448     }
5449   if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5450     diff_type = signed_type_for (diff_type);
5451   if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
5452     diff_type = integer_type_node;
5453 
5454   basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5455   basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5456   basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE  */
5457   basic_block bottom_bb = NULL;
5458 
5459   /* entry_bb has two sucessors; the branch edge is to the exit
5460      block,  fallthrough edge to body.  */
5461   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5462 	      && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5463 
5464   /* If cont_bb non-NULL, it has 2 successors.  The branch successor is
5465      body_bb, or to a block whose only successor is the body_bb.  Its
5466      fallthrough successor is the final block (same as the branch
5467      successor of the entry_bb).  */
5468   if (cont_bb)
5469     {
5470       basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5471       basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5472 
5473       gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5474       gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5475     }
5476   else
5477     gcc_assert (!gimple_in_ssa_p (cfun));
5478 
5479   /* The exit block only has entry_bb and cont_bb as predecessors.  */
5480   gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5481 
5482   tree chunk_no;
5483   tree chunk_max = NULL_TREE;
5484   tree bound, offset;
5485   tree step = create_tmp_var (diff_type, ".step");
5486   bool up = cond_code == LT_EXPR;
5487   tree dir = build_int_cst (diff_type, up ? +1 : -1);
5488   bool chunking = !gimple_in_ssa_p (cfun);
5489   bool negating;
5490 
5491   /* Tiling vars.  */
5492   tree tile_size = NULL_TREE;
5493   tree element_s = NULL_TREE;
5494   tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5495   basic_block elem_body_bb = NULL;
5496   basic_block elem_cont_bb = NULL;
5497 
5498   /* SSA instances.  */
5499   tree offset_incr = NULL_TREE;
5500   tree offset_init = NULL_TREE;
5501 
5502   gimple_stmt_iterator gsi;
5503   gassign *ass;
5504   gcall *call;
5505   gimple *stmt;
5506   tree expr;
5507   location_t loc;
5508   edge split, be, fte;
5509 
5510   /* Split the end of entry_bb to create head_bb.  */
5511   split = split_block (entry_bb, last_stmt (entry_bb));
5512   basic_block head_bb = split->dest;
5513   entry_bb = split->src;
5514 
5515   /* Chunk setup goes at end of entry_bb, replacing the omp_for.  */
5516   gsi = gsi_last_nondebug_bb (entry_bb);
5517   gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5518   loc = gimple_location (for_stmt);
5519 
5520   if (gimple_in_ssa_p (cfun))
5521     {
5522       offset_init = gimple_omp_for_index (for_stmt, 0);
5523       gcc_assert (integer_zerop (fd->loop.n1));
5524       /* The SSA parallelizer does gang parallelism.  */
5525       gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5526     }
5527 
5528   if (fd->collapse > 1 || fd->tiling)
5529     {
5530       gcc_assert (!gimple_in_ssa_p (cfun) && up);
5531       counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5532       tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5533 					      TREE_TYPE (fd->loop.n2), loc);
5534 
5535       if (SSA_VAR_P (fd->loop.n2))
5536 	{
5537 	  total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5538 					    true, GSI_SAME_STMT);
5539 	  ass = gimple_build_assign (fd->loop.n2, total);
5540 	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5541 	}
5542     }
5543 
5544   tree b = fd->loop.n1;
5545   tree e = fd->loop.n2;
5546   tree s = fd->loop.step;
5547 
5548   b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5549   e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5550 
5551   /* Convert the step, avoiding possible unsigned->signed overflow.  */
5552   negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5553   if (negating)
5554     s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5555   s = fold_convert (diff_type, s);
5556   if (negating)
5557     s = fold_build1 (NEGATE_EXPR, diff_type, s);
5558   s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5559 
5560   if (!chunking)
5561     chunk_size = integer_zero_node;
5562   expr = fold_convert (diff_type, chunk_size);
5563   chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5564 					 NULL_TREE, true, GSI_SAME_STMT);
5565 
5566   if (fd->tiling)
5567     {
5568       /* Determine the tile size and element step,
5569 	 modify the outer loop step size.  */
5570       tile_size = create_tmp_var (diff_type, ".tile_size");
5571       expr = build_int_cst (diff_type, 1);
5572       for (int ix = 0; ix < fd->collapse; ix++)
5573 	expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5574       expr = force_gimple_operand_gsi (&gsi, expr, true,
5575 				       NULL_TREE, true, GSI_SAME_STMT);
5576       ass = gimple_build_assign (tile_size, expr);
5577       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5578 
5579       element_s = create_tmp_var (diff_type, ".element_s");
5580       ass = gimple_build_assign (element_s, s);
5581       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5582 
5583       expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5584       s = force_gimple_operand_gsi (&gsi, expr, true,
5585 				    NULL_TREE, true, GSI_SAME_STMT);
5586     }
5587 
5588   /* Determine the range, avoiding possible unsigned->signed overflow.  */
5589   negating = !up && TYPE_UNSIGNED (iter_type);
5590   expr = fold_build2 (MINUS_EXPR, plus_type,
5591 		      fold_convert (plus_type, negating ? b : e),
5592 		      fold_convert (plus_type, negating ? e : b));
5593   expr = fold_convert (diff_type, expr);
5594   if (negating)
5595     expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5596   tree range = force_gimple_operand_gsi (&gsi, expr, true,
5597 					 NULL_TREE, true, GSI_SAME_STMT);
5598 
5599   chunk_no = build_int_cst (diff_type, 0);
5600   if (chunking)
5601     {
5602       gcc_assert (!gimple_in_ssa_p (cfun));
5603 
5604       expr = chunk_no;
5605       chunk_max = create_tmp_var (diff_type, ".chunk_max");
5606       chunk_no = create_tmp_var (diff_type, ".chunk_no");
5607 
5608       ass = gimple_build_assign (chunk_no, expr);
5609       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5610 
5611       call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5612 					 build_int_cst (integer_type_node,
5613 							IFN_GOACC_LOOP_CHUNKS),
5614 					 dir, range, s, chunk_size, gwv);
5615       gimple_call_set_lhs (call, chunk_max);
5616       gimple_set_location (call, loc);
5617       gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5618     }
5619   else
5620     chunk_size = chunk_no;
5621 
5622   call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5623 				     build_int_cst (integer_type_node,
5624 						    IFN_GOACC_LOOP_STEP),
5625 				     dir, range, s, chunk_size, gwv);
5626   gimple_call_set_lhs (call, step);
5627   gimple_set_location (call, loc);
5628   gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5629 
5630   /* Remove the GIMPLE_OMP_FOR.  */
5631   gsi_remove (&gsi, true);
5632 
5633   /* Fixup edges from head_bb.  */
5634   be = BRANCH_EDGE (head_bb);
5635   fte = FALLTHRU_EDGE (head_bb);
5636   be->flags |= EDGE_FALSE_VALUE;
5637   fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5638 
5639   basic_block body_bb = fte->dest;
5640 
5641   if (gimple_in_ssa_p (cfun))
5642     {
5643       gsi = gsi_last_nondebug_bb (cont_bb);
5644       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5645 
5646       offset = gimple_omp_continue_control_use (cont_stmt);
5647       offset_incr = gimple_omp_continue_control_def (cont_stmt);
5648     }
5649   else
5650     {
5651       offset = create_tmp_var (diff_type, ".offset");
5652       offset_init = offset_incr = offset;
5653     }
5654   bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5655 
5656   /* Loop offset & bound go into head_bb.  */
5657   gsi = gsi_start_bb (head_bb);
5658 
5659   call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5660 				     build_int_cst (integer_type_node,
5661 						    IFN_GOACC_LOOP_OFFSET),
5662 				     dir, range, s,
5663 				     chunk_size, gwv, chunk_no);
5664   gimple_call_set_lhs (call, offset_init);
5665   gimple_set_location (call, loc);
5666   gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5667 
5668   call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5669 				     build_int_cst (integer_type_node,
5670 						    IFN_GOACC_LOOP_BOUND),
5671 				     dir, range, s,
5672 				     chunk_size, gwv, offset_init);
5673   gimple_call_set_lhs (call, bound);
5674   gimple_set_location (call, loc);
5675   gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5676 
5677   expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5678   gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5679 		    GSI_CONTINUE_LINKING);
5680 
5681   /* V assignment goes into body_bb.  */
5682   if (!gimple_in_ssa_p (cfun))
5683     {
5684       gsi = gsi_start_bb (body_bb);
5685 
5686       expr = build2 (plus_code, iter_type, b,
5687 		     fold_convert (plus_type, offset));
5688       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5689 				       true, GSI_SAME_STMT);
5690       ass = gimple_build_assign (v, expr);
5691       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5692 
5693       if (fd->collapse > 1 || fd->tiling)
5694 	expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5695 
5696       if (fd->tiling)
5697 	{
5698 	  /* Determine the range of the element loop -- usually simply
5699 	     the tile_size, but could be smaller if the final
5700 	     iteration of the outer loop is a partial tile.  */
5701 	  tree e_range = create_tmp_var (diff_type, ".e_range");
5702 
5703 	  expr = build2 (MIN_EXPR, diff_type,
5704 			 build2 (MINUS_EXPR, diff_type, bound, offset),
5705 			 build2 (MULT_EXPR, diff_type, tile_size,
5706 				 element_s));
5707 	  expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5708 					   true, GSI_SAME_STMT);
5709 	  ass = gimple_build_assign (e_range, expr);
5710 	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5711 
5712 	  /* Determine bound, offset & step of inner loop. */
5713 	  e_bound = create_tmp_var (diff_type, ".e_bound");
5714 	  e_offset = create_tmp_var (diff_type, ".e_offset");
5715 	  e_step = create_tmp_var (diff_type, ".e_step");
5716 
5717 	  /* Mark these as element loops.  */
5718 	  tree t, e_gwv = integer_minus_one_node;
5719 	  tree chunk = build_int_cst (diff_type, 0); /* Never chunked.  */
5720 
5721 	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
5722 	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5723 					     element_s, chunk, e_gwv, chunk);
5724 	  gimple_call_set_lhs (call, e_offset);
5725 	  gimple_set_location (call, loc);
5726 	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5727 
5728 	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
5729 	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5730 					     element_s, chunk, e_gwv, e_offset);
5731 	  gimple_call_set_lhs (call, e_bound);
5732 	  gimple_set_location (call, loc);
5733 	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5734 
5735 	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
5736 	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
5737 					     element_s, chunk, e_gwv);
5738 	  gimple_call_set_lhs (call, e_step);
5739 	  gimple_set_location (call, loc);
5740 	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5741 
5742 	  /* Add test and split block.  */
5743 	  expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5744 	  stmt = gimple_build_cond_empty (expr);
5745 	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5746 	  split = split_block (body_bb, stmt);
5747 	  elem_body_bb = split->dest;
5748 	  if (cont_bb == body_bb)
5749 	    cont_bb = elem_body_bb;
5750 	  body_bb = split->src;
5751 
5752 	  split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5753 
5754 	  /* Add a dummy exit for the tiled block when cont_bb is missing.  */
5755 	  if (cont_bb == NULL)
5756 	    {
5757 	      edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
5758 	      e->probability = profile_probability::even ();
5759 	      split->probability = profile_probability::even ();
5760 	    }
5761 
5762 	  /* Initialize the user's loop vars.  */
5763 	  gsi = gsi_start_bb (elem_body_bb);
5764 	  expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
5765 	}
5766     }
5767 
5768   /* Loop increment goes into cont_bb.  If this is not a loop, we
5769      will have spawned threads as if it was, and each one will
5770      execute one iteration.  The specification is not explicit about
5771      whether such constructs are ill-formed or not, and they can
5772      occur, especially when noreturn routines are involved.  */
5773   if (cont_bb)
5774     {
5775       gsi = gsi_last_nondebug_bb (cont_bb);
5776       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5777       loc = gimple_location (cont_stmt);
5778 
5779       if (fd->tiling)
5780 	{
5781 	  /* Insert element loop increment and test.  */
5782 	  expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
5783 	  expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5784 					   true, GSI_SAME_STMT);
5785 	  ass = gimple_build_assign (e_offset, expr);
5786 	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5787 	  expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5788 
5789 	  stmt = gimple_build_cond_empty (expr);
5790 	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5791 	  split = split_block (cont_bb, stmt);
5792 	  elem_cont_bb = split->src;
5793 	  cont_bb = split->dest;
5794 
5795 	  split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5796 	  split->probability = profile_probability::unlikely ().guessed ();
5797 	  edge latch_edge
5798 	    = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
5799 	  latch_edge->probability = profile_probability::likely ().guessed ();
5800 
5801 	  edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
5802 	  skip_edge->probability = profile_probability::unlikely ().guessed ();
5803 	  edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
5804 	  loop_entry_edge->probability
5805 	    = profile_probability::likely ().guessed ();
5806 
5807 	  gsi = gsi_for_stmt (cont_stmt);
5808 	}
5809 
5810       /* Increment offset.  */
5811       if (gimple_in_ssa_p (cfun))
5812 	expr = build2 (plus_code, iter_type, offset,
5813 		       fold_convert (plus_type, step));
5814       else
5815 	expr = build2 (PLUS_EXPR, diff_type, offset, step);
5816       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5817 				       true, GSI_SAME_STMT);
5818       ass = gimple_build_assign (offset_incr, expr);
5819       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5820       expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5821       gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5822 
5823       /*  Remove the GIMPLE_OMP_CONTINUE.  */
5824       gsi_remove (&gsi, true);
5825 
5826       /* Fixup edges from cont_bb.  */
5827       be = BRANCH_EDGE (cont_bb);
5828       fte = FALLTHRU_EDGE (cont_bb);
5829       be->flags |= EDGE_TRUE_VALUE;
5830       fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5831 
5832       if (chunking)
5833 	{
5834 	  /* Split the beginning of exit_bb to make bottom_bb.  We
5835 	     need to insert a nop at the start, because splitting is
5836 	     after a stmt, not before.  */
5837 	  gsi = gsi_start_bb (exit_bb);
5838 	  stmt = gimple_build_nop ();
5839 	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5840 	  split = split_block (exit_bb, stmt);
5841 	  bottom_bb = split->src;
5842 	  exit_bb = split->dest;
5843 	  gsi = gsi_last_bb (bottom_bb);
5844 
5845 	  /* Chunk increment and test goes into bottom_bb.  */
5846 	  expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5847 			 build_int_cst (diff_type, 1));
5848 	  ass = gimple_build_assign (chunk_no, expr);
5849 	  gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5850 
5851 	  /* Chunk test at end of bottom_bb.  */
5852 	  expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5853 	  gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5854 			    GSI_CONTINUE_LINKING);
5855 
5856 	  /* Fixup edges from bottom_bb.  */
5857 	  split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5858 	  split->probability = profile_probability::unlikely ().guessed ();
5859 	  edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5860 	  latch_edge->probability = profile_probability::likely ().guessed ();
5861 	}
5862     }
5863 
5864   gsi = gsi_last_nondebug_bb (exit_bb);
5865   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5866   loc = gimple_location (gsi_stmt (gsi));
5867 
5868   if (!gimple_in_ssa_p (cfun))
5869     {
5870       /* Insert the final value of V, in case it is live.  This is the
5871 	 value for the only thread that survives past the join.  */
5872       expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5873       expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5874       expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5875       expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5876       expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5877       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5878 				       true, GSI_SAME_STMT);
5879       ass = gimple_build_assign (v, expr);
5880       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5881     }
5882 
5883   /* Remove the OMP_RETURN.  */
5884   gsi_remove (&gsi, true);
5885 
5886   if (cont_bb)
5887     {
5888       /* We now have one, two or three nested loops.  Update the loop
5889 	 structures.  */
5890       struct loop *parent = entry_bb->loop_father;
5891       struct loop *body = body_bb->loop_father;
5892 
5893       if (chunking)
5894 	{
5895 	  struct loop *chunk_loop = alloc_loop ();
5896 	  chunk_loop->header = head_bb;
5897 	  chunk_loop->latch = bottom_bb;
5898 	  add_loop (chunk_loop, parent);
5899 	  parent = chunk_loop;
5900 	}
5901       else if (parent != body)
5902 	{
5903 	  gcc_assert (body->header == body_bb);
5904 	  gcc_assert (body->latch == cont_bb
5905 		      || single_pred (body->latch) == cont_bb);
5906 	  parent = NULL;
5907 	}
5908 
5909       if (parent)
5910 	{
5911 	  struct loop *body_loop = alloc_loop ();
5912 	  body_loop->header = body_bb;
5913 	  body_loop->latch = cont_bb;
5914 	  add_loop (body_loop, parent);
5915 
5916 	  if (fd->tiling)
5917 	    {
5918 	      /* Insert tiling's element loop.  */
5919 	      struct loop *inner_loop = alloc_loop ();
5920 	      inner_loop->header = elem_body_bb;
5921 	      inner_loop->latch = elem_cont_bb;
5922 	      add_loop (inner_loop, body_loop);
5923 	    }
5924 	}
5925     }
5926 }
5927 
5928 /* Expand the OMP loop defined by REGION.  */
5929 
5930 static void
5931 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5932 {
5933   struct omp_for_data fd;
5934   struct omp_for_data_loop *loops;
5935 
5936   loops
5937     = (struct omp_for_data_loop *)
5938       alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5939 	      * sizeof (struct omp_for_data_loop));
5940   omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5941 			&fd, loops);
5942   region->sched_kind = fd.sched_kind;
5943   region->sched_modifiers = fd.sched_modifiers;
5944 
5945   gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5946   BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5947   FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5948   if (region->cont)
5949     {
5950       gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5951       BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5952       FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5953     }
5954   else
5955     /* If there isn't a continue then this is a degerate case where
5956        the introduction of abnormal edges during lowering will prevent
5957        original loops from being detected.  Fix that up.  */
5958     loops_state_set (LOOPS_NEED_FIXUP);
5959 
5960   if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5961     expand_omp_simd (region, &fd);
5962   else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5963     {
5964       gcc_assert (!inner_stmt);
5965       expand_oacc_for (region, &fd);
5966     }
5967   else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5968     {
5969       if (gimple_omp_for_combined_into_p (fd.for_stmt))
5970 	expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5971       else
5972 	expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5973     }
5974   else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5975 	   && !fd.have_ordered)
5976     {
5977       if (fd.chunk_size == NULL)
5978 	expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5979       else
5980 	expand_omp_for_static_chunk (region, &fd, inner_stmt);
5981     }
5982   else
5983     {
5984       int fn_index, start_ix, next_ix;
5985       unsigned HOST_WIDE_INT sched = 0;
5986       tree sched_arg = NULL_TREE;
5987 
5988       gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5989 		  == GF_OMP_FOR_KIND_FOR);
5990       if (fd.chunk_size == NULL
5991 	  && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5992 	fd.chunk_size = integer_zero_node;
5993       switch (fd.sched_kind)
5994 	{
5995 	case OMP_CLAUSE_SCHEDULE_RUNTIME:
5996 	  if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
5997 	    {
5998 	      gcc_assert (!fd.have_ordered);
5999 	      fn_index = 6;
6000 	      sched = 4;
6001 	    }
6002 	  else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
6003 		   && !fd.have_ordered)
6004 	    fn_index = 7;
6005 	  else
6006 	    {
6007 	      fn_index = 3;
6008 	      sched = (HOST_WIDE_INT_1U << 31);
6009 	    }
6010 	  break;
6011 	case OMP_CLAUSE_SCHEDULE_DYNAMIC:
6012 	case OMP_CLAUSE_SCHEDULE_GUIDED:
6013 	  if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
6014 	      && !fd.have_ordered)
6015 	    {
6016 	      fn_index = 3 + fd.sched_kind;
6017 	      sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
6018 	      break;
6019 	    }
6020 	  fn_index = fd.sched_kind;
6021 	  sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
6022 	  sched += (HOST_WIDE_INT_1U << 31);
6023 	  break;
6024 	case OMP_CLAUSE_SCHEDULE_STATIC:
6025 	  gcc_assert (fd.have_ordered);
6026 	  fn_index = 0;
6027 	  sched = (HOST_WIDE_INT_1U << 31) + 1;
6028 	  break;
6029 	default:
6030 	  gcc_unreachable ();
6031 	}
6032       if (!fd.ordered)
6033 	fn_index += fd.have_ordered * 8;
6034       if (fd.ordered)
6035 	start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
6036       else
6037 	start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
6038       next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
6039       if (fd.have_reductemp)
6040 	{
6041 	  if (fd.ordered)
6042 	    start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
6043 	  else if (fd.have_ordered)
6044 	    start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
6045 	  else
6046 	    start_ix = (int)BUILT_IN_GOMP_LOOP_START;
6047 	  sched_arg = build_int_cstu (long_integer_type_node, sched);
6048 	  if (!fd.chunk_size)
6049 	    fd.chunk_size = integer_zero_node;
6050 	}
6051       if (fd.iter_type == long_long_unsigned_type_node)
6052 	{
6053 	  start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
6054 			- (int)BUILT_IN_GOMP_LOOP_STATIC_START);
6055 	  next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
6056 		      - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
6057 	}
6058       expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
6059 			      (enum built_in_function) next_ix, sched_arg,
6060 			      inner_stmt);
6061     }
6062 
6063   if (gimple_in_ssa_p (cfun))
6064     update_ssa (TODO_update_ssa_only_virtuals);
6065 }
6066 
6067 /* Expand code for an OpenMP sections directive.  In pseudo code, we generate
6068 
6069 	v = GOMP_sections_start (n);
6070     L0:
6071 	switch (v)
6072 	  {
6073 	  case 0:
6074 	    goto L2;
6075 	  case 1:
6076 	    section 1;
6077 	    goto L1;
6078 	  case 2:
6079 	    ...
6080 	  case n:
6081 	    ...
6082 	  default:
6083 	    abort ();
6084 	  }
6085     L1:
6086 	v = GOMP_sections_next ();
6087 	goto L0;
6088     L2:
6089 	reduction;
6090 
6091     If this is a combined parallel sections, replace the call to
6092     GOMP_sections_start with call to GOMP_sections_next.  */
6093 
6094 static void
6095 expand_omp_sections (struct omp_region *region)
6096 {
6097   tree t, u, vin = NULL, vmain, vnext, l2;
6098   unsigned len;
6099   basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
6100   gimple_stmt_iterator si, switch_si;
6101   gomp_sections *sections_stmt;
6102   gimple *stmt;
6103   gomp_continue *cont;
6104   edge_iterator ei;
6105   edge e;
6106   struct omp_region *inner;
6107   unsigned i, casei;
6108   bool exit_reachable = region->cont != NULL;
6109 
6110   gcc_assert (region->exit != NULL);
6111   entry_bb = region->entry;
6112   l0_bb = single_succ (entry_bb);
6113   l1_bb = region->cont;
6114   l2_bb = region->exit;
6115   if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
6116     l2 = gimple_block_label (l2_bb);
6117   else
6118     {
6119       /* This can happen if there are reductions.  */
6120       len = EDGE_COUNT (l0_bb->succs);
6121       gcc_assert (len > 0);
6122       e = EDGE_SUCC (l0_bb, len - 1);
6123       si = gsi_last_nondebug_bb (e->dest);
6124       l2 = NULL_TREE;
6125       if (gsi_end_p (si)
6126 	  || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6127 	l2 = gimple_block_label (e->dest);
6128       else
6129 	FOR_EACH_EDGE (e, ei, l0_bb->succs)
6130 	  {
6131 	    si = gsi_last_nondebug_bb (e->dest);
6132 	    if (gsi_end_p (si)
6133 		|| gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6134 	      {
6135 		l2 = gimple_block_label (e->dest);
6136 		break;
6137 	      }
6138 	  }
6139     }
6140   if (exit_reachable)
6141     default_bb = create_empty_bb (l1_bb->prev_bb);
6142   else
6143     default_bb = create_empty_bb (l0_bb);
6144 
6145   /* We will build a switch() with enough cases for all the
6146      GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
6147      and a default case to abort if something goes wrong.  */
6148   len = EDGE_COUNT (l0_bb->succs);
6149 
6150   /* Use vec::quick_push on label_vec throughout, since we know the size
6151      in advance.  */
6152   auto_vec<tree> label_vec (len);
6153 
6154   /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
6155      GIMPLE_OMP_SECTIONS statement.  */
6156   si = gsi_last_nondebug_bb (entry_bb);
6157   sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
6158   gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
6159   vin = gimple_omp_sections_control (sections_stmt);
6160   tree clauses = gimple_omp_sections_clauses (sections_stmt);
6161   tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
6162   if (reductmp)
6163     {
6164       tree reductions = OMP_CLAUSE_DECL (reductmp);
6165       gcc_assert (TREE_CODE (reductions) == SSA_NAME);
6166       gimple *g = SSA_NAME_DEF_STMT (reductions);
6167       reductions = gimple_assign_rhs1 (g);
6168       OMP_CLAUSE_DECL (reductmp) = reductions;
6169       gimple_stmt_iterator gsi = gsi_for_stmt (g);
6170       t = build_int_cst (unsigned_type_node, len - 1);
6171       u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
6172       stmt = gimple_build_call (u, 3, t, reductions, null_pointer_node);
6173       gimple_call_set_lhs (stmt, vin);
6174       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6175       gsi_remove (&gsi, true);
6176       release_ssa_name (gimple_assign_lhs (g));
6177     }
6178   else if (!is_combined_parallel (region))
6179     {
6180       /* If we are not inside a combined parallel+sections region,
6181 	 call GOMP_sections_start.  */
6182       t = build_int_cst (unsigned_type_node, len - 1);
6183       u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
6184       stmt = gimple_build_call (u, 1, t);
6185     }
6186   else
6187     {
6188       /* Otherwise, call GOMP_sections_next.  */
6189       u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6190       stmt = gimple_build_call (u, 0);
6191     }
6192   if (!reductmp)
6193     {
6194       gimple_call_set_lhs (stmt, vin);
6195       gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6196     }
6197   gsi_remove (&si, true);
6198 
6199   /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6200      L0_BB.  */
6201   switch_si = gsi_last_nondebug_bb (l0_bb);
6202   gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
6203   if (exit_reachable)
6204     {
6205       cont = as_a <gomp_continue *> (last_stmt (l1_bb));
6206       gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
6207       vmain = gimple_omp_continue_control_use (cont);
6208       vnext = gimple_omp_continue_control_def (cont);
6209     }
6210   else
6211     {
6212       vmain = vin;
6213       vnext = NULL_TREE;
6214     }
6215 
6216   t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
6217   label_vec.quick_push (t);
6218   i = 1;
6219 
6220   /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR.  */
6221   for (inner = region->inner, casei = 1;
6222        inner;
6223        inner = inner->next, i++, casei++)
6224     {
6225       basic_block s_entry_bb, s_exit_bb;
6226 
6227       /* Skip optional reduction region.  */
6228       if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
6229 	{
6230 	  --i;
6231 	  --casei;
6232 	  continue;
6233 	}
6234 
6235       s_entry_bb = inner->entry;
6236       s_exit_bb = inner->exit;
6237 
6238       t = gimple_block_label (s_entry_bb);
6239       u = build_int_cst (unsigned_type_node, casei);
6240       u = build_case_label (u, NULL, t);
6241       label_vec.quick_push (u);
6242 
6243       si = gsi_last_nondebug_bb (s_entry_bb);
6244       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
6245       gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
6246       gsi_remove (&si, true);
6247       single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
6248 
6249       if (s_exit_bb == NULL)
6250 	continue;
6251 
6252       si = gsi_last_nondebug_bb (s_exit_bb);
6253       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6254       gsi_remove (&si, true);
6255 
6256       single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
6257     }
6258 
6259   /* Error handling code goes in DEFAULT_BB.  */
6260   t = gimple_block_label (default_bb);
6261   u = build_case_label (NULL, NULL, t);
6262   make_edge (l0_bb, default_bb, 0);
6263   add_bb_to_loop (default_bb, current_loops->tree_root);
6264 
6265   stmt = gimple_build_switch (vmain, u, label_vec);
6266   gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
6267   gsi_remove (&switch_si, true);
6268 
6269   si = gsi_start_bb (default_bb);
6270   stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
6271   gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
6272 
6273   if (exit_reachable)
6274     {
6275       tree bfn_decl;
6276 
6277       /* Code to get the next section goes in L1_BB.  */
6278       si = gsi_last_nondebug_bb (l1_bb);
6279       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
6280 
6281       bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6282       stmt = gimple_build_call (bfn_decl, 0);
6283       gimple_call_set_lhs (stmt, vnext);
6284       gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6285       gsi_remove (&si, true);
6286 
6287       single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
6288     }
6289 
6290   /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB.  */
6291   si = gsi_last_nondebug_bb (l2_bb);
6292   if (gimple_omp_return_nowait_p (gsi_stmt (si)))
6293     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
6294   else if (gimple_omp_return_lhs (gsi_stmt (si)))
6295     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
6296   else
6297     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
6298   stmt = gimple_build_call (t, 0);
6299   if (gimple_omp_return_lhs (gsi_stmt (si)))
6300     gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
6301   gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6302   gsi_remove (&si, true);
6303 
6304   set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
6305 }
6306 
6307 /* Expand code for an OpenMP single directive.  We've already expanded
6308    much of the code, here we simply place the GOMP_barrier call.  */
6309 
6310 static void
6311 expand_omp_single (struct omp_region *region)
6312 {
6313   basic_block entry_bb, exit_bb;
6314   gimple_stmt_iterator si;
6315 
6316   entry_bb = region->entry;
6317   exit_bb = region->exit;
6318 
6319   si = gsi_last_nondebug_bb (entry_bb);
6320   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
6321   gsi_remove (&si, true);
6322   single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6323 
6324   si = gsi_last_nondebug_bb (exit_bb);
6325   if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
6326     {
6327       tree t = gimple_omp_return_lhs (gsi_stmt (si));
6328       gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
6329     }
6330   gsi_remove (&si, true);
6331   single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6332 }
6333 
6334 /* Generic expansion for OpenMP synchronization directives: master,
6335    ordered and critical.  All we need to do here is remove the entry
6336    and exit markers for REGION.  */
6337 
6338 static void
6339 expand_omp_synch (struct omp_region *region)
6340 {
6341   basic_block entry_bb, exit_bb;
6342   gimple_stmt_iterator si;
6343 
6344   entry_bb = region->entry;
6345   exit_bb = region->exit;
6346 
6347   si = gsi_last_nondebug_bb (entry_bb);
6348   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
6349 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
6350 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6351 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6352 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6353 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
6354   if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
6355       && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
6356     {
6357       expand_omp_taskreg (region);
6358       return;
6359     }
6360   gsi_remove (&si, true);
6361   single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6362 
6363   if (exit_bb)
6364     {
6365       si = gsi_last_nondebug_bb (exit_bb);
6366       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6367       gsi_remove (&si, true);
6368       single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6369     }
6370 }
6371 
6372 /* Translate enum omp_memory_order to enum memmodel.  The two enums
6373    are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
6374    is 0.  */
6375 
6376 static enum memmodel
6377 omp_memory_order_to_memmodel (enum omp_memory_order mo)
6378 {
6379   switch (mo)
6380     {
6381     case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
6382     case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
6383     case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
6384     case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
6385     case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
6386     default: gcc_unreachable ();
6387     }
6388 }
6389 
6390 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
6391    operation as a normal volatile load.  */
6392 
6393 static bool
6394 expand_omp_atomic_load (basic_block load_bb, tree addr,
6395 			tree loaded_val, int index)
6396 {
6397   enum built_in_function tmpbase;
6398   gimple_stmt_iterator gsi;
6399   basic_block store_bb;
6400   location_t loc;
6401   gimple *stmt;
6402   tree decl, call, type, itype;
6403 
6404   gsi = gsi_last_nondebug_bb (load_bb);
6405   stmt = gsi_stmt (gsi);
6406   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6407   loc = gimple_location (stmt);
6408 
6409   /* ??? If the target does not implement atomic_load_optab[mode], and mode
6410      is smaller than word size, then expand_atomic_load assumes that the load
6411      is atomic.  We could avoid the builtin entirely in this case.  */
6412 
6413   tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6414   decl = builtin_decl_explicit (tmpbase);
6415   if (decl == NULL_TREE)
6416     return false;
6417 
6418   type = TREE_TYPE (loaded_val);
6419   itype = TREE_TYPE (TREE_TYPE (decl));
6420 
6421   enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
6422   tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
6423   call = build_call_expr_loc (loc, decl, 2, addr, mo);
6424   if (!useless_type_conversion_p (type, itype))
6425     call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6426   call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6427 
6428   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6429   gsi_remove (&gsi, true);
6430 
6431   store_bb = single_succ (load_bb);
6432   gsi = gsi_last_nondebug_bb (store_bb);
6433   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6434   gsi_remove (&gsi, true);
6435 
6436   if (gimple_in_ssa_p (cfun))
6437     update_ssa (TODO_update_ssa_no_phi);
6438 
6439   return true;
6440 }
6441 
6442 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
6443    operation as a normal volatile store.  */
6444 
6445 static bool
6446 expand_omp_atomic_store (basic_block load_bb, tree addr,
6447 			 tree loaded_val, tree stored_val, int index)
6448 {
6449   enum built_in_function tmpbase;
6450   gimple_stmt_iterator gsi;
6451   basic_block store_bb = single_succ (load_bb);
6452   location_t loc;
6453   gimple *stmt;
6454   tree decl, call, type, itype;
6455   machine_mode imode;
6456   bool exchange;
6457 
6458   gsi = gsi_last_nondebug_bb (load_bb);
6459   stmt = gsi_stmt (gsi);
6460   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6461 
6462   /* If the load value is needed, then this isn't a store but an exchange.  */
6463   exchange = gimple_omp_atomic_need_value_p (stmt);
6464 
6465   gsi = gsi_last_nondebug_bb (store_bb);
6466   stmt = gsi_stmt (gsi);
6467   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6468   loc = gimple_location (stmt);
6469 
6470   /* ??? If the target does not implement atomic_store_optab[mode], and mode
6471      is smaller than word size, then expand_atomic_store assumes that the store
6472      is atomic.  We could avoid the builtin entirely in this case.  */
6473 
6474   tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6475   tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6476   decl = builtin_decl_explicit (tmpbase);
6477   if (decl == NULL_TREE)
6478     return false;
6479 
6480   type = TREE_TYPE (stored_val);
6481 
6482   /* Dig out the type of the function's second argument.  */
6483   itype = TREE_TYPE (decl);
6484   itype = TYPE_ARG_TYPES (itype);
6485   itype = TREE_CHAIN (itype);
6486   itype = TREE_VALUE (itype);
6487   imode = TYPE_MODE (itype);
6488 
6489   if (exchange && !can_atomic_exchange_p (imode, true))
6490     return false;
6491 
6492   if (!useless_type_conversion_p (itype, type))
6493     stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6494   enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
6495   tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
6496   call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
6497   if (exchange)
6498     {
6499       if (!useless_type_conversion_p (type, itype))
6500 	call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6501       call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6502     }
6503 
6504   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6505   gsi_remove (&gsi, true);
6506 
6507   /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above.  */
6508   gsi = gsi_last_nondebug_bb (load_bb);
6509   gsi_remove (&gsi, true);
6510 
6511   if (gimple_in_ssa_p (cfun))
6512     update_ssa (TODO_update_ssa_no_phi);
6513 
6514   return true;
6515 }
6516 
6517 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
6518    operation as a __atomic_fetch_op builtin.  INDEX is log2 of the
6519    size of the data type, and thus usable to find the index of the builtin
6520    decl.  Returns false if the expression is not of the proper form.  */
6521 
6522 static bool
6523 expand_omp_atomic_fetch_op (basic_block load_bb,
6524 			    tree addr, tree loaded_val,
6525 			    tree stored_val, int index)
6526 {
6527   enum built_in_function oldbase, newbase, tmpbase;
6528   tree decl, itype, call;
6529   tree lhs, rhs;
6530   basic_block store_bb = single_succ (load_bb);
6531   gimple_stmt_iterator gsi;
6532   gimple *stmt;
6533   location_t loc;
6534   enum tree_code code;
6535   bool need_old, need_new;
6536   machine_mode imode;
6537 
6538   /* We expect to find the following sequences:
6539 
6540    load_bb:
6541        GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6542 
6543    store_bb:
6544        val = tmp OP something; (or: something OP tmp)
6545        GIMPLE_OMP_STORE (val)
6546 
6547   ???FIXME: Allow a more flexible sequence.
6548   Perhaps use data flow to pick the statements.
6549 
6550   */
6551 
6552   gsi = gsi_after_labels (store_bb);
6553   stmt = gsi_stmt (gsi);
6554   if (is_gimple_debug (stmt))
6555     {
6556       gsi_next_nondebug (&gsi);
6557       if (gsi_end_p (gsi))
6558 	return false;
6559       stmt = gsi_stmt (gsi);
6560     }
6561   loc = gimple_location (stmt);
6562   if (!is_gimple_assign (stmt))
6563     return false;
6564   gsi_next_nondebug (&gsi);
6565   if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6566     return false;
6567   need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6568   need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6569   enum omp_memory_order omo
6570     = gimple_omp_atomic_memory_order (last_stmt (load_bb));
6571   enum memmodel mo = omp_memory_order_to_memmodel (omo);
6572   gcc_checking_assert (!need_old || !need_new);
6573 
6574   if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6575     return false;
6576 
6577   /* Check for one of the supported fetch-op operations.  */
6578   code = gimple_assign_rhs_code (stmt);
6579   switch (code)
6580     {
6581     case PLUS_EXPR:
6582     case POINTER_PLUS_EXPR:
6583       oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6584       newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6585       break;
6586     case MINUS_EXPR:
6587       oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6588       newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6589       break;
6590     case BIT_AND_EXPR:
6591       oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6592       newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6593       break;
6594     case BIT_IOR_EXPR:
6595       oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6596       newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6597       break;
6598     case BIT_XOR_EXPR:
6599       oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6600       newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6601       break;
6602     default:
6603       return false;
6604     }
6605 
6606   /* Make sure the expression is of the proper form.  */
6607   if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6608     rhs = gimple_assign_rhs2 (stmt);
6609   else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6610 	   && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6611     rhs = gimple_assign_rhs1 (stmt);
6612   else
6613     return false;
6614 
6615   tmpbase = ((enum built_in_function)
6616 	     ((need_new ? newbase : oldbase) + index + 1));
6617   decl = builtin_decl_explicit (tmpbase);
6618   if (decl == NULL_TREE)
6619     return false;
6620   itype = TREE_TYPE (TREE_TYPE (decl));
6621   imode = TYPE_MODE (itype);
6622 
6623   /* We could test all of the various optabs involved, but the fact of the
6624      matter is that (with the exception of i486 vs i586 and xadd) all targets
6625      that support any atomic operaton optab also implements compare-and-swap.
6626      Let optabs.c take care of expanding any compare-and-swap loop.  */
6627   if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
6628     return false;
6629 
6630   gsi = gsi_last_nondebug_bb (load_bb);
6631   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6632 
6633   /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6634      It only requires that the operation happen atomically.  Thus we can
6635      use the RELAXED memory model.  */
6636   call = build_call_expr_loc (loc, decl, 3, addr,
6637 			      fold_convert_loc (loc, itype, rhs),
6638 			      build_int_cst (NULL, mo));
6639 
6640   if (need_old || need_new)
6641     {
6642       lhs = need_old ? loaded_val : stored_val;
6643       call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6644       call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6645     }
6646   else
6647     call = fold_convert_loc (loc, void_type_node, call);
6648   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6649   gsi_remove (&gsi, true);
6650 
6651   gsi = gsi_last_nondebug_bb (store_bb);
6652   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6653   gsi_remove (&gsi, true);
6654   gsi = gsi_last_nondebug_bb (store_bb);
6655   stmt = gsi_stmt (gsi);
6656   gsi_remove (&gsi, true);
6657 
6658   if (gimple_in_ssa_p (cfun))
6659     {
6660       release_defs (stmt);
6661       update_ssa (TODO_update_ssa_no_phi);
6662     }
6663 
6664   return true;
6665 }
6666 
6667 /* A subroutine of expand_omp_atomic.  Implement the atomic operation as:
6668 
6669       oldval = *addr;
6670       repeat:
6671 	newval = rhs;	 // with oldval replacing *addr in rhs
6672 	oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6673 	if (oldval != newval)
6674 	  goto repeat;
6675 
6676    INDEX is log2 of the size of the data type, and thus usable to find the
6677    index of the builtin decl.  */
6678 
6679 static bool
6680 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6681 			    tree addr, tree loaded_val, tree stored_val,
6682 			    int index)
6683 {
6684   tree loadedi, storedi, initial, new_storedi, old_vali;
6685   tree type, itype, cmpxchg, iaddr, atype;
6686   gimple_stmt_iterator si;
6687   basic_block loop_header = single_succ (load_bb);
6688   gimple *phi, *stmt;
6689   edge e;
6690   enum built_in_function fncode;
6691 
6692   /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6693      order to use the RELAXED memory model effectively.  */
6694   fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6695 				    + index + 1);
6696   cmpxchg = builtin_decl_explicit (fncode);
6697   if (cmpxchg == NULL_TREE)
6698     return false;
6699   type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6700   atype = type;
6701   itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6702 
6703   if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
6704       || !can_atomic_load_p (TYPE_MODE (itype)))
6705     return false;
6706 
6707   /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD.  */
6708   si = gsi_last_nondebug_bb (load_bb);
6709   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6710 
6711   /* For floating-point values, we'll need to view-convert them to integers
6712      so that we can perform the atomic compare and swap.  Simplify the
6713      following code by always setting up the "i"ntegral variables.  */
6714   if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6715     {
6716       tree iaddr_val;
6717 
6718       iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6719 							   true));
6720       atype = itype;
6721       iaddr_val
6722 	= force_gimple_operand_gsi (&si,
6723 				    fold_convert (TREE_TYPE (iaddr), addr),
6724 				    false, NULL_TREE, true, GSI_SAME_STMT);
6725       stmt = gimple_build_assign (iaddr, iaddr_val);
6726       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6727       loadedi = create_tmp_var (itype);
6728       if (gimple_in_ssa_p (cfun))
6729 	loadedi = make_ssa_name (loadedi);
6730     }
6731   else
6732     {
6733       iaddr = addr;
6734       loadedi = loaded_val;
6735     }
6736 
6737   fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6738   tree loaddecl = builtin_decl_explicit (fncode);
6739   if (loaddecl)
6740     initial
6741       = fold_convert (atype,
6742 		      build_call_expr (loaddecl, 2, iaddr,
6743 				       build_int_cst (NULL_TREE,
6744 						      MEMMODEL_RELAXED)));
6745   else
6746     {
6747       tree off
6748 	= build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
6749 						      true), 0);
6750       initial = build2 (MEM_REF, atype, iaddr, off);
6751     }
6752 
6753   initial
6754     = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6755 				GSI_SAME_STMT);
6756 
6757   /* Move the value to the LOADEDI temporary.  */
6758   if (gimple_in_ssa_p (cfun))
6759     {
6760       gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6761       phi = create_phi_node (loadedi, loop_header);
6762       SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6763 	       initial);
6764     }
6765   else
6766     gsi_insert_before (&si,
6767 		       gimple_build_assign (loadedi, initial),
6768 		       GSI_SAME_STMT);
6769   if (loadedi != loaded_val)
6770     {
6771       gimple_stmt_iterator gsi2;
6772       tree x;
6773 
6774       x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6775       gsi2 = gsi_start_bb (loop_header);
6776       if (gimple_in_ssa_p (cfun))
6777 	{
6778 	  gassign *stmt;
6779 	  x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6780 					true, GSI_SAME_STMT);
6781 	  stmt = gimple_build_assign (loaded_val, x);
6782 	  gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6783 	}
6784       else
6785 	{
6786 	  x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6787 	  force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6788 				    true, GSI_SAME_STMT);
6789 	}
6790     }
6791   gsi_remove (&si, true);
6792 
6793   si = gsi_last_nondebug_bb (store_bb);
6794   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6795 
6796   if (iaddr == addr)
6797     storedi = stored_val;
6798   else
6799     storedi
6800       = force_gimple_operand_gsi (&si,
6801 				  build1 (VIEW_CONVERT_EXPR, itype,
6802 					  stored_val), true, NULL_TREE, true,
6803 				  GSI_SAME_STMT);
6804 
6805   /* Build the compare&swap statement.  */
6806   new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6807   new_storedi = force_gimple_operand_gsi (&si,
6808 					  fold_convert (TREE_TYPE (loadedi),
6809 							new_storedi),
6810 					  true, NULL_TREE,
6811 					  true, GSI_SAME_STMT);
6812 
6813   if (gimple_in_ssa_p (cfun))
6814     old_vali = loadedi;
6815   else
6816     {
6817       old_vali = create_tmp_var (TREE_TYPE (loadedi));
6818       stmt = gimple_build_assign (old_vali, loadedi);
6819       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6820 
6821       stmt = gimple_build_assign (loadedi, new_storedi);
6822       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6823     }
6824 
6825   /* Note that we always perform the comparison as an integer, even for
6826      floating point.  This allows the atomic operation to properly
6827      succeed even with NaNs and -0.0.  */
6828   tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6829   stmt = gimple_build_cond_empty (ne);
6830   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6831 
6832   /* Update cfg.  */
6833   e = single_succ_edge (store_bb);
6834   e->flags &= ~EDGE_FALLTHRU;
6835   e->flags |= EDGE_FALSE_VALUE;
6836   /* Expect no looping.  */
6837   e->probability = profile_probability::guessed_always ();
6838 
6839   e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
6840   e->probability = profile_probability::guessed_never ();
6841 
6842   /* Copy the new value to loadedi (we already did that before the condition
6843      if we are not in SSA).  */
6844   if (gimple_in_ssa_p (cfun))
6845     {
6846       phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6847       SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6848     }
6849 
6850   /* Remove GIMPLE_OMP_ATOMIC_STORE.  */
6851   gsi_remove (&si, true);
6852 
6853   struct loop *loop = alloc_loop ();
6854   loop->header = loop_header;
6855   loop->latch = store_bb;
6856   add_loop (loop, loop_header->loop_father);
6857 
6858   if (gimple_in_ssa_p (cfun))
6859     update_ssa (TODO_update_ssa_no_phi);
6860 
6861   return true;
6862 }
6863 
6864 /* A subroutine of expand_omp_atomic.  Implement the atomic operation as:
6865 
6866 				  GOMP_atomic_start ();
6867 				  *addr = rhs;
6868 				  GOMP_atomic_end ();
6869 
6870    The result is not globally atomic, but works so long as all parallel
6871    references are within #pragma omp atomic directives.  According to
6872    responses received from omp@openmp.org, appears to be within spec.
6873    Which makes sense, since that's how several other compilers handle
6874    this situation as well.
6875    LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6876    expanding.  STORED_VAL is the operand of the matching
6877    GIMPLE_OMP_ATOMIC_STORE.
6878 
6879    We replace
6880    GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6881    loaded_val = *addr;
6882 
6883    and replace
6884    GIMPLE_OMP_ATOMIC_STORE (stored_val)  with
6885    *addr = stored_val;
6886 */
6887 
6888 static bool
6889 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6890 			 tree addr, tree loaded_val, tree stored_val)
6891 {
6892   gimple_stmt_iterator si;
6893   gassign *stmt;
6894   tree t;
6895 
6896   si = gsi_last_nondebug_bb (load_bb);
6897   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6898 
6899   t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6900   t = build_call_expr (t, 0);
6901   force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6902 
6903   tree mem = build_simple_mem_ref (addr);
6904   TREE_TYPE (mem) = TREE_TYPE (loaded_val);
6905   TREE_OPERAND (mem, 1)
6906     = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
6907 						 true),
6908 		    TREE_OPERAND (mem, 1));
6909   stmt = gimple_build_assign (loaded_val, mem);
6910   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6911   gsi_remove (&si, true);
6912 
6913   si = gsi_last_nondebug_bb (store_bb);
6914   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6915 
6916   stmt = gimple_build_assign (unshare_expr (mem), stored_val);
6917   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6918 
6919   t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6920   t = build_call_expr (t, 0);
6921   force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6922   gsi_remove (&si, true);
6923 
6924   if (gimple_in_ssa_p (cfun))
6925     update_ssa (TODO_update_ssa_no_phi);
6926   return true;
6927 }
6928 
6929 /* Expand an GIMPLE_OMP_ATOMIC statement.  We try to expand
6930    using expand_omp_atomic_fetch_op.  If it failed, we try to
6931    call expand_omp_atomic_pipeline, and if it fails too, the
6932    ultimate fallback is wrapping the operation in a mutex
6933    (expand_omp_atomic_mutex).  REGION is the atomic region built
6934    by build_omp_regions_1().  */
6935 
6936 static void
6937 expand_omp_atomic (struct omp_region *region)
6938 {
6939   basic_block load_bb = region->entry, store_bb = region->exit;
6940   gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6941   gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6942   tree loaded_val = gimple_omp_atomic_load_lhs (load);
6943   tree addr = gimple_omp_atomic_load_rhs (load);
6944   tree stored_val = gimple_omp_atomic_store_val (store);
6945   tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6946   HOST_WIDE_INT index;
6947 
6948   /* Make sure the type is one of the supported sizes.  */
6949   index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6950   index = exact_log2 (index);
6951   if (index >= 0 && index <= 4)
6952     {
6953       unsigned int align = TYPE_ALIGN_UNIT (type);
6954 
6955       /* __sync builtins require strict data alignment.  */
6956       if (exact_log2 (align) >= index)
6957 	{
6958 	  /* Atomic load.  */
6959 	  scalar_mode smode;
6960 	  if (loaded_val == stored_val
6961 	      && (is_int_mode (TYPE_MODE (type), &smode)
6962 		  || is_float_mode (TYPE_MODE (type), &smode))
6963 	      && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6964 	      && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6965 	    return;
6966 
6967 	  /* Atomic store.  */
6968 	  if ((is_int_mode (TYPE_MODE (type), &smode)
6969 	       || is_float_mode (TYPE_MODE (type), &smode))
6970 	      && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6971 	      && store_bb == single_succ (load_bb)
6972 	      && first_stmt (store_bb) == store
6973 	      && expand_omp_atomic_store (load_bb, addr, loaded_val,
6974 					  stored_val, index))
6975 	    return;
6976 
6977 	  /* When possible, use specialized atomic update functions.  */
6978 	  if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6979 	      && store_bb == single_succ (load_bb)
6980 	      && expand_omp_atomic_fetch_op (load_bb, addr,
6981 					     loaded_val, stored_val, index))
6982 	    return;
6983 
6984 	  /* If we don't have specialized __sync builtins, try and implement
6985 	     as a compare and swap loop.  */
6986 	  if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6987 					  loaded_val, stored_val, index))
6988 	    return;
6989 	}
6990     }
6991 
6992   /* The ultimate fallback is wrapping the operation in a mutex.  */
6993   expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6994 }
6995 
6996 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6997    at REGION_EXIT.  */
6998 
6999 static void
7000 mark_loops_in_oacc_kernels_region (basic_block region_entry,
7001 				   basic_block region_exit)
7002 {
7003   struct loop *outer = region_entry->loop_father;
7004   gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
7005 
7006   /* Don't parallelize the kernels region if it contains more than one outer
7007      loop.  */
7008   unsigned int nr_outer_loops = 0;
7009   struct loop *single_outer = NULL;
7010   for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
7011     {
7012       gcc_assert (loop_outer (loop) == outer);
7013 
7014       if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
7015 	continue;
7016 
7017       if (region_exit != NULL
7018 	  && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
7019 	continue;
7020 
7021       nr_outer_loops++;
7022       single_outer = loop;
7023     }
7024   if (nr_outer_loops != 1)
7025     return;
7026 
7027   for (struct loop *loop = single_outer->inner;
7028        loop != NULL;
7029        loop = loop->inner)
7030     if (loop->next)
7031       return;
7032 
7033   /* Mark the loops in the region.  */
7034   for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
7035     loop->in_oacc_kernels_region = true;
7036 }
7037 
7038 /* Types used to pass grid and wortkgroup sizes to kernel invocation.  */
7039 
7040 struct GTY(()) grid_launch_attributes_trees
7041 {
7042   tree kernel_dim_array_type;
7043   tree kernel_lattrs_dimnum_decl;
7044   tree kernel_lattrs_grid_decl;
7045   tree kernel_lattrs_group_decl;
7046   tree kernel_launch_attributes_type;
7047 };
7048 
7049 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
7050 
7051 /* Create types used to pass kernel launch attributes to target.  */
7052 
7053 static void
7054 grid_create_kernel_launch_attr_types (void)
7055 {
7056   if (grid_attr_trees)
7057     return;
7058   grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
7059 
7060   tree dim_arr_index_type
7061     = build_index_type (build_int_cst (integer_type_node, 2));
7062   grid_attr_trees->kernel_dim_array_type
7063     = build_array_type (uint32_type_node, dim_arr_index_type);
7064 
7065   grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
7066   grid_attr_trees->kernel_lattrs_dimnum_decl
7067     = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
7068 		  uint32_type_node);
7069   DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
7070 
7071   grid_attr_trees->kernel_lattrs_grid_decl
7072     = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
7073 		  grid_attr_trees->kernel_dim_array_type);
7074   DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
7075     = grid_attr_trees->kernel_lattrs_dimnum_decl;
7076   grid_attr_trees->kernel_lattrs_group_decl
7077     = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
7078 		  grid_attr_trees->kernel_dim_array_type);
7079   DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
7080     = grid_attr_trees->kernel_lattrs_grid_decl;
7081   finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
7082 			 "__gomp_kernel_launch_attributes",
7083 			 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
7084 }
7085 
7086 /* Insert before the current statement in GSI a store of VALUE to INDEX of
7087    array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR.  VALUE must be
7088    of type uint32_type_node.  */
7089 
7090 static void
7091 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
7092 			     tree fld_decl, int index, tree value)
7093 {
7094   tree ref = build4 (ARRAY_REF, uint32_type_node,
7095 		     build3 (COMPONENT_REF,
7096 			     grid_attr_trees->kernel_dim_array_type,
7097 			     range_var, fld_decl, NULL_TREE),
7098 		     build_int_cst (integer_type_node, index),
7099 		     NULL_TREE, NULL_TREE);
7100   gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
7101 }
7102 
7103 /* Return a tree representation of a pointer to a structure with grid and
7104    work-group size information.  Statements filling that information will be
7105    inserted before GSI, TGT_STMT is the target statement which has the
7106    necessary information in it.  */
7107 
7108 static tree
7109 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
7110 				       gomp_target *tgt_stmt)
7111 {
7112   grid_create_kernel_launch_attr_types ();
7113   tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
7114 				"__kernel_launch_attrs");
7115 
7116   unsigned max_dim = 0;
7117   for (tree clause = gimple_omp_target_clauses (tgt_stmt);
7118        clause;
7119        clause = OMP_CLAUSE_CHAIN (clause))
7120     {
7121       if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
7122 	continue;
7123 
7124       unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
7125       max_dim = MAX (dim, max_dim);
7126 
7127       grid_insert_store_range_dim (gsi, lattrs,
7128 				   grid_attr_trees->kernel_lattrs_grid_decl,
7129 				   dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
7130       grid_insert_store_range_dim (gsi, lattrs,
7131 				   grid_attr_trees->kernel_lattrs_group_decl,
7132 				   dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
7133     }
7134 
7135   tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
7136 			grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
7137   gcc_checking_assert (max_dim <= 2);
7138   tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
7139   gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
7140 		     GSI_SAME_STMT);
7141   TREE_ADDRESSABLE (lattrs) = 1;
7142   return build_fold_addr_expr (lattrs);
7143 }
7144 
7145 /* Build target argument identifier from the DEVICE identifier, value
7146    identifier ID and whether the element also has a SUBSEQUENT_PARAM.  */
7147 
7148 static tree
7149 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
7150 {
7151   tree t = build_int_cst (integer_type_node, device);
7152   if (subseqent_param)
7153     t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7154 		     build_int_cst (integer_type_node,
7155 				    GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
7156   t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7157 		   build_int_cst (integer_type_node, id));
7158   return t;
7159 }
7160 
7161 /* Like above but return it in type that can be directly stored as an element
7162    of the argument array.  */
7163 
7164 static tree
7165 get_target_argument_identifier (int device, bool subseqent_param, int id)
7166 {
7167   tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
7168   return fold_convert (ptr_type_node, t);
7169 }
7170 
7171 /* Return a target argument consisting of DEVICE identifier, value identifier
7172    ID, and the actual VALUE.  */
7173 
7174 static tree
7175 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
7176 			   tree value)
7177 {
7178   tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
7179 			fold_convert (integer_type_node, value),
7180 			build_int_cst (unsigned_type_node,
7181 				       GOMP_TARGET_ARG_VALUE_SHIFT));
7182   t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7183 		   get_target_argument_identifier_1 (device, false, id));
7184   t = fold_convert (ptr_type_node, t);
7185   return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
7186 }
7187 
7188 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
7189    push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
7190    otherwise push an identifier (with DEVICE and ID) and the VALUE in two
7191    arguments.  */
7192 
7193 static void
7194 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
7195 					 int id, tree value, vec <tree> *args)
7196 {
7197   if (tree_fits_shwi_p (value)
7198       && tree_to_shwi (value) > -(1 << 15)
7199       && tree_to_shwi (value) < (1 << 15))
7200     args->quick_push (get_target_argument_value (gsi, device, id, value));
7201   else
7202     {
7203       args->quick_push (get_target_argument_identifier (device, true, id));
7204       value = fold_convert (ptr_type_node, value);
7205       value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
7206 					GSI_SAME_STMT);
7207       args->quick_push (value);
7208     }
7209 }
7210 
7211 /* Create an array of arguments that is then passed to GOMP_target.  */
7212 
7213 static tree
7214 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
7215 {
7216   auto_vec <tree, 6> args;
7217   tree clauses = gimple_omp_target_clauses (tgt_stmt);
7218   tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
7219   if (c)
7220     t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
7221   else
7222     t = integer_minus_one_node;
7223   push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7224 					   GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
7225 
7226   c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
7227   if (c)
7228     t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
7229   else
7230     t = integer_minus_one_node;
7231   push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7232 					   GOMP_TARGET_ARG_THREAD_LIMIT, t,
7233 					   &args);
7234 
7235   /* Add HSA-specific grid sizes, if available.  */
7236   if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7237 		       OMP_CLAUSE__GRIDDIM_))
7238     {
7239       int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
7240       t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
7241       args.quick_push (t);
7242       args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
7243     }
7244 
7245   /* Produce more, perhaps device specific, arguments here.  */
7246 
7247   tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
7248 							  args.length () + 1),
7249 				  ".omp_target_args");
7250   for (unsigned i = 0; i < args.length (); i++)
7251     {
7252       tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7253 			 build_int_cst (integer_type_node, i),
7254 			 NULL_TREE, NULL_TREE);
7255       gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
7256 			 GSI_SAME_STMT);
7257     }
7258   tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7259 		     build_int_cst (integer_type_node, args.length ()),
7260 		     NULL_TREE, NULL_TREE);
7261   gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
7262 		     GSI_SAME_STMT);
7263   TREE_ADDRESSABLE (argarray) = 1;
7264   return build_fold_addr_expr (argarray);
7265 }
7266 
7267 /* Expand the GIMPLE_OMP_TARGET starting at REGION.  */
7268 
7269 static void
7270 expand_omp_target (struct omp_region *region)
7271 {
7272   basic_block entry_bb, exit_bb, new_bb;
7273   struct function *child_cfun;
7274   tree child_fn, block, t;
7275   gimple_stmt_iterator gsi;
7276   gomp_target *entry_stmt;
7277   gimple *stmt;
7278   edge e;
7279   bool offloaded, data_region;
7280 
7281   entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
7282   new_bb = region->entry;
7283 
7284   offloaded = is_gimple_omp_offloaded (entry_stmt);
7285   switch (gimple_omp_target_kind (entry_stmt))
7286     {
7287     case GF_OMP_TARGET_KIND_REGION:
7288     case GF_OMP_TARGET_KIND_UPDATE:
7289     case GF_OMP_TARGET_KIND_ENTER_DATA:
7290     case GF_OMP_TARGET_KIND_EXIT_DATA:
7291     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7292     case GF_OMP_TARGET_KIND_OACC_KERNELS:
7293     case GF_OMP_TARGET_KIND_OACC_UPDATE:
7294     case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7295     case GF_OMP_TARGET_KIND_OACC_DECLARE:
7296       data_region = false;
7297       break;
7298     case GF_OMP_TARGET_KIND_DATA:
7299     case GF_OMP_TARGET_KIND_OACC_DATA:
7300     case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7301       data_region = true;
7302       break;
7303     default:
7304       gcc_unreachable ();
7305     }
7306 
7307   child_fn = NULL_TREE;
7308   child_cfun = NULL;
7309   if (offloaded)
7310     {
7311       child_fn = gimple_omp_target_child_fn (entry_stmt);
7312       child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7313     }
7314 
7315   /* Supported by expand_omp_taskreg, but not here.  */
7316   if (child_cfun != NULL)
7317     gcc_checking_assert (!child_cfun->cfg);
7318   gcc_checking_assert (!gimple_in_ssa_p (cfun));
7319 
7320   entry_bb = region->entry;
7321   exit_bb = region->exit;
7322 
7323   if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
7324     {
7325       mark_loops_in_oacc_kernels_region (region->entry, region->exit);
7326 
7327       /* Further down, both OpenACC kernels and OpenACC parallel constructs
7328 	 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
7329 	 two, there is an "oacc kernels" attribute set for OpenACC kernels.  */
7330       DECL_ATTRIBUTES (child_fn)
7331 	= tree_cons (get_identifier ("oacc kernels"),
7332 		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
7333     }
7334 
7335   if (offloaded)
7336     {
7337       unsigned srcidx, dstidx, num;
7338 
7339       /* If the offloading region needs data sent from the parent
7340 	 function, then the very first statement (except possible
7341 	 tree profile counter updates) of the offloading body
7342 	 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
7343 	 &.OMP_DATA_O is passed as an argument to the child function,
7344 	 we need to replace it with the argument as seen by the child
7345 	 function.
7346 
7347 	 In most cases, this will end up being the identity assignment
7348 	 .OMP_DATA_I = .OMP_DATA_I.  However, if the offloading body had
7349 	 a function call that has been inlined, the original PARM_DECL
7350 	 .OMP_DATA_I may have been converted into a different local
7351 	 variable.  In which case, we need to keep the assignment.  */
7352       tree data_arg = gimple_omp_target_data_arg (entry_stmt);
7353       if (data_arg)
7354 	{
7355 	  basic_block entry_succ_bb = single_succ (entry_bb);
7356 	  gimple_stmt_iterator gsi;
7357 	  tree arg;
7358 	  gimple *tgtcopy_stmt = NULL;
7359 	  tree sender = TREE_VEC_ELT (data_arg, 0);
7360 
7361 	  for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
7362 	    {
7363 	      gcc_assert (!gsi_end_p (gsi));
7364 	      stmt = gsi_stmt (gsi);
7365 	      if (gimple_code (stmt) != GIMPLE_ASSIGN)
7366 		continue;
7367 
7368 	      if (gimple_num_ops (stmt) == 2)
7369 		{
7370 		  tree arg = gimple_assign_rhs1 (stmt);
7371 
7372 		  /* We're ignoring the subcode because we're
7373 		     effectively doing a STRIP_NOPS.  */
7374 
7375 		  if (TREE_CODE (arg) == ADDR_EXPR
7376 		      && TREE_OPERAND (arg, 0) == sender)
7377 		    {
7378 		      tgtcopy_stmt = stmt;
7379 		      break;
7380 		    }
7381 		}
7382 	    }
7383 
7384 	  gcc_assert (tgtcopy_stmt != NULL);
7385 	  arg = DECL_ARGUMENTS (child_fn);
7386 
7387 	  gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
7388 	  gsi_remove (&gsi, true);
7389 	}
7390 
7391       /* Declare local variables needed in CHILD_CFUN.  */
7392       block = DECL_INITIAL (child_fn);
7393       BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7394       /* The gimplifier could record temporaries in the offloading block
7395 	 rather than in containing function's local_decls chain,
7396 	 which would mean cgraph missed finalizing them.  Do it now.  */
7397       for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7398 	if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7399 	  varpool_node::finalize_decl (t);
7400       DECL_SAVED_TREE (child_fn) = NULL;
7401       /* We'll create a CFG for child_fn, so no gimple body is needed.  */
7402       gimple_set_body (child_fn, NULL);
7403       TREE_USED (block) = 1;
7404 
7405       /* Reset DECL_CONTEXT on function arguments.  */
7406       for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7407 	DECL_CONTEXT (t) = child_fn;
7408 
7409       /* Split ENTRY_BB at GIMPLE_*,
7410 	 so that it can be moved to the child function.  */
7411       gsi = gsi_last_nondebug_bb (entry_bb);
7412       stmt = gsi_stmt (gsi);
7413       gcc_assert (stmt
7414 		  && gimple_code (stmt) == gimple_code (entry_stmt));
7415       e = split_block (entry_bb, stmt);
7416       gsi_remove (&gsi, true);
7417       entry_bb = e->dest;
7418       single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7419 
7420       /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR.  */
7421       if (exit_bb)
7422 	{
7423 	  gsi = gsi_last_nondebug_bb (exit_bb);
7424 	  gcc_assert (!gsi_end_p (gsi)
7425 		      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7426 	  stmt = gimple_build_return (NULL);
7427 	  gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7428 	  gsi_remove (&gsi, true);
7429 	}
7430 
7431       /* Move the offloading region into CHILD_CFUN.  */
7432 
7433       block = gimple_block (entry_stmt);
7434 
7435       new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7436       if (exit_bb)
7437 	single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7438       /* When the OMP expansion process cannot guarantee an up-to-date
7439 	 loop tree arrange for the child function to fixup loops.  */
7440       if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7441 	child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7442 
7443       /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
7444       num = vec_safe_length (child_cfun->local_decls);
7445       for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7446 	{
7447 	  t = (*child_cfun->local_decls)[srcidx];
7448 	  if (DECL_CONTEXT (t) == cfun->decl)
7449 	    continue;
7450 	  if (srcidx != dstidx)
7451 	    (*child_cfun->local_decls)[dstidx] = t;
7452 	  dstidx++;
7453 	}
7454       if (dstidx != num)
7455 	vec_safe_truncate (child_cfun->local_decls, dstidx);
7456 
7457       /* Inform the callgraph about the new function.  */
7458       child_cfun->curr_properties = cfun->curr_properties;
7459       child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7460       child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7461       cgraph_node *node = cgraph_node::get_create (child_fn);
7462       node->parallelized_function = 1;
7463       cgraph_node::add_new_function (child_fn, true);
7464 
7465       /* Add the new function to the offload table.  */
7466       if (ENABLE_OFFLOADING)
7467 	{
7468 	  if (in_lto_p)
7469 	    DECL_PRESERVE_P (child_fn) = 1;
7470 	  vec_safe_push (offload_funcs, child_fn);
7471 	}
7472 
7473       bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7474 		      && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7475 
7476       /* Fix the callgraph edges for child_cfun.  Those for cfun will be
7477 	 fixed in a following pass.  */
7478       push_cfun (child_cfun);
7479       if (need_asm)
7480 	assign_assembler_name_if_needed (child_fn);
7481       cgraph_edge::rebuild_edges ();
7482 
7483       /* Some EH regions might become dead, see PR34608.  If
7484 	 pass_cleanup_cfg isn't the first pass to happen with the
7485 	 new child, these dead EH edges might cause problems.
7486 	 Clean them up now.  */
7487       if (flag_exceptions)
7488 	{
7489 	  basic_block bb;
7490 	  bool changed = false;
7491 
7492 	  FOR_EACH_BB_FN (bb, cfun)
7493 	    changed |= gimple_purge_dead_eh_edges (bb);
7494 	  if (changed)
7495 	    cleanup_tree_cfg ();
7496 	}
7497       if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7498 	verify_loop_structure ();
7499       pop_cfun ();
7500 
7501       if (dump_file && !gimple_in_ssa_p (cfun))
7502 	{
7503 	  omp_any_child_fn_dumped = true;
7504 	  dump_function_header (dump_file, child_fn, dump_flags);
7505 	  dump_function_to_file (child_fn, dump_file, dump_flags);
7506 	}
7507 
7508       adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
7509     }
7510 
7511   /* Emit a library call to launch the offloading region, or do data
7512      transfers.  */
7513   tree t1, t2, t3, t4, depend, c, clauses;
7514   enum built_in_function start_ix;
7515   unsigned int flags_i = 0;
7516 
7517   switch (gimple_omp_target_kind (entry_stmt))
7518     {
7519     case GF_OMP_TARGET_KIND_REGION:
7520       start_ix = BUILT_IN_GOMP_TARGET;
7521       break;
7522     case GF_OMP_TARGET_KIND_DATA:
7523       start_ix = BUILT_IN_GOMP_TARGET_DATA;
7524       break;
7525     case GF_OMP_TARGET_KIND_UPDATE:
7526       start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7527       break;
7528     case GF_OMP_TARGET_KIND_ENTER_DATA:
7529       start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7530       break;
7531     case GF_OMP_TARGET_KIND_EXIT_DATA:
7532       start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7533       flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7534       break;
7535     case GF_OMP_TARGET_KIND_OACC_KERNELS:
7536     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7537       start_ix = BUILT_IN_GOACC_PARALLEL;
7538       break;
7539     case GF_OMP_TARGET_KIND_OACC_DATA:
7540     case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7541       start_ix = BUILT_IN_GOACC_DATA_START;
7542       break;
7543     case GF_OMP_TARGET_KIND_OACC_UPDATE:
7544       start_ix = BUILT_IN_GOACC_UPDATE;
7545       break;
7546     case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7547       start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7548       break;
7549     case GF_OMP_TARGET_KIND_OACC_DECLARE:
7550       start_ix = BUILT_IN_GOACC_DECLARE;
7551       break;
7552     default:
7553       gcc_unreachable ();
7554     }
7555 
7556   clauses = gimple_omp_target_clauses (entry_stmt);
7557 
7558   tree device = NULL_TREE;
7559   location_t device_loc = UNKNOWN_LOCATION;
7560   tree goacc_flags = NULL_TREE;
7561   if (is_gimple_omp_oacc (entry_stmt))
7562     {
7563       /* By default, no GOACC_FLAGs are set.  */
7564       goacc_flags = integer_zero_node;
7565     }
7566   else
7567     {
7568       c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7569       if (c)
7570 	{
7571 	  device = OMP_CLAUSE_DEVICE_ID (c);
7572 	  device_loc = OMP_CLAUSE_LOCATION (c);
7573 	}
7574       else
7575 	{
7576 	  /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7577 	     library choose).  */
7578 	  device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7579 	  device_loc = gimple_location (entry_stmt);
7580 	}
7581 
7582       c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7583       if (c)
7584 	flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7585     }
7586 
7587   /* By default, there is no conditional.  */
7588   tree cond = NULL_TREE;
7589   c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7590   if (c)
7591     cond = OMP_CLAUSE_IF_EXPR (c);
7592   /* If we found the clause 'if (cond)', build:
7593      OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
7594      OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
7595   if (cond)
7596     {
7597       tree *tp;
7598       if (is_gimple_omp_oacc (entry_stmt))
7599 	tp = &goacc_flags;
7600       else
7601 	{
7602 	  /* Ensure 'device' is of the correct type.  */
7603 	  device = fold_convert_loc (device_loc, integer_type_node, device);
7604 
7605 	  tp = &device;
7606 	}
7607 
7608       cond = gimple_boolify (cond);
7609 
7610       basic_block cond_bb, then_bb, else_bb;
7611       edge e;
7612       tree tmp_var;
7613 
7614       tmp_var = create_tmp_var (TREE_TYPE (*tp));
7615       if (offloaded)
7616 	e = split_block_after_labels (new_bb);
7617       else
7618 	{
7619 	  gsi = gsi_last_nondebug_bb (new_bb);
7620 	  gsi_prev (&gsi);
7621 	  e = split_block (new_bb, gsi_stmt (gsi));
7622 	}
7623       cond_bb = e->src;
7624       new_bb = e->dest;
7625       remove_edge (e);
7626 
7627       then_bb = create_empty_bb (cond_bb);
7628       else_bb = create_empty_bb (then_bb);
7629       set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7630       set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7631 
7632       stmt = gimple_build_cond_empty (cond);
7633       gsi = gsi_last_bb (cond_bb);
7634       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7635 
7636       gsi = gsi_start_bb (then_bb);
7637       stmt = gimple_build_assign (tmp_var, *tp);
7638       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7639 
7640       gsi = gsi_start_bb (else_bb);
7641       if (is_gimple_omp_oacc (entry_stmt))
7642 	stmt = gimple_build_assign (tmp_var,
7643 				    BIT_IOR_EXPR,
7644 				    *tp,
7645 				    build_int_cst (integer_type_node,
7646 						   GOACC_FLAG_HOST_FALLBACK));
7647       else
7648 	stmt = gimple_build_assign (tmp_var,
7649 				    build_int_cst (integer_type_node,
7650 						   GOMP_DEVICE_HOST_FALLBACK));
7651       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7652 
7653       make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7654       make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7655       add_bb_to_loop (then_bb, cond_bb->loop_father);
7656       add_bb_to_loop (else_bb, cond_bb->loop_father);
7657       make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7658       make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7659 
7660       *tp = tmp_var;
7661 
7662       gsi = gsi_last_nondebug_bb (new_bb);
7663     }
7664   else
7665     {
7666       gsi = gsi_last_nondebug_bb (new_bb);
7667 
7668       if (device != NULL_TREE)
7669 	device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7670 					   true, GSI_SAME_STMT);
7671     }
7672 
7673   t = gimple_omp_target_data_arg (entry_stmt);
7674   if (t == NULL)
7675     {
7676       t1 = size_zero_node;
7677       t2 = build_zero_cst (ptr_type_node);
7678       t3 = t2;
7679       t4 = t2;
7680     }
7681   else
7682     {
7683       t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7684       t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7685       t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7686       t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7687       t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7688     }
7689 
7690   gimple *g;
7691   bool tagging = false;
7692   /* The maximum number used by any start_ix, without varargs.  */
7693   auto_vec<tree, 11> args;
7694   if (is_gimple_omp_oacc (entry_stmt))
7695     {
7696       tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
7697 					TREE_TYPE (goacc_flags), goacc_flags);
7698       goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
7699 						NULL_TREE, true,
7700 						GSI_SAME_STMT);
7701       args.quick_push (goacc_flags_m);
7702     }
7703   else
7704     args.quick_push (device);
7705   if (offloaded)
7706     args.quick_push (build_fold_addr_expr (child_fn));
7707   args.quick_push (t1);
7708   args.quick_push (t2);
7709   args.quick_push (t3);
7710   args.quick_push (t4);
7711   switch (start_ix)
7712     {
7713     case BUILT_IN_GOACC_DATA_START:
7714     case BUILT_IN_GOACC_DECLARE:
7715     case BUILT_IN_GOMP_TARGET_DATA:
7716       break;
7717     case BUILT_IN_GOMP_TARGET:
7718     case BUILT_IN_GOMP_TARGET_UPDATE:
7719     case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7720       args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7721       c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7722       if (c)
7723 	depend = OMP_CLAUSE_DECL (c);
7724       else
7725 	depend = build_int_cst (ptr_type_node, 0);
7726       args.quick_push (depend);
7727       if (start_ix == BUILT_IN_GOMP_TARGET)
7728 	args.quick_push (get_target_arguments (&gsi, entry_stmt));
7729       break;
7730     case BUILT_IN_GOACC_PARALLEL:
7731       oacc_set_fn_attrib (child_fn, clauses, &args);
7732       tagging = true;
7733       /* FALLTHRU */
7734     case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7735     case BUILT_IN_GOACC_UPDATE:
7736       {
7737 	tree t_async = NULL_TREE;
7738 
7739 	/* If present, use the value specified by the respective
7740 	   clause, making sure that is of the correct type.  */
7741 	c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7742 	if (c)
7743 	  t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7744 				      integer_type_node,
7745 				      OMP_CLAUSE_ASYNC_EXPR (c));
7746 	else if (!tagging)
7747 	  /* Default values for t_async.  */
7748 	  t_async = fold_convert_loc (gimple_location (entry_stmt),
7749 				      integer_type_node,
7750 				      build_int_cst (integer_type_node,
7751 						     GOMP_ASYNC_SYNC));
7752 	if (tagging && t_async)
7753 	  {
7754 	    unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7755 
7756 	    if (TREE_CODE (t_async) == INTEGER_CST)
7757 	      {
7758 		/* See if we can pack the async arg in to the tag's
7759 		   operand.  */
7760 		i_async = TREE_INT_CST_LOW (t_async);
7761 		if (i_async < GOMP_LAUNCH_OP_MAX)
7762 		  t_async = NULL_TREE;
7763 		else
7764 		  i_async = GOMP_LAUNCH_OP_MAX;
7765 	      }
7766 	    args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7767 					      i_async));
7768 	  }
7769 	if (t_async)
7770 	  args.safe_push (t_async);
7771 
7772 	/* Save the argument index, and ... */
7773 	unsigned t_wait_idx = args.length ();
7774 	unsigned num_waits = 0;
7775 	c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7776 	if (!tagging || c)
7777 	  /* ... push a placeholder.  */
7778 	  args.safe_push (integer_zero_node);
7779 
7780 	for (; c; c = OMP_CLAUSE_CHAIN (c))
7781 	  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7782 	    {
7783 	      args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7784 						integer_type_node,
7785 						OMP_CLAUSE_WAIT_EXPR (c)));
7786 	      num_waits++;
7787 	    }
7788 
7789 	if (!tagging || num_waits)
7790 	  {
7791 	    tree len;
7792 
7793 	    /* Now that we know the number, update the placeholder.  */
7794 	    if (tagging)
7795 	      len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7796 	    else
7797 	      len = build_int_cst (integer_type_node, num_waits);
7798 	    len = fold_convert_loc (gimple_location (entry_stmt),
7799 				    unsigned_type_node, len);
7800 	    args[t_wait_idx] = len;
7801 	  }
7802       }
7803       break;
7804     default:
7805       gcc_unreachable ();
7806     }
7807   if (tagging)
7808     /*  Push terminal marker - zero.  */
7809     args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7810 
7811   g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7812   gimple_set_location (g, gimple_location (entry_stmt));
7813   gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7814   if (!offloaded)
7815     {
7816       g = gsi_stmt (gsi);
7817       gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7818       gsi_remove (&gsi, true);
7819     }
7820   if (data_region && region->exit)
7821     {
7822       gsi = gsi_last_nondebug_bb (region->exit);
7823       g = gsi_stmt (gsi);
7824       gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7825       gsi_remove (&gsi, true);
7826     }
7827 }
7828 
7829 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7830    iteration variable derived from the thread number.  INTRA_GROUP means this
7831    is an expansion of a loop iterating over work-items within a separate
7832    iteration over groups.  */
7833 
7834 static void
7835 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7836 {
7837   gimple_stmt_iterator gsi;
7838   gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7839   gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7840 		       == GF_OMP_FOR_KIND_GRID_LOOP);
7841   size_t collapse = gimple_omp_for_collapse (for_stmt);
7842   struct omp_for_data_loop *loops
7843     = XALLOCAVEC (struct omp_for_data_loop,
7844 		  gimple_omp_for_collapse (for_stmt));
7845   struct omp_for_data fd;
7846 
7847   remove_edge (BRANCH_EDGE (kfor->entry));
7848   basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7849 
7850   gcc_assert (kfor->cont);
7851   omp_extract_for_data (for_stmt, &fd, loops);
7852 
7853   gsi = gsi_start_bb (body_bb);
7854 
7855   for (size_t dim = 0; dim < collapse; dim++)
7856     {
7857       tree type, itype;
7858       itype = type = TREE_TYPE (fd.loops[dim].v);
7859       if (POINTER_TYPE_P (type))
7860 	itype = signed_type_for (type);
7861 
7862       tree n1 = fd.loops[dim].n1;
7863       tree step = fd.loops[dim].step;
7864       n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7865 				     true, NULL_TREE, true, GSI_SAME_STMT);
7866       step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7867 				       true, NULL_TREE, true, GSI_SAME_STMT);
7868       tree threadid;
7869       if (gimple_omp_for_grid_group_iter (for_stmt))
7870 	{
7871 	  gcc_checking_assert (!intra_group);
7872 	  threadid = build_call_expr (builtin_decl_explicit
7873 				      (BUILT_IN_HSA_WORKGROUPID), 1,
7874 				      build_int_cstu (unsigned_type_node, dim));
7875 	}
7876       else if (intra_group)
7877 	threadid = build_call_expr (builtin_decl_explicit
7878 				    (BUILT_IN_HSA_WORKITEMID), 1,
7879 				    build_int_cstu (unsigned_type_node, dim));
7880       else
7881 	threadid = build_call_expr (builtin_decl_explicit
7882 				    (BUILT_IN_HSA_WORKITEMABSID), 1,
7883 				    build_int_cstu (unsigned_type_node, dim));
7884       threadid = fold_convert (itype, threadid);
7885       threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7886 					   true, GSI_SAME_STMT);
7887 
7888       tree startvar = fd.loops[dim].v;
7889       tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7890       if (POINTER_TYPE_P (type))
7891 	t = fold_build_pointer_plus (n1, t);
7892       else
7893 	t = fold_build2 (PLUS_EXPR, type, t, n1);
7894       t = fold_convert (type, t);
7895       t = force_gimple_operand_gsi (&gsi, t,
7896 				    DECL_P (startvar)
7897 				    && TREE_ADDRESSABLE (startvar),
7898 				    NULL_TREE, true, GSI_SAME_STMT);
7899       gassign *assign_stmt = gimple_build_assign (startvar, t);
7900       gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7901     }
7902   /* Remove the omp for statement.  */
7903   gsi = gsi_last_nondebug_bb (kfor->entry);
7904   gsi_remove (&gsi, true);
7905 
7906   /* Remove the GIMPLE_OMP_CONTINUE statement.  */
7907   gsi = gsi_last_nondebug_bb (kfor->cont);
7908   gcc_assert (!gsi_end_p (gsi)
7909 	      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7910   gsi_remove (&gsi, true);
7911 
7912   /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary.  */
7913   gsi = gsi_last_nondebug_bb (kfor->exit);
7914   gcc_assert (!gsi_end_p (gsi)
7915 	      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7916   if (intra_group)
7917     gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7918   gsi_remove (&gsi, true);
7919 
7920   /* Fixup the much simpler CFG.  */
7921   remove_edge (find_edge (kfor->cont, body_bb));
7922 
7923   if (kfor->cont != body_bb)
7924     set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7925   set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7926 }
7927 
7928 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7929    argument_decls.  */
7930 
7931 struct grid_arg_decl_map
7932 {
7933   tree old_arg;
7934   tree new_arg;
7935 };
7936 
7937 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7938    pertaining to kernel function.  */
7939 
7940 static tree
7941 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7942 {
7943   struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7944   struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7945   tree t = *tp;
7946 
7947   if (t == adm->old_arg)
7948     *tp = adm->new_arg;
7949   *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7950   return NULL_TREE;
7951 }
7952 
7953 /* If TARGET region contains a kernel body for loop, remove its region from the
7954    TARGET and expand it in HSA gridified kernel fashion.  */
7955 
7956 static void
7957 grid_expand_target_grid_body (struct omp_region *target)
7958 {
7959   if (!hsa_gen_requested_p ())
7960     return;
7961 
7962   gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7963   struct omp_region **pp;
7964 
7965   for (pp = &target->inner; *pp; pp = &(*pp)->next)
7966     if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7967       break;
7968 
7969   struct omp_region *gpukernel = *pp;
7970 
7971   tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7972   if (!gpukernel)
7973     {
7974       /* HSA cannot handle OACC stuff.  */
7975       if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7976 	return;
7977       gcc_checking_assert (orig_child_fndecl);
7978       gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7979 				    OMP_CLAUSE__GRIDDIM_));
7980       cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7981 
7982       hsa_register_kernel (n);
7983       return;
7984     }
7985 
7986   gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7987 			       OMP_CLAUSE__GRIDDIM_));
7988   tree inside_block
7989     = gimple_block (first_stmt (single_succ (gpukernel->entry)));
7990   *pp = gpukernel->next;
7991   for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7992     if ((*pp)->type == GIMPLE_OMP_FOR)
7993       break;
7994 
7995   struct omp_region *kfor = *pp;
7996   gcc_assert (kfor);
7997   gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7998   gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7999   *pp = kfor->next;
8000   if (kfor->inner)
8001     {
8002       if (gimple_omp_for_grid_group_iter (for_stmt))
8003 	{
8004 	  struct omp_region **next_pp;
8005 	  for (pp = &kfor->inner; *pp; pp = next_pp)
8006 	    {
8007 	      next_pp = &(*pp)->next;
8008 	      if ((*pp)->type != GIMPLE_OMP_FOR)
8009 		continue;
8010 	      gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
8011 	      gcc_assert (gimple_omp_for_kind (inner)
8012 			  == GF_OMP_FOR_KIND_GRID_LOOP);
8013 	      grid_expand_omp_for_loop (*pp, true);
8014 	      *pp = (*pp)->next;
8015 	      next_pp = pp;
8016 	    }
8017 	}
8018       expand_omp (kfor->inner);
8019     }
8020   if (gpukernel->inner)
8021     expand_omp (gpukernel->inner);
8022 
8023   tree kern_fndecl = copy_node (orig_child_fndecl);
8024   DECL_NAME (kern_fndecl) = clone_function_name_numbered (kern_fndecl,
8025 							  "kernel");
8026   SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
8027   tree tgtblock = gimple_block (tgt_stmt);
8028   tree fniniblock = make_node (BLOCK);
8029   BLOCK_ABSTRACT_ORIGIN (fniniblock) = BLOCK_ORIGIN (tgtblock);
8030   BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
8031   BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
8032   BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
8033   DECL_INITIAL (kern_fndecl) = fniniblock;
8034   push_struct_function (kern_fndecl);
8035   cfun->function_end_locus = gimple_location (tgt_stmt);
8036   init_tree_ssa (cfun);
8037   pop_cfun ();
8038 
8039   tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
8040   gcc_assert (!DECL_CHAIN (old_parm_decl));
8041   tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
8042   DECL_CONTEXT (new_parm_decl) = kern_fndecl;
8043   DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
8044   gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
8045   DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
8046   DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
8047   struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
8048   kern_cfun->curr_properties = cfun->curr_properties;
8049 
8050   grid_expand_omp_for_loop (kfor, false);
8051 
8052   /* Remove the omp for statement.  */
8053   gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
8054   gsi_remove (&gsi, true);
8055   /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
8056      return.  */
8057   gsi = gsi_last_nondebug_bb (gpukernel->exit);
8058   gcc_assert (!gsi_end_p (gsi)
8059 	      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8060   gimple *ret_stmt = gimple_build_return (NULL);
8061   gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
8062   gsi_remove (&gsi, true);
8063 
8064   /* Statements in the first BB in the target construct have been produced by
8065      target lowering and must be copied inside the GPUKERNEL, with the two
8066      exceptions of the first OMP statement and the OMP_DATA assignment
8067      statement.  */
8068   gsi = gsi_start_bb (single_succ (gpukernel->entry));
8069   tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
8070   tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
8071   for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
8072        !gsi_end_p (tsi); gsi_next (&tsi))
8073     {
8074       gimple *stmt = gsi_stmt (tsi);
8075       if (is_gimple_omp (stmt))
8076 	break;
8077       if (sender
8078 	  && is_gimple_assign (stmt)
8079 	  && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
8080 	  && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
8081 	continue;
8082       gimple *copy = gimple_copy (stmt);
8083       gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
8084       gimple_set_block (copy, fniniblock);
8085     }
8086 
8087   move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
8088 			  gpukernel->exit, inside_block);
8089 
8090   cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
8091   kcn->mark_force_output ();
8092   cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
8093 
8094   hsa_register_kernel (kcn, orig_child);
8095 
8096   cgraph_node::add_new_function (kern_fndecl, true);
8097   push_cfun (kern_cfun);
8098   cgraph_edge::rebuild_edges ();
8099 
8100   /* Re-map any mention of the PARM_DECL of the original function to the
8101      PARM_DECL of the new one.
8102 
8103      TODO: It would be great if lowering produced references into the GPU
8104      kernel decl straight away and we did not have to do this.  */
8105   struct grid_arg_decl_map adm;
8106   adm.old_arg = old_parm_decl;
8107   adm.new_arg = new_parm_decl;
8108   basic_block bb;
8109   FOR_EACH_BB_FN (bb, kern_cfun)
8110     {
8111       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
8112 	{
8113 	  gimple *stmt = gsi_stmt (gsi);
8114 	  struct walk_stmt_info wi;
8115 	  memset (&wi, 0, sizeof (wi));
8116 	  wi.info = &adm;
8117 	  walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
8118 	}
8119     }
8120   pop_cfun ();
8121 
8122   return;
8123 }
8124 
8125 /* Expand the parallel region tree rooted at REGION.  Expansion
8126    proceeds in depth-first order.  Innermost regions are expanded
8127    first.  This way, parallel regions that require a new function to
8128    be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
8129    internal dependencies in their body.  */
8130 
8131 static void
8132 expand_omp (struct omp_region *region)
8133 {
8134   omp_any_child_fn_dumped = false;
8135   while (region)
8136     {
8137       location_t saved_location;
8138       gimple *inner_stmt = NULL;
8139 
8140       /* First, determine whether this is a combined parallel+workshare
8141 	 region.  */
8142       if (region->type == GIMPLE_OMP_PARALLEL)
8143 	determine_parallel_type (region);
8144       else if (region->type == GIMPLE_OMP_TARGET)
8145 	grid_expand_target_grid_body (region);
8146 
8147       if (region->type == GIMPLE_OMP_FOR
8148 	  && gimple_omp_for_combined_p (last_stmt (region->entry)))
8149 	inner_stmt = last_stmt (region->inner->entry);
8150 
8151       if (region->inner)
8152 	expand_omp (region->inner);
8153 
8154       saved_location = input_location;
8155       if (gimple_has_location (last_stmt (region->entry)))
8156 	input_location = gimple_location (last_stmt (region->entry));
8157 
8158       switch (region->type)
8159 	{
8160 	case GIMPLE_OMP_PARALLEL:
8161 	case GIMPLE_OMP_TASK:
8162 	  expand_omp_taskreg (region);
8163 	  break;
8164 
8165 	case GIMPLE_OMP_FOR:
8166 	  expand_omp_for (region, inner_stmt);
8167 	  break;
8168 
8169 	case GIMPLE_OMP_SECTIONS:
8170 	  expand_omp_sections (region);
8171 	  break;
8172 
8173 	case GIMPLE_OMP_SECTION:
8174 	  /* Individual omp sections are handled together with their
8175 	     parent GIMPLE_OMP_SECTIONS region.  */
8176 	  break;
8177 
8178 	case GIMPLE_OMP_SINGLE:
8179 	  expand_omp_single (region);
8180 	  break;
8181 
8182 	case GIMPLE_OMP_ORDERED:
8183 	  {
8184 	    gomp_ordered *ord_stmt
8185 	      = as_a <gomp_ordered *> (last_stmt (region->entry));
8186 	    if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
8187 				 OMP_CLAUSE_DEPEND))
8188 	      {
8189 		/* We'll expand these when expanding corresponding
8190 		   worksharing region with ordered(n) clause.  */
8191 		gcc_assert (region->outer
8192 			    && region->outer->type == GIMPLE_OMP_FOR);
8193 		region->ord_stmt = ord_stmt;
8194 		break;
8195 	      }
8196 	  }
8197 	  /* FALLTHRU */
8198 	case GIMPLE_OMP_MASTER:
8199 	case GIMPLE_OMP_TASKGROUP:
8200 	case GIMPLE_OMP_CRITICAL:
8201 	case GIMPLE_OMP_TEAMS:
8202 	  expand_omp_synch (region);
8203 	  break;
8204 
8205 	case GIMPLE_OMP_ATOMIC_LOAD:
8206 	  expand_omp_atomic (region);
8207 	  break;
8208 
8209 	case GIMPLE_OMP_TARGET:
8210 	  expand_omp_target (region);
8211 	  break;
8212 
8213 	default:
8214 	  gcc_unreachable ();
8215 	}
8216 
8217       input_location = saved_location;
8218       region = region->next;
8219     }
8220   if (omp_any_child_fn_dumped)
8221     {
8222       if (dump_file)
8223 	dump_function_header (dump_file, current_function_decl, dump_flags);
8224       omp_any_child_fn_dumped = false;
8225     }
8226 }
8227 
8228 /* Helper for build_omp_regions.  Scan the dominator tree starting at
8229    block BB.  PARENT is the region that contains BB.  If SINGLE_TREE is
8230    true, the function ends once a single tree is built (otherwise, whole
8231    forest of OMP constructs may be built).  */
8232 
8233 static void
8234 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
8235 		     bool single_tree)
8236 {
8237   gimple_stmt_iterator gsi;
8238   gimple *stmt;
8239   basic_block son;
8240 
8241   gsi = gsi_last_nondebug_bb (bb);
8242   if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
8243     {
8244       struct omp_region *region;
8245       enum gimple_code code;
8246 
8247       stmt = gsi_stmt (gsi);
8248       code = gimple_code (stmt);
8249       if (code == GIMPLE_OMP_RETURN)
8250 	{
8251 	  /* STMT is the return point out of region PARENT.  Mark it
8252 	     as the exit point and make PARENT the immediately
8253 	     enclosing region.  */
8254 	  gcc_assert (parent);
8255 	  region = parent;
8256 	  region->exit = bb;
8257 	  parent = parent->outer;
8258 	}
8259       else if (code == GIMPLE_OMP_ATOMIC_STORE)
8260 	{
8261 	  /* GIMPLE_OMP_ATOMIC_STORE is analogous to
8262 	     GIMPLE_OMP_RETURN, but matches with
8263 	     GIMPLE_OMP_ATOMIC_LOAD.  */
8264 	  gcc_assert (parent);
8265 	  gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
8266 	  region = parent;
8267 	  region->exit = bb;
8268 	  parent = parent->outer;
8269 	}
8270       else if (code == GIMPLE_OMP_CONTINUE)
8271 	{
8272 	  gcc_assert (parent);
8273 	  parent->cont = bb;
8274 	}
8275       else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
8276 	{
8277 	  /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8278 	     GIMPLE_OMP_SECTIONS, and we do nothing for it.  */
8279 	}
8280       else
8281 	{
8282 	  region = new_omp_region (bb, code, parent);
8283 	  /* Otherwise...  */
8284 	  if (code == GIMPLE_OMP_TARGET)
8285 	    {
8286 	      switch (gimple_omp_target_kind (stmt))
8287 		{
8288 		case GF_OMP_TARGET_KIND_REGION:
8289 		case GF_OMP_TARGET_KIND_DATA:
8290 		case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8291 		case GF_OMP_TARGET_KIND_OACC_KERNELS:
8292 		case GF_OMP_TARGET_KIND_OACC_DATA:
8293 		case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8294 		  break;
8295 		case GF_OMP_TARGET_KIND_UPDATE:
8296 		case GF_OMP_TARGET_KIND_ENTER_DATA:
8297 		case GF_OMP_TARGET_KIND_EXIT_DATA:
8298 		case GF_OMP_TARGET_KIND_OACC_UPDATE:
8299 		case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8300 		case GF_OMP_TARGET_KIND_OACC_DECLARE:
8301 		  /* ..., other than for those stand-alone directives...  */
8302 		  region = NULL;
8303 		  break;
8304 		default:
8305 		  gcc_unreachable ();
8306 		}
8307 	    }
8308 	  else if (code == GIMPLE_OMP_ORDERED
8309 		   && omp_find_clause (gimple_omp_ordered_clauses
8310 					 (as_a <gomp_ordered *> (stmt)),
8311 				       OMP_CLAUSE_DEPEND))
8312 	    /* #pragma omp ordered depend is also just a stand-alone
8313 	       directive.  */
8314 	    region = NULL;
8315 	  else if (code == GIMPLE_OMP_TASK
8316 		   && gimple_omp_task_taskwait_p (stmt))
8317 	    /* #pragma omp taskwait depend(...) is a stand-alone directive.  */
8318 	    region = NULL;
8319 	  /* ..., this directive becomes the parent for a new region.  */
8320 	  if (region)
8321 	    parent = region;
8322 	}
8323     }
8324 
8325   if (single_tree && !parent)
8326     return;
8327 
8328   for (son = first_dom_son (CDI_DOMINATORS, bb);
8329        son;
8330        son = next_dom_son (CDI_DOMINATORS, son))
8331     build_omp_regions_1 (son, parent, single_tree);
8332 }
8333 
8334 /* Builds the tree of OMP regions rooted at ROOT, storing it to
8335    root_omp_region.  */
8336 
8337 static void
8338 build_omp_regions_root (basic_block root)
8339 {
8340   gcc_assert (root_omp_region == NULL);
8341   build_omp_regions_1 (root, NULL, true);
8342   gcc_assert (root_omp_region != NULL);
8343 }
8344 
8345 /* Expands omp construct (and its subconstructs) starting in HEAD.  */
8346 
8347 void
8348 omp_expand_local (basic_block head)
8349 {
8350   build_omp_regions_root (head);
8351   if (dump_file && (dump_flags & TDF_DETAILS))
8352     {
8353       fprintf (dump_file, "\nOMP region tree\n\n");
8354       dump_omp_region (dump_file, root_omp_region, 0);
8355       fprintf (dump_file, "\n");
8356     }
8357 
8358   remove_exit_barriers (root_omp_region);
8359   expand_omp (root_omp_region);
8360 
8361   omp_free_regions ();
8362 }
8363 
8364 /* Scan the CFG and build a tree of OMP regions.  Return the root of
8365    the OMP region tree.  */
8366 
8367 static void
8368 build_omp_regions (void)
8369 {
8370   gcc_assert (root_omp_region == NULL);
8371   calculate_dominance_info (CDI_DOMINATORS);
8372   build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
8373 }
8374 
8375 /* Main entry point for expanding OMP-GIMPLE into runtime calls.  */
8376 
8377 static unsigned int
8378 execute_expand_omp (void)
8379 {
8380   build_omp_regions ();
8381 
8382   if (!root_omp_region)
8383     return 0;
8384 
8385   if (dump_file)
8386     {
8387       fprintf (dump_file, "\nOMP region tree\n\n");
8388       dump_omp_region (dump_file, root_omp_region, 0);
8389       fprintf (dump_file, "\n");
8390     }
8391 
8392   remove_exit_barriers (root_omp_region);
8393 
8394   expand_omp (root_omp_region);
8395 
8396   if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8397     verify_loop_structure ();
8398   cleanup_tree_cfg ();
8399 
8400   omp_free_regions ();
8401 
8402   return 0;
8403 }
8404 
8405 /* OMP expansion -- the default pass, run before creation of SSA form.  */
8406 
8407 namespace {
8408 
8409 const pass_data pass_data_expand_omp =
8410 {
8411   GIMPLE_PASS, /* type */
8412   "ompexp", /* name */
8413   OPTGROUP_OMP, /* optinfo_flags */
8414   TV_NONE, /* tv_id */
8415   PROP_gimple_any, /* properties_required */
8416   PROP_gimple_eomp, /* properties_provided */
8417   0, /* properties_destroyed */
8418   0, /* todo_flags_start */
8419   0, /* todo_flags_finish */
8420 };
8421 
8422 class pass_expand_omp : public gimple_opt_pass
8423 {
8424 public:
8425   pass_expand_omp (gcc::context *ctxt)
8426     : gimple_opt_pass (pass_data_expand_omp, ctxt)
8427   {}
8428 
8429   /* opt_pass methods: */
8430   virtual unsigned int execute (function *)
8431     {
8432       bool gate = ((flag_openacc != 0 || flag_openmp != 0
8433 		    || flag_openmp_simd != 0)
8434 		   && !seen_error ());
8435 
8436       /* This pass always runs, to provide PROP_gimple_eomp.
8437 	 But often, there is nothing to do.  */
8438       if (!gate)
8439 	return 0;
8440 
8441       return execute_expand_omp ();
8442     }
8443 
8444 }; // class pass_expand_omp
8445 
8446 } // anon namespace
8447 
8448 gimple_opt_pass *
8449 make_pass_expand_omp (gcc::context *ctxt)
8450 {
8451   return new pass_expand_omp (ctxt);
8452 }
8453 
8454 namespace {
8455 
8456 const pass_data pass_data_expand_omp_ssa =
8457 {
8458   GIMPLE_PASS, /* type */
8459   "ompexpssa", /* name */
8460   OPTGROUP_OMP, /* optinfo_flags */
8461   TV_NONE, /* tv_id */
8462   PROP_cfg | PROP_ssa, /* properties_required */
8463   PROP_gimple_eomp, /* properties_provided */
8464   0, /* properties_destroyed */
8465   0, /* todo_flags_start */
8466   TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8467 };
8468 
8469 class pass_expand_omp_ssa : public gimple_opt_pass
8470 {
8471 public:
8472   pass_expand_omp_ssa (gcc::context *ctxt)
8473     : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8474   {}
8475 
8476   /* opt_pass methods: */
8477   virtual bool gate (function *fun)
8478     {
8479       return !(fun->curr_properties & PROP_gimple_eomp);
8480     }
8481   virtual unsigned int execute (function *) { return execute_expand_omp (); }
8482   opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8483 
8484 }; // class pass_expand_omp_ssa
8485 
8486 } // anon namespace
8487 
8488 gimple_opt_pass *
8489 make_pass_expand_omp_ssa (gcc::context *ctxt)
8490 {
8491   return new pass_expand_omp_ssa (ctxt);
8492 }
8493 
8494 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8495    GIMPLE_* codes.  */
8496 
8497 bool
8498 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8499 		       int *region_idx)
8500 {
8501   gimple *last = last_stmt (bb);
8502   enum gimple_code code = gimple_code (last);
8503   struct omp_region *cur_region = *region;
8504   bool fallthru = false;
8505 
8506   switch (code)
8507     {
8508     case GIMPLE_OMP_PARALLEL:
8509     case GIMPLE_OMP_FOR:
8510     case GIMPLE_OMP_SINGLE:
8511     case GIMPLE_OMP_TEAMS:
8512     case GIMPLE_OMP_MASTER:
8513     case GIMPLE_OMP_TASKGROUP:
8514     case GIMPLE_OMP_CRITICAL:
8515     case GIMPLE_OMP_SECTION:
8516     case GIMPLE_OMP_GRID_BODY:
8517       cur_region = new_omp_region (bb, code, cur_region);
8518       fallthru = true;
8519       break;
8520 
8521     case GIMPLE_OMP_TASK:
8522       cur_region = new_omp_region (bb, code, cur_region);
8523       fallthru = true;
8524       if (gimple_omp_task_taskwait_p (last))
8525 	cur_region = cur_region->outer;
8526       break;
8527 
8528     case GIMPLE_OMP_ORDERED:
8529       cur_region = new_omp_region (bb, code, cur_region);
8530       fallthru = true;
8531       if (omp_find_clause (gimple_omp_ordered_clauses
8532 			     (as_a <gomp_ordered *> (last)),
8533 			   OMP_CLAUSE_DEPEND))
8534 	cur_region = cur_region->outer;
8535       break;
8536 
8537     case GIMPLE_OMP_TARGET:
8538       cur_region = new_omp_region (bb, code, cur_region);
8539       fallthru = true;
8540       switch (gimple_omp_target_kind (last))
8541 	{
8542 	case GF_OMP_TARGET_KIND_REGION:
8543 	case GF_OMP_TARGET_KIND_DATA:
8544 	case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8545 	case GF_OMP_TARGET_KIND_OACC_KERNELS:
8546 	case GF_OMP_TARGET_KIND_OACC_DATA:
8547 	case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8548 	  break;
8549 	case GF_OMP_TARGET_KIND_UPDATE:
8550 	case GF_OMP_TARGET_KIND_ENTER_DATA:
8551 	case GF_OMP_TARGET_KIND_EXIT_DATA:
8552 	case GF_OMP_TARGET_KIND_OACC_UPDATE:
8553 	case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8554 	case GF_OMP_TARGET_KIND_OACC_DECLARE:
8555 	  cur_region = cur_region->outer;
8556 	  break;
8557 	default:
8558 	  gcc_unreachable ();
8559 	}
8560       break;
8561 
8562     case GIMPLE_OMP_SECTIONS:
8563       cur_region = new_omp_region (bb, code, cur_region);
8564       fallthru = true;
8565       break;
8566 
8567     case GIMPLE_OMP_SECTIONS_SWITCH:
8568       fallthru = false;
8569       break;
8570 
8571     case GIMPLE_OMP_ATOMIC_LOAD:
8572     case GIMPLE_OMP_ATOMIC_STORE:
8573        fallthru = true;
8574        break;
8575 
8576     case GIMPLE_OMP_RETURN:
8577       /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8578 	 somewhere other than the next block.  This will be
8579 	 created later.  */
8580       cur_region->exit = bb;
8581       if (cur_region->type == GIMPLE_OMP_TASK)
8582 	/* Add an edge corresponding to not scheduling the task
8583 	   immediately.  */
8584 	make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8585       fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8586       cur_region = cur_region->outer;
8587       break;
8588 
8589     case GIMPLE_OMP_CONTINUE:
8590       cur_region->cont = bb;
8591       switch (cur_region->type)
8592 	{
8593 	case GIMPLE_OMP_FOR:
8594 	  /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8595 	     succs edges as abnormal to prevent splitting
8596 	     them.  */
8597 	  single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8598 	  /* Make the loopback edge.  */
8599 	  make_edge (bb, single_succ (cur_region->entry),
8600 		     EDGE_ABNORMAL);
8601 
8602 	  /* Create an edge from GIMPLE_OMP_FOR to exit, which
8603 	     corresponds to the case that the body of the loop
8604 	     is not executed at all.  */
8605 	  make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8606 	  make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8607 	  fallthru = false;
8608 	  break;
8609 
8610 	case GIMPLE_OMP_SECTIONS:
8611 	  /* Wire up the edges into and out of the nested sections.  */
8612 	  {
8613 	    basic_block switch_bb = single_succ (cur_region->entry);
8614 
8615 	    struct omp_region *i;
8616 	    for (i = cur_region->inner; i ; i = i->next)
8617 	      {
8618 		gcc_assert (i->type == GIMPLE_OMP_SECTION);
8619 		make_edge (switch_bb, i->entry, 0);
8620 		make_edge (i->exit, bb, EDGE_FALLTHRU);
8621 	      }
8622 
8623 	    /* Make the loopback edge to the block with
8624 	       GIMPLE_OMP_SECTIONS_SWITCH.  */
8625 	    make_edge (bb, switch_bb, 0);
8626 
8627 	    /* Make the edge from the switch to exit.  */
8628 	    make_edge (switch_bb, bb->next_bb, 0);
8629 	    fallthru = false;
8630 	  }
8631 	  break;
8632 
8633 	case GIMPLE_OMP_TASK:
8634 	  fallthru = true;
8635 	  break;
8636 
8637 	default:
8638 	  gcc_unreachable ();
8639 	}
8640       break;
8641 
8642     default:
8643       gcc_unreachable ();
8644     }
8645 
8646   if (*region != cur_region)
8647     {
8648       *region = cur_region;
8649       if (cur_region)
8650 	*region_idx = cur_region->entry->index;
8651       else
8652 	*region_idx = 0;
8653     }
8654 
8655   return fallthru;
8656 }
8657 
8658 #include "gt-omp-expand.h"
8659