xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/omp-expand.c (revision e6c7e151de239c49d2e38720a061ed9d1fa99309)
1 /* Expansion pass for OMP directives.  Outlines regions of certain OMP
2    directives to separate functions, converts others into explicit calls to the
3    runtime library (libgomp) and so forth
4 
5 Copyright (C) 2005-2017 Free Software Foundation, Inc.
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "cilk.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "hsa-common.h"
60 #include "debug.h"
61 
62 
63 /* OMP region information.  Every parallel and workshare
64    directive is enclosed between two markers, the OMP_* directive
65    and a corresponding GIMPLE_OMP_RETURN statement.  */
66 
67 struct omp_region
68 {
69   /* The enclosing region.  */
70   struct omp_region *outer;
71 
72   /* First child region.  */
73   struct omp_region *inner;
74 
75   /* Next peer region.  */
76   struct omp_region *next;
77 
78   /* Block containing the omp directive as its last stmt.  */
79   basic_block entry;
80 
81   /* Block containing the GIMPLE_OMP_RETURN as its last stmt.  */
82   basic_block exit;
83 
84   /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt.  */
85   basic_block cont;
86 
87   /* If this is a combined parallel+workshare region, this is a list
88      of additional arguments needed by the combined parallel+workshare
89      library call.  */
90   vec<tree, va_gc> *ws_args;
91 
92   /* The code for the omp directive of this region.  */
93   enum gimple_code type;
94 
95   /* Schedule kind, only used for GIMPLE_OMP_FOR type regions.  */
96   enum omp_clause_schedule_kind sched_kind;
97 
98   /* Schedule modifiers.  */
99   unsigned char sched_modifiers;
100 
101   /* True if this is a combined parallel+workshare region.  */
102   bool is_combined_parallel;
103 
104   /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
105      a depend clause.  */
106   gomp_ordered *ord_stmt;
107 };
108 
109 static struct omp_region *root_omp_region;
110 static bool omp_any_child_fn_dumped;
111 
112 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
113 				     bool = false);
114 static gphi *find_phi_with_arg_on_edge (tree, edge);
115 static void expand_omp (struct omp_region *region);
116 
117 /* Return true if REGION is a combined parallel+workshare region.  */
118 
119 static inline bool
120 is_combined_parallel (struct omp_region *region)
121 {
122   return region->is_combined_parallel;
123 }
124 
125 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
126    is the immediate dominator of PAR_ENTRY_BB, return true if there
127    are no data dependencies that would prevent expanding the parallel
128    directive at PAR_ENTRY_BB as a combined parallel+workshare region.
129 
130    When expanding a combined parallel+workshare region, the call to
131    the child function may need additional arguments in the case of
132    GIMPLE_OMP_FOR regions.  In some cases, these arguments are
133    computed out of variables passed in from the parent to the child
134    via 'struct .omp_data_s'.  For instance:
135 
136 	#pragma omp parallel for schedule (guided, i * 4)
137 	for (j ...)
138 
139    Is lowered into:
140 
141 	# BLOCK 2 (PAR_ENTRY_BB)
142 	.omp_data_o.i = i;
143 	#pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
144 
145 	# BLOCK 3 (WS_ENTRY_BB)
146 	.omp_data_i = &.omp_data_o;
147 	D.1667 = .omp_data_i->i;
148 	D.1598 = D.1667 * 4;
149 	#pragma omp for schedule (guided, D.1598)
150 
151    When we outline the parallel region, the call to the child function
152    'bar.omp_fn.0' will need the value D.1598 in its argument list, but
153    that value is computed *after* the call site.  So, in principle we
154    cannot do the transformation.
155 
156    To see whether the code in WS_ENTRY_BB blocks the combined
157    parallel+workshare call, we collect all the variables used in the
158    GIMPLE_OMP_FOR header check whether they appear on the LHS of any
159    statement in WS_ENTRY_BB.  If so, then we cannot emit the combined
160    call.
161 
162    FIXME.  If we had the SSA form built at this point, we could merely
163    hoist the code in block 3 into block 2 and be done with it.  But at
164    this point we don't have dataflow information and though we could
165    hack something up here, it is really not worth the aggravation.  */
166 
167 static bool
168 workshare_safe_to_combine_p (basic_block ws_entry_bb)
169 {
170   struct omp_for_data fd;
171   gimple *ws_stmt = last_stmt (ws_entry_bb);
172 
173   if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
174     return true;
175 
176   gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
177 
178   omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
179 
180   if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
181     return false;
182   if (fd.iter_type != long_integer_type_node)
183     return false;
184 
185   /* FIXME.  We give up too easily here.  If any of these arguments
186      are not constants, they will likely involve variables that have
187      been mapped into fields of .omp_data_s for sharing with the child
188      function.  With appropriate data flow, it would be possible to
189      see through this.  */
190   if (!is_gimple_min_invariant (fd.loop.n1)
191       || !is_gimple_min_invariant (fd.loop.n2)
192       || !is_gimple_min_invariant (fd.loop.step)
193       || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
194     return false;
195 
196   return true;
197 }
198 
199 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
200    presence (SIMD_SCHEDULE).  */
201 
202 static tree
203 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
204 {
205   if (!simd_schedule)
206     return chunk_size;
207 
208   int vf = omp_max_vf ();
209   if (vf == 1)
210     return chunk_size;
211 
212   tree type = TREE_TYPE (chunk_size);
213   chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
214 			    build_int_cst (type, vf - 1));
215   return fold_build2 (BIT_AND_EXPR, type, chunk_size,
216 		      build_int_cst (type, -vf));
217 }
218 
219 /* Collect additional arguments needed to emit a combined
220    parallel+workshare call.  WS_STMT is the workshare directive being
221    expanded.  */
222 
223 static vec<tree, va_gc> *
224 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
225 {
226   tree t;
227   location_t loc = gimple_location (ws_stmt);
228   vec<tree, va_gc> *ws_args;
229 
230   if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
231     {
232       struct omp_for_data fd;
233       tree n1, n2;
234 
235       omp_extract_for_data (for_stmt, &fd, NULL);
236       n1 = fd.loop.n1;
237       n2 = fd.loop.n2;
238 
239       if (gimple_omp_for_combined_into_p (for_stmt))
240 	{
241 	  tree innerc
242 	    = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
243 			       OMP_CLAUSE__LOOPTEMP_);
244 	  gcc_assert (innerc);
245 	  n1 = OMP_CLAUSE_DECL (innerc);
246 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
247 				    OMP_CLAUSE__LOOPTEMP_);
248 	  gcc_assert (innerc);
249 	  n2 = OMP_CLAUSE_DECL (innerc);
250 	}
251 
252       vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
253 
254       t = fold_convert_loc (loc, long_integer_type_node, n1);
255       ws_args->quick_push (t);
256 
257       t = fold_convert_loc (loc, long_integer_type_node, n2);
258       ws_args->quick_push (t);
259 
260       t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
261       ws_args->quick_push (t);
262 
263       if (fd.chunk_size)
264 	{
265 	  t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
266 	  t = omp_adjust_chunk_size (t, fd.simd_schedule);
267 	  ws_args->quick_push (t);
268 	}
269 
270       return ws_args;
271     }
272   else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
273     {
274       /* Number of sections is equal to the number of edges from the
275 	 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
276 	 the exit of the sections region.  */
277       basic_block bb = single_succ (gimple_bb (ws_stmt));
278       t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
279       vec_alloc (ws_args, 1);
280       ws_args->quick_push (t);
281       return ws_args;
282     }
283 
284   gcc_unreachable ();
285 }
286 
287 /* Discover whether REGION is a combined parallel+workshare region.  */
288 
289 static void
290 determine_parallel_type (struct omp_region *region)
291 {
292   basic_block par_entry_bb, par_exit_bb;
293   basic_block ws_entry_bb, ws_exit_bb;
294 
295   if (region == NULL || region->inner == NULL
296       || region->exit == NULL || region->inner->exit == NULL
297       || region->inner->cont == NULL)
298     return;
299 
300   /* We only support parallel+for and parallel+sections.  */
301   if (region->type != GIMPLE_OMP_PARALLEL
302       || (region->inner->type != GIMPLE_OMP_FOR
303 	  && region->inner->type != GIMPLE_OMP_SECTIONS))
304     return;
305 
306   /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
307      WS_EXIT_BB -> PAR_EXIT_BB.  */
308   par_entry_bb = region->entry;
309   par_exit_bb = region->exit;
310   ws_entry_bb = region->inner->entry;
311   ws_exit_bb = region->inner->exit;
312 
313   if (single_succ (par_entry_bb) == ws_entry_bb
314       && single_succ (ws_exit_bb) == par_exit_bb
315       && workshare_safe_to_combine_p (ws_entry_bb)
316       && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
317 	  || (last_and_only_stmt (ws_entry_bb)
318 	      && last_and_only_stmt (par_exit_bb))))
319     {
320       gimple *par_stmt = last_stmt (par_entry_bb);
321       gimple *ws_stmt = last_stmt (ws_entry_bb);
322 
323       if (region->inner->type == GIMPLE_OMP_FOR)
324 	{
325 	  /* If this is a combined parallel loop, we need to determine
326 	     whether or not to use the combined library calls.  There
327 	     are two cases where we do not apply the transformation:
328 	     static loops and any kind of ordered loop.  In the first
329 	     case, we already open code the loop so there is no need
330 	     to do anything else.  In the latter case, the combined
331 	     parallel loop call would still need extra synchronization
332 	     to implement ordered semantics, so there would not be any
333 	     gain in using the combined call.  */
334 	  tree clauses = gimple_omp_for_clauses (ws_stmt);
335 	  tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
336 	  if (c == NULL
337 	      || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
338 		  == OMP_CLAUSE_SCHEDULE_STATIC)
339 	      || omp_find_clause (clauses, OMP_CLAUSE_ORDERED))
340 	    {
341 	      region->is_combined_parallel = false;
342 	      region->inner->is_combined_parallel = false;
343 	      return;
344 	    }
345 	}
346 
347       region->is_combined_parallel = true;
348       region->inner->is_combined_parallel = true;
349       region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
350     }
351 }
352 
353 /* Debugging dumps for parallel regions.  */
354 void dump_omp_region (FILE *, struct omp_region *, int);
355 void debug_omp_region (struct omp_region *);
356 void debug_all_omp_regions (void);
357 
358 /* Dump the parallel region tree rooted at REGION.  */
359 
360 void
361 dump_omp_region (FILE *file, struct omp_region *region, int indent)
362 {
363   fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
364 	   gimple_code_name[region->type]);
365 
366   if (region->inner)
367     dump_omp_region (file, region->inner, indent + 4);
368 
369   if (region->cont)
370     {
371       fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
372 	       region->cont->index);
373     }
374 
375   if (region->exit)
376     fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
377 	     region->exit->index);
378   else
379     fprintf (file, "%*s[no exit marker]\n", indent, "");
380 
381   if (region->next)
382     dump_omp_region (file, region->next, indent);
383 }
384 
385 DEBUG_FUNCTION void
386 debug_omp_region (struct omp_region *region)
387 {
388   dump_omp_region (stderr, region, 0);
389 }
390 
391 DEBUG_FUNCTION void
392 debug_all_omp_regions (void)
393 {
394   dump_omp_region (stderr, root_omp_region, 0);
395 }
396 
397 /* Create a new parallel region starting at STMT inside region PARENT.  */
398 
399 static struct omp_region *
400 new_omp_region (basic_block bb, enum gimple_code type,
401 		struct omp_region *parent)
402 {
403   struct omp_region *region = XCNEW (struct omp_region);
404 
405   region->outer = parent;
406   region->entry = bb;
407   region->type = type;
408 
409   if (parent)
410     {
411       /* This is a nested region.  Add it to the list of inner
412 	 regions in PARENT.  */
413       region->next = parent->inner;
414       parent->inner = region;
415     }
416   else
417     {
418       /* This is a toplevel region.  Add it to the list of toplevel
419 	 regions in ROOT_OMP_REGION.  */
420       region->next = root_omp_region;
421       root_omp_region = region;
422     }
423 
424   return region;
425 }
426 
427 /* Release the memory associated with the region tree rooted at REGION.  */
428 
429 static void
430 free_omp_region_1 (struct omp_region *region)
431 {
432   struct omp_region *i, *n;
433 
434   for (i = region->inner; i ; i = n)
435     {
436       n = i->next;
437       free_omp_region_1 (i);
438     }
439 
440   free (region);
441 }
442 
443 /* Release the memory for the entire omp region tree.  */
444 
445 void
446 omp_free_regions (void)
447 {
448   struct omp_region *r, *n;
449   for (r = root_omp_region; r ; r = n)
450     {
451       n = r->next;
452       free_omp_region_1 (r);
453     }
454   root_omp_region = NULL;
455 }
456 
457 /* A convenience function to build an empty GIMPLE_COND with just the
458    condition.  */
459 
460 static gcond *
461 gimple_build_cond_empty (tree cond)
462 {
463   enum tree_code pred_code;
464   tree lhs, rhs;
465 
466   gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
467   return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
468 }
469 
470 /* Return true if a parallel REGION is within a declare target function or
471    within a target region and is not a part of a gridified target.  */
472 
473 static bool
474 parallel_needs_hsa_kernel_p (struct omp_region *region)
475 {
476   bool indirect = false;
477   for (region = region->outer; region; region = region->outer)
478     {
479       if (region->type == GIMPLE_OMP_PARALLEL)
480 	indirect = true;
481       else if (region->type == GIMPLE_OMP_TARGET)
482 	{
483 	  gomp_target *tgt_stmt
484 	    = as_a <gomp_target *> (last_stmt (region->entry));
485 
486 	  if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
487 			       OMP_CLAUSE__GRIDDIM_))
488 	    return indirect;
489 	  else
490 	    return true;
491 	}
492     }
493 
494   if (lookup_attribute ("omp declare target",
495 			DECL_ATTRIBUTES (current_function_decl)))
496     return true;
497 
498   return false;
499 }
500 
501 /* Build the function calls to GOMP_parallel_start etc to actually
502    generate the parallel operation.  REGION is the parallel region
503    being expanded.  BB is the block where to insert the code.  WS_ARGS
504    will be set if this is a call to a combined parallel+workshare
505    construct, it contains the list of additional arguments needed by
506    the workshare construct.  */
507 
508 static void
509 expand_parallel_call (struct omp_region *region, basic_block bb,
510 		      gomp_parallel *entry_stmt,
511 		      vec<tree, va_gc> *ws_args)
512 {
513   tree t, t1, t2, val, cond, c, clauses, flags;
514   gimple_stmt_iterator gsi;
515   gimple *stmt;
516   enum built_in_function start_ix;
517   int start_ix2;
518   location_t clause_loc;
519   vec<tree, va_gc> *args;
520 
521   clauses = gimple_omp_parallel_clauses (entry_stmt);
522 
523   /* Determine what flavor of GOMP_parallel we will be
524      emitting.  */
525   start_ix = BUILT_IN_GOMP_PARALLEL;
526   if (is_combined_parallel (region))
527     {
528       switch (region->inner->type)
529 	{
530 	case GIMPLE_OMP_FOR:
531 	  gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
532 	  switch (region->inner->sched_kind)
533 	    {
534 	    case OMP_CLAUSE_SCHEDULE_RUNTIME:
535 	      start_ix2 = 3;
536 	      break;
537 	    case OMP_CLAUSE_SCHEDULE_DYNAMIC:
538 	    case OMP_CLAUSE_SCHEDULE_GUIDED:
539 	      if (region->inner->sched_modifiers
540 		  & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
541 		{
542 		  start_ix2 = 3 + region->inner->sched_kind;
543 		  break;
544 		}
545 	      /* FALLTHRU */
546 	    default:
547 	      start_ix2 = region->inner->sched_kind;
548 	      break;
549 	    }
550 	  start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
551 	  start_ix = (enum built_in_function) start_ix2;
552 	  break;
553 	case GIMPLE_OMP_SECTIONS:
554 	  start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
555 	  break;
556 	default:
557 	  gcc_unreachable ();
558 	}
559     }
560 
561   /* By default, the value of NUM_THREADS is zero (selected at run time)
562      and there is no conditional.  */
563   cond = NULL_TREE;
564   val = build_int_cst (unsigned_type_node, 0);
565   flags = build_int_cst (unsigned_type_node, 0);
566 
567   c = omp_find_clause (clauses, OMP_CLAUSE_IF);
568   if (c)
569     cond = OMP_CLAUSE_IF_EXPR (c);
570 
571   c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
572   if (c)
573     {
574       val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
575       clause_loc = OMP_CLAUSE_LOCATION (c);
576     }
577   else
578     clause_loc = gimple_location (entry_stmt);
579 
580   c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
581   if (c)
582     flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
583 
584   /* Ensure 'val' is of the correct type.  */
585   val = fold_convert_loc (clause_loc, unsigned_type_node, val);
586 
587   /* If we found the clause 'if (cond)', build either
588      (cond != 0) or (cond ? val : 1u).  */
589   if (cond)
590     {
591       cond = gimple_boolify (cond);
592 
593       if (integer_zerop (val))
594 	val = fold_build2_loc (clause_loc,
595 			   EQ_EXPR, unsigned_type_node, cond,
596 			   build_int_cst (TREE_TYPE (cond), 0));
597       else
598 	{
599 	  basic_block cond_bb, then_bb, else_bb;
600 	  edge e, e_then, e_else;
601 	  tree tmp_then, tmp_else, tmp_join, tmp_var;
602 
603 	  tmp_var = create_tmp_var (TREE_TYPE (val));
604 	  if (gimple_in_ssa_p (cfun))
605 	    {
606 	      tmp_then = make_ssa_name (tmp_var);
607 	      tmp_else = make_ssa_name (tmp_var);
608 	      tmp_join = make_ssa_name (tmp_var);
609 	    }
610 	  else
611 	    {
612 	      tmp_then = tmp_var;
613 	      tmp_else = tmp_var;
614 	      tmp_join = tmp_var;
615 	    }
616 
617 	  e = split_block_after_labels (bb);
618 	  cond_bb = e->src;
619 	  bb = e->dest;
620 	  remove_edge (e);
621 
622 	  then_bb = create_empty_bb (cond_bb);
623 	  else_bb = create_empty_bb (then_bb);
624 	  set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
625 	  set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
626 
627 	  stmt = gimple_build_cond_empty (cond);
628 	  gsi = gsi_start_bb (cond_bb);
629 	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
630 
631 	  gsi = gsi_start_bb (then_bb);
632 	  expand_omp_build_assign (&gsi, tmp_then, val, true);
633 
634 	  gsi = gsi_start_bb (else_bb);
635 	  expand_omp_build_assign (&gsi, tmp_else,
636 				   build_int_cst (unsigned_type_node, 1),
637 				   true);
638 
639 	  make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
640 	  make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
641 	  add_bb_to_loop (then_bb, cond_bb->loop_father);
642 	  add_bb_to_loop (else_bb, cond_bb->loop_father);
643 	  e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
644 	  e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
645 
646 	  if (gimple_in_ssa_p (cfun))
647 	    {
648 	      gphi *phi = create_phi_node (tmp_join, bb);
649 	      add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
650 	      add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
651 	    }
652 
653 	  val = tmp_join;
654 	}
655 
656       gsi = gsi_start_bb (bb);
657       val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
658 				      false, GSI_CONTINUE_LINKING);
659     }
660 
661   gsi = gsi_last_bb (bb);
662   t = gimple_omp_parallel_data_arg (entry_stmt);
663   if (t == NULL)
664     t1 = null_pointer_node;
665   else
666     t1 = build_fold_addr_expr (t);
667   tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
668   t2 = build_fold_addr_expr (child_fndecl);
669 
670   vec_alloc (args, 4 + vec_safe_length (ws_args));
671   args->quick_push (t2);
672   args->quick_push (t1);
673   args->quick_push (val);
674   if (ws_args)
675     args->splice (*ws_args);
676   args->quick_push (flags);
677 
678   t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
679 			       builtin_decl_explicit (start_ix), args);
680 
681   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
682 			    false, GSI_CONTINUE_LINKING);
683 
684   if (hsa_gen_requested_p ()
685       && parallel_needs_hsa_kernel_p (region))
686     {
687       cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
688       hsa_register_kernel (child_cnode);
689     }
690 }
691 
692 /* Insert a function call whose name is FUNC_NAME with the information from
693    ENTRY_STMT into the basic_block BB.  */
694 
695 static void
696 expand_cilk_for_call (basic_block bb, gomp_parallel *entry_stmt,
697 		      vec <tree, va_gc> *ws_args)
698 {
699   tree t, t1, t2;
700   gimple_stmt_iterator gsi;
701   vec <tree, va_gc> *args;
702 
703   gcc_assert (vec_safe_length (ws_args) == 2);
704   tree func_name = (*ws_args)[0];
705   tree grain = (*ws_args)[1];
706 
707   tree clauses = gimple_omp_parallel_clauses (entry_stmt);
708   tree count = omp_find_clause (clauses, OMP_CLAUSE__CILK_FOR_COUNT_);
709   gcc_assert (count != NULL_TREE);
710   count = OMP_CLAUSE_OPERAND (count, 0);
711 
712   gsi = gsi_last_bb (bb);
713   t = gimple_omp_parallel_data_arg (entry_stmt);
714   if (t == NULL)
715     t1 = null_pointer_node;
716   else
717     t1 = build_fold_addr_expr (t);
718   t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt));
719 
720   vec_alloc (args, 4);
721   args->quick_push (t2);
722   args->quick_push (t1);
723   args->quick_push (count);
724   args->quick_push (grain);
725   t = build_call_expr_loc_vec (UNKNOWN_LOCATION, func_name, args);
726 
727   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false,
728 			    GSI_CONTINUE_LINKING);
729 }
730 
731 /* Build the function call to GOMP_task to actually
732    generate the task operation.  BB is the block where to insert the code.  */
733 
734 static void
735 expand_task_call (struct omp_region *region, basic_block bb,
736 		  gomp_task *entry_stmt)
737 {
738   tree t1, t2, t3;
739   gimple_stmt_iterator gsi;
740   location_t loc = gimple_location (entry_stmt);
741 
742   tree clauses = gimple_omp_task_clauses (entry_stmt);
743 
744   tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
745   tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
746   tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
747   tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
748   tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
749   tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
750 
751   unsigned int iflags
752     = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
753       | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
754       | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
755 
756   bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
757   tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
758   tree num_tasks = NULL_TREE;
759   bool ull = false;
760   if (taskloop_p)
761     {
762       gimple *g = last_stmt (region->outer->entry);
763       gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
764 		  && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
765       struct omp_for_data fd;
766       omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
767       startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
768       endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
769 				OMP_CLAUSE__LOOPTEMP_);
770       startvar = OMP_CLAUSE_DECL (startvar);
771       endvar = OMP_CLAUSE_DECL (endvar);
772       step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
773       if (fd.loop.cond_code == LT_EXPR)
774 	iflags |= GOMP_TASK_FLAG_UP;
775       tree tclauses = gimple_omp_for_clauses (g);
776       num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
777       if (num_tasks)
778 	num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
779       else
780 	{
781 	  num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
782 	  if (num_tasks)
783 	    {
784 	      iflags |= GOMP_TASK_FLAG_GRAINSIZE;
785 	      num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
786 	    }
787 	  else
788 	    num_tasks = integer_zero_node;
789 	}
790       num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
791       if (ifc == NULL_TREE)
792 	iflags |= GOMP_TASK_FLAG_IF;
793       if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
794 	iflags |= GOMP_TASK_FLAG_NOGROUP;
795       ull = fd.iter_type == long_long_unsigned_type_node;
796     }
797   else if (priority)
798     iflags |= GOMP_TASK_FLAG_PRIORITY;
799 
800   tree flags = build_int_cst (unsigned_type_node, iflags);
801 
802   tree cond = boolean_true_node;
803   if (ifc)
804     {
805       if (taskloop_p)
806 	{
807 	  tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
808 	  t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
809 			       build_int_cst (unsigned_type_node,
810 					      GOMP_TASK_FLAG_IF),
811 			       build_int_cst (unsigned_type_node, 0));
812 	  flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
813 				   flags, t);
814 	}
815       else
816 	cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
817     }
818 
819   if (finalc)
820     {
821       tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
822       t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
823 			   build_int_cst (unsigned_type_node,
824 					  GOMP_TASK_FLAG_FINAL),
825 			   build_int_cst (unsigned_type_node, 0));
826       flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
827     }
828   if (depend)
829     depend = OMP_CLAUSE_DECL (depend);
830   else
831     depend = build_int_cst (ptr_type_node, 0);
832   if (priority)
833     priority = fold_convert (integer_type_node,
834 			     OMP_CLAUSE_PRIORITY_EXPR (priority));
835   else
836     priority = integer_zero_node;
837 
838   gsi = gsi_last_bb (bb);
839   tree t = gimple_omp_task_data_arg (entry_stmt);
840   if (t == NULL)
841     t2 = null_pointer_node;
842   else
843     t2 = build_fold_addr_expr_loc (loc, t);
844   t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
845   t = gimple_omp_task_copy_fn (entry_stmt);
846   if (t == NULL)
847     t3 = null_pointer_node;
848   else
849     t3 = build_fold_addr_expr_loc (loc, t);
850 
851   if (taskloop_p)
852     t = build_call_expr (ull
853 			 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
854 			 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
855 			 11, t1, t2, t3,
856 			 gimple_omp_task_arg_size (entry_stmt),
857 			 gimple_omp_task_arg_align (entry_stmt), flags,
858 			 num_tasks, priority, startvar, endvar, step);
859   else
860     t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
861 			 9, t1, t2, t3,
862 			 gimple_omp_task_arg_size (entry_stmt),
863 			 gimple_omp_task_arg_align (entry_stmt), cond, flags,
864 			 depend, priority);
865 
866   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
867 			    false, GSI_CONTINUE_LINKING);
868 }
869 
870 /* Chain all the DECLs in LIST by their TREE_CHAIN fields.  */
871 
872 static tree
873 vec2chain (vec<tree, va_gc> *v)
874 {
875   tree chain = NULL_TREE, t;
876   unsigned ix;
877 
878   FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
879     {
880       DECL_CHAIN (t) = chain;
881       chain = t;
882     }
883 
884   return chain;
885 }
886 
887 /* Remove barriers in REGION->EXIT's block.  Note that this is only
888    valid for GIMPLE_OMP_PARALLEL regions.  Since the end of a parallel region
889    is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
890    left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
891    removed.  */
892 
893 static void
894 remove_exit_barrier (struct omp_region *region)
895 {
896   gimple_stmt_iterator gsi;
897   basic_block exit_bb;
898   edge_iterator ei;
899   edge e;
900   gimple *stmt;
901   int any_addressable_vars = -1;
902 
903   exit_bb = region->exit;
904 
905   /* If the parallel region doesn't return, we don't have REGION->EXIT
906      block at all.  */
907   if (! exit_bb)
908     return;
909 
910   /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN.  The
911      workshare's GIMPLE_OMP_RETURN will be in a preceding block.  The kinds of
912      statements that can appear in between are extremely limited -- no
913      memory operations at all.  Here, we allow nothing at all, so the
914      only thing we allow to precede this GIMPLE_OMP_RETURN is a label.  */
915   gsi = gsi_last_bb (exit_bb);
916   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
917   gsi_prev (&gsi);
918   if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
919     return;
920 
921   FOR_EACH_EDGE (e, ei, exit_bb->preds)
922     {
923       gsi = gsi_last_bb (e->src);
924       if (gsi_end_p (gsi))
925 	continue;
926       stmt = gsi_stmt (gsi);
927       if (gimple_code (stmt) == GIMPLE_OMP_RETURN
928 	  && !gimple_omp_return_nowait_p (stmt))
929 	{
930 	  /* OpenMP 3.0 tasks unfortunately prevent this optimization
931 	     in many cases.  If there could be tasks queued, the barrier
932 	     might be needed to let the tasks run before some local
933 	     variable of the parallel that the task uses as shared
934 	     runs out of scope.  The task can be spawned either
935 	     from within current function (this would be easy to check)
936 	     or from some function it calls and gets passed an address
937 	     of such a variable.  */
938 	  if (any_addressable_vars < 0)
939 	    {
940 	      gomp_parallel *parallel_stmt
941 		= as_a <gomp_parallel *> (last_stmt (region->entry));
942 	      tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
943 	      tree local_decls, block, decl;
944 	      unsigned ix;
945 
946 	      any_addressable_vars = 0;
947 	      FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
948 		if (TREE_ADDRESSABLE (decl))
949 		  {
950 		    any_addressable_vars = 1;
951 		    break;
952 		  }
953 	      for (block = gimple_block (stmt);
954 		   !any_addressable_vars
955 		   && block
956 		   && TREE_CODE (block) == BLOCK;
957 		   block = BLOCK_SUPERCONTEXT (block))
958 		{
959 		  for (local_decls = BLOCK_VARS (block);
960 		       local_decls;
961 		       local_decls = DECL_CHAIN (local_decls))
962 		    if (TREE_ADDRESSABLE (local_decls))
963 		      {
964 			any_addressable_vars = 1;
965 			break;
966 		      }
967 		  if (block == gimple_block (parallel_stmt))
968 		    break;
969 		}
970 	    }
971 	  if (!any_addressable_vars)
972 	    gimple_omp_return_set_nowait (stmt);
973 	}
974     }
975 }
976 
977 static void
978 remove_exit_barriers (struct omp_region *region)
979 {
980   if (region->type == GIMPLE_OMP_PARALLEL)
981     remove_exit_barrier (region);
982 
983   if (region->inner)
984     {
985       region = region->inner;
986       remove_exit_barriers (region);
987       while (region->next)
988 	{
989 	  region = region->next;
990 	  remove_exit_barriers (region);
991 	}
992     }
993 }
994 
995 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
996    calls.  These can't be declared as const functions, but
997    within one parallel body they are constant, so they can be
998    transformed there into __builtin_omp_get_{thread_num,num_threads} ()
999    which are declared const.  Similarly for task body, except
1000    that in untied task omp_get_thread_num () can change at any task
1001    scheduling point.  */
1002 
1003 static void
1004 optimize_omp_library_calls (gimple *entry_stmt)
1005 {
1006   basic_block bb;
1007   gimple_stmt_iterator gsi;
1008   tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1009   tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1010   tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1011   tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1012   bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1013 		      && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1014 					  OMP_CLAUSE_UNTIED) != NULL);
1015 
1016   FOR_EACH_BB_FN (bb, cfun)
1017     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1018       {
1019 	gimple *call = gsi_stmt (gsi);
1020 	tree decl;
1021 
1022 	if (is_gimple_call (call)
1023 	    && (decl = gimple_call_fndecl (call))
1024 	    && DECL_EXTERNAL (decl)
1025 	    && TREE_PUBLIC (decl)
1026 	    && DECL_INITIAL (decl) == NULL)
1027 	  {
1028 	    tree built_in;
1029 
1030 	    if (DECL_NAME (decl) == thr_num_id)
1031 	      {
1032 		/* In #pragma omp task untied omp_get_thread_num () can change
1033 		   during the execution of the task region.  */
1034 		if (untied_task)
1035 		  continue;
1036 		built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1037 	      }
1038 	    else if (DECL_NAME (decl) == num_thr_id)
1039 	      built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1040 	    else
1041 	      continue;
1042 
1043 	    if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1044 		|| gimple_call_num_args (call) != 0)
1045 	      continue;
1046 
1047 	    if (flag_exceptions && !TREE_NOTHROW (decl))
1048 	      continue;
1049 
1050 	    if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1051 		|| !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1052 					TREE_TYPE (TREE_TYPE (built_in))))
1053 	      continue;
1054 
1055 	    gimple_call_set_fndecl (call, built_in);
1056 	  }
1057       }
1058 }
1059 
1060 /* Callback for expand_omp_build_assign.  Return non-NULL if *tp needs to be
1061    regimplified.  */
1062 
1063 static tree
1064 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1065 {
1066   tree t = *tp;
1067 
1068   /* Any variable with DECL_VALUE_EXPR needs to be regimplified.  */
1069   if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1070     return t;
1071 
1072   if (TREE_CODE (t) == ADDR_EXPR)
1073     recompute_tree_invariant_for_addr_expr (t);
1074 
1075   *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1076   return NULL_TREE;
1077 }
1078 
1079 /* Prepend or append TO = FROM assignment before or after *GSI_P.  */
1080 
1081 static void
1082 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1083 			 bool after)
1084 {
1085   bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1086   from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1087 				   !after, after ? GSI_CONTINUE_LINKING
1088 						 : GSI_SAME_STMT);
1089   gimple *stmt = gimple_build_assign (to, from);
1090   if (after)
1091     gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1092   else
1093     gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1094   if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1095       || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1096     {
1097       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1098       gimple_regimplify_operands (stmt, &gsi);
1099     }
1100 }
1101 
1102 /* Expand the OpenMP parallel or task directive starting at REGION.  */
1103 
1104 static void
1105 expand_omp_taskreg (struct omp_region *region)
1106 {
1107   basic_block entry_bb, exit_bb, new_bb;
1108   struct function *child_cfun;
1109   tree child_fn, block, t;
1110   gimple_stmt_iterator gsi;
1111   gimple *entry_stmt, *stmt;
1112   edge e;
1113   vec<tree, va_gc> *ws_args;
1114 
1115   entry_stmt = last_stmt (region->entry);
1116   child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1117   child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1118 
1119   entry_bb = region->entry;
1120   if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1121     exit_bb = region->cont;
1122   else
1123     exit_bb = region->exit;
1124 
1125   bool is_cilk_for
1126     = (flag_cilkplus
1127        && gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL
1128        && omp_find_clause (gimple_omp_parallel_clauses (entry_stmt),
1129 			   OMP_CLAUSE__CILK_FOR_COUNT_) != NULL_TREE);
1130 
1131   if (is_cilk_for)
1132     /* If it is a _Cilk_for statement, it is modelled *like* a parallel for,
1133        and the inner statement contains the name of the built-in function
1134        and grain.  */
1135     ws_args = region->inner->ws_args;
1136   else if (is_combined_parallel (region))
1137     ws_args = region->ws_args;
1138   else
1139     ws_args = NULL;
1140 
1141   if (child_cfun->cfg)
1142     {
1143       /* Due to inlining, it may happen that we have already outlined
1144 	 the region, in which case all we need to do is make the
1145 	 sub-graph unreachable and emit the parallel call.  */
1146       edge entry_succ_e, exit_succ_e;
1147 
1148       entry_succ_e = single_succ_edge (entry_bb);
1149 
1150       gsi = gsi_last_bb (entry_bb);
1151       gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1152 		  || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1153       gsi_remove (&gsi, true);
1154 
1155       new_bb = entry_bb;
1156       if (exit_bb)
1157 	{
1158 	  exit_succ_e = single_succ_edge (exit_bb);
1159 	  make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1160 	}
1161       remove_edge_and_dominated_blocks (entry_succ_e);
1162     }
1163   else
1164     {
1165       unsigned srcidx, dstidx, num;
1166 
1167       /* If the parallel region needs data sent from the parent
1168 	 function, then the very first statement (except possible
1169 	 tree profile counter updates) of the parallel body
1170 	 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
1171 	 &.OMP_DATA_O is passed as an argument to the child function,
1172 	 we need to replace it with the argument as seen by the child
1173 	 function.
1174 
1175 	 In most cases, this will end up being the identity assignment
1176 	 .OMP_DATA_I = .OMP_DATA_I.  However, if the parallel body had
1177 	 a function call that has been inlined, the original PARM_DECL
1178 	 .OMP_DATA_I may have been converted into a different local
1179 	 variable.  In which case, we need to keep the assignment.  */
1180       if (gimple_omp_taskreg_data_arg (entry_stmt))
1181 	{
1182 	  basic_block entry_succ_bb
1183 	    = single_succ_p (entry_bb) ? single_succ (entry_bb)
1184 				       : FALLTHRU_EDGE (entry_bb)->dest;
1185 	  tree arg;
1186 	  gimple *parcopy_stmt = NULL;
1187 
1188 	  for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1189 	    {
1190 	      gimple *stmt;
1191 
1192 	      gcc_assert (!gsi_end_p (gsi));
1193 	      stmt = gsi_stmt (gsi);
1194 	      if (gimple_code (stmt) != GIMPLE_ASSIGN)
1195 		continue;
1196 
1197 	      if (gimple_num_ops (stmt) == 2)
1198 		{
1199 		  tree arg = gimple_assign_rhs1 (stmt);
1200 
1201 		  /* We're ignore the subcode because we're
1202 		     effectively doing a STRIP_NOPS.  */
1203 
1204 		  if (TREE_CODE (arg) == ADDR_EXPR
1205 		      && TREE_OPERAND (arg, 0)
1206 			== gimple_omp_taskreg_data_arg (entry_stmt))
1207 		    {
1208 		      parcopy_stmt = stmt;
1209 		      break;
1210 		    }
1211 		}
1212 	    }
1213 
1214 	  gcc_assert (parcopy_stmt != NULL);
1215 	  arg = DECL_ARGUMENTS (child_fn);
1216 
1217 	  if (!gimple_in_ssa_p (cfun))
1218 	    {
1219 	      if (gimple_assign_lhs (parcopy_stmt) == arg)
1220 		gsi_remove (&gsi, true);
1221 	      else
1222 		{
1223 		  /* ?? Is setting the subcode really necessary ??  */
1224 		  gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1225 		  gimple_assign_set_rhs1 (parcopy_stmt, arg);
1226 		}
1227 	    }
1228 	  else
1229 	    {
1230 	      tree lhs = gimple_assign_lhs (parcopy_stmt);
1231 	      gcc_assert (SSA_NAME_VAR (lhs) == arg);
1232 	      /* We'd like to set the rhs to the default def in the child_fn,
1233 		 but it's too early to create ssa names in the child_fn.
1234 		 Instead, we set the rhs to the parm.  In
1235 		 move_sese_region_to_fn, we introduce a default def for the
1236 		 parm, map the parm to it's default def, and once we encounter
1237 		 this stmt, replace the parm with the default def.  */
1238 	      gimple_assign_set_rhs1 (parcopy_stmt, arg);
1239 	      update_stmt (parcopy_stmt);
1240 	    }
1241 	}
1242 
1243       /* Declare local variables needed in CHILD_CFUN.  */
1244       block = DECL_INITIAL (child_fn);
1245       BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1246       /* The gimplifier could record temporaries in parallel/task block
1247 	 rather than in containing function's local_decls chain,
1248 	 which would mean cgraph missed finalizing them.  Do it now.  */
1249       for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1250 	if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1251 	  varpool_node::finalize_decl (t);
1252       DECL_SAVED_TREE (child_fn) = NULL;
1253       /* We'll create a CFG for child_fn, so no gimple body is needed.  */
1254       gimple_set_body (child_fn, NULL);
1255       TREE_USED (block) = 1;
1256 
1257       /* Reset DECL_CONTEXT on function arguments.  */
1258       for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1259 	DECL_CONTEXT (t) = child_fn;
1260 
1261       /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1262 	 so that it can be moved to the child function.  */
1263       gsi = gsi_last_bb (entry_bb);
1264       stmt = gsi_stmt (gsi);
1265       gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1266 			   || gimple_code (stmt) == GIMPLE_OMP_TASK));
1267       e = split_block (entry_bb, stmt);
1268       gsi_remove (&gsi, true);
1269       entry_bb = e->dest;
1270       edge e2 = NULL;
1271       if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1272 	single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1273       else
1274 	{
1275 	  e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1276 	  gcc_assert (e2->dest == region->exit);
1277 	  remove_edge (BRANCH_EDGE (entry_bb));
1278 	  set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1279 	  gsi = gsi_last_bb (region->exit);
1280 	  gcc_assert (!gsi_end_p (gsi)
1281 		      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1282 	  gsi_remove (&gsi, true);
1283 	}
1284 
1285       /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR.  */
1286       if (exit_bb)
1287 	{
1288 	  gsi = gsi_last_bb (exit_bb);
1289 	  gcc_assert (!gsi_end_p (gsi)
1290 		      && (gimple_code (gsi_stmt (gsi))
1291 			  == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1292 	  stmt = gimple_build_return (NULL);
1293 	  gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1294 	  gsi_remove (&gsi, true);
1295 	}
1296 
1297       /* Move the parallel region into CHILD_CFUN.  */
1298 
1299       if (gimple_in_ssa_p (cfun))
1300 	{
1301 	  init_tree_ssa (child_cfun);
1302 	  init_ssa_operands (child_cfun);
1303 	  child_cfun->gimple_df->in_ssa_p = true;
1304 	  block = NULL_TREE;
1305 	}
1306       else
1307 	block = gimple_block (entry_stmt);
1308 
1309       /* Make sure to generate early debug for the function before
1310          outlining anything.  */
1311       if (! gimple_in_ssa_p (cfun))
1312 	(*debug_hooks->early_global_decl) (cfun->decl);
1313 
1314       new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1315       if (exit_bb)
1316 	single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1317       if (e2)
1318 	{
1319 	  basic_block dest_bb = e2->dest;
1320 	  if (!exit_bb)
1321 	    make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1322 	  remove_edge (e2);
1323 	  set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1324 	}
1325       /* When the OMP expansion process cannot guarantee an up-to-date
1326 	 loop tree arrange for the child function to fixup loops.  */
1327       if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1328 	child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1329 
1330       /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
1331       num = vec_safe_length (child_cfun->local_decls);
1332       for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1333 	{
1334 	  t = (*child_cfun->local_decls)[srcidx];
1335 	  if (DECL_CONTEXT (t) == cfun->decl)
1336 	    continue;
1337 	  if (srcidx != dstidx)
1338 	    (*child_cfun->local_decls)[dstidx] = t;
1339 	  dstidx++;
1340 	}
1341       if (dstidx != num)
1342 	vec_safe_truncate (child_cfun->local_decls, dstidx);
1343 
1344       /* Inform the callgraph about the new function.  */
1345       child_cfun->curr_properties = cfun->curr_properties;
1346       child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1347       child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1348       cgraph_node *node = cgraph_node::get_create (child_fn);
1349       node->parallelized_function = 1;
1350       cgraph_node::add_new_function (child_fn, true);
1351 
1352       bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1353 		      && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1354 
1355       /* Fix the callgraph edges for child_cfun.  Those for cfun will be
1356 	 fixed in a following pass.  */
1357       push_cfun (child_cfun);
1358       if (need_asm)
1359 	assign_assembler_name_if_needed (child_fn);
1360 
1361       if (optimize)
1362 	optimize_omp_library_calls (entry_stmt);
1363       cgraph_edge::rebuild_edges ();
1364 
1365       /* Some EH regions might become dead, see PR34608.  If
1366 	 pass_cleanup_cfg isn't the first pass to happen with the
1367 	 new child, these dead EH edges might cause problems.
1368 	 Clean them up now.  */
1369       if (flag_exceptions)
1370 	{
1371 	  basic_block bb;
1372 	  bool changed = false;
1373 
1374 	  FOR_EACH_BB_FN (bb, cfun)
1375 	    changed |= gimple_purge_dead_eh_edges (bb);
1376 	  if (changed)
1377 	    cleanup_tree_cfg ();
1378 	}
1379       if (gimple_in_ssa_p (cfun))
1380 	update_ssa (TODO_update_ssa);
1381       if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1382 	verify_loop_structure ();
1383       pop_cfun ();
1384 
1385       if (dump_file && !gimple_in_ssa_p (cfun))
1386 	{
1387 	  omp_any_child_fn_dumped = true;
1388 	  dump_function_header (dump_file, child_fn, dump_flags);
1389 	  dump_function_to_file (child_fn, dump_file, dump_flags);
1390 	}
1391     }
1392 
1393   /* Emit a library call to launch the children threads.  */
1394   if (is_cilk_for)
1395     expand_cilk_for_call (new_bb,
1396 			  as_a <gomp_parallel *> (entry_stmt), ws_args);
1397   else if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1398     expand_parallel_call (region, new_bb,
1399 			  as_a <gomp_parallel *> (entry_stmt), ws_args);
1400   else
1401     expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1402   if (gimple_in_ssa_p (cfun))
1403     update_ssa (TODO_update_ssa_only_virtuals);
1404 }
1405 
1406 /* Information about members of an OpenACC collapsed loop nest.  */
1407 
1408 struct oacc_collapse
1409 {
1410   tree base;  /* Base value.  */
1411   tree iters; /* Number of steps.  */
1412   tree step;  /* Step size.  */
1413   tree tile;  /* Tile increment (if tiled).  */
1414   tree outer; /* Tile iterator var. */
1415 };
1416 
1417 /* Helper for expand_oacc_for.  Determine collapsed loop information.
1418    Fill in COUNTS array.  Emit any initialization code before GSI.
1419    Return the calculated outer loop bound of BOUND_TYPE.  */
1420 
1421 static tree
1422 expand_oacc_collapse_init (const struct omp_for_data *fd,
1423 			   gimple_stmt_iterator *gsi,
1424 			   oacc_collapse *counts, tree bound_type,
1425 			   location_t loc)
1426 {
1427   tree tiling = fd->tiling;
1428   tree total = build_int_cst (bound_type, 1);
1429   int ix;
1430 
1431   gcc_assert (integer_onep (fd->loop.step));
1432   gcc_assert (integer_zerop (fd->loop.n1));
1433 
1434   /* When tiling, the first operand of the tile clause applies to the
1435      innermost loop, and we work outwards from there.  Seems
1436      backwards, but whatever.  */
1437   for (ix = fd->collapse; ix--;)
1438     {
1439       const omp_for_data_loop *loop = &fd->loops[ix];
1440 
1441       tree iter_type = TREE_TYPE (loop->v);
1442       tree diff_type = iter_type;
1443       tree plus_type = iter_type;
1444 
1445       gcc_assert (loop->cond_code == fd->loop.cond_code);
1446 
1447       if (POINTER_TYPE_P (iter_type))
1448 	plus_type = sizetype;
1449       if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1450 	diff_type = signed_type_for (diff_type);
1451 
1452       if (tiling)
1453 	{
1454 	  tree num = build_int_cst (integer_type_node, fd->collapse);
1455 	  tree loop_no = build_int_cst (integer_type_node, ix);
1456 	  tree tile = TREE_VALUE (tiling);
1457 	  gcall *call
1458 	    = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1459 					  /* gwv-outer=*/integer_zero_node,
1460 					  /* gwv-inner=*/integer_zero_node);
1461 
1462 	  counts[ix].outer = create_tmp_var (iter_type, ".outer");
1463 	  counts[ix].tile = create_tmp_var (diff_type, ".tile");
1464 	  gimple_call_set_lhs (call, counts[ix].tile);
1465 	  gimple_set_location (call, loc);
1466 	  gsi_insert_before (gsi, call, GSI_SAME_STMT);
1467 
1468 	  tiling = TREE_CHAIN (tiling);
1469 	}
1470       else
1471 	{
1472 	  counts[ix].tile = NULL;
1473 	  counts[ix].outer = loop->v;
1474 	}
1475 
1476       tree b = loop->n1;
1477       tree e = loop->n2;
1478       tree s = loop->step;
1479       bool up = loop->cond_code == LT_EXPR;
1480       tree dir = build_int_cst (diff_type, up ? +1 : -1);
1481       bool negating;
1482       tree expr;
1483 
1484       b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1485 				    true, GSI_SAME_STMT);
1486       e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1487 				    true, GSI_SAME_STMT);
1488 
1489       /* Convert the step, avoiding possible unsigned->signed overflow.  */
1490       negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1491       if (negating)
1492 	s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1493       s = fold_convert (diff_type, s);
1494       if (negating)
1495 	s = fold_build1 (NEGATE_EXPR, diff_type, s);
1496       s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1497 				    true, GSI_SAME_STMT);
1498 
1499       /* Determine the range, avoiding possible unsigned->signed overflow.  */
1500       negating = !up && TYPE_UNSIGNED (iter_type);
1501       expr = fold_build2 (MINUS_EXPR, plus_type,
1502 			  fold_convert (plus_type, negating ? b : e),
1503 			  fold_convert (plus_type, negating ? e : b));
1504       expr = fold_convert (diff_type, expr);
1505       if (negating)
1506 	expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1507       tree range = force_gimple_operand_gsi
1508 	(gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1509 
1510       /* Determine number of iterations.  */
1511       expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1512       expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1513       expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1514 
1515       tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1516 					     true, GSI_SAME_STMT);
1517 
1518       counts[ix].base = b;
1519       counts[ix].iters = iters;
1520       counts[ix].step = s;
1521 
1522       total = fold_build2 (MULT_EXPR, bound_type, total,
1523 			   fold_convert (bound_type, iters));
1524     }
1525 
1526   return total;
1527 }
1528 
1529 /* Emit initializers for collapsed loop members.  INNER is true if
1530    this is for the element loop of a TILE.  IVAR is the outer
1531    loop iteration variable, from which collapsed loop iteration values
1532    are  calculated.  COUNTS array has been initialized by
1533    expand_oacc_collapse_inits.  */
1534 
1535 static void
1536 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1537 			   gimple_stmt_iterator *gsi,
1538 			   const oacc_collapse *counts, tree ivar)
1539 {
1540   tree ivar_type = TREE_TYPE (ivar);
1541 
1542   /*  The most rapidly changing iteration variable is the innermost
1543       one.  */
1544   for (int ix = fd->collapse; ix--;)
1545     {
1546       const omp_for_data_loop *loop = &fd->loops[ix];
1547       const oacc_collapse *collapse = &counts[ix];
1548       tree v = inner ? loop->v : collapse->outer;
1549       tree iter_type = TREE_TYPE (v);
1550       tree diff_type = TREE_TYPE (collapse->step);
1551       tree plus_type = iter_type;
1552       enum tree_code plus_code = PLUS_EXPR;
1553       tree expr;
1554 
1555       if (POINTER_TYPE_P (iter_type))
1556 	{
1557 	  plus_code = POINTER_PLUS_EXPR;
1558 	  plus_type = sizetype;
1559 	}
1560 
1561       expr = ivar;
1562       if (ix)
1563 	{
1564 	  tree mod = fold_convert (ivar_type, collapse->iters);
1565 	  ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1566 	  expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1567 	  ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1568 					   true, GSI_SAME_STMT);
1569 	}
1570 
1571       expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1572 			  collapse->step);
1573       expr = fold_build2 (plus_code, iter_type,
1574 			  inner ? collapse->outer : collapse->base,
1575 			  fold_convert (plus_type, expr));
1576       expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1577 				       true, GSI_SAME_STMT);
1578       gassign *ass = gimple_build_assign (v, expr);
1579       gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1580     }
1581 }
1582 
1583 /* Helper function for expand_omp_{for_*,simd}.  If this is the outermost
1584    of the combined collapse > 1 loop constructs, generate code like:
1585 	if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1586 	if (cond3 is <)
1587 	  adj = STEP3 - 1;
1588 	else
1589 	  adj = STEP3 + 1;
1590 	count3 = (adj + N32 - N31) / STEP3;
1591 	if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1592 	if (cond2 is <)
1593 	  adj = STEP2 - 1;
1594 	else
1595 	  adj = STEP2 + 1;
1596 	count2 = (adj + N22 - N21) / STEP2;
1597 	if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1598 	if (cond1 is <)
1599 	  adj = STEP1 - 1;
1600 	else
1601 	  adj = STEP1 + 1;
1602 	count1 = (adj + N12 - N11) / STEP1;
1603 	count = count1 * count2 * count3;
1604    Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1605 	count = 0;
1606    and set ZERO_ITER_BB to that bb.  If this isn't the outermost
1607    of the combined loop constructs, just initialize COUNTS array
1608    from the _looptemp_ clauses.  */
1609 
1610 /* NOTE: It *could* be better to moosh all of the BBs together,
1611    creating one larger BB with all the computation and the unexpected
1612    jump at the end.  I.e.
1613 
1614    bool zero3, zero2, zero1, zero;
1615 
1616    zero3 = N32 c3 N31;
1617    count3 = (N32 - N31) /[cl] STEP3;
1618    zero2 = N22 c2 N21;
1619    count2 = (N22 - N21) /[cl] STEP2;
1620    zero1 = N12 c1 N11;
1621    count1 = (N12 - N11) /[cl] STEP1;
1622    zero = zero3 || zero2 || zero1;
1623    count = count1 * count2 * count3;
1624    if (__builtin_expect(zero, false)) goto zero_iter_bb;
1625 
1626    After all, we expect the zero=false, and thus we expect to have to
1627    evaluate all of the comparison expressions, so short-circuiting
1628    oughtn't be a win.  Since the condition isn't protecting a
1629    denominator, we're not concerned about divide-by-zero, so we can
1630    fully evaluate count even if a numerator turned out to be wrong.
1631 
1632    It seems like putting this all together would create much better
1633    scheduling opportunities, and less pressure on the chip's branch
1634    predictor.  */
1635 
1636 static void
1637 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1638 			    basic_block &entry_bb, tree *counts,
1639 			    basic_block &zero_iter1_bb, int &first_zero_iter1,
1640 			    basic_block &zero_iter2_bb, int &first_zero_iter2,
1641 			    basic_block &l2_dom_bb)
1642 {
1643   tree t, type = TREE_TYPE (fd->loop.v);
1644   edge e, ne;
1645   int i;
1646 
1647   /* Collapsed loops need work for expansion into SSA form.  */
1648   gcc_assert (!gimple_in_ssa_p (cfun));
1649 
1650   if (gimple_omp_for_combined_into_p (fd->for_stmt)
1651       && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1652     {
1653       gcc_assert (fd->ordered == 0);
1654       /* First two _looptemp_ clauses are for istart/iend, counts[0]
1655 	 isn't supposed to be handled, as the inner loop doesn't
1656 	 use it.  */
1657       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1658 				     OMP_CLAUSE__LOOPTEMP_);
1659       gcc_assert (innerc);
1660       for (i = 0; i < fd->collapse; i++)
1661 	{
1662 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1663 				    OMP_CLAUSE__LOOPTEMP_);
1664 	  gcc_assert (innerc);
1665 	  if (i)
1666 	    counts[i] = OMP_CLAUSE_DECL (innerc);
1667 	  else
1668 	    counts[0] = NULL_TREE;
1669 	}
1670       return;
1671     }
1672 
1673   for (i = fd->collapse; i < fd->ordered; i++)
1674     {
1675       tree itype = TREE_TYPE (fd->loops[i].v);
1676       counts[i] = NULL_TREE;
1677       t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1678 		       fold_convert (itype, fd->loops[i].n1),
1679 		       fold_convert (itype, fd->loops[i].n2));
1680       if (t && integer_zerop (t))
1681 	{
1682 	  for (i = fd->collapse; i < fd->ordered; i++)
1683 	    counts[i] = build_int_cst (type, 0);
1684 	  break;
1685 	}
1686     }
1687   for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1688     {
1689       tree itype = TREE_TYPE (fd->loops[i].v);
1690 
1691       if (i >= fd->collapse && counts[i])
1692 	continue;
1693       if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1694 	  && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1695 				fold_convert (itype, fd->loops[i].n1),
1696 				fold_convert (itype, fd->loops[i].n2)))
1697 	      == NULL_TREE || !integer_onep (t)))
1698 	{
1699 	  gcond *cond_stmt;
1700 	  tree n1, n2;
1701 	  n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1702 	  n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1703 					 true, GSI_SAME_STMT);
1704 	  n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1705 	  n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1706 					 true, GSI_SAME_STMT);
1707 	  cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1708 					 NULL_TREE, NULL_TREE);
1709 	  gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1710 	  if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1711 			 expand_omp_regimplify_p, NULL, NULL)
1712 	      || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1713 			    expand_omp_regimplify_p, NULL, NULL))
1714 	    {
1715 	      *gsi = gsi_for_stmt (cond_stmt);
1716 	      gimple_regimplify_operands (cond_stmt, gsi);
1717 	    }
1718 	  e = split_block (entry_bb, cond_stmt);
1719 	  basic_block &zero_iter_bb
1720 	    = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1721 	  int &first_zero_iter
1722 	    = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1723 	  if (zero_iter_bb == NULL)
1724 	    {
1725 	      gassign *assign_stmt;
1726 	      first_zero_iter = i;
1727 	      zero_iter_bb = create_empty_bb (entry_bb);
1728 	      add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1729 	      *gsi = gsi_after_labels (zero_iter_bb);
1730 	      if (i < fd->collapse)
1731 		assign_stmt = gimple_build_assign (fd->loop.n2,
1732 						   build_zero_cst (type));
1733 	      else
1734 		{
1735 		  counts[i] = create_tmp_reg (type, ".count");
1736 		  assign_stmt
1737 		    = gimple_build_assign (counts[i], build_zero_cst (type));
1738 		}
1739 	      gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1740 	      set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1741 				       entry_bb);
1742 	    }
1743 	  ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1744 	  ne->probability = REG_BR_PROB_BASE / 2000 - 1;
1745 	  e->flags = EDGE_TRUE_VALUE;
1746 	  e->probability = REG_BR_PROB_BASE - ne->probability;
1747 	  if (l2_dom_bb == NULL)
1748 	    l2_dom_bb = entry_bb;
1749 	  entry_bb = e->dest;
1750 	  *gsi = gsi_last_bb (entry_bb);
1751 	}
1752 
1753       if (POINTER_TYPE_P (itype))
1754 	itype = signed_type_for (itype);
1755       t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1756 				 ? -1 : 1));
1757       t = fold_build2 (PLUS_EXPR, itype,
1758 		       fold_convert (itype, fd->loops[i].step), t);
1759       t = fold_build2 (PLUS_EXPR, itype, t,
1760 		       fold_convert (itype, fd->loops[i].n2));
1761       t = fold_build2 (MINUS_EXPR, itype, t,
1762 		       fold_convert (itype, fd->loops[i].n1));
1763       /* ?? We could probably use CEIL_DIV_EXPR instead of
1764 	 TRUNC_DIV_EXPR and adjusting by hand.  Unless we can't
1765 	 generate the same code in the end because generically we
1766 	 don't know that the values involved must be negative for
1767 	 GT??  */
1768       if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1769 	t = fold_build2 (TRUNC_DIV_EXPR, itype,
1770 			 fold_build1 (NEGATE_EXPR, itype, t),
1771 			 fold_build1 (NEGATE_EXPR, itype,
1772 				      fold_convert (itype,
1773 						    fd->loops[i].step)));
1774       else
1775 	t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1776 			 fold_convert (itype, fd->loops[i].step));
1777       t = fold_convert (type, t);
1778       if (TREE_CODE (t) == INTEGER_CST)
1779 	counts[i] = t;
1780       else
1781 	{
1782 	  if (i < fd->collapse || i != first_zero_iter2)
1783 	    counts[i] = create_tmp_reg (type, ".count");
1784 	  expand_omp_build_assign (gsi, counts[i], t);
1785 	}
1786       if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1787 	{
1788 	  if (i == 0)
1789 	    t = counts[0];
1790 	  else
1791 	    t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1792 	  expand_omp_build_assign (gsi, fd->loop.n2, t);
1793 	}
1794     }
1795 }
1796 
1797 /* Helper function for expand_omp_{for_*,simd}.  Generate code like:
1798 	T = V;
1799 	V3 = N31 + (T % count3) * STEP3;
1800 	T = T / count3;
1801 	V2 = N21 + (T % count2) * STEP2;
1802 	T = T / count2;
1803 	V1 = N11 + T * STEP1;
1804    if this loop doesn't have an inner loop construct combined with it.
1805    If it does have an inner loop construct combined with it and the
1806    iteration count isn't known constant, store values from counts array
1807    into its _looptemp_ temporaries instead.  */
1808 
1809 static void
1810 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1811 			  tree *counts, gimple *inner_stmt, tree startvar)
1812 {
1813   int i;
1814   if (gimple_omp_for_combined_p (fd->for_stmt))
1815     {
1816       /* If fd->loop.n2 is constant, then no propagation of the counts
1817 	 is needed, they are constant.  */
1818       if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1819 	return;
1820 
1821       tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1822 		     ? gimple_omp_taskreg_clauses (inner_stmt)
1823 		     : gimple_omp_for_clauses (inner_stmt);
1824       /* First two _looptemp_ clauses are for istart/iend, counts[0]
1825 	 isn't supposed to be handled, as the inner loop doesn't
1826 	 use it.  */
1827       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1828       gcc_assert (innerc);
1829       for (i = 0; i < fd->collapse; i++)
1830 	{
1831 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1832 				    OMP_CLAUSE__LOOPTEMP_);
1833 	  gcc_assert (innerc);
1834 	  if (i)
1835 	    {
1836 	      tree tem = OMP_CLAUSE_DECL (innerc);
1837 	      tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1838 	      t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1839 					    false, GSI_CONTINUE_LINKING);
1840 	      gassign *stmt = gimple_build_assign (tem, t);
1841 	      gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1842 	    }
1843 	}
1844       return;
1845     }
1846 
1847   tree type = TREE_TYPE (fd->loop.v);
1848   tree tem = create_tmp_reg (type, ".tem");
1849   gassign *stmt = gimple_build_assign (tem, startvar);
1850   gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1851 
1852   for (i = fd->collapse - 1; i >= 0; i--)
1853     {
1854       tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1855       itype = vtype;
1856       if (POINTER_TYPE_P (vtype))
1857 	itype = signed_type_for (vtype);
1858       if (i != 0)
1859 	t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1860       else
1861 	t = tem;
1862       t = fold_convert (itype, t);
1863       t = fold_build2 (MULT_EXPR, itype, t,
1864 		       fold_convert (itype, fd->loops[i].step));
1865       if (POINTER_TYPE_P (vtype))
1866 	t = fold_build_pointer_plus (fd->loops[i].n1, t);
1867       else
1868 	t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1869       t = force_gimple_operand_gsi (gsi, t,
1870 				    DECL_P (fd->loops[i].v)
1871 				    && TREE_ADDRESSABLE (fd->loops[i].v),
1872 				    NULL_TREE, false,
1873 				    GSI_CONTINUE_LINKING);
1874       stmt = gimple_build_assign (fd->loops[i].v, t);
1875       gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1876       if (i != 0)
1877 	{
1878 	  t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1879 	  t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1880 					false, GSI_CONTINUE_LINKING);
1881 	  stmt = gimple_build_assign (tem, t);
1882 	  gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1883 	}
1884     }
1885 }
1886 
1887 /* Helper function for expand_omp_for_*.  Generate code like:
1888     L10:
1889 	V3 += STEP3;
1890 	if (V3 cond3 N32) goto BODY_BB; else goto L11;
1891     L11:
1892 	V3 = N31;
1893 	V2 += STEP2;
1894 	if (V2 cond2 N22) goto BODY_BB; else goto L12;
1895     L12:
1896 	V2 = N21;
1897 	V1 += STEP1;
1898 	goto BODY_BB;  */
1899 
1900 static basic_block
1901 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
1902 			     basic_block body_bb)
1903 {
1904   basic_block last_bb, bb, collapse_bb = NULL;
1905   int i;
1906   gimple_stmt_iterator gsi;
1907   edge e;
1908   tree t;
1909   gimple *stmt;
1910 
1911   last_bb = cont_bb;
1912   for (i = fd->collapse - 1; i >= 0; i--)
1913     {
1914       tree vtype = TREE_TYPE (fd->loops[i].v);
1915 
1916       bb = create_empty_bb (last_bb);
1917       add_bb_to_loop (bb, last_bb->loop_father);
1918       gsi = gsi_start_bb (bb);
1919 
1920       if (i < fd->collapse - 1)
1921 	{
1922 	  e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
1923 	  e->probability = REG_BR_PROB_BASE / 8;
1924 
1925 	  t = fd->loops[i + 1].n1;
1926 	  t = force_gimple_operand_gsi (&gsi, t,
1927 					DECL_P (fd->loops[i + 1].v)
1928 					&& TREE_ADDRESSABLE (fd->loops[i
1929 								       + 1].v),
1930 					NULL_TREE, false,
1931 					GSI_CONTINUE_LINKING);
1932 	  stmt = gimple_build_assign (fd->loops[i + 1].v, t);
1933 	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1934 	}
1935       else
1936 	collapse_bb = bb;
1937 
1938       set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
1939 
1940       if (POINTER_TYPE_P (vtype))
1941 	t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
1942       else
1943 	t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
1944       t = force_gimple_operand_gsi (&gsi, t,
1945 				    DECL_P (fd->loops[i].v)
1946 				    && TREE_ADDRESSABLE (fd->loops[i].v),
1947 				    NULL_TREE, false, GSI_CONTINUE_LINKING);
1948       stmt = gimple_build_assign (fd->loops[i].v, t);
1949       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1950 
1951       if (i > 0)
1952 	{
1953 	  t = fd->loops[i].n2;
1954 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
1955 					false, GSI_CONTINUE_LINKING);
1956 	  tree v = fd->loops[i].v;
1957 	  if (DECL_P (v) && TREE_ADDRESSABLE (v))
1958 	    v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
1959 					  false, GSI_CONTINUE_LINKING);
1960 	  t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
1961 	  stmt = gimple_build_cond_empty (t);
1962 	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1963 	  e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
1964 	  e->probability = REG_BR_PROB_BASE * 7 / 8;
1965 	}
1966       else
1967 	make_edge (bb, body_bb, EDGE_FALLTHRU);
1968       last_bb = bb;
1969     }
1970 
1971   return collapse_bb;
1972 }
1973 
1974 /* Expand #pragma omp ordered depend(source).  */
1975 
1976 static void
1977 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1978 			   tree *counts, location_t loc)
1979 {
1980   enum built_in_function source_ix
1981     = fd->iter_type == long_integer_type_node
1982       ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
1983   gimple *g
1984     = gimple_build_call (builtin_decl_explicit (source_ix), 1,
1985 			 build_fold_addr_expr (counts[fd->ordered]));
1986   gimple_set_location (g, loc);
1987   gsi_insert_before (gsi, g, GSI_SAME_STMT);
1988 }
1989 
1990 /* Expand a single depend from #pragma omp ordered depend(sink:...).  */
1991 
1992 static void
1993 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1994 			 tree *counts, tree c, location_t loc)
1995 {
1996   auto_vec<tree, 10> args;
1997   enum built_in_function sink_ix
1998     = fd->iter_type == long_integer_type_node
1999       ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2000   tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2001   int i;
2002   gimple_stmt_iterator gsi2 = *gsi;
2003   bool warned_step = false;
2004 
2005   for (i = 0; i < fd->ordered; i++)
2006     {
2007       tree step = NULL_TREE;
2008       off = TREE_PURPOSE (deps);
2009       if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2010 	{
2011 	  step = TREE_OPERAND (off, 1);
2012 	  off = TREE_OPERAND (off, 0);
2013 	}
2014       if (!integer_zerop (off))
2015 	{
2016 	  gcc_assert (fd->loops[i].cond_code == LT_EXPR
2017 		      || fd->loops[i].cond_code == GT_EXPR);
2018 	  bool forward = fd->loops[i].cond_code == LT_EXPR;
2019 	  if (step)
2020 	    {
2021 	      /* Non-simple Fortran DO loops.  If step is variable,
2022 		 we don't know at compile even the direction, so can't
2023 		 warn.  */
2024 	      if (TREE_CODE (step) != INTEGER_CST)
2025 		break;
2026 	      forward = tree_int_cst_sgn (step) != -1;
2027 	    }
2028 	  if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2029 	    warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
2030 				"lexically later iteration");
2031 	  break;
2032 	}
2033       deps = TREE_CHAIN (deps);
2034     }
2035   /* If all offsets corresponding to the collapsed loops are zero,
2036      this depend clause can be ignored.  FIXME: but there is still a
2037      flush needed.  We need to emit one __sync_synchronize () for it
2038      though (perhaps conditionally)?  Solve this together with the
2039      conservative dependence folding optimization.
2040   if (i >= fd->collapse)
2041     return;  */
2042 
2043   deps = OMP_CLAUSE_DECL (c);
2044   gsi_prev (&gsi2);
2045   edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2046   edge e2 = split_block_after_labels (e1->dest);
2047 
2048   gsi2 = gsi_after_labels (e1->dest);
2049   *gsi = gsi_last_bb (e1->src);
2050   for (i = 0; i < fd->ordered; i++)
2051     {
2052       tree itype = TREE_TYPE (fd->loops[i].v);
2053       tree step = NULL_TREE;
2054       tree orig_off = NULL_TREE;
2055       if (POINTER_TYPE_P (itype))
2056 	itype = sizetype;
2057       if (i)
2058 	deps = TREE_CHAIN (deps);
2059       off = TREE_PURPOSE (deps);
2060       if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2061 	{
2062 	  step = TREE_OPERAND (off, 1);
2063 	  off = TREE_OPERAND (off, 0);
2064 	  gcc_assert (fd->loops[i].cond_code == LT_EXPR
2065 		      && integer_onep (fd->loops[i].step)
2066 		      && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2067 	}
2068       tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2069       if (step)
2070 	{
2071 	  off = fold_convert_loc (loc, itype, off);
2072 	  orig_off = off;
2073 	  off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2074 	}
2075 
2076       if (integer_zerop (off))
2077 	t = boolean_true_node;
2078       else
2079 	{
2080 	  tree a;
2081 	  tree co = fold_convert_loc (loc, itype, off);
2082 	  if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2083 	    {
2084 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2085 		co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2086 	      a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2087 				   TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2088 				   co);
2089 	    }
2090 	  else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2091 	    a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2092 				 fd->loops[i].v, co);
2093 	  else
2094 	    a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2095 				 fd->loops[i].v, co);
2096 	  if (step)
2097 	    {
2098 	      tree t1, t2;
2099 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2100 		t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2101 				      fd->loops[i].n1);
2102 	      else
2103 		t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2104 				      fd->loops[i].n2);
2105 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2106 		t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2107 				      fd->loops[i].n2);
2108 	      else
2109 		t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2110 				      fd->loops[i].n1);
2111 	      t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2112 				   step, build_int_cst (TREE_TYPE (step), 0));
2113 	      if (TREE_CODE (step) != INTEGER_CST)
2114 		{
2115 		  t1 = unshare_expr (t1);
2116 		  t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2117 						 false, GSI_CONTINUE_LINKING);
2118 		  t2 = unshare_expr (t2);
2119 		  t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2120 						 false, GSI_CONTINUE_LINKING);
2121 		}
2122 	      t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2123 				   t, t2, t1);
2124 	    }
2125 	  else if (fd->loops[i].cond_code == LT_EXPR)
2126 	    {
2127 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2128 		t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2129 				     fd->loops[i].n1);
2130 	      else
2131 		t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2132 				     fd->loops[i].n2);
2133 	    }
2134 	  else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2135 	    t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2136 				 fd->loops[i].n2);
2137 	  else
2138 	    t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2139 				 fd->loops[i].n1);
2140 	}
2141       if (cond)
2142 	cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2143       else
2144 	cond = t;
2145 
2146       off = fold_convert_loc (loc, itype, off);
2147 
2148       if (step
2149 	  || (fd->loops[i].cond_code == LT_EXPR
2150 	      ? !integer_onep (fd->loops[i].step)
2151 	      : !integer_minus_onep (fd->loops[i].step)))
2152 	{
2153 	  if (step == NULL_TREE
2154 	      && TYPE_UNSIGNED (itype)
2155 	      && fd->loops[i].cond_code == GT_EXPR)
2156 	    t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2157 				 fold_build1_loc (loc, NEGATE_EXPR, itype,
2158 						  s));
2159 	  else
2160 	    t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2161 				 orig_off ? orig_off : off, s);
2162 	  t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2163 			       build_int_cst (itype, 0));
2164 	  if (integer_zerop (t) && !warned_step)
2165 	    {
2166 	      warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
2167 				  "in the iteration space");
2168 	      warned_step = true;
2169 	    }
2170 	  cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2171 				  cond, t);
2172 	}
2173 
2174       if (i <= fd->collapse - 1 && fd->collapse > 1)
2175 	t = fd->loop.v;
2176       else if (counts[i])
2177 	t = counts[i];
2178       else
2179 	{
2180 	  t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2181 			       fd->loops[i].v, fd->loops[i].n1);
2182 	  t = fold_convert_loc (loc, fd->iter_type, t);
2183 	}
2184       if (step)
2185 	/* We have divided off by step already earlier.  */;
2186       else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2187 	off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2188 			       fold_build1_loc (loc, NEGATE_EXPR, itype,
2189 						s));
2190       else
2191 	off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2192       if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2193 	off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2194       off = fold_convert_loc (loc, fd->iter_type, off);
2195       if (i <= fd->collapse - 1 && fd->collapse > 1)
2196 	{
2197 	  if (i)
2198 	    off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2199 				   off);
2200 	  if (i < fd->collapse - 1)
2201 	    {
2202 	      coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2203 				      counts[i]);
2204 	      continue;
2205 	    }
2206 	}
2207       off = unshare_expr (off);
2208       t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2209       t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2210 				    true, GSI_SAME_STMT);
2211       args.safe_push (t);
2212     }
2213   gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2214   gimple_set_location (g, loc);
2215   gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2216 
2217   cond = unshare_expr (cond);
2218   cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2219 				   GSI_CONTINUE_LINKING);
2220   gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2221   edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2222   e3->probability = REG_BR_PROB_BASE / 8;
2223   e1->probability = REG_BR_PROB_BASE - e3->probability;
2224   e1->flags = EDGE_TRUE_VALUE;
2225   set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2226 
2227   *gsi = gsi_after_labels (e2->dest);
2228 }
2229 
2230 /* Expand all #pragma omp ordered depend(source) and
2231    #pragma omp ordered depend(sink:...) constructs in the current
2232    #pragma omp for ordered(n) region.  */
2233 
2234 static void
2235 expand_omp_ordered_source_sink (struct omp_region *region,
2236 				struct omp_for_data *fd, tree *counts,
2237 				basic_block cont_bb)
2238 {
2239   struct omp_region *inner;
2240   int i;
2241   for (i = fd->collapse - 1; i < fd->ordered; i++)
2242     if (i == fd->collapse - 1 && fd->collapse > 1)
2243       counts[i] = NULL_TREE;
2244     else if (i >= fd->collapse && !cont_bb)
2245       counts[i] = build_zero_cst (fd->iter_type);
2246     else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2247 	     && integer_onep (fd->loops[i].step))
2248       counts[i] = NULL_TREE;
2249     else
2250       counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2251   tree atype
2252     = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2253   counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2254   TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2255 
2256   for (inner = region->inner; inner; inner = inner->next)
2257     if (inner->type == GIMPLE_OMP_ORDERED)
2258       {
2259 	gomp_ordered *ord_stmt = inner->ord_stmt;
2260 	gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2261 	location_t loc = gimple_location (ord_stmt);
2262 	tree c;
2263 	for (c = gimple_omp_ordered_clauses (ord_stmt);
2264 	     c; c = OMP_CLAUSE_CHAIN (c))
2265 	  if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2266 	    break;
2267 	if (c)
2268 	  expand_omp_ordered_source (&gsi, fd, counts, loc);
2269 	for (c = gimple_omp_ordered_clauses (ord_stmt);
2270 	     c; c = OMP_CLAUSE_CHAIN (c))
2271 	  if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2272 	    expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2273 	gsi_remove (&gsi, true);
2274       }
2275 }
2276 
2277 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2278    collapsed.  */
2279 
2280 static basic_block
2281 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2282 			      basic_block cont_bb, basic_block body_bb,
2283 			      bool ordered_lastprivate)
2284 {
2285   if (fd->ordered == fd->collapse)
2286     return cont_bb;
2287 
2288   if (!cont_bb)
2289     {
2290       gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2291       for (int i = fd->collapse; i < fd->ordered; i++)
2292 	{
2293 	  tree type = TREE_TYPE (fd->loops[i].v);
2294 	  tree n1 = fold_convert (type, fd->loops[i].n1);
2295 	  expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2296 	  tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2297 			      size_int (i - fd->collapse + 1),
2298 			      NULL_TREE, NULL_TREE);
2299 	  expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2300 	}
2301       return NULL;
2302     }
2303 
2304   for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2305     {
2306       tree t, type = TREE_TYPE (fd->loops[i].v);
2307       gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2308       expand_omp_build_assign (&gsi, fd->loops[i].v,
2309 			       fold_convert (type, fd->loops[i].n1));
2310       if (counts[i])
2311 	expand_omp_build_assign (&gsi, counts[i],
2312 				 build_zero_cst (fd->iter_type));
2313       tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2314 			  size_int (i - fd->collapse + 1),
2315 			  NULL_TREE, NULL_TREE);
2316       expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2317       if (!gsi_end_p (gsi))
2318 	gsi_prev (&gsi);
2319       else
2320 	gsi = gsi_last_bb (body_bb);
2321       edge e1 = split_block (body_bb, gsi_stmt (gsi));
2322       basic_block new_body = e1->dest;
2323       if (body_bb == cont_bb)
2324 	cont_bb = new_body;
2325       edge e2 = NULL;
2326       basic_block new_header;
2327       if (EDGE_COUNT (cont_bb->preds) > 0)
2328 	{
2329 	  gsi = gsi_last_bb (cont_bb);
2330 	  if (POINTER_TYPE_P (type))
2331 	    t = fold_build_pointer_plus (fd->loops[i].v,
2332 					 fold_convert (sizetype,
2333 						       fd->loops[i].step));
2334 	  else
2335 	    t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2336 			     fold_convert (type, fd->loops[i].step));
2337 	  expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2338 	  if (counts[i])
2339 	    {
2340 	      t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2341 			       build_int_cst (fd->iter_type, 1));
2342 	      expand_omp_build_assign (&gsi, counts[i], t);
2343 	      t = counts[i];
2344 	    }
2345 	  else
2346 	    {
2347 	      t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2348 			       fd->loops[i].v, fd->loops[i].n1);
2349 	      t = fold_convert (fd->iter_type, t);
2350 	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2351 					    true, GSI_SAME_STMT);
2352 	    }
2353 	  aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2354 			 size_int (i - fd->collapse + 1),
2355 			 NULL_TREE, NULL_TREE);
2356 	  expand_omp_build_assign (&gsi, aref, t);
2357 	  gsi_prev (&gsi);
2358 	  e2 = split_block (cont_bb, gsi_stmt (gsi));
2359 	  new_header = e2->dest;
2360 	}
2361       else
2362 	new_header = cont_bb;
2363       gsi = gsi_after_labels (new_header);
2364       tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2365 					 true, GSI_SAME_STMT);
2366       tree n2
2367 	= force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2368 				    true, NULL_TREE, true, GSI_SAME_STMT);
2369       t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2370       gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2371       edge e3 = split_block (new_header, gsi_stmt (gsi));
2372       cont_bb = e3->dest;
2373       remove_edge (e1);
2374       make_edge (body_bb, new_header, EDGE_FALLTHRU);
2375       e3->flags = EDGE_FALSE_VALUE;
2376       e3->probability = REG_BR_PROB_BASE / 8;
2377       e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2378       e1->probability = REG_BR_PROB_BASE - e3->probability;
2379 
2380       set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2381       set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2382 
2383       if (e2)
2384 	{
2385 	  struct loop *loop = alloc_loop ();
2386 	  loop->header = new_header;
2387 	  loop->latch = e2->src;
2388 	  add_loop (loop, body_bb->loop_father);
2389 	}
2390     }
2391 
2392   /* If there are any lastprivate clauses and it is possible some loops
2393      might have zero iterations, ensure all the decls are initialized,
2394      otherwise we could crash evaluating C++ class iterators with lastprivate
2395      clauses.  */
2396   bool need_inits = false;
2397   for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2398     if (need_inits)
2399       {
2400 	tree type = TREE_TYPE (fd->loops[i].v);
2401 	gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2402 	expand_omp_build_assign (&gsi, fd->loops[i].v,
2403 				 fold_convert (type, fd->loops[i].n1));
2404       }
2405     else
2406       {
2407 	tree type = TREE_TYPE (fd->loops[i].v);
2408 	tree this_cond = fold_build2 (fd->loops[i].cond_code,
2409 				      boolean_type_node,
2410 				      fold_convert (type, fd->loops[i].n1),
2411 				      fold_convert (type, fd->loops[i].n2));
2412 	if (!integer_onep (this_cond))
2413 	  need_inits = true;
2414       }
2415 
2416   return cont_bb;
2417 }
2418 
2419 /* A subroutine of expand_omp_for.  Generate code for a parallel
2420    loop with any schedule.  Given parameters:
2421 
2422 	for (V = N1; V cond N2; V += STEP) BODY;
2423 
2424    where COND is "<" or ">", we generate pseudocode
2425 
2426 	more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2427 	if (more) goto L0; else goto L3;
2428     L0:
2429 	V = istart0;
2430 	iend = iend0;
2431     L1:
2432 	BODY;
2433 	V += STEP;
2434 	if (V cond iend) goto L1; else goto L2;
2435     L2:
2436 	if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2437     L3:
2438 
2439     If this is a combined omp parallel loop, instead of the call to
2440     GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2441     If this is gimple_omp_for_combined_p loop, then instead of assigning
2442     V and iend in L0 we assign the first two _looptemp_ clause decls of the
2443     inner GIMPLE_OMP_FOR and V += STEP; and
2444     if (V cond iend) goto L1; else goto L2; are removed.
2445 
2446     For collapsed loops, given parameters:
2447       collapse(3)
2448       for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2449 	for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2450 	  for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2451 	    BODY;
2452 
2453     we generate pseudocode
2454 
2455 	if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2456 	if (cond3 is <)
2457 	  adj = STEP3 - 1;
2458 	else
2459 	  adj = STEP3 + 1;
2460 	count3 = (adj + N32 - N31) / STEP3;
2461 	if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2462 	if (cond2 is <)
2463 	  adj = STEP2 - 1;
2464 	else
2465 	  adj = STEP2 + 1;
2466 	count2 = (adj + N22 - N21) / STEP2;
2467 	if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2468 	if (cond1 is <)
2469 	  adj = STEP1 - 1;
2470 	else
2471 	  adj = STEP1 + 1;
2472 	count1 = (adj + N12 - N11) / STEP1;
2473 	count = count1 * count2 * count3;
2474 	goto Z1;
2475     Z0:
2476 	count = 0;
2477     Z1:
2478 	more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2479 	if (more) goto L0; else goto L3;
2480     L0:
2481 	V = istart0;
2482 	T = V;
2483 	V3 = N31 + (T % count3) * STEP3;
2484 	T = T / count3;
2485 	V2 = N21 + (T % count2) * STEP2;
2486 	T = T / count2;
2487 	V1 = N11 + T * STEP1;
2488 	iend = iend0;
2489     L1:
2490 	BODY;
2491 	V += 1;
2492 	if (V < iend) goto L10; else goto L2;
2493     L10:
2494 	V3 += STEP3;
2495 	if (V3 cond3 N32) goto L1; else goto L11;
2496     L11:
2497 	V3 = N31;
2498 	V2 += STEP2;
2499 	if (V2 cond2 N22) goto L1; else goto L12;
2500     L12:
2501 	V2 = N21;
2502 	V1 += STEP1;
2503 	goto L1;
2504     L2:
2505 	if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2506     L3:
2507 
2508       */
2509 
2510 static void
2511 expand_omp_for_generic (struct omp_region *region,
2512 			struct omp_for_data *fd,
2513 			enum built_in_function start_fn,
2514 			enum built_in_function next_fn,
2515 			gimple *inner_stmt)
2516 {
2517   tree type, istart0, iend0, iend;
2518   tree t, vmain, vback, bias = NULL_TREE;
2519   basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2520   basic_block l2_bb = NULL, l3_bb = NULL;
2521   gimple_stmt_iterator gsi;
2522   gassign *assign_stmt;
2523   bool in_combined_parallel = is_combined_parallel (region);
2524   bool broken_loop = region->cont == NULL;
2525   edge e, ne;
2526   tree *counts = NULL;
2527   int i;
2528   bool ordered_lastprivate = false;
2529 
2530   gcc_assert (!broken_loop || !in_combined_parallel);
2531   gcc_assert (fd->iter_type == long_integer_type_node
2532 	      || !in_combined_parallel);
2533 
2534   entry_bb = region->entry;
2535   cont_bb = region->cont;
2536   collapse_bb = NULL;
2537   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2538   gcc_assert (broken_loop
2539 	      || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2540   l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2541   l1_bb = single_succ (l0_bb);
2542   if (!broken_loop)
2543     {
2544       l2_bb = create_empty_bb (cont_bb);
2545       gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2546 		  || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2547 		      == l1_bb));
2548       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2549     }
2550   else
2551     l2_bb = NULL;
2552   l3_bb = BRANCH_EDGE (entry_bb)->dest;
2553   exit_bb = region->exit;
2554 
2555   gsi = gsi_last_bb (entry_bb);
2556 
2557   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2558   if (fd->ordered
2559       && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2560 			  OMP_CLAUSE_LASTPRIVATE))
2561     ordered_lastprivate = false;
2562   if (fd->collapse > 1 || fd->ordered)
2563     {
2564       int first_zero_iter1 = -1, first_zero_iter2 = -1;
2565       basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2566 
2567       counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2568       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2569 				  zero_iter1_bb, first_zero_iter1,
2570 				  zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2571 
2572       if (zero_iter1_bb)
2573 	{
2574 	  /* Some counts[i] vars might be uninitialized if
2575 	     some loop has zero iterations.  But the body shouldn't
2576 	     be executed in that case, so just avoid uninit warnings.  */
2577 	  for (i = first_zero_iter1;
2578 	       i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2579 	    if (SSA_VAR_P (counts[i]))
2580 	      TREE_NO_WARNING (counts[i]) = 1;
2581 	  gsi_prev (&gsi);
2582 	  e = split_block (entry_bb, gsi_stmt (gsi));
2583 	  entry_bb = e->dest;
2584 	  make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2585 	  gsi = gsi_last_bb (entry_bb);
2586 	  set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2587 				   get_immediate_dominator (CDI_DOMINATORS,
2588 							    zero_iter1_bb));
2589 	}
2590       if (zero_iter2_bb)
2591 	{
2592 	  /* Some counts[i] vars might be uninitialized if
2593 	     some loop has zero iterations.  But the body shouldn't
2594 	     be executed in that case, so just avoid uninit warnings.  */
2595 	  for (i = first_zero_iter2; i < fd->ordered; i++)
2596 	    if (SSA_VAR_P (counts[i]))
2597 	      TREE_NO_WARNING (counts[i]) = 1;
2598 	  if (zero_iter1_bb)
2599 	    make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2600 	  else
2601 	    {
2602 	      gsi_prev (&gsi);
2603 	      e = split_block (entry_bb, gsi_stmt (gsi));
2604 	      entry_bb = e->dest;
2605 	      make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2606 	      gsi = gsi_last_bb (entry_bb);
2607 	      set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2608 				       get_immediate_dominator
2609 					 (CDI_DOMINATORS, zero_iter2_bb));
2610 	    }
2611 	}
2612       if (fd->collapse == 1)
2613 	{
2614 	  counts[0] = fd->loop.n2;
2615 	  fd->loop = fd->loops[0];
2616 	}
2617     }
2618 
2619   type = TREE_TYPE (fd->loop.v);
2620   istart0 = create_tmp_var (fd->iter_type, ".istart0");
2621   iend0 = create_tmp_var (fd->iter_type, ".iend0");
2622   TREE_ADDRESSABLE (istart0) = 1;
2623   TREE_ADDRESSABLE (iend0) = 1;
2624 
2625   /* See if we need to bias by LLONG_MIN.  */
2626   if (fd->iter_type == long_long_unsigned_type_node
2627       && TREE_CODE (type) == INTEGER_TYPE
2628       && !TYPE_UNSIGNED (type)
2629       && fd->ordered == 0)
2630     {
2631       tree n1, n2;
2632 
2633       if (fd->loop.cond_code == LT_EXPR)
2634 	{
2635 	  n1 = fd->loop.n1;
2636 	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2637 	}
2638       else
2639 	{
2640 	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2641 	  n2 = fd->loop.n1;
2642 	}
2643       if (TREE_CODE (n1) != INTEGER_CST
2644 	  || TREE_CODE (n2) != INTEGER_CST
2645 	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2646 	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2647     }
2648 
2649   gimple_stmt_iterator gsif = gsi;
2650   gsi_prev (&gsif);
2651 
2652   tree arr = NULL_TREE;
2653   if (in_combined_parallel)
2654     {
2655       gcc_assert (fd->ordered == 0);
2656       /* In a combined parallel loop, emit a call to
2657 	 GOMP_loop_foo_next.  */
2658       t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2659 			   build_fold_addr_expr (istart0),
2660 			   build_fold_addr_expr (iend0));
2661     }
2662   else
2663     {
2664       tree t0, t1, t2, t3, t4;
2665       /* If this is not a combined parallel loop, emit a call to
2666 	 GOMP_loop_foo_start in ENTRY_BB.  */
2667       t4 = build_fold_addr_expr (iend0);
2668       t3 = build_fold_addr_expr (istart0);
2669       if (fd->ordered)
2670 	{
2671 	  t0 = build_int_cst (unsigned_type_node,
2672 			      fd->ordered - fd->collapse + 1);
2673 	  arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2674 							fd->ordered
2675 							- fd->collapse + 1),
2676 				".omp_counts");
2677 	  DECL_NAMELESS (arr) = 1;
2678 	  TREE_ADDRESSABLE (arr) = 1;
2679 	  TREE_STATIC (arr) = 1;
2680 	  vec<constructor_elt, va_gc> *v;
2681 	  vec_alloc (v, fd->ordered - fd->collapse + 1);
2682 	  int idx;
2683 
2684 	  for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2685 	    {
2686 	      tree c;
2687 	      if (idx == 0 && fd->collapse > 1)
2688 		c = fd->loop.n2;
2689 	      else
2690 		c = counts[idx + fd->collapse - 1];
2691 	      tree purpose = size_int (idx);
2692 	      CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2693 	      if (TREE_CODE (c) != INTEGER_CST)
2694 		TREE_STATIC (arr) = 0;
2695 	    }
2696 
2697 	  DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2698 	  if (!TREE_STATIC (arr))
2699 	    force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2700 						    void_type_node, arr),
2701 				      true, NULL_TREE, true, GSI_SAME_STMT);
2702 	  t1 = build_fold_addr_expr (arr);
2703 	  t2 = NULL_TREE;
2704 	}
2705       else
2706 	{
2707 	  t2 = fold_convert (fd->iter_type, fd->loop.step);
2708 	  t1 = fd->loop.n2;
2709 	  t0 = fd->loop.n1;
2710 	  if (gimple_omp_for_combined_into_p (fd->for_stmt))
2711 	    {
2712 	      tree innerc
2713 		= omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2714 				   OMP_CLAUSE__LOOPTEMP_);
2715 	      gcc_assert (innerc);
2716 	      t0 = OMP_CLAUSE_DECL (innerc);
2717 	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2718 					OMP_CLAUSE__LOOPTEMP_);
2719 	      gcc_assert (innerc);
2720 	      t1 = OMP_CLAUSE_DECL (innerc);
2721 	    }
2722 	  if (POINTER_TYPE_P (TREE_TYPE (t0))
2723 	      && TYPE_PRECISION (TREE_TYPE (t0))
2724 		 != TYPE_PRECISION (fd->iter_type))
2725 	    {
2726 	      /* Avoid casting pointers to integer of a different size.  */
2727 	      tree itype = signed_type_for (type);
2728 	      t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2729 	      t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2730 	    }
2731 	  else
2732 	    {
2733 	      t1 = fold_convert (fd->iter_type, t1);
2734 	      t0 = fold_convert (fd->iter_type, t0);
2735 	    }
2736 	  if (bias)
2737 	    {
2738 	      t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2739 	      t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2740 	    }
2741 	}
2742       if (fd->iter_type == long_integer_type_node || fd->ordered)
2743 	{
2744 	  if (fd->chunk_size)
2745 	    {
2746 	      t = fold_convert (fd->iter_type, fd->chunk_size);
2747 	      t = omp_adjust_chunk_size (t, fd->simd_schedule);
2748 	      if (fd->ordered)
2749 		t = build_call_expr (builtin_decl_explicit (start_fn),
2750 				     5, t0, t1, t, t3, t4);
2751 	      else
2752 		t = build_call_expr (builtin_decl_explicit (start_fn),
2753 				     6, t0, t1, t2, t, t3, t4);
2754 	    }
2755 	  else if (fd->ordered)
2756 	    t = build_call_expr (builtin_decl_explicit (start_fn),
2757 				 4, t0, t1, t3, t4);
2758 	  else
2759 	    t = build_call_expr (builtin_decl_explicit (start_fn),
2760 				 5, t0, t1, t2, t3, t4);
2761 	}
2762       else
2763 	{
2764 	  tree t5;
2765 	  tree c_bool_type;
2766 	  tree bfn_decl;
2767 
2768 	  /* The GOMP_loop_ull_*start functions have additional boolean
2769 	     argument, true for < loops and false for > loops.
2770 	     In Fortran, the C bool type can be different from
2771 	     boolean_type_node.  */
2772 	  bfn_decl = builtin_decl_explicit (start_fn);
2773 	  c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2774 	  t5 = build_int_cst (c_bool_type,
2775 			      fd->loop.cond_code == LT_EXPR ? 1 : 0);
2776 	  if (fd->chunk_size)
2777 	    {
2778 	      tree bfn_decl = builtin_decl_explicit (start_fn);
2779 	      t = fold_convert (fd->iter_type, fd->chunk_size);
2780 	      t = omp_adjust_chunk_size (t, fd->simd_schedule);
2781 	      t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2782 	    }
2783 	  else
2784 	    t = build_call_expr (builtin_decl_explicit (start_fn),
2785 				 6, t5, t0, t1, t2, t3, t4);
2786 	}
2787     }
2788   if (TREE_TYPE (t) != boolean_type_node)
2789     t = fold_build2 (NE_EXPR, boolean_type_node,
2790 		     t, build_int_cst (TREE_TYPE (t), 0));
2791   t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2792 				true, GSI_SAME_STMT);
2793   if (arr && !TREE_STATIC (arr))
2794     {
2795       tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2796       TREE_THIS_VOLATILE (clobber) = 1;
2797       gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2798 			 GSI_SAME_STMT);
2799     }
2800   gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2801 
2802   /* Remove the GIMPLE_OMP_FOR statement.  */
2803   gsi_remove (&gsi, true);
2804 
2805   if (gsi_end_p (gsif))
2806     gsif = gsi_after_labels (gsi_bb (gsif));
2807   gsi_next (&gsif);
2808 
2809   /* Iteration setup for sequential loop goes in L0_BB.  */
2810   tree startvar = fd->loop.v;
2811   tree endvar = NULL_TREE;
2812 
2813   if (gimple_omp_for_combined_p (fd->for_stmt))
2814     {
2815       gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2816 		  && gimple_omp_for_kind (inner_stmt)
2817 		     == GF_OMP_FOR_KIND_SIMD);
2818       tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2819 				     OMP_CLAUSE__LOOPTEMP_);
2820       gcc_assert (innerc);
2821       startvar = OMP_CLAUSE_DECL (innerc);
2822       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2823 				OMP_CLAUSE__LOOPTEMP_);
2824       gcc_assert (innerc);
2825       endvar = OMP_CLAUSE_DECL (innerc);
2826     }
2827 
2828   gsi = gsi_start_bb (l0_bb);
2829   t = istart0;
2830   if (fd->ordered && fd->collapse == 1)
2831     t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2832 		     fold_convert (fd->iter_type, fd->loop.step));
2833   else if (bias)
2834     t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2835   if (fd->ordered && fd->collapse == 1)
2836     {
2837       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2838 	t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2839 			 fd->loop.n1, fold_convert (sizetype, t));
2840       else
2841 	{
2842 	  t = fold_convert (TREE_TYPE (startvar), t);
2843 	  t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2844 			   fd->loop.n1, t);
2845 	}
2846     }
2847   else
2848     {
2849       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2850 	t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2851       t = fold_convert (TREE_TYPE (startvar), t);
2852     }
2853   t = force_gimple_operand_gsi (&gsi, t,
2854 				DECL_P (startvar)
2855 				&& TREE_ADDRESSABLE (startvar),
2856 				NULL_TREE, false, GSI_CONTINUE_LINKING);
2857   assign_stmt = gimple_build_assign (startvar, t);
2858   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2859 
2860   t = iend0;
2861   if (fd->ordered && fd->collapse == 1)
2862     t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2863 		     fold_convert (fd->iter_type, fd->loop.step));
2864   else if (bias)
2865     t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2866   if (fd->ordered && fd->collapse == 1)
2867     {
2868       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2869 	t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2870 			 fd->loop.n1, fold_convert (sizetype, t));
2871       else
2872 	{
2873 	  t = fold_convert (TREE_TYPE (startvar), t);
2874 	  t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2875 			   fd->loop.n1, t);
2876 	}
2877     }
2878   else
2879     {
2880       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2881 	t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2882       t = fold_convert (TREE_TYPE (startvar), t);
2883     }
2884   iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2885 				   false, GSI_CONTINUE_LINKING);
2886   if (endvar)
2887     {
2888       assign_stmt = gimple_build_assign (endvar, iend);
2889       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2890       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
2891 	assign_stmt = gimple_build_assign (fd->loop.v, iend);
2892       else
2893 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
2894       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2895     }
2896   /* Handle linear clause adjustments.  */
2897   tree itercnt = NULL_TREE;
2898   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
2899     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
2900 	 c; c = OMP_CLAUSE_CHAIN (c))
2901       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2902 	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
2903 	{
2904 	  tree d = OMP_CLAUSE_DECL (c);
2905 	  bool is_ref = omp_is_reference (d);
2906 	  tree t = d, a, dest;
2907 	  if (is_ref)
2908 	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
2909 	  tree type = TREE_TYPE (t);
2910 	  if (POINTER_TYPE_P (type))
2911 	    type = sizetype;
2912 	  dest = unshare_expr (t);
2913 	  tree v = create_tmp_var (TREE_TYPE (t), NULL);
2914 	  expand_omp_build_assign (&gsif, v, t);
2915 	  if (itercnt == NULL_TREE)
2916 	    {
2917 	      itercnt = startvar;
2918 	      tree n1 = fd->loop.n1;
2919 	      if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
2920 		{
2921 		  itercnt
2922 		    = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
2923 				    itercnt);
2924 		  n1 = fold_convert (TREE_TYPE (itercnt), n1);
2925 		}
2926 	      itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
2927 				     itercnt, n1);
2928 	      itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
2929 				     itercnt, fd->loop.step);
2930 	      itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
2931 						  NULL_TREE, false,
2932 						  GSI_CONTINUE_LINKING);
2933 	    }
2934 	  a = fold_build2 (MULT_EXPR, type,
2935 			   fold_convert (type, itercnt),
2936 			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
2937 	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
2938 			   : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
2939 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2940 					false, GSI_CONTINUE_LINKING);
2941 	  assign_stmt = gimple_build_assign (dest, t);
2942 	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2943 	}
2944   if (fd->collapse > 1)
2945     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
2946 
2947   if (fd->ordered)
2948     {
2949       /* Until now, counts array contained number of iterations or
2950 	 variable containing it for ith loop.  From now on, we need
2951 	 those counts only for collapsed loops, and only for the 2nd
2952 	 till the last collapsed one.  Move those one element earlier,
2953 	 we'll use counts[fd->collapse - 1] for the first source/sink
2954 	 iteration counter and so on and counts[fd->ordered]
2955 	 as the array holding the current counter values for
2956 	 depend(source).  */
2957       if (fd->collapse > 1)
2958 	memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
2959       if (broken_loop)
2960 	{
2961 	  int i;
2962 	  for (i = fd->collapse; i < fd->ordered; i++)
2963 	    {
2964 	      tree type = TREE_TYPE (fd->loops[i].v);
2965 	      tree this_cond
2966 		= fold_build2 (fd->loops[i].cond_code, boolean_type_node,
2967 			       fold_convert (type, fd->loops[i].n1),
2968 			       fold_convert (type, fd->loops[i].n2));
2969 	      if (!integer_onep (this_cond))
2970 		break;
2971 	    }
2972 	  if (i < fd->ordered)
2973 	    {
2974 	      cont_bb
2975 		= create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
2976 	      add_bb_to_loop (cont_bb, l1_bb->loop_father);
2977 	      gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
2978 	      gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
2979 	      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2980 	      make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
2981 	      make_edge (cont_bb, l1_bb, 0);
2982 	      l2_bb = create_empty_bb (cont_bb);
2983 	      broken_loop = false;
2984 	    }
2985 	}
2986       expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
2987       cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
2988 					      ordered_lastprivate);
2989       if (counts[fd->collapse - 1])
2990 	{
2991 	  gcc_assert (fd->collapse == 1);
2992 	  gsi = gsi_last_bb (l0_bb);
2993 	  expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
2994 				   istart0, true);
2995 	  gsi = gsi_last_bb (cont_bb);
2996 	  t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
2997 			   build_int_cst (fd->iter_type, 1));
2998 	  expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
2999 	  tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3000 			      size_zero_node, NULL_TREE, NULL_TREE);
3001 	  expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3002 	  t = counts[fd->collapse - 1];
3003 	}
3004       else if (fd->collapse > 1)
3005 	t = fd->loop.v;
3006       else
3007 	{
3008 	  t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3009 			   fd->loops[0].v, fd->loops[0].n1);
3010 	  t = fold_convert (fd->iter_type, t);
3011 	}
3012       gsi = gsi_last_bb (l0_bb);
3013       tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3014 			  size_zero_node, NULL_TREE, NULL_TREE);
3015       t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3016 				    false, GSI_CONTINUE_LINKING);
3017       expand_omp_build_assign (&gsi, aref, t, true);
3018     }
3019 
3020   if (!broken_loop)
3021     {
3022       /* Code to control the increment and predicate for the sequential
3023 	 loop goes in the CONT_BB.  */
3024       gsi = gsi_last_bb (cont_bb);
3025       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3026       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3027       vmain = gimple_omp_continue_control_use (cont_stmt);
3028       vback = gimple_omp_continue_control_def (cont_stmt);
3029 
3030       if (!gimple_omp_for_combined_p (fd->for_stmt))
3031 	{
3032 	  if (POINTER_TYPE_P (type))
3033 	    t = fold_build_pointer_plus (vmain, fd->loop.step);
3034 	  else
3035 	    t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3036 	  t = force_gimple_operand_gsi (&gsi, t,
3037 					DECL_P (vback)
3038 					&& TREE_ADDRESSABLE (vback),
3039 					NULL_TREE, true, GSI_SAME_STMT);
3040 	  assign_stmt = gimple_build_assign (vback, t);
3041 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3042 
3043 	  if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3044 	    {
3045 	      if (fd->collapse > 1)
3046 		t = fd->loop.v;
3047 	      else
3048 		{
3049 		  t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3050 				   fd->loops[0].v, fd->loops[0].n1);
3051 		  t = fold_convert (fd->iter_type, t);
3052 		}
3053 	      tree aref = build4 (ARRAY_REF, fd->iter_type,
3054 				  counts[fd->ordered], size_zero_node,
3055 				  NULL_TREE, NULL_TREE);
3056 	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3057 					    true, GSI_SAME_STMT);
3058 	      expand_omp_build_assign (&gsi, aref, t);
3059 	    }
3060 
3061 	  t = build2 (fd->loop.cond_code, boolean_type_node,
3062 		      DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3063 		      iend);
3064 	  gcond *cond_stmt = gimple_build_cond_empty (t);
3065 	  gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3066 	}
3067 
3068       /* Remove GIMPLE_OMP_CONTINUE.  */
3069       gsi_remove (&gsi, true);
3070 
3071       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3072 	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3073 
3074       /* Emit code to get the next parallel iteration in L2_BB.  */
3075       gsi = gsi_start_bb (l2_bb);
3076 
3077       t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3078 			   build_fold_addr_expr (istart0),
3079 			   build_fold_addr_expr (iend0));
3080       t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3081 				    false, GSI_CONTINUE_LINKING);
3082       if (TREE_TYPE (t) != boolean_type_node)
3083 	t = fold_build2 (NE_EXPR, boolean_type_node,
3084 			 t, build_int_cst (TREE_TYPE (t), 0));
3085       gcond *cond_stmt = gimple_build_cond_empty (t);
3086       gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3087     }
3088 
3089   /* Add the loop cleanup function.  */
3090   gsi = gsi_last_bb (exit_bb);
3091   if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3092     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3093   else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3094     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3095   else
3096     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3097   gcall *call_stmt = gimple_build_call (t, 0);
3098   if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3099     gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3100   gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3101   if (fd->ordered)
3102     {
3103       tree arr = counts[fd->ordered];
3104       tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3105       TREE_THIS_VOLATILE (clobber) = 1;
3106       gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3107 			GSI_SAME_STMT);
3108     }
3109   gsi_remove (&gsi, true);
3110 
3111   /* Connect the new blocks.  */
3112   find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3113   find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3114 
3115   if (!broken_loop)
3116     {
3117       gimple_seq phis;
3118 
3119       e = find_edge (cont_bb, l3_bb);
3120       ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3121 
3122       phis = phi_nodes (l3_bb);
3123       for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3124 	{
3125 	  gimple *phi = gsi_stmt (gsi);
3126 	  SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3127 		   PHI_ARG_DEF_FROM_EDGE (phi, e));
3128 	}
3129       remove_edge (e);
3130 
3131       make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3132       e = find_edge (cont_bb, l1_bb);
3133       if (e == NULL)
3134 	{
3135 	  e = BRANCH_EDGE (cont_bb);
3136 	  gcc_assert (single_succ (e->dest) == l1_bb);
3137 	}
3138       if (gimple_omp_for_combined_p (fd->for_stmt))
3139 	{
3140 	  remove_edge (e);
3141 	  e = NULL;
3142 	}
3143       else if (fd->collapse > 1)
3144 	{
3145 	  remove_edge (e);
3146 	  e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3147 	}
3148       else
3149 	e->flags = EDGE_TRUE_VALUE;
3150       if (e)
3151 	{
3152 	  e->probability = REG_BR_PROB_BASE * 7 / 8;
3153 	  find_edge (cont_bb, l2_bb)->probability = REG_BR_PROB_BASE / 8;
3154 	}
3155       else
3156 	{
3157 	  e = find_edge (cont_bb, l2_bb);
3158 	  e->flags = EDGE_FALLTHRU;
3159 	}
3160       make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3161 
3162       if (gimple_in_ssa_p (cfun))
3163 	{
3164 	  /* Add phis to the outer loop that connect to the phis in the inner,
3165 	     original loop, and move the loop entry value of the inner phi to
3166 	     the loop entry value of the outer phi.  */
3167 	  gphi_iterator psi;
3168 	  for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3169 	    {
3170 	      source_location locus;
3171 	      gphi *nphi;
3172 	      gphi *exit_phi = psi.phi ();
3173 
3174 	      edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3175 	      tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3176 
3177 	      basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3178 	      edge latch_to_l1 = find_edge (latch, l1_bb);
3179 	      gphi *inner_phi
3180 		= find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3181 
3182 	      tree t = gimple_phi_result (exit_phi);
3183 	      tree new_res = copy_ssa_name (t, NULL);
3184 	      nphi = create_phi_node (new_res, l0_bb);
3185 
3186 	      edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3187 	      t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3188 	      locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3189 	      edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3190 	      add_phi_arg (nphi, t, entry_to_l0, locus);
3191 
3192 	      edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3193 	      add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3194 
3195 	      add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3196 	    };
3197 	}
3198 
3199       set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3200 			       recompute_dominator (CDI_DOMINATORS, l2_bb));
3201       set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3202 			       recompute_dominator (CDI_DOMINATORS, l3_bb));
3203       set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3204 			       recompute_dominator (CDI_DOMINATORS, l0_bb));
3205       set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3206 			       recompute_dominator (CDI_DOMINATORS, l1_bb));
3207 
3208       /* We enter expand_omp_for_generic with a loop.  This original loop may
3209 	 have its own loop struct, or it may be part of an outer loop struct
3210 	 (which may be the fake loop).  */
3211       struct loop *outer_loop = entry_bb->loop_father;
3212       bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3213 
3214       add_bb_to_loop (l2_bb, outer_loop);
3215 
3216       /* We've added a new loop around the original loop.  Allocate the
3217 	 corresponding loop struct.  */
3218       struct loop *new_loop = alloc_loop ();
3219       new_loop->header = l0_bb;
3220       new_loop->latch = l2_bb;
3221       add_loop (new_loop, outer_loop);
3222 
3223       /* Allocate a loop structure for the original loop unless we already
3224 	 had one.  */
3225       if (!orig_loop_has_loop_struct
3226 	  && !gimple_omp_for_combined_p (fd->for_stmt))
3227 	{
3228 	  struct loop *orig_loop = alloc_loop ();
3229 	  orig_loop->header = l1_bb;
3230 	  /* The loop may have multiple latches.  */
3231 	  add_loop (orig_loop, new_loop);
3232 	}
3233     }
3234 }
3235 
3236 /* A subroutine of expand_omp_for.  Generate code for a parallel
3237    loop with static schedule and no specified chunk size.  Given
3238    parameters:
3239 
3240 	for (V = N1; V cond N2; V += STEP) BODY;
3241 
3242    where COND is "<" or ">", we generate pseudocode
3243 
3244 	if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3245 	if (cond is <)
3246 	  adj = STEP - 1;
3247 	else
3248 	  adj = STEP + 1;
3249 	if ((__typeof (V)) -1 > 0 && cond is >)
3250 	  n = -(adj + N2 - N1) / -STEP;
3251 	else
3252 	  n = (adj + N2 - N1) / STEP;
3253 	q = n / nthreads;
3254 	tt = n % nthreads;
3255 	if (threadid < tt) goto L3; else goto L4;
3256     L3:
3257 	tt = 0;
3258 	q = q + 1;
3259     L4:
3260 	s0 = q * threadid + tt;
3261 	e0 = s0 + q;
3262 	V = s0 * STEP + N1;
3263 	if (s0 >= e0) goto L2; else goto L0;
3264     L0:
3265 	e = e0 * STEP + N1;
3266     L1:
3267 	BODY;
3268 	V += STEP;
3269 	if (V cond e) goto L1;
3270     L2:
3271 */
3272 
3273 static void
3274 expand_omp_for_static_nochunk (struct omp_region *region,
3275 			       struct omp_for_data *fd,
3276 			       gimple *inner_stmt)
3277 {
3278   tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3279   tree type, itype, vmain, vback;
3280   basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3281   basic_block body_bb, cont_bb, collapse_bb = NULL;
3282   basic_block fin_bb;
3283   gimple_stmt_iterator gsi;
3284   edge ep;
3285   bool broken_loop = region->cont == NULL;
3286   tree *counts = NULL;
3287   tree n1, n2, step;
3288 
3289   itype = type = TREE_TYPE (fd->loop.v);
3290   if (POINTER_TYPE_P (type))
3291     itype = signed_type_for (type);
3292 
3293   entry_bb = region->entry;
3294   cont_bb = region->cont;
3295   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3296   fin_bb = BRANCH_EDGE (entry_bb)->dest;
3297   gcc_assert (broken_loop
3298 	      || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3299   seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3300   body_bb = single_succ (seq_start_bb);
3301   if (!broken_loop)
3302     {
3303       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3304 		  || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3305       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3306     }
3307   exit_bb = region->exit;
3308 
3309   /* Iteration space partitioning goes in ENTRY_BB.  */
3310   gsi = gsi_last_bb (entry_bb);
3311   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3312 
3313   if (fd->collapse > 1)
3314     {
3315       int first_zero_iter = -1, dummy = -1;
3316       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3317 
3318       counts = XALLOCAVEC (tree, fd->collapse);
3319       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3320 				  fin_bb, first_zero_iter,
3321 				  dummy_bb, dummy, l2_dom_bb);
3322       t = NULL_TREE;
3323     }
3324   else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3325     t = integer_one_node;
3326   else
3327     t = fold_binary (fd->loop.cond_code, boolean_type_node,
3328 		     fold_convert (type, fd->loop.n1),
3329 		     fold_convert (type, fd->loop.n2));
3330   if (fd->collapse == 1
3331       && TYPE_UNSIGNED (type)
3332       && (t == NULL_TREE || !integer_onep (t)))
3333     {
3334       n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3335       n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3336 				     true, GSI_SAME_STMT);
3337       n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3338       n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3339 				     true, GSI_SAME_STMT);
3340       gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3341 						 NULL_TREE, NULL_TREE);
3342       gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3343       if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3344 		     expand_omp_regimplify_p, NULL, NULL)
3345 	  || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3346 			expand_omp_regimplify_p, NULL, NULL))
3347 	{
3348 	  gsi = gsi_for_stmt (cond_stmt);
3349 	  gimple_regimplify_operands (cond_stmt, &gsi);
3350 	}
3351       ep = split_block (entry_bb, cond_stmt);
3352       ep->flags = EDGE_TRUE_VALUE;
3353       entry_bb = ep->dest;
3354       ep->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1);
3355       ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3356       ep->probability = REG_BR_PROB_BASE / 2000 - 1;
3357       if (gimple_in_ssa_p (cfun))
3358 	{
3359 	  int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3360 	  for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3361 	       !gsi_end_p (gpi); gsi_next (&gpi))
3362 	    {
3363 	      gphi *phi = gpi.phi ();
3364 	      add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3365 			   ep, UNKNOWN_LOCATION);
3366 	    }
3367 	}
3368       gsi = gsi_last_bb (entry_bb);
3369     }
3370 
3371   switch (gimple_omp_for_kind (fd->for_stmt))
3372     {
3373     case GF_OMP_FOR_KIND_FOR:
3374       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3375       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3376       break;
3377     case GF_OMP_FOR_KIND_DISTRIBUTE:
3378       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3379       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3380       break;
3381     default:
3382       gcc_unreachable ();
3383     }
3384   nthreads = build_call_expr (nthreads, 0);
3385   nthreads = fold_convert (itype, nthreads);
3386   nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3387 				       true, GSI_SAME_STMT);
3388   threadid = build_call_expr (threadid, 0);
3389   threadid = fold_convert (itype, threadid);
3390   threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3391 				       true, GSI_SAME_STMT);
3392 
3393   n1 = fd->loop.n1;
3394   n2 = fd->loop.n2;
3395   step = fd->loop.step;
3396   if (gimple_omp_for_combined_into_p (fd->for_stmt))
3397     {
3398       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3399 				     OMP_CLAUSE__LOOPTEMP_);
3400       gcc_assert (innerc);
3401       n1 = OMP_CLAUSE_DECL (innerc);
3402       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3403 				OMP_CLAUSE__LOOPTEMP_);
3404       gcc_assert (innerc);
3405       n2 = OMP_CLAUSE_DECL (innerc);
3406     }
3407   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3408 				 true, NULL_TREE, true, GSI_SAME_STMT);
3409   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3410 				 true, NULL_TREE, true, GSI_SAME_STMT);
3411   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3412 				   true, NULL_TREE, true, GSI_SAME_STMT);
3413 
3414   t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3415   t = fold_build2 (PLUS_EXPR, itype, step, t);
3416   t = fold_build2 (PLUS_EXPR, itype, t, n2);
3417   t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3418   if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3419     t = fold_build2 (TRUNC_DIV_EXPR, itype,
3420 		     fold_build1 (NEGATE_EXPR, itype, t),
3421 		     fold_build1 (NEGATE_EXPR, itype, step));
3422   else
3423     t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3424   t = fold_convert (itype, t);
3425   n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3426 
3427   q = create_tmp_reg (itype, "q");
3428   t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3429   t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3430   gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3431 
3432   tt = create_tmp_reg (itype, "tt");
3433   t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3434   t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3435   gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3436 
3437   t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3438   gcond *cond_stmt = gimple_build_cond_empty (t);
3439   gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3440 
3441   second_bb = split_block (entry_bb, cond_stmt)->dest;
3442   gsi = gsi_last_bb (second_bb);
3443   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3444 
3445   gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3446 		     GSI_SAME_STMT);
3447   gassign *assign_stmt
3448     = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3449   gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3450 
3451   third_bb = split_block (second_bb, assign_stmt)->dest;
3452   gsi = gsi_last_bb (third_bb);
3453   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3454 
3455   t = build2 (MULT_EXPR, itype, q, threadid);
3456   t = build2 (PLUS_EXPR, itype, t, tt);
3457   s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3458 
3459   t = fold_build2 (PLUS_EXPR, itype, s0, q);
3460   e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3461 
3462   t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3463   gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3464 
3465   /* Remove the GIMPLE_OMP_FOR statement.  */
3466   gsi_remove (&gsi, true);
3467 
3468   /* Setup code for sequential iteration goes in SEQ_START_BB.  */
3469   gsi = gsi_start_bb (seq_start_bb);
3470 
3471   tree startvar = fd->loop.v;
3472   tree endvar = NULL_TREE;
3473 
3474   if (gimple_omp_for_combined_p (fd->for_stmt))
3475     {
3476       tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3477 		     ? gimple_omp_parallel_clauses (inner_stmt)
3478 		     : gimple_omp_for_clauses (inner_stmt);
3479       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3480       gcc_assert (innerc);
3481       startvar = OMP_CLAUSE_DECL (innerc);
3482       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3483 				OMP_CLAUSE__LOOPTEMP_);
3484       gcc_assert (innerc);
3485       endvar = OMP_CLAUSE_DECL (innerc);
3486       if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3487 	  && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3488 	{
3489 	  int i;
3490 	  for (i = 1; i < fd->collapse; i++)
3491 	    {
3492 	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3493 					OMP_CLAUSE__LOOPTEMP_);
3494 	      gcc_assert (innerc);
3495 	    }
3496 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3497 				    OMP_CLAUSE__LOOPTEMP_);
3498 	  if (innerc)
3499 	    {
3500 	      /* If needed (distribute parallel for with lastprivate),
3501 		 propagate down the total number of iterations.  */
3502 	      tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3503 				     fd->loop.n2);
3504 	      t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3505 					    GSI_CONTINUE_LINKING);
3506 	      assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3507 	      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3508 	    }
3509 	}
3510     }
3511   t = fold_convert (itype, s0);
3512   t = fold_build2 (MULT_EXPR, itype, t, step);
3513   if (POINTER_TYPE_P (type))
3514     t = fold_build_pointer_plus (n1, t);
3515   else
3516     t = fold_build2 (PLUS_EXPR, type, t, n1);
3517   t = fold_convert (TREE_TYPE (startvar), t);
3518   t = force_gimple_operand_gsi (&gsi, t,
3519 				DECL_P (startvar)
3520 				&& TREE_ADDRESSABLE (startvar),
3521 				NULL_TREE, false, GSI_CONTINUE_LINKING);
3522   assign_stmt = gimple_build_assign (startvar, t);
3523   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3524 
3525   t = fold_convert (itype, e0);
3526   t = fold_build2 (MULT_EXPR, itype, t, step);
3527   if (POINTER_TYPE_P (type))
3528     t = fold_build_pointer_plus (n1, t);
3529   else
3530     t = fold_build2 (PLUS_EXPR, type, t, n1);
3531   t = fold_convert (TREE_TYPE (startvar), t);
3532   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3533 				false, GSI_CONTINUE_LINKING);
3534   if (endvar)
3535     {
3536       assign_stmt = gimple_build_assign (endvar, e);
3537       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3538       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3539 	assign_stmt = gimple_build_assign (fd->loop.v, e);
3540       else
3541 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3542       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3543     }
3544   /* Handle linear clause adjustments.  */
3545   tree itercnt = NULL_TREE;
3546   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3547     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3548 	 c; c = OMP_CLAUSE_CHAIN (c))
3549       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3550 	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3551 	{
3552 	  tree d = OMP_CLAUSE_DECL (c);
3553 	  bool is_ref = omp_is_reference (d);
3554 	  tree t = d, a, dest;
3555 	  if (is_ref)
3556 	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3557 	  if (itercnt == NULL_TREE)
3558 	    {
3559 	      if (gimple_omp_for_combined_into_p (fd->for_stmt))
3560 		{
3561 		  itercnt = fold_build2 (MINUS_EXPR, itype,
3562 					 fold_convert (itype, n1),
3563 					 fold_convert (itype, fd->loop.n1));
3564 		  itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3565 		  itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3566 		  itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3567 						      NULL_TREE, false,
3568 						      GSI_CONTINUE_LINKING);
3569 		}
3570 	      else
3571 		itercnt = s0;
3572 	    }
3573 	  tree type = TREE_TYPE (t);
3574 	  if (POINTER_TYPE_P (type))
3575 	    type = sizetype;
3576 	  a = fold_build2 (MULT_EXPR, type,
3577 			   fold_convert (type, itercnt),
3578 			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3579 	  dest = unshare_expr (t);
3580 	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3581 			   : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3582 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3583 					false, GSI_CONTINUE_LINKING);
3584 	  assign_stmt = gimple_build_assign (dest, t);
3585 	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3586 	}
3587   if (fd->collapse > 1)
3588     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3589 
3590   if (!broken_loop)
3591     {
3592       /* The code controlling the sequential loop replaces the
3593 	 GIMPLE_OMP_CONTINUE.  */
3594       gsi = gsi_last_bb (cont_bb);
3595       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3596       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3597       vmain = gimple_omp_continue_control_use (cont_stmt);
3598       vback = gimple_omp_continue_control_def (cont_stmt);
3599 
3600       if (!gimple_omp_for_combined_p (fd->for_stmt))
3601 	{
3602 	  if (POINTER_TYPE_P (type))
3603 	    t = fold_build_pointer_plus (vmain, step);
3604 	  else
3605 	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
3606 	  t = force_gimple_operand_gsi (&gsi, t,
3607 					DECL_P (vback)
3608 					&& TREE_ADDRESSABLE (vback),
3609 					NULL_TREE, true, GSI_SAME_STMT);
3610 	  assign_stmt = gimple_build_assign (vback, t);
3611 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3612 
3613 	  t = build2 (fd->loop.cond_code, boolean_type_node,
3614 		      DECL_P (vback) && TREE_ADDRESSABLE (vback)
3615 		      ? t : vback, e);
3616 	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3617 	}
3618 
3619       /* Remove the GIMPLE_OMP_CONTINUE statement.  */
3620       gsi_remove (&gsi, true);
3621 
3622       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3623 	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3624     }
3625 
3626   /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
3627   gsi = gsi_last_bb (exit_bb);
3628   if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3629     {
3630       t = gimple_omp_return_lhs (gsi_stmt (gsi));
3631       gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3632     }
3633   gsi_remove (&gsi, true);
3634 
3635   /* Connect all the blocks.  */
3636   ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
3637   ep->probability = REG_BR_PROB_BASE / 4 * 3;
3638   ep = find_edge (entry_bb, second_bb);
3639   ep->flags = EDGE_TRUE_VALUE;
3640   ep->probability = REG_BR_PROB_BASE / 4;
3641   find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3642   find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3643 
3644   if (!broken_loop)
3645     {
3646       ep = find_edge (cont_bb, body_bb);
3647       if (ep == NULL)
3648 	{
3649 	  ep = BRANCH_EDGE (cont_bb);
3650 	  gcc_assert (single_succ (ep->dest) == body_bb);
3651 	}
3652       if (gimple_omp_for_combined_p (fd->for_stmt))
3653 	{
3654 	  remove_edge (ep);
3655 	  ep = NULL;
3656 	}
3657       else if (fd->collapse > 1)
3658 	{
3659 	  remove_edge (ep);
3660 	  ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3661 	}
3662       else
3663 	ep->flags = EDGE_TRUE_VALUE;
3664       find_edge (cont_bb, fin_bb)->flags
3665 	= ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3666     }
3667 
3668   set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3669   set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3670   set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3671 
3672   set_immediate_dominator (CDI_DOMINATORS, body_bb,
3673 			   recompute_dominator (CDI_DOMINATORS, body_bb));
3674   set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3675 			   recompute_dominator (CDI_DOMINATORS, fin_bb));
3676 
3677   struct loop *loop = body_bb->loop_father;
3678   if (loop != entry_bb->loop_father)
3679     {
3680       gcc_assert (broken_loop || loop->header == body_bb);
3681       gcc_assert (broken_loop
3682 		  || loop->latch == region->cont
3683 		  || single_pred (loop->latch) == region->cont);
3684       return;
3685     }
3686 
3687   if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3688     {
3689       loop = alloc_loop ();
3690       loop->header = body_bb;
3691       if (collapse_bb == NULL)
3692 	loop->latch = cont_bb;
3693       add_loop (loop, body_bb->loop_father);
3694     }
3695 }
3696 
3697 /* Return phi in E->DEST with ARG on edge E.  */
3698 
3699 static gphi *
3700 find_phi_with_arg_on_edge (tree arg, edge e)
3701 {
3702   basic_block bb = e->dest;
3703 
3704   for (gphi_iterator gpi = gsi_start_phis (bb);
3705        !gsi_end_p (gpi);
3706        gsi_next (&gpi))
3707     {
3708       gphi *phi = gpi.phi ();
3709       if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3710 	return phi;
3711     }
3712 
3713   return NULL;
3714 }
3715 
3716 /* A subroutine of expand_omp_for.  Generate code for a parallel
3717    loop with static schedule and a specified chunk size.  Given
3718    parameters:
3719 
3720 	for (V = N1; V cond N2; V += STEP) BODY;
3721 
3722    where COND is "<" or ">", we generate pseudocode
3723 
3724 	if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3725 	if (cond is <)
3726 	  adj = STEP - 1;
3727 	else
3728 	  adj = STEP + 1;
3729 	if ((__typeof (V)) -1 > 0 && cond is >)
3730 	  n = -(adj + N2 - N1) / -STEP;
3731 	else
3732 	  n = (adj + N2 - N1) / STEP;
3733 	trip = 0;
3734 	V = threadid * CHUNK * STEP + N1;  -- this extra definition of V is
3735 					      here so that V is defined
3736 					      if the loop is not entered
3737     L0:
3738 	s0 = (trip * nthreads + threadid) * CHUNK;
3739 	e0 = min (s0 + CHUNK, n);
3740 	if (s0 < n) goto L1; else goto L4;
3741     L1:
3742 	V = s0 * STEP + N1;
3743 	e = e0 * STEP + N1;
3744     L2:
3745 	BODY;
3746 	V += STEP;
3747 	if (V cond e) goto L2; else goto L3;
3748     L3:
3749 	trip += 1;
3750 	goto L0;
3751     L4:
3752 */
3753 
3754 static void
3755 expand_omp_for_static_chunk (struct omp_region *region,
3756 			     struct omp_for_data *fd, gimple *inner_stmt)
3757 {
3758   tree n, s0, e0, e, t;
3759   tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3760   tree type, itype, vmain, vback, vextra;
3761   basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3762   basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
3763   gimple_stmt_iterator gsi;
3764   edge se;
3765   bool broken_loop = region->cont == NULL;
3766   tree *counts = NULL;
3767   tree n1, n2, step;
3768 
3769   itype = type = TREE_TYPE (fd->loop.v);
3770   if (POINTER_TYPE_P (type))
3771     itype = signed_type_for (type);
3772 
3773   entry_bb = region->entry;
3774   se = split_block (entry_bb, last_stmt (entry_bb));
3775   entry_bb = se->src;
3776   iter_part_bb = se->dest;
3777   cont_bb = region->cont;
3778   gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
3779   fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
3780   gcc_assert (broken_loop
3781 	      || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
3782   seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
3783   body_bb = single_succ (seq_start_bb);
3784   if (!broken_loop)
3785     {
3786       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3787 		  || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3788       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3789       trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
3790     }
3791   exit_bb = region->exit;
3792 
3793   /* Trip and adjustment setup goes in ENTRY_BB.  */
3794   gsi = gsi_last_bb (entry_bb);
3795   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3796 
3797   if (fd->collapse > 1)
3798     {
3799       int first_zero_iter = -1, dummy = -1;
3800       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3801 
3802       counts = XALLOCAVEC (tree, fd->collapse);
3803       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3804 				  fin_bb, first_zero_iter,
3805 				  dummy_bb, dummy, l2_dom_bb);
3806       t = NULL_TREE;
3807     }
3808   else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3809     t = integer_one_node;
3810   else
3811     t = fold_binary (fd->loop.cond_code, boolean_type_node,
3812 		     fold_convert (type, fd->loop.n1),
3813 		     fold_convert (type, fd->loop.n2));
3814   if (fd->collapse == 1
3815       && TYPE_UNSIGNED (type)
3816       && (t == NULL_TREE || !integer_onep (t)))
3817     {
3818       n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3819       n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3820 				     true, GSI_SAME_STMT);
3821       n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3822       n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3823 				     true, GSI_SAME_STMT);
3824       gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3825 						 NULL_TREE, NULL_TREE);
3826       gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3827       if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3828 		     expand_omp_regimplify_p, NULL, NULL)
3829 	  || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3830 			expand_omp_regimplify_p, NULL, NULL))
3831 	{
3832 	  gsi = gsi_for_stmt (cond_stmt);
3833 	  gimple_regimplify_operands (cond_stmt, &gsi);
3834 	}
3835       se = split_block (entry_bb, cond_stmt);
3836       se->flags = EDGE_TRUE_VALUE;
3837       entry_bb = se->dest;
3838       se->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1);
3839       se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
3840       se->probability = REG_BR_PROB_BASE / 2000 - 1;
3841       if (gimple_in_ssa_p (cfun))
3842 	{
3843 	  int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
3844 	  for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3845 	       !gsi_end_p (gpi); gsi_next (&gpi))
3846 	    {
3847 	      gphi *phi = gpi.phi ();
3848 	      add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3849 			   se, UNKNOWN_LOCATION);
3850 	    }
3851 	}
3852       gsi = gsi_last_bb (entry_bb);
3853     }
3854 
3855   switch (gimple_omp_for_kind (fd->for_stmt))
3856     {
3857     case GF_OMP_FOR_KIND_FOR:
3858       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3859       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3860       break;
3861     case GF_OMP_FOR_KIND_DISTRIBUTE:
3862       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3863       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3864       break;
3865     default:
3866       gcc_unreachable ();
3867     }
3868   nthreads = build_call_expr (nthreads, 0);
3869   nthreads = fold_convert (itype, nthreads);
3870   nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3871 				       true, GSI_SAME_STMT);
3872   threadid = build_call_expr (threadid, 0);
3873   threadid = fold_convert (itype, threadid);
3874   threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3875 				       true, GSI_SAME_STMT);
3876 
3877   n1 = fd->loop.n1;
3878   n2 = fd->loop.n2;
3879   step = fd->loop.step;
3880   if (gimple_omp_for_combined_into_p (fd->for_stmt))
3881     {
3882       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3883 				     OMP_CLAUSE__LOOPTEMP_);
3884       gcc_assert (innerc);
3885       n1 = OMP_CLAUSE_DECL (innerc);
3886       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3887 				OMP_CLAUSE__LOOPTEMP_);
3888       gcc_assert (innerc);
3889       n2 = OMP_CLAUSE_DECL (innerc);
3890     }
3891   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3892 				 true, NULL_TREE, true, GSI_SAME_STMT);
3893   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3894 				 true, NULL_TREE, true, GSI_SAME_STMT);
3895   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3896 				   true, NULL_TREE, true, GSI_SAME_STMT);
3897   tree chunk_size = fold_convert (itype, fd->chunk_size);
3898   chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
3899   chunk_size
3900     = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
3901 				GSI_SAME_STMT);
3902 
3903   t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3904   t = fold_build2 (PLUS_EXPR, itype, step, t);
3905   t = fold_build2 (PLUS_EXPR, itype, t, n2);
3906   t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3907   if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3908     t = fold_build2 (TRUNC_DIV_EXPR, itype,
3909 		     fold_build1 (NEGATE_EXPR, itype, t),
3910 		     fold_build1 (NEGATE_EXPR, itype, step));
3911   else
3912     t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3913   t = fold_convert (itype, t);
3914   n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3915 				true, GSI_SAME_STMT);
3916 
3917   trip_var = create_tmp_reg (itype, ".trip");
3918   if (gimple_in_ssa_p (cfun))
3919     {
3920       trip_init = make_ssa_name (trip_var);
3921       trip_main = make_ssa_name (trip_var);
3922       trip_back = make_ssa_name (trip_var);
3923     }
3924   else
3925     {
3926       trip_init = trip_var;
3927       trip_main = trip_var;
3928       trip_back = trip_var;
3929     }
3930 
3931   gassign *assign_stmt
3932     = gimple_build_assign (trip_init, build_int_cst (itype, 0));
3933   gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3934 
3935   t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
3936   t = fold_build2 (MULT_EXPR, itype, t, step);
3937   if (POINTER_TYPE_P (type))
3938     t = fold_build_pointer_plus (n1, t);
3939   else
3940     t = fold_build2 (PLUS_EXPR, type, t, n1);
3941   vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3942 				     true, GSI_SAME_STMT);
3943 
3944   /* Remove the GIMPLE_OMP_FOR.  */
3945   gsi_remove (&gsi, true);
3946 
3947   gimple_stmt_iterator gsif = gsi;
3948 
3949   /* Iteration space partitioning goes in ITER_PART_BB.  */
3950   gsi = gsi_last_bb (iter_part_bb);
3951 
3952   t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
3953   t = fold_build2 (PLUS_EXPR, itype, t, threadid);
3954   t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
3955   s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3956 				 false, GSI_CONTINUE_LINKING);
3957 
3958   t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
3959   t = fold_build2 (MIN_EXPR, itype, t, n);
3960   e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3961 				 false, GSI_CONTINUE_LINKING);
3962 
3963   t = build2 (LT_EXPR, boolean_type_node, s0, n);
3964   gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
3965 
3966   /* Setup code for sequential iteration goes in SEQ_START_BB.  */
3967   gsi = gsi_start_bb (seq_start_bb);
3968 
3969   tree startvar = fd->loop.v;
3970   tree endvar = NULL_TREE;
3971 
3972   if (gimple_omp_for_combined_p (fd->for_stmt))
3973     {
3974       tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3975 		     ? gimple_omp_parallel_clauses (inner_stmt)
3976 		     : gimple_omp_for_clauses (inner_stmt);
3977       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3978       gcc_assert (innerc);
3979       startvar = OMP_CLAUSE_DECL (innerc);
3980       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3981 				OMP_CLAUSE__LOOPTEMP_);
3982       gcc_assert (innerc);
3983       endvar = OMP_CLAUSE_DECL (innerc);
3984       if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3985 	  && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3986 	{
3987 	  int i;
3988 	  for (i = 1; i < fd->collapse; i++)
3989 	    {
3990 	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3991 					OMP_CLAUSE__LOOPTEMP_);
3992 	      gcc_assert (innerc);
3993 	    }
3994 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3995 				    OMP_CLAUSE__LOOPTEMP_);
3996 	  if (innerc)
3997 	    {
3998 	      /* If needed (distribute parallel for with lastprivate),
3999 		 propagate down the total number of iterations.  */
4000 	      tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4001 				     fd->loop.n2);
4002 	      t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4003 					    GSI_CONTINUE_LINKING);
4004 	      assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4005 	      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4006 	    }
4007 	}
4008     }
4009 
4010   t = fold_convert (itype, s0);
4011   t = fold_build2 (MULT_EXPR, itype, t, step);
4012   if (POINTER_TYPE_P (type))
4013     t = fold_build_pointer_plus (n1, t);
4014   else
4015     t = fold_build2 (PLUS_EXPR, type, t, n1);
4016   t = fold_convert (TREE_TYPE (startvar), t);
4017   t = force_gimple_operand_gsi (&gsi, t,
4018 				DECL_P (startvar)
4019 				&& TREE_ADDRESSABLE (startvar),
4020 				NULL_TREE, false, GSI_CONTINUE_LINKING);
4021   assign_stmt = gimple_build_assign (startvar, t);
4022   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4023 
4024   t = fold_convert (itype, e0);
4025   t = fold_build2 (MULT_EXPR, itype, t, step);
4026   if (POINTER_TYPE_P (type))
4027     t = fold_build_pointer_plus (n1, t);
4028   else
4029     t = fold_build2 (PLUS_EXPR, type, t, n1);
4030   t = fold_convert (TREE_TYPE (startvar), t);
4031   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4032 				false, GSI_CONTINUE_LINKING);
4033   if (endvar)
4034     {
4035       assign_stmt = gimple_build_assign (endvar, e);
4036       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4037       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4038 	assign_stmt = gimple_build_assign (fd->loop.v, e);
4039       else
4040 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4041       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4042     }
4043   /* Handle linear clause adjustments.  */
4044   tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4045   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4046     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4047 	 c; c = OMP_CLAUSE_CHAIN (c))
4048       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4049 	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4050 	{
4051 	  tree d = OMP_CLAUSE_DECL (c);
4052 	  bool is_ref = omp_is_reference (d);
4053 	  tree t = d, a, dest;
4054 	  if (is_ref)
4055 	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4056 	  tree type = TREE_TYPE (t);
4057 	  if (POINTER_TYPE_P (type))
4058 	    type = sizetype;
4059 	  dest = unshare_expr (t);
4060 	  tree v = create_tmp_var (TREE_TYPE (t), NULL);
4061 	  expand_omp_build_assign (&gsif, v, t);
4062 	  if (itercnt == NULL_TREE)
4063 	    {
4064 	      if (gimple_omp_for_combined_into_p (fd->for_stmt))
4065 		{
4066 		  itercntbias
4067 		    = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4068 				   fold_convert (itype, fd->loop.n1));
4069 		  itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4070 					     itercntbias, step);
4071 		  itercntbias
4072 		    = force_gimple_operand_gsi (&gsif, itercntbias, true,
4073 						NULL_TREE, true,
4074 						GSI_SAME_STMT);
4075 		  itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4076 		  itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4077 						      NULL_TREE, false,
4078 						      GSI_CONTINUE_LINKING);
4079 		}
4080 	      else
4081 		itercnt = s0;
4082 	    }
4083 	  a = fold_build2 (MULT_EXPR, type,
4084 			   fold_convert (type, itercnt),
4085 			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4086 	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4087 			   : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4088 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4089 					false, GSI_CONTINUE_LINKING);
4090 	  assign_stmt = gimple_build_assign (dest, t);
4091 	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4092 	}
4093   if (fd->collapse > 1)
4094     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4095 
4096   if (!broken_loop)
4097     {
4098       /* The code controlling the sequential loop goes in CONT_BB,
4099 	 replacing the GIMPLE_OMP_CONTINUE.  */
4100       gsi = gsi_last_bb (cont_bb);
4101       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4102       vmain = gimple_omp_continue_control_use (cont_stmt);
4103       vback = gimple_omp_continue_control_def (cont_stmt);
4104 
4105       if (!gimple_omp_for_combined_p (fd->for_stmt))
4106 	{
4107 	  if (POINTER_TYPE_P (type))
4108 	    t = fold_build_pointer_plus (vmain, step);
4109 	  else
4110 	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
4111 	  if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4112 	    t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4113 					  true, GSI_SAME_STMT);
4114 	  assign_stmt = gimple_build_assign (vback, t);
4115 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4116 
4117 	  if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4118 	    t = build2 (EQ_EXPR, boolean_type_node,
4119 			build_int_cst (itype, 0),
4120 			build_int_cst (itype, 1));
4121 	  else
4122 	    t = build2 (fd->loop.cond_code, boolean_type_node,
4123 			DECL_P (vback) && TREE_ADDRESSABLE (vback)
4124 			? t : vback, e);
4125 	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4126 	}
4127 
4128       /* Remove GIMPLE_OMP_CONTINUE.  */
4129       gsi_remove (&gsi, true);
4130 
4131       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4132 	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4133 
4134       /* Trip update code goes into TRIP_UPDATE_BB.  */
4135       gsi = gsi_start_bb (trip_update_bb);
4136 
4137       t = build_int_cst (itype, 1);
4138       t = build2 (PLUS_EXPR, itype, trip_main, t);
4139       assign_stmt = gimple_build_assign (trip_back, t);
4140       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4141     }
4142 
4143   /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
4144   gsi = gsi_last_bb (exit_bb);
4145   if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4146     {
4147       t = gimple_omp_return_lhs (gsi_stmt (gsi));
4148       gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4149     }
4150   gsi_remove (&gsi, true);
4151 
4152   /* Connect the new blocks.  */
4153   find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4154   find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4155 
4156   if (!broken_loop)
4157     {
4158       se = find_edge (cont_bb, body_bb);
4159       if (se == NULL)
4160 	{
4161 	  se = BRANCH_EDGE (cont_bb);
4162 	  gcc_assert (single_succ (se->dest) == body_bb);
4163 	}
4164       if (gimple_omp_for_combined_p (fd->for_stmt))
4165 	{
4166 	  remove_edge (se);
4167 	  se = NULL;
4168 	}
4169       else if (fd->collapse > 1)
4170 	{
4171 	  remove_edge (se);
4172 	  se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4173 	}
4174       else
4175 	se->flags = EDGE_TRUE_VALUE;
4176       find_edge (cont_bb, trip_update_bb)->flags
4177 	= se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4178 
4179       redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4180 				iter_part_bb);
4181     }
4182 
4183   if (gimple_in_ssa_p (cfun))
4184     {
4185       gphi_iterator psi;
4186       gphi *phi;
4187       edge re, ene;
4188       edge_var_map *vm;
4189       size_t i;
4190 
4191       gcc_assert (fd->collapse == 1 && !broken_loop);
4192 
4193       /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4194 	 remove arguments of the phi nodes in fin_bb.  We need to create
4195 	 appropriate phi nodes in iter_part_bb instead.  */
4196       se = find_edge (iter_part_bb, fin_bb);
4197       re = single_succ_edge (trip_update_bb);
4198       vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4199       ene = single_succ_edge (entry_bb);
4200 
4201       psi = gsi_start_phis (fin_bb);
4202       for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4203 	   gsi_next (&psi), ++i)
4204 	{
4205 	  gphi *nphi;
4206 	  source_location locus;
4207 
4208 	  phi = psi.phi ();
4209 	  t = gimple_phi_result (phi);
4210 	  gcc_assert (t == redirect_edge_var_map_result (vm));
4211 
4212 	  if (!single_pred_p (fin_bb))
4213 	    t = copy_ssa_name (t, phi);
4214 
4215 	  nphi = create_phi_node (t, iter_part_bb);
4216 
4217 	  t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4218 	  locus = gimple_phi_arg_location_from_edge (phi, se);
4219 
4220 	  /* A special case -- fd->loop.v is not yet computed in
4221 	     iter_part_bb, we need to use vextra instead.  */
4222 	  if (t == fd->loop.v)
4223 	    t = vextra;
4224 	  add_phi_arg (nphi, t, ene, locus);
4225 	  locus = redirect_edge_var_map_location (vm);
4226 	  tree back_arg = redirect_edge_var_map_def (vm);
4227 	  add_phi_arg (nphi, back_arg, re, locus);
4228 	  edge ce = find_edge (cont_bb, body_bb);
4229 	  if (ce == NULL)
4230 	    {
4231 	      ce = BRANCH_EDGE (cont_bb);
4232 	      gcc_assert (single_succ (ce->dest) == body_bb);
4233 	      ce = single_succ_edge (ce->dest);
4234 	    }
4235 	  gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4236 	  gcc_assert (inner_loop_phi != NULL);
4237 	  add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4238 		       find_edge (seq_start_bb, body_bb), locus);
4239 
4240 	  if (!single_pred_p (fin_bb))
4241 	    add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4242 	}
4243       gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4244       redirect_edge_var_map_clear (re);
4245       if (single_pred_p (fin_bb))
4246 	while (1)
4247 	  {
4248 	    psi = gsi_start_phis (fin_bb);
4249 	    if (gsi_end_p (psi))
4250 	      break;
4251 	    remove_phi_node (&psi, false);
4252 	  }
4253 
4254       /* Make phi node for trip.  */
4255       phi = create_phi_node (trip_main, iter_part_bb);
4256       add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4257 		   UNKNOWN_LOCATION);
4258       add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4259 		   UNKNOWN_LOCATION);
4260     }
4261 
4262   if (!broken_loop)
4263     set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4264   set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4265 			   recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4266   set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4267 			   recompute_dominator (CDI_DOMINATORS, fin_bb));
4268   set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4269 			   recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4270   set_immediate_dominator (CDI_DOMINATORS, body_bb,
4271 			   recompute_dominator (CDI_DOMINATORS, body_bb));
4272 
4273   if (!broken_loop)
4274     {
4275       struct loop *loop = body_bb->loop_father;
4276       struct loop *trip_loop = alloc_loop ();
4277       trip_loop->header = iter_part_bb;
4278       trip_loop->latch = trip_update_bb;
4279       add_loop (trip_loop, iter_part_bb->loop_father);
4280 
4281       if (loop != entry_bb->loop_father)
4282 	{
4283 	  gcc_assert (loop->header == body_bb);
4284 	  gcc_assert (loop->latch == region->cont
4285 		      || single_pred (loop->latch) == region->cont);
4286 	  trip_loop->inner = loop;
4287 	  return;
4288 	}
4289 
4290       if (!gimple_omp_for_combined_p (fd->for_stmt))
4291 	{
4292 	  loop = alloc_loop ();
4293 	  loop->header = body_bb;
4294 	  if (collapse_bb == NULL)
4295 	    loop->latch = cont_bb;
4296 	  add_loop (loop, trip_loop);
4297 	}
4298     }
4299 }
4300 
4301 /* A subroutine of expand_omp_for.  Generate code for _Cilk_for loop.
4302    Given parameters:
4303    for (V = N1; V cond N2; V += STEP) BODY;
4304 
4305    where COND is "<" or ">" or "!=", we generate pseudocode
4306 
4307    for (ind_var = low; ind_var < high; ind_var++)
4308      {
4309        V = n1 + (ind_var * STEP)
4310 
4311        <BODY>
4312      }
4313 
4314    In the above pseudocode, low and high are function parameters of the
4315    child function.  In the function below, we are inserting a temp.
4316    variable that will be making a call to two OMP functions that will not be
4317    found in the body of _Cilk_for (since OMP_FOR cannot be mixed
4318    with _Cilk_for).  These functions are replaced with low and high
4319    by the function that handles taskreg.  */
4320 
4321 
4322 static void
4323 expand_cilk_for (struct omp_region *region, struct omp_for_data *fd)
4324 {
4325   bool broken_loop = region->cont == NULL;
4326   basic_block entry_bb = region->entry;
4327   basic_block cont_bb = region->cont;
4328 
4329   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4330   gcc_assert (broken_loop
4331 	      || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4332   basic_block l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4333   basic_block l1_bb, l2_bb;
4334 
4335   if (!broken_loop)
4336     {
4337       gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4338       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4339       l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4340       l2_bb = BRANCH_EDGE (entry_bb)->dest;
4341     }
4342   else
4343     {
4344       BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4345       l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4346       l2_bb = single_succ (l1_bb);
4347     }
4348   basic_block exit_bb = region->exit;
4349   basic_block l2_dom_bb = NULL;
4350 
4351   gimple_stmt_iterator gsi = gsi_last_bb (entry_bb);
4352 
4353   /* Below statements until the "tree high_val = ..." are pseudo statements
4354      used to pass information to be used by expand_omp_taskreg.
4355      low_val and high_val will be replaced by the __low and __high
4356      parameter from the child function.
4357 
4358      The call_exprs part is a place-holder, it is mainly used
4359      to distinctly identify to the top-level part that this is
4360      where we should put low and high (reasoning given in header
4361      comment).  */
4362 
4363   gomp_parallel *par_stmt
4364     = as_a <gomp_parallel *> (last_stmt (region->outer->entry));
4365   tree child_fndecl = gimple_omp_parallel_child_fn (par_stmt);
4366   tree t, low_val = NULL_TREE, high_val = NULL_TREE;
4367   for (t = DECL_ARGUMENTS (child_fndecl); t; t = TREE_CHAIN (t))
4368     {
4369       if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__high"))
4370 	high_val = t;
4371       else if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__low"))
4372 	low_val = t;
4373     }
4374   gcc_assert (low_val && high_val);
4375 
4376   tree type = TREE_TYPE (low_val);
4377   tree ind_var = create_tmp_reg (type, "__cilk_ind_var");
4378   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4379 
4380   /* Not needed in SSA form right now.  */
4381   gcc_assert (!gimple_in_ssa_p (cfun));
4382   if (l2_dom_bb == NULL)
4383     l2_dom_bb = l1_bb;
4384 
4385   tree n1 = low_val;
4386   tree n2 = high_val;
4387 
4388   gimple *stmt = gimple_build_assign (ind_var, n1);
4389 
4390   /* Replace the GIMPLE_OMP_FOR statement.  */
4391   gsi_replace (&gsi, stmt, true);
4392 
4393   if (!broken_loop)
4394     {
4395       /* Code to control the increment goes in the CONT_BB.  */
4396       gsi = gsi_last_bb (cont_bb);
4397       stmt = gsi_stmt (gsi);
4398       gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4399       stmt = gimple_build_assign (ind_var, PLUS_EXPR, ind_var,
4400 				  build_one_cst (type));
4401 
4402       /* Replace GIMPLE_OMP_CONTINUE.  */
4403       gsi_replace (&gsi, stmt, true);
4404     }
4405 
4406   /* Emit the condition in L1_BB.  */
4407   gsi = gsi_after_labels (l1_bb);
4408   t = fold_build2 (MULT_EXPR, TREE_TYPE (fd->loop.step),
4409 		   fold_convert (TREE_TYPE (fd->loop.step), ind_var),
4410 		   fd->loop.step);
4411   if (POINTER_TYPE_P (TREE_TYPE (fd->loop.n1)))
4412     t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4413 		     fd->loop.n1, fold_convert (sizetype, t));
4414   else
4415     t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4416 		     fd->loop.n1, fold_convert (TREE_TYPE (fd->loop.n1), t));
4417   t = fold_convert (TREE_TYPE (fd->loop.v), t);
4418   expand_omp_build_assign (&gsi, fd->loop.v, t);
4419 
4420   /* The condition is always '<' since the runtime will fill in the low
4421      and high values.  */
4422   stmt = gimple_build_cond (LT_EXPR, ind_var, n2, NULL_TREE, NULL_TREE);
4423   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4424 
4425   /* Remove GIMPLE_OMP_RETURN.  */
4426   gsi = gsi_last_bb (exit_bb);
4427   gsi_remove (&gsi, true);
4428 
4429   /* Connect the new blocks.  */
4430   remove_edge (FALLTHRU_EDGE (entry_bb));
4431 
4432   edge e, ne;
4433   if (!broken_loop)
4434     {
4435       remove_edge (BRANCH_EDGE (entry_bb));
4436       make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4437 
4438       e = BRANCH_EDGE (l1_bb);
4439       ne = FALLTHRU_EDGE (l1_bb);
4440       e->flags = EDGE_TRUE_VALUE;
4441     }
4442   else
4443     {
4444       single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4445 
4446       ne = single_succ_edge (l1_bb);
4447       e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4448 
4449     }
4450   ne->flags = EDGE_FALSE_VALUE;
4451   e->probability = REG_BR_PROB_BASE * 7 / 8;
4452   ne->probability = REG_BR_PROB_BASE / 8;
4453 
4454   set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4455   set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4456   set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4457 
4458   if (!broken_loop)
4459     {
4460       struct loop *loop = alloc_loop ();
4461       loop->header = l1_bb;
4462       loop->latch = cont_bb;
4463       add_loop (loop, l1_bb->loop_father);
4464       loop->safelen = INT_MAX;
4465     }
4466 
4467   /* Pick the correct library function based on the precision of the
4468      induction variable type.  */
4469   tree lib_fun = NULL_TREE;
4470   if (TYPE_PRECISION (type) == 32)
4471     lib_fun = cilk_for_32_fndecl;
4472   else if (TYPE_PRECISION (type) == 64)
4473     lib_fun = cilk_for_64_fndecl;
4474   else
4475     gcc_unreachable ();
4476 
4477   gcc_assert (fd->sched_kind == OMP_CLAUSE_SCHEDULE_CILKFOR);
4478 
4479   /* WS_ARGS contains the library function flavor to call:
4480      __libcilkrts_cilk_for_64 or __libcilkrts_cilk_for_32), and the
4481      user-defined grain value.  If the user does not define one, then zero
4482      is passed in by the parser.  */
4483   vec_alloc (region->ws_args, 2);
4484   region->ws_args->quick_push (lib_fun);
4485   region->ws_args->quick_push (fd->chunk_size);
4486 }
4487 
4488 /* A subroutine of expand_omp_for.  Generate code for a simd non-worksharing
4489    loop.  Given parameters:
4490 
4491 	for (V = N1; V cond N2; V += STEP) BODY;
4492 
4493    where COND is "<" or ">", we generate pseudocode
4494 
4495 	V = N1;
4496 	goto L1;
4497     L0:
4498 	BODY;
4499 	V += STEP;
4500     L1:
4501 	if (V cond N2) goto L0; else goto L2;
4502     L2:
4503 
4504     For collapsed loops, given parameters:
4505       collapse(3)
4506       for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4507 	for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4508 	  for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4509 	    BODY;
4510 
4511     we generate pseudocode
4512 
4513 	if (cond3 is <)
4514 	  adj = STEP3 - 1;
4515 	else
4516 	  adj = STEP3 + 1;
4517 	count3 = (adj + N32 - N31) / STEP3;
4518 	if (cond2 is <)
4519 	  adj = STEP2 - 1;
4520 	else
4521 	  adj = STEP2 + 1;
4522 	count2 = (adj + N22 - N21) / STEP2;
4523 	if (cond1 is <)
4524 	  adj = STEP1 - 1;
4525 	else
4526 	  adj = STEP1 + 1;
4527 	count1 = (adj + N12 - N11) / STEP1;
4528 	count = count1 * count2 * count3;
4529 	V = 0;
4530 	V1 = N11;
4531 	V2 = N21;
4532 	V3 = N31;
4533 	goto L1;
4534     L0:
4535 	BODY;
4536 	V += 1;
4537 	V3 += STEP3;
4538 	V2 += (V3 cond3 N32) ? 0 : STEP2;
4539 	V3 = (V3 cond3 N32) ? V3 : N31;
4540 	V1 += (V2 cond2 N22) ? 0 : STEP1;
4541 	V2 = (V2 cond2 N22) ? V2 : N21;
4542     L1:
4543 	if (V < count) goto L0; else goto L2;
4544     L2:
4545 
4546       */
4547 
4548 static void
4549 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4550 {
4551   tree type, t;
4552   basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4553   gimple_stmt_iterator gsi;
4554   gimple *stmt;
4555   gcond *cond_stmt;
4556   bool broken_loop = region->cont == NULL;
4557   edge e, ne;
4558   tree *counts = NULL;
4559   int i;
4560   int safelen_int = INT_MAX;
4561   tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4562 				  OMP_CLAUSE_SAFELEN);
4563   tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4564 				  OMP_CLAUSE__SIMDUID_);
4565   tree n1, n2;
4566 
4567   if (safelen)
4568     {
4569       safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4570       if (TREE_CODE (safelen) != INTEGER_CST)
4571 	safelen_int = 0;
4572       else if (tree_fits_uhwi_p (safelen) && tree_to_uhwi (safelen) < INT_MAX)
4573 	safelen_int = tree_to_uhwi (safelen);
4574       if (safelen_int == 1)
4575 	safelen_int = 0;
4576     }
4577   type = TREE_TYPE (fd->loop.v);
4578   entry_bb = region->entry;
4579   cont_bb = region->cont;
4580   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4581   gcc_assert (broken_loop
4582 	      || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4583   l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4584   if (!broken_loop)
4585     {
4586       gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4587       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4588       l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4589       l2_bb = BRANCH_EDGE (entry_bb)->dest;
4590     }
4591   else
4592     {
4593       BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4594       l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4595       l2_bb = single_succ (l1_bb);
4596     }
4597   exit_bb = region->exit;
4598   l2_dom_bb = NULL;
4599 
4600   gsi = gsi_last_bb (entry_bb);
4601 
4602   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4603   /* Not needed in SSA form right now.  */
4604   gcc_assert (!gimple_in_ssa_p (cfun));
4605   if (fd->collapse > 1)
4606     {
4607       int first_zero_iter = -1, dummy = -1;
4608       basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4609 
4610       counts = XALLOCAVEC (tree, fd->collapse);
4611       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4612 				  zero_iter_bb, first_zero_iter,
4613 				  dummy_bb, dummy, l2_dom_bb);
4614     }
4615   if (l2_dom_bb == NULL)
4616     l2_dom_bb = l1_bb;
4617 
4618   n1 = fd->loop.n1;
4619   n2 = fd->loop.n2;
4620   if (gimple_omp_for_combined_into_p (fd->for_stmt))
4621     {
4622       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4623 				     OMP_CLAUSE__LOOPTEMP_);
4624       gcc_assert (innerc);
4625       n1 = OMP_CLAUSE_DECL (innerc);
4626       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4627 				OMP_CLAUSE__LOOPTEMP_);
4628       gcc_assert (innerc);
4629       n2 = OMP_CLAUSE_DECL (innerc);
4630     }
4631   tree step = fd->loop.step;
4632 
4633   bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4634 				  OMP_CLAUSE__SIMT_);
4635   if (is_simt)
4636     {
4637       cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4638       is_simt = safelen_int > 1;
4639     }
4640   tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4641   if (is_simt)
4642     {
4643       simt_lane = create_tmp_var (unsigned_type_node);
4644       gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4645       gimple_call_set_lhs (g, simt_lane);
4646       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4647       tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4648 				 fold_convert (TREE_TYPE (step), simt_lane));
4649       n1 = fold_convert (type, n1);
4650       if (POINTER_TYPE_P (type))
4651 	n1 = fold_build_pointer_plus (n1, offset);
4652       else
4653 	n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4654 
4655       /* Collapsed loops not handled for SIMT yet: limit to one lane only.  */
4656       if (fd->collapse > 1)
4657 	simt_maxlane = build_one_cst (unsigned_type_node);
4658       else if (safelen_int < omp_max_simt_vf ())
4659 	simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4660       tree vf
4661 	= build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4662 					unsigned_type_node, 0);
4663       if (simt_maxlane)
4664 	vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4665       vf = fold_convert (TREE_TYPE (step), vf);
4666       step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4667     }
4668 
4669   expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4670   if (fd->collapse > 1)
4671     {
4672       if (gimple_omp_for_combined_into_p (fd->for_stmt))
4673 	{
4674 	  gsi_prev (&gsi);
4675 	  expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4676 	  gsi_next (&gsi);
4677 	}
4678       else
4679 	for (i = 0; i < fd->collapse; i++)
4680 	  {
4681 	    tree itype = TREE_TYPE (fd->loops[i].v);
4682 	    if (POINTER_TYPE_P (itype))
4683 	      itype = signed_type_for (itype);
4684 	    t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4685 	    expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4686 	  }
4687     }
4688 
4689   /* Remove the GIMPLE_OMP_FOR statement.  */
4690   gsi_remove (&gsi, true);
4691 
4692   if (!broken_loop)
4693     {
4694       /* Code to control the increment goes in the CONT_BB.  */
4695       gsi = gsi_last_bb (cont_bb);
4696       stmt = gsi_stmt (gsi);
4697       gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4698 
4699       if (POINTER_TYPE_P (type))
4700 	t = fold_build_pointer_plus (fd->loop.v, step);
4701       else
4702 	t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4703       expand_omp_build_assign (&gsi, fd->loop.v, t);
4704 
4705       if (fd->collapse > 1)
4706 	{
4707 	  i = fd->collapse - 1;
4708 	  if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4709 	    {
4710 	      t = fold_convert (sizetype, fd->loops[i].step);
4711 	      t = fold_build_pointer_plus (fd->loops[i].v, t);
4712 	    }
4713 	  else
4714 	    {
4715 	      t = fold_convert (TREE_TYPE (fd->loops[i].v),
4716 				fd->loops[i].step);
4717 	      t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4718 			       fd->loops[i].v, t);
4719 	    }
4720 	  expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4721 
4722 	  for (i = fd->collapse - 1; i > 0; i--)
4723 	    {
4724 	      tree itype = TREE_TYPE (fd->loops[i].v);
4725 	      tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4726 	      if (POINTER_TYPE_P (itype2))
4727 		itype2 = signed_type_for (itype2);
4728 	      t = fold_convert (itype2, fd->loops[i - 1].step);
4729 	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4730 					    GSI_SAME_STMT);
4731 	      t = build3 (COND_EXPR, itype2,
4732 			  build2 (fd->loops[i].cond_code, boolean_type_node,
4733 				  fd->loops[i].v,
4734 				  fold_convert (itype, fd->loops[i].n2)),
4735 			  build_int_cst (itype2, 0), t);
4736 	      if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4737 		t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4738 	      else
4739 		t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4740 	      expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4741 
4742 	      t = fold_convert (itype, fd->loops[i].n1);
4743 	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4744 					    GSI_SAME_STMT);
4745 	      t = build3 (COND_EXPR, itype,
4746 			  build2 (fd->loops[i].cond_code, boolean_type_node,
4747 				  fd->loops[i].v,
4748 				  fold_convert (itype, fd->loops[i].n2)),
4749 			  fd->loops[i].v, t);
4750 	      expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4751 	    }
4752 	}
4753 
4754       /* Remove GIMPLE_OMP_CONTINUE.  */
4755       gsi_remove (&gsi, true);
4756     }
4757 
4758   /* Emit the condition in L1_BB.  */
4759   gsi = gsi_start_bb (l1_bb);
4760 
4761   t = fold_convert (type, n2);
4762   t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4763 				false, GSI_CONTINUE_LINKING);
4764   tree v = fd->loop.v;
4765   if (DECL_P (v) && TREE_ADDRESSABLE (v))
4766     v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4767 				  false, GSI_CONTINUE_LINKING);
4768   t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4769   cond_stmt = gimple_build_cond_empty (t);
4770   gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4771   if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4772 		 NULL, NULL)
4773       || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4774 		    NULL, NULL))
4775     {
4776       gsi = gsi_for_stmt (cond_stmt);
4777       gimple_regimplify_operands (cond_stmt, &gsi);
4778     }
4779 
4780   /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop.  */
4781   if (is_simt)
4782     {
4783       gsi = gsi_start_bb (l2_bb);
4784       step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4785       if (POINTER_TYPE_P (type))
4786 	t = fold_build_pointer_plus (fd->loop.v, step);
4787       else
4788 	t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4789       expand_omp_build_assign (&gsi, fd->loop.v, t);
4790     }
4791 
4792   /* Remove GIMPLE_OMP_RETURN.  */
4793   gsi = gsi_last_bb (exit_bb);
4794   gsi_remove (&gsi, true);
4795 
4796   /* Connect the new blocks.  */
4797   remove_edge (FALLTHRU_EDGE (entry_bb));
4798 
4799   if (!broken_loop)
4800     {
4801       remove_edge (BRANCH_EDGE (entry_bb));
4802       make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4803 
4804       e = BRANCH_EDGE (l1_bb);
4805       ne = FALLTHRU_EDGE (l1_bb);
4806       e->flags = EDGE_TRUE_VALUE;
4807     }
4808   else
4809     {
4810       single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4811 
4812       ne = single_succ_edge (l1_bb);
4813       e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4814 
4815     }
4816   ne->flags = EDGE_FALSE_VALUE;
4817   e->probability = REG_BR_PROB_BASE * 7 / 8;
4818   ne->probability = REG_BR_PROB_BASE / 8;
4819 
4820   set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4821   set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4822 
4823   if (simt_maxlane)
4824     {
4825       cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4826 				     NULL_TREE, NULL_TREE);
4827       gsi = gsi_last_bb (entry_bb);
4828       gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4829       make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4830       FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
4831       FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE * 7 / 8;
4832       BRANCH_EDGE (entry_bb)->probability = REG_BR_PROB_BASE / 8;
4833       l2_dom_bb = entry_bb;
4834     }
4835   set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4836 
4837   if (!broken_loop)
4838     {
4839       struct loop *loop = alloc_loop ();
4840       loop->header = l1_bb;
4841       loop->latch = cont_bb;
4842       add_loop (loop, l1_bb->loop_father);
4843       loop->safelen = safelen_int;
4844       if (simduid)
4845 	{
4846 	  loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4847 	  cfun->has_simduid_loops = true;
4848 	}
4849       /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4850 	 the loop.  */
4851       if ((flag_tree_loop_vectorize
4852 	   || (!global_options_set.x_flag_tree_loop_vectorize
4853 	       && !global_options_set.x_flag_tree_vectorize))
4854 	  && flag_tree_loop_optimize
4855 	  && loop->safelen > 1)
4856 	{
4857 	  loop->force_vectorize = true;
4858 	  cfun->has_force_vectorize_loops = true;
4859 	}
4860     }
4861   else if (simduid)
4862     cfun->has_simduid_loops = true;
4863 }
4864 
4865 /* Taskloop construct is represented after gimplification with
4866    two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4867    in between them.  This routine expands the outer GIMPLE_OMP_FOR,
4868    which should just compute all the needed loop temporaries
4869    for GIMPLE_OMP_TASK.  */
4870 
4871 static void
4872 expand_omp_taskloop_for_outer (struct omp_region *region,
4873 			       struct omp_for_data *fd,
4874 			       gimple *inner_stmt)
4875 {
4876   tree type, bias = NULL_TREE;
4877   basic_block entry_bb, cont_bb, exit_bb;
4878   gimple_stmt_iterator gsi;
4879   gassign *assign_stmt;
4880   tree *counts = NULL;
4881   int i;
4882 
4883   gcc_assert (inner_stmt);
4884   gcc_assert (region->cont);
4885   gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4886 	      && gimple_omp_task_taskloop_p (inner_stmt));
4887   type = TREE_TYPE (fd->loop.v);
4888 
4889   /* See if we need to bias by LLONG_MIN.  */
4890   if (fd->iter_type == long_long_unsigned_type_node
4891       && TREE_CODE (type) == INTEGER_TYPE
4892       && !TYPE_UNSIGNED (type))
4893     {
4894       tree n1, n2;
4895 
4896       if (fd->loop.cond_code == LT_EXPR)
4897 	{
4898 	  n1 = fd->loop.n1;
4899 	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4900 	}
4901       else
4902 	{
4903 	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4904 	  n2 = fd->loop.n1;
4905 	}
4906       if (TREE_CODE (n1) != INTEGER_CST
4907 	  || TREE_CODE (n2) != INTEGER_CST
4908 	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4909 	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4910     }
4911 
4912   entry_bb = region->entry;
4913   cont_bb = region->cont;
4914   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4915   gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4916   exit_bb = region->exit;
4917 
4918   gsi = gsi_last_bb (entry_bb);
4919   gimple *for_stmt = gsi_stmt (gsi);
4920   gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
4921   if (fd->collapse > 1)
4922     {
4923       int first_zero_iter = -1, dummy = -1;
4924       basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
4925 
4926       counts = XALLOCAVEC (tree, fd->collapse);
4927       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4928 				  zero_iter_bb, first_zero_iter,
4929 				  dummy_bb, dummy, l2_dom_bb);
4930 
4931       if (zero_iter_bb)
4932 	{
4933 	  /* Some counts[i] vars might be uninitialized if
4934 	     some loop has zero iterations.  But the body shouldn't
4935 	     be executed in that case, so just avoid uninit warnings.  */
4936 	  for (i = first_zero_iter; i < fd->collapse; i++)
4937 	    if (SSA_VAR_P (counts[i]))
4938 	      TREE_NO_WARNING (counts[i]) = 1;
4939 	  gsi_prev (&gsi);
4940 	  edge e = split_block (entry_bb, gsi_stmt (gsi));
4941 	  entry_bb = e->dest;
4942 	  make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
4943 	  gsi = gsi_last_bb (entry_bb);
4944 	  set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4945 				   get_immediate_dominator (CDI_DOMINATORS,
4946 							    zero_iter_bb));
4947 	}
4948     }
4949 
4950   tree t0, t1;
4951   t1 = fd->loop.n2;
4952   t0 = fd->loop.n1;
4953   if (POINTER_TYPE_P (TREE_TYPE (t0))
4954       && TYPE_PRECISION (TREE_TYPE (t0))
4955 	 != TYPE_PRECISION (fd->iter_type))
4956     {
4957       /* Avoid casting pointers to integer of a different size.  */
4958       tree itype = signed_type_for (type);
4959       t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4960       t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4961     }
4962   else
4963     {
4964       t1 = fold_convert (fd->iter_type, t1);
4965       t0 = fold_convert (fd->iter_type, t0);
4966     }
4967   if (bias)
4968     {
4969       t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4970       t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4971     }
4972 
4973   tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
4974 				 OMP_CLAUSE__LOOPTEMP_);
4975   gcc_assert (innerc);
4976   tree startvar = OMP_CLAUSE_DECL (innerc);
4977   innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4978   gcc_assert (innerc);
4979   tree endvar = OMP_CLAUSE_DECL (innerc);
4980   if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
4981     {
4982       gcc_assert (innerc);
4983       for (i = 1; i < fd->collapse; i++)
4984 	{
4985 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4986 				    OMP_CLAUSE__LOOPTEMP_);
4987 	  gcc_assert (innerc);
4988 	}
4989       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4990 				OMP_CLAUSE__LOOPTEMP_);
4991       if (innerc)
4992 	{
4993 	  /* If needed (inner taskloop has lastprivate clause), propagate
4994 	     down the total number of iterations.  */
4995 	  tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
4996 					     NULL_TREE, false,
4997 					     GSI_CONTINUE_LINKING);
4998 	  assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4999 	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5000 	}
5001     }
5002 
5003   t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
5004 				 GSI_CONTINUE_LINKING);
5005   assign_stmt = gimple_build_assign (startvar, t0);
5006   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5007 
5008   t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
5009 				 GSI_CONTINUE_LINKING);
5010   assign_stmt = gimple_build_assign (endvar, t1);
5011   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5012   if (fd->collapse > 1)
5013     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5014 
5015   /* Remove the GIMPLE_OMP_FOR statement.  */
5016   gsi = gsi_for_stmt (for_stmt);
5017   gsi_remove (&gsi, true);
5018 
5019   gsi = gsi_last_bb (cont_bb);
5020   gsi_remove (&gsi, true);
5021 
5022   gsi = gsi_last_bb (exit_bb);
5023   gsi_remove (&gsi, true);
5024 
5025   FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE;
5026   remove_edge (BRANCH_EDGE (entry_bb));
5027   FALLTHRU_EDGE (cont_bb)->probability = REG_BR_PROB_BASE;
5028   remove_edge (BRANCH_EDGE (cont_bb));
5029   set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
5030   set_immediate_dominator (CDI_DOMINATORS, region->entry,
5031 			   recompute_dominator (CDI_DOMINATORS, region->entry));
5032 }
5033 
5034 /* Taskloop construct is represented after gimplification with
5035    two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5036    in between them.  This routine expands the inner GIMPLE_OMP_FOR.
5037    GOMP_taskloop{,_ull} function arranges for each task to be given just
5038    a single range of iterations.  */
5039 
5040 static void
5041 expand_omp_taskloop_for_inner (struct omp_region *region,
5042 			       struct omp_for_data *fd,
5043 			       gimple *inner_stmt)
5044 {
5045   tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5046   basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5047   basic_block fin_bb;
5048   gimple_stmt_iterator gsi;
5049   edge ep;
5050   bool broken_loop = region->cont == NULL;
5051   tree *counts = NULL;
5052   tree n1, n2, step;
5053 
5054   itype = type = TREE_TYPE (fd->loop.v);
5055   if (POINTER_TYPE_P (type))
5056     itype = signed_type_for (type);
5057 
5058   /* See if we need to bias by LLONG_MIN.  */
5059   if (fd->iter_type == long_long_unsigned_type_node
5060       && TREE_CODE (type) == INTEGER_TYPE
5061       && !TYPE_UNSIGNED (type))
5062     {
5063       tree n1, n2;
5064 
5065       if (fd->loop.cond_code == LT_EXPR)
5066 	{
5067 	  n1 = fd->loop.n1;
5068 	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5069 	}
5070       else
5071 	{
5072 	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5073 	  n2 = fd->loop.n1;
5074 	}
5075       if (TREE_CODE (n1) != INTEGER_CST
5076 	  || TREE_CODE (n2) != INTEGER_CST
5077 	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5078 	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5079     }
5080 
5081   entry_bb = region->entry;
5082   cont_bb = region->cont;
5083   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5084   fin_bb = BRANCH_EDGE (entry_bb)->dest;
5085   gcc_assert (broken_loop
5086 	      || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5087   body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5088   if (!broken_loop)
5089     {
5090       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5091       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5092     }
5093   exit_bb = region->exit;
5094 
5095   /* Iteration space partitioning goes in ENTRY_BB.  */
5096   gsi = gsi_last_bb (entry_bb);
5097   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5098 
5099   if (fd->collapse > 1)
5100     {
5101       int first_zero_iter = -1, dummy = -1;
5102       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5103 
5104       counts = XALLOCAVEC (tree, fd->collapse);
5105       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5106 				  fin_bb, first_zero_iter,
5107 				  dummy_bb, dummy, l2_dom_bb);
5108       t = NULL_TREE;
5109     }
5110   else
5111     t = integer_one_node;
5112 
5113   step = fd->loop.step;
5114   tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5115 				 OMP_CLAUSE__LOOPTEMP_);
5116   gcc_assert (innerc);
5117   n1 = OMP_CLAUSE_DECL (innerc);
5118   innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5119   gcc_assert (innerc);
5120   n2 = OMP_CLAUSE_DECL (innerc);
5121   if (bias)
5122     {
5123       n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5124       n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5125     }
5126   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5127 				 true, NULL_TREE, true, GSI_SAME_STMT);
5128   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5129 				 true, NULL_TREE, true, GSI_SAME_STMT);
5130   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5131 				   true, NULL_TREE, true, GSI_SAME_STMT);
5132 
5133   tree startvar = fd->loop.v;
5134   tree endvar = NULL_TREE;
5135 
5136   if (gimple_omp_for_combined_p (fd->for_stmt))
5137     {
5138       tree clauses = gimple_omp_for_clauses (inner_stmt);
5139       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5140       gcc_assert (innerc);
5141       startvar = OMP_CLAUSE_DECL (innerc);
5142       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5143 				OMP_CLAUSE__LOOPTEMP_);
5144       gcc_assert (innerc);
5145       endvar = OMP_CLAUSE_DECL (innerc);
5146     }
5147   t = fold_convert (TREE_TYPE (startvar), n1);
5148   t = force_gimple_operand_gsi (&gsi, t,
5149 				DECL_P (startvar)
5150 				&& TREE_ADDRESSABLE (startvar),
5151 				NULL_TREE, false, GSI_CONTINUE_LINKING);
5152   gimple *assign_stmt = gimple_build_assign (startvar, t);
5153   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5154 
5155   t = fold_convert (TREE_TYPE (startvar), n2);
5156   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5157 				false, GSI_CONTINUE_LINKING);
5158   if (endvar)
5159     {
5160       assign_stmt = gimple_build_assign (endvar, e);
5161       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5162       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5163 	assign_stmt = gimple_build_assign (fd->loop.v, e);
5164       else
5165 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5166       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5167     }
5168   if (fd->collapse > 1)
5169     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5170 
5171   if (!broken_loop)
5172     {
5173       /* The code controlling the sequential loop replaces the
5174 	 GIMPLE_OMP_CONTINUE.  */
5175       gsi = gsi_last_bb (cont_bb);
5176       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5177       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5178       vmain = gimple_omp_continue_control_use (cont_stmt);
5179       vback = gimple_omp_continue_control_def (cont_stmt);
5180 
5181       if (!gimple_omp_for_combined_p (fd->for_stmt))
5182 	{
5183 	  if (POINTER_TYPE_P (type))
5184 	    t = fold_build_pointer_plus (vmain, step);
5185 	  else
5186 	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
5187 	  t = force_gimple_operand_gsi (&gsi, t,
5188 					DECL_P (vback)
5189 					&& TREE_ADDRESSABLE (vback),
5190 					NULL_TREE, true, GSI_SAME_STMT);
5191 	  assign_stmt = gimple_build_assign (vback, t);
5192 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5193 
5194 	  t = build2 (fd->loop.cond_code, boolean_type_node,
5195 		      DECL_P (vback) && TREE_ADDRESSABLE (vback)
5196 		      ? t : vback, e);
5197 	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5198 	}
5199 
5200       /* Remove the GIMPLE_OMP_CONTINUE statement.  */
5201       gsi_remove (&gsi, true);
5202 
5203       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5204 	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5205     }
5206 
5207   /* Remove the GIMPLE_OMP_FOR statement.  */
5208   gsi = gsi_for_stmt (fd->for_stmt);
5209   gsi_remove (&gsi, true);
5210 
5211   /* Remove the GIMPLE_OMP_RETURN statement.  */
5212   gsi = gsi_last_bb (exit_bb);
5213   gsi_remove (&gsi, true);
5214 
5215   FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE;
5216   if (!broken_loop)
5217     remove_edge (BRANCH_EDGE (entry_bb));
5218   else
5219     {
5220       remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5221       region->outer->cont = NULL;
5222     }
5223 
5224   /* Connect all the blocks.  */
5225   if (!broken_loop)
5226     {
5227       ep = find_edge (cont_bb, body_bb);
5228       if (gimple_omp_for_combined_p (fd->for_stmt))
5229 	{
5230 	  remove_edge (ep);
5231 	  ep = NULL;
5232 	}
5233       else if (fd->collapse > 1)
5234 	{
5235 	  remove_edge (ep);
5236 	  ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5237 	}
5238       else
5239 	ep->flags = EDGE_TRUE_VALUE;
5240       find_edge (cont_bb, fin_bb)->flags
5241 	= ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5242     }
5243 
5244   set_immediate_dominator (CDI_DOMINATORS, body_bb,
5245 			   recompute_dominator (CDI_DOMINATORS, body_bb));
5246   if (!broken_loop)
5247     set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5248 			     recompute_dominator (CDI_DOMINATORS, fin_bb));
5249 
5250   if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5251     {
5252       struct loop *loop = alloc_loop ();
5253       loop->header = body_bb;
5254       if (collapse_bb == NULL)
5255 	loop->latch = cont_bb;
5256       add_loop (loop, body_bb->loop_father);
5257     }
5258 }
5259 
5260 /* A subroutine of expand_omp_for.  Generate code for an OpenACC
5261    partitioned loop.  The lowering here is abstracted, in that the
5262    loop parameters are passed through internal functions, which are
5263    further lowered by oacc_device_lower, once we get to the target
5264    compiler.  The loop is of the form:
5265 
5266    for (V = B; V LTGT E; V += S) {BODY}
5267 
5268    where LTGT is < or >.  We may have a specified chunking size, CHUNKING
5269    (constant 0 for no chunking) and we will have a GWV partitioning
5270    mask, specifying dimensions over which the loop is to be
5271    partitioned (see note below).  We generate code that looks like
5272    (this ignores tiling):
5273 
5274    <entry_bb> [incoming FALL->body, BRANCH->exit]
5275      typedef signedintify (typeof (V)) T;  // underlying signed integral type
5276      T range = E - B;
5277      T chunk_no = 0;
5278      T DIR = LTGT == '<' ? +1 : -1;
5279      T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5280      T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5281 
5282    <head_bb> [created by splitting end of entry_bb]
5283      T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5284      T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5285      if (!(offset LTGT bound)) goto bottom_bb;
5286 
5287    <body_bb> [incoming]
5288      V = B + offset;
5289      {BODY}
5290 
5291    <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5292      offset += step;
5293      if (offset LTGT bound) goto body_bb; [*]
5294 
5295    <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5296      chunk_no++;
5297      if (chunk < chunk_max) goto head_bb;
5298 
5299    <exit_bb> [incoming]
5300      V = B + ((range -/+ 1) / S +/- 1) * S [*]
5301 
5302    [*] Needed if V live at end of loop.  */
5303 
5304 static void
5305 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5306 {
5307   tree v = fd->loop.v;
5308   enum tree_code cond_code = fd->loop.cond_code;
5309   enum tree_code plus_code = PLUS_EXPR;
5310 
5311   tree chunk_size = integer_minus_one_node;
5312   tree gwv = integer_zero_node;
5313   tree iter_type = TREE_TYPE (v);
5314   tree diff_type = iter_type;
5315   tree plus_type = iter_type;
5316   struct oacc_collapse *counts = NULL;
5317 
5318   gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5319 		       == GF_OMP_FOR_KIND_OACC_LOOP);
5320   gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5321   gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5322 
5323   if (POINTER_TYPE_P (iter_type))
5324     {
5325       plus_code = POINTER_PLUS_EXPR;
5326       plus_type = sizetype;
5327     }
5328   if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5329     diff_type = signed_type_for (diff_type);
5330 
5331   basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5332   basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5333   basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE  */
5334   basic_block bottom_bb = NULL;
5335 
5336   /* entry_bb has two sucessors; the branch edge is to the exit
5337      block,  fallthrough edge to body.  */
5338   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5339 	      && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5340 
5341   /* If cont_bb non-NULL, it has 2 successors.  The branch successor is
5342      body_bb, or to a block whose only successor is the body_bb.  Its
5343      fallthrough successor is the final block (same as the branch
5344      successor of the entry_bb).  */
5345   if (cont_bb)
5346     {
5347       basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5348       basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5349 
5350       gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5351       gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5352     }
5353   else
5354     gcc_assert (!gimple_in_ssa_p (cfun));
5355 
5356   /* The exit block only has entry_bb and cont_bb as predecessors.  */
5357   gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5358 
5359   tree chunk_no;
5360   tree chunk_max = NULL_TREE;
5361   tree bound, offset;
5362   tree step = create_tmp_var (diff_type, ".step");
5363   bool up = cond_code == LT_EXPR;
5364   tree dir = build_int_cst (diff_type, up ? +1 : -1);
5365   bool chunking = !gimple_in_ssa_p (cfun);
5366   bool negating;
5367 
5368   /* Tiling vars.  */
5369   tree tile_size = NULL_TREE;
5370   tree element_s = NULL_TREE;
5371   tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5372   basic_block elem_body_bb = NULL;
5373   basic_block elem_cont_bb = NULL;
5374 
5375   /* SSA instances.  */
5376   tree offset_incr = NULL_TREE;
5377   tree offset_init = NULL_TREE;
5378 
5379   gimple_stmt_iterator gsi;
5380   gassign *ass;
5381   gcall *call;
5382   gimple *stmt;
5383   tree expr;
5384   location_t loc;
5385   edge split, be, fte;
5386 
5387   /* Split the end of entry_bb to create head_bb.  */
5388   split = split_block (entry_bb, last_stmt (entry_bb));
5389   basic_block head_bb = split->dest;
5390   entry_bb = split->src;
5391 
5392   /* Chunk setup goes at end of entry_bb, replacing the omp_for.  */
5393   gsi = gsi_last_bb (entry_bb);
5394   gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5395   loc = gimple_location (for_stmt);
5396 
5397   if (gimple_in_ssa_p (cfun))
5398     {
5399       offset_init = gimple_omp_for_index (for_stmt, 0);
5400       gcc_assert (integer_zerop (fd->loop.n1));
5401       /* The SSA parallelizer does gang parallelism.  */
5402       gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5403     }
5404 
5405   if (fd->collapse > 1 || fd->tiling)
5406     {
5407       gcc_assert (!gimple_in_ssa_p (cfun) && up);
5408       counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5409       tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5410 					      TREE_TYPE (fd->loop.n2), loc);
5411 
5412       if (SSA_VAR_P (fd->loop.n2))
5413 	{
5414 	  total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5415 					    true, GSI_SAME_STMT);
5416 	  ass = gimple_build_assign (fd->loop.n2, total);
5417 	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5418 	}
5419     }
5420 
5421   tree b = fd->loop.n1;
5422   tree e = fd->loop.n2;
5423   tree s = fd->loop.step;
5424 
5425   b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5426   e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5427 
5428   /* Convert the step, avoiding possible unsigned->signed overflow.  */
5429   negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5430   if (negating)
5431     s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5432   s = fold_convert (diff_type, s);
5433   if (negating)
5434     s = fold_build1 (NEGATE_EXPR, diff_type, s);
5435   s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5436 
5437   if (!chunking)
5438     chunk_size = integer_zero_node;
5439   expr = fold_convert (diff_type, chunk_size);
5440   chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5441 					 NULL_TREE, true, GSI_SAME_STMT);
5442 
5443   if (fd->tiling)
5444     {
5445       /* Determine the tile size and element step,
5446 	 modify the outer loop step size.  */
5447       tile_size = create_tmp_var (diff_type, ".tile_size");
5448       expr = build_int_cst (diff_type, 1);
5449       for (int ix = 0; ix < fd->collapse; ix++)
5450 	expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5451       expr = force_gimple_operand_gsi (&gsi, expr, true,
5452 				       NULL_TREE, true, GSI_SAME_STMT);
5453       ass = gimple_build_assign (tile_size, expr);
5454       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5455 
5456       element_s = create_tmp_var (diff_type, ".element_s");
5457       ass = gimple_build_assign (element_s, s);
5458       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5459 
5460       expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5461       s = force_gimple_operand_gsi (&gsi, expr, true,
5462 				    NULL_TREE, true, GSI_SAME_STMT);
5463     }
5464 
5465   /* Determine the range, avoiding possible unsigned->signed overflow.  */
5466   negating = !up && TYPE_UNSIGNED (iter_type);
5467   expr = fold_build2 (MINUS_EXPR, plus_type,
5468 		      fold_convert (plus_type, negating ? b : e),
5469 		      fold_convert (plus_type, negating ? e : b));
5470   expr = fold_convert (diff_type, expr);
5471   if (negating)
5472     expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5473   tree range = force_gimple_operand_gsi (&gsi, expr, true,
5474 					 NULL_TREE, true, GSI_SAME_STMT);
5475 
5476   chunk_no = build_int_cst (diff_type, 0);
5477   if (chunking)
5478     {
5479       gcc_assert (!gimple_in_ssa_p (cfun));
5480 
5481       expr = chunk_no;
5482       chunk_max = create_tmp_var (diff_type, ".chunk_max");
5483       chunk_no = create_tmp_var (diff_type, ".chunk_no");
5484 
5485       ass = gimple_build_assign (chunk_no, expr);
5486       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5487 
5488       call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5489 					 build_int_cst (integer_type_node,
5490 							IFN_GOACC_LOOP_CHUNKS),
5491 					 dir, range, s, chunk_size, gwv);
5492       gimple_call_set_lhs (call, chunk_max);
5493       gimple_set_location (call, loc);
5494       gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5495     }
5496   else
5497     chunk_size = chunk_no;
5498 
5499   call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5500 				     build_int_cst (integer_type_node,
5501 						    IFN_GOACC_LOOP_STEP),
5502 				     dir, range, s, chunk_size, gwv);
5503   gimple_call_set_lhs (call, step);
5504   gimple_set_location (call, loc);
5505   gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5506 
5507   /* Remove the GIMPLE_OMP_FOR.  */
5508   gsi_remove (&gsi, true);
5509 
5510   /* Fixup edges from head_bb.  */
5511   be = BRANCH_EDGE (head_bb);
5512   fte = FALLTHRU_EDGE (head_bb);
5513   be->flags |= EDGE_FALSE_VALUE;
5514   fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5515 
5516   basic_block body_bb = fte->dest;
5517 
5518   if (gimple_in_ssa_p (cfun))
5519     {
5520       gsi = gsi_last_bb (cont_bb);
5521       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5522 
5523       offset = gimple_omp_continue_control_use (cont_stmt);
5524       offset_incr = gimple_omp_continue_control_def (cont_stmt);
5525     }
5526   else
5527     {
5528       offset = create_tmp_var (diff_type, ".offset");
5529       offset_init = offset_incr = offset;
5530     }
5531   bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5532 
5533   /* Loop offset & bound go into head_bb.  */
5534   gsi = gsi_start_bb (head_bb);
5535 
5536   call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5537 				     build_int_cst (integer_type_node,
5538 						    IFN_GOACC_LOOP_OFFSET),
5539 				     dir, range, s,
5540 				     chunk_size, gwv, chunk_no);
5541   gimple_call_set_lhs (call, offset_init);
5542   gimple_set_location (call, loc);
5543   gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5544 
5545   call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5546 				     build_int_cst (integer_type_node,
5547 						    IFN_GOACC_LOOP_BOUND),
5548 				     dir, range, s,
5549 				     chunk_size, gwv, offset_init);
5550   gimple_call_set_lhs (call, bound);
5551   gimple_set_location (call, loc);
5552   gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5553 
5554   expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5555   gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5556 		    GSI_CONTINUE_LINKING);
5557 
5558   /* V assignment goes into body_bb.  */
5559   if (!gimple_in_ssa_p (cfun))
5560     {
5561       gsi = gsi_start_bb (body_bb);
5562 
5563       expr = build2 (plus_code, iter_type, b,
5564 		     fold_convert (plus_type, offset));
5565       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5566 				       true, GSI_SAME_STMT);
5567       ass = gimple_build_assign (v, expr);
5568       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5569 
5570       if (fd->collapse > 1 || fd->tiling)
5571 	expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5572 
5573       if (fd->tiling)
5574 	{
5575 	  /* Determine the range of the element loop -- usually simply
5576 	     the tile_size, but could be smaller if the final
5577 	     iteration of the outer loop is a partial tile.  */
5578 	  tree e_range = create_tmp_var (diff_type, ".e_range");
5579 
5580 	  expr = build2 (MIN_EXPR, diff_type,
5581 			 build2 (MINUS_EXPR, diff_type, bound, offset),
5582 			 build2 (MULT_EXPR, diff_type, tile_size,
5583 				 element_s));
5584 	  expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5585 					   true, GSI_SAME_STMT);
5586 	  ass = gimple_build_assign (e_range, expr);
5587 	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5588 
5589 	  /* Determine bound, offset & step of inner loop. */
5590 	  e_bound = create_tmp_var (diff_type, ".e_bound");
5591 	  e_offset = create_tmp_var (diff_type, ".e_offset");
5592 	  e_step = create_tmp_var (diff_type, ".e_step");
5593 
5594 	  /* Mark these as element loops.  */
5595 	  tree t, e_gwv = integer_minus_one_node;
5596 	  tree chunk = build_int_cst (diff_type, 0); /* Never chunked.  */
5597 
5598 	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
5599 	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5600 					     element_s, chunk, e_gwv, chunk);
5601 	  gimple_call_set_lhs (call, e_offset);
5602 	  gimple_set_location (call, loc);
5603 	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5604 
5605 	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
5606 	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5607 					     element_s, chunk, e_gwv, e_offset);
5608 	  gimple_call_set_lhs (call, e_bound);
5609 	  gimple_set_location (call, loc);
5610 	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5611 
5612 	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
5613 	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
5614 					     element_s, chunk, e_gwv);
5615 	  gimple_call_set_lhs (call, e_step);
5616 	  gimple_set_location (call, loc);
5617 	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5618 
5619 	  /* Add test and split block.  */
5620 	  expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5621 	  stmt = gimple_build_cond_empty (expr);
5622 	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5623 	  split = split_block (body_bb, stmt);
5624 	  elem_body_bb = split->dest;
5625 	  if (cont_bb == body_bb)
5626 	    cont_bb = elem_body_bb;
5627 	  body_bb = split->src;
5628 
5629 	  split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5630 
5631 	  /* Add a dummy exit for the tiled block when cont_bb is missing.  */
5632 	  if (cont_bb == NULL)
5633 	    {
5634 	      edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
5635 	      e->probability = PROB_EVEN;
5636 	      split->probability = PROB_EVEN;
5637 	    }
5638 
5639 	  /* Initialize the user's loop vars.  */
5640 	  gsi = gsi_start_bb (elem_body_bb);
5641 	  expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
5642 	}
5643     }
5644 
5645   /* Loop increment goes into cont_bb.  If this is not a loop, we
5646      will have spawned threads as if it was, and each one will
5647      execute one iteration.  The specification is not explicit about
5648      whether such constructs are ill-formed or not, and they can
5649      occur, especially when noreturn routines are involved.  */
5650   if (cont_bb)
5651     {
5652       gsi = gsi_last_bb (cont_bb);
5653       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5654       loc = gimple_location (cont_stmt);
5655 
5656       if (fd->tiling)
5657 	{
5658 	  /* Insert element loop increment and test.  */
5659 	  expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
5660 	  expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5661 					   true, GSI_SAME_STMT);
5662 	  ass = gimple_build_assign (e_offset, expr);
5663 	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5664 	  expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5665 
5666 	  stmt = gimple_build_cond_empty (expr);
5667 	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5668 	  split = split_block (cont_bb, stmt);
5669 	  elem_cont_bb = split->src;
5670 	  cont_bb = split->dest;
5671 
5672 	  split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5673 	  make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
5674 
5675 	  make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
5676 
5677 	  gsi = gsi_for_stmt (cont_stmt);
5678 	}
5679 
5680       /* Increment offset.  */
5681       if (gimple_in_ssa_p (cfun))
5682 	expr = build2 (plus_code, iter_type, offset,
5683 		       fold_convert (plus_type, step));
5684       else
5685 	expr = build2 (PLUS_EXPR, diff_type, offset, step);
5686       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5687 				       true, GSI_SAME_STMT);
5688       ass = gimple_build_assign (offset_incr, expr);
5689       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5690       expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5691       gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5692 
5693       /*  Remove the GIMPLE_OMP_CONTINUE.  */
5694       gsi_remove (&gsi, true);
5695 
5696       /* Fixup edges from cont_bb.  */
5697       be = BRANCH_EDGE (cont_bb);
5698       fte = FALLTHRU_EDGE (cont_bb);
5699       be->flags |= EDGE_TRUE_VALUE;
5700       fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5701 
5702       if (chunking)
5703 	{
5704 	  /* Split the beginning of exit_bb to make bottom_bb.  We
5705 	     need to insert a nop at the start, because splitting is
5706 	     after a stmt, not before.  */
5707 	  gsi = gsi_start_bb (exit_bb);
5708 	  stmt = gimple_build_nop ();
5709 	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5710 	  split = split_block (exit_bb, stmt);
5711 	  bottom_bb = split->src;
5712 	  exit_bb = split->dest;
5713 	  gsi = gsi_last_bb (bottom_bb);
5714 
5715 	  /* Chunk increment and test goes into bottom_bb.  */
5716 	  expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5717 			 build_int_cst (diff_type, 1));
5718 	  ass = gimple_build_assign (chunk_no, expr);
5719 	  gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5720 
5721 	  /* Chunk test at end of bottom_bb.  */
5722 	  expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5723 	  gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5724 			    GSI_CONTINUE_LINKING);
5725 
5726 	  /* Fixup edges from bottom_bb.  */
5727 	  split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5728 	  make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5729 	}
5730     }
5731 
5732   gsi = gsi_last_bb (exit_bb);
5733   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5734   loc = gimple_location (gsi_stmt (gsi));
5735 
5736   if (!gimple_in_ssa_p (cfun))
5737     {
5738       /* Insert the final value of V, in case it is live.  This is the
5739 	 value for the only thread that survives past the join.  */
5740       expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5741       expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5742       expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5743       expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5744       expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5745       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5746 				       true, GSI_SAME_STMT);
5747       ass = gimple_build_assign (v, expr);
5748       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5749     }
5750 
5751   /* Remove the OMP_RETURN.  */
5752   gsi_remove (&gsi, true);
5753 
5754   if (cont_bb)
5755     {
5756       /* We now have one, two or three nested loops.  Update the loop
5757 	 structures.  */
5758       struct loop *parent = entry_bb->loop_father;
5759       struct loop *body = body_bb->loop_father;
5760 
5761       if (chunking)
5762 	{
5763 	  struct loop *chunk_loop = alloc_loop ();
5764 	  chunk_loop->header = head_bb;
5765 	  chunk_loop->latch = bottom_bb;
5766 	  add_loop (chunk_loop, parent);
5767 	  parent = chunk_loop;
5768 	}
5769       else if (parent != body)
5770 	{
5771 	  gcc_assert (body->header == body_bb);
5772 	  gcc_assert (body->latch == cont_bb
5773 		      || single_pred (body->latch) == cont_bb);
5774 	  parent = NULL;
5775 	}
5776 
5777       if (parent)
5778 	{
5779 	  struct loop *body_loop = alloc_loop ();
5780 	  body_loop->header = body_bb;
5781 	  body_loop->latch = cont_bb;
5782 	  add_loop (body_loop, parent);
5783 
5784 	  if (fd->tiling)
5785 	    {
5786 	      /* Insert tiling's element loop.  */
5787 	      struct loop *inner_loop = alloc_loop ();
5788 	      inner_loop->header = elem_body_bb;
5789 	      inner_loop->latch = elem_cont_bb;
5790 	      add_loop (inner_loop, body_loop);
5791 	    }
5792 	}
5793     }
5794 }
5795 
5796 /* Expand the OMP loop defined by REGION.  */
5797 
5798 static void
5799 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5800 {
5801   struct omp_for_data fd;
5802   struct omp_for_data_loop *loops;
5803 
5804   loops
5805     = (struct omp_for_data_loop *)
5806       alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5807 	      * sizeof (struct omp_for_data_loop));
5808   omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5809 			&fd, loops);
5810   region->sched_kind = fd.sched_kind;
5811   region->sched_modifiers = fd.sched_modifiers;
5812 
5813   gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5814   BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5815   FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5816   if (region->cont)
5817     {
5818       gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5819       BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5820       FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5821     }
5822   else
5823     /* If there isn't a continue then this is a degerate case where
5824        the introduction of abnormal edges during lowering will prevent
5825        original loops from being detected.  Fix that up.  */
5826     loops_state_set (LOOPS_NEED_FIXUP);
5827 
5828   if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5829     expand_omp_simd (region, &fd);
5830   else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_CILKFOR)
5831     expand_cilk_for (region, &fd);
5832   else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5833     {
5834       gcc_assert (!inner_stmt);
5835       expand_oacc_for (region, &fd);
5836     }
5837   else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5838     {
5839       if (gimple_omp_for_combined_into_p (fd.for_stmt))
5840 	expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5841       else
5842 	expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5843     }
5844   else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5845 	   && !fd.have_ordered)
5846     {
5847       if (fd.chunk_size == NULL)
5848 	expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5849       else
5850 	expand_omp_for_static_chunk (region, &fd, inner_stmt);
5851     }
5852   else
5853     {
5854       int fn_index, start_ix, next_ix;
5855 
5856       gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5857 		  == GF_OMP_FOR_KIND_FOR);
5858       if (fd.chunk_size == NULL
5859 	  && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5860 	fd.chunk_size = integer_zero_node;
5861       gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
5862       switch (fd.sched_kind)
5863 	{
5864 	case OMP_CLAUSE_SCHEDULE_RUNTIME:
5865 	  fn_index = 3;
5866 	  break;
5867 	case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5868 	case OMP_CLAUSE_SCHEDULE_GUIDED:
5869 	  if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
5870 	      && !fd.ordered
5871 	      && !fd.have_ordered)
5872 	    {
5873 	      fn_index = 3 + fd.sched_kind;
5874 	      break;
5875 	    }
5876 	  /* FALLTHRU */
5877 	default:
5878 	  fn_index = fd.sched_kind;
5879 	  break;
5880 	}
5881       if (!fd.ordered)
5882 	fn_index += fd.have_ordered * 6;
5883       if (fd.ordered)
5884 	start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
5885       else
5886 	start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
5887       next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
5888       if (fd.iter_type == long_long_unsigned_type_node)
5889 	{
5890 	  start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5891 			- (int)BUILT_IN_GOMP_LOOP_STATIC_START);
5892 	  next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5893 		      - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
5894 	}
5895       expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
5896 			      (enum built_in_function) next_ix, inner_stmt);
5897     }
5898 
5899   if (gimple_in_ssa_p (cfun))
5900     update_ssa (TODO_update_ssa_only_virtuals);
5901 }
5902 
5903 /* Expand code for an OpenMP sections directive.  In pseudo code, we generate
5904 
5905 	v = GOMP_sections_start (n);
5906     L0:
5907 	switch (v)
5908 	  {
5909 	  case 0:
5910 	    goto L2;
5911 	  case 1:
5912 	    section 1;
5913 	    goto L1;
5914 	  case 2:
5915 	    ...
5916 	  case n:
5917 	    ...
5918 	  default:
5919 	    abort ();
5920 	  }
5921     L1:
5922 	v = GOMP_sections_next ();
5923 	goto L0;
5924     L2:
5925 	reduction;
5926 
5927     If this is a combined parallel sections, replace the call to
5928     GOMP_sections_start with call to GOMP_sections_next.  */
5929 
5930 static void
5931 expand_omp_sections (struct omp_region *region)
5932 {
5933   tree t, u, vin = NULL, vmain, vnext, l2;
5934   unsigned len;
5935   basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
5936   gimple_stmt_iterator si, switch_si;
5937   gomp_sections *sections_stmt;
5938   gimple *stmt;
5939   gomp_continue *cont;
5940   edge_iterator ei;
5941   edge e;
5942   struct omp_region *inner;
5943   unsigned i, casei;
5944   bool exit_reachable = region->cont != NULL;
5945 
5946   gcc_assert (region->exit != NULL);
5947   entry_bb = region->entry;
5948   l0_bb = single_succ (entry_bb);
5949   l1_bb = region->cont;
5950   l2_bb = region->exit;
5951   if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
5952     l2 = gimple_block_label (l2_bb);
5953   else
5954     {
5955       /* This can happen if there are reductions.  */
5956       len = EDGE_COUNT (l0_bb->succs);
5957       gcc_assert (len > 0);
5958       e = EDGE_SUCC (l0_bb, len - 1);
5959       si = gsi_last_bb (e->dest);
5960       l2 = NULL_TREE;
5961       if (gsi_end_p (si)
5962 	  || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5963 	l2 = gimple_block_label (e->dest);
5964       else
5965 	FOR_EACH_EDGE (e, ei, l0_bb->succs)
5966 	  {
5967 	    si = gsi_last_bb (e->dest);
5968 	    if (gsi_end_p (si)
5969 		|| gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5970 	      {
5971 		l2 = gimple_block_label (e->dest);
5972 		break;
5973 	      }
5974 	  }
5975     }
5976   if (exit_reachable)
5977     default_bb = create_empty_bb (l1_bb->prev_bb);
5978   else
5979     default_bb = create_empty_bb (l0_bb);
5980 
5981   /* We will build a switch() with enough cases for all the
5982      GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
5983      and a default case to abort if something goes wrong.  */
5984   len = EDGE_COUNT (l0_bb->succs);
5985 
5986   /* Use vec::quick_push on label_vec throughout, since we know the size
5987      in advance.  */
5988   auto_vec<tree> label_vec (len);
5989 
5990   /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
5991      GIMPLE_OMP_SECTIONS statement.  */
5992   si = gsi_last_bb (entry_bb);
5993   sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
5994   gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
5995   vin = gimple_omp_sections_control (sections_stmt);
5996   if (!is_combined_parallel (region))
5997     {
5998       /* If we are not inside a combined parallel+sections region,
5999 	 call GOMP_sections_start.  */
6000       t = build_int_cst (unsigned_type_node, len - 1);
6001       u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
6002       stmt = gimple_build_call (u, 1, t);
6003     }
6004   else
6005     {
6006       /* Otherwise, call GOMP_sections_next.  */
6007       u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6008       stmt = gimple_build_call (u, 0);
6009     }
6010   gimple_call_set_lhs (stmt, vin);
6011   gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6012   gsi_remove (&si, true);
6013 
6014   /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6015      L0_BB.  */
6016   switch_si = gsi_last_bb (l0_bb);
6017   gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
6018   if (exit_reachable)
6019     {
6020       cont = as_a <gomp_continue *> (last_stmt (l1_bb));
6021       gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
6022       vmain = gimple_omp_continue_control_use (cont);
6023       vnext = gimple_omp_continue_control_def (cont);
6024     }
6025   else
6026     {
6027       vmain = vin;
6028       vnext = NULL_TREE;
6029     }
6030 
6031   t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
6032   label_vec.quick_push (t);
6033   i = 1;
6034 
6035   /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR.  */
6036   for (inner = region->inner, casei = 1;
6037        inner;
6038        inner = inner->next, i++, casei++)
6039     {
6040       basic_block s_entry_bb, s_exit_bb;
6041 
6042       /* Skip optional reduction region.  */
6043       if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
6044 	{
6045 	  --i;
6046 	  --casei;
6047 	  continue;
6048 	}
6049 
6050       s_entry_bb = inner->entry;
6051       s_exit_bb = inner->exit;
6052 
6053       t = gimple_block_label (s_entry_bb);
6054       u = build_int_cst (unsigned_type_node, casei);
6055       u = build_case_label (u, NULL, t);
6056       label_vec.quick_push (u);
6057 
6058       si = gsi_last_bb (s_entry_bb);
6059       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
6060       gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
6061       gsi_remove (&si, true);
6062       single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
6063 
6064       if (s_exit_bb == NULL)
6065 	continue;
6066 
6067       si = gsi_last_bb (s_exit_bb);
6068       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6069       gsi_remove (&si, true);
6070 
6071       single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
6072     }
6073 
6074   /* Error handling code goes in DEFAULT_BB.  */
6075   t = gimple_block_label (default_bb);
6076   u = build_case_label (NULL, NULL, t);
6077   make_edge (l0_bb, default_bb, 0);
6078   add_bb_to_loop (default_bb, current_loops->tree_root);
6079 
6080   stmt = gimple_build_switch (vmain, u, label_vec);
6081   gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
6082   gsi_remove (&switch_si, true);
6083 
6084   si = gsi_start_bb (default_bb);
6085   stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
6086   gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
6087 
6088   if (exit_reachable)
6089     {
6090       tree bfn_decl;
6091 
6092       /* Code to get the next section goes in L1_BB.  */
6093       si = gsi_last_bb (l1_bb);
6094       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
6095 
6096       bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6097       stmt = gimple_build_call (bfn_decl, 0);
6098       gimple_call_set_lhs (stmt, vnext);
6099       gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6100       gsi_remove (&si, true);
6101 
6102       single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
6103     }
6104 
6105   /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB.  */
6106   si = gsi_last_bb (l2_bb);
6107   if (gimple_omp_return_nowait_p (gsi_stmt (si)))
6108     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
6109   else if (gimple_omp_return_lhs (gsi_stmt (si)))
6110     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
6111   else
6112     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
6113   stmt = gimple_build_call (t, 0);
6114   if (gimple_omp_return_lhs (gsi_stmt (si)))
6115     gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
6116   gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6117   gsi_remove (&si, true);
6118 
6119   set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
6120 }
6121 
6122 /* Expand code for an OpenMP single directive.  We've already expanded
6123    much of the code, here we simply place the GOMP_barrier call.  */
6124 
6125 static void
6126 expand_omp_single (struct omp_region *region)
6127 {
6128   basic_block entry_bb, exit_bb;
6129   gimple_stmt_iterator si;
6130 
6131   entry_bb = region->entry;
6132   exit_bb = region->exit;
6133 
6134   si = gsi_last_bb (entry_bb);
6135   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
6136   gsi_remove (&si, true);
6137   single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6138 
6139   si = gsi_last_bb (exit_bb);
6140   if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
6141     {
6142       tree t = gimple_omp_return_lhs (gsi_stmt (si));
6143       gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
6144     }
6145   gsi_remove (&si, true);
6146   single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6147 }
6148 
6149 /* Generic expansion for OpenMP synchronization directives: master,
6150    ordered and critical.  All we need to do here is remove the entry
6151    and exit markers for REGION.  */
6152 
6153 static void
6154 expand_omp_synch (struct omp_region *region)
6155 {
6156   basic_block entry_bb, exit_bb;
6157   gimple_stmt_iterator si;
6158 
6159   entry_bb = region->entry;
6160   exit_bb = region->exit;
6161 
6162   si = gsi_last_bb (entry_bb);
6163   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
6164 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
6165 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6166 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6167 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6168 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
6169   gsi_remove (&si, true);
6170   single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6171 
6172   if (exit_bb)
6173     {
6174       si = gsi_last_bb (exit_bb);
6175       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6176       gsi_remove (&si, true);
6177       single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6178     }
6179 }
6180 
6181 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
6182    operation as a normal volatile load.  */
6183 
6184 static bool
6185 expand_omp_atomic_load (basic_block load_bb, tree addr,
6186 			tree loaded_val, int index)
6187 {
6188   enum built_in_function tmpbase;
6189   gimple_stmt_iterator gsi;
6190   basic_block store_bb;
6191   location_t loc;
6192   gimple *stmt;
6193   tree decl, call, type, itype;
6194 
6195   gsi = gsi_last_bb (load_bb);
6196   stmt = gsi_stmt (gsi);
6197   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6198   loc = gimple_location (stmt);
6199 
6200   /* ??? If the target does not implement atomic_load_optab[mode], and mode
6201      is smaller than word size, then expand_atomic_load assumes that the load
6202      is atomic.  We could avoid the builtin entirely in this case.  */
6203 
6204   tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6205   decl = builtin_decl_explicit (tmpbase);
6206   if (decl == NULL_TREE)
6207     return false;
6208 
6209   type = TREE_TYPE (loaded_val);
6210   itype = TREE_TYPE (TREE_TYPE (decl));
6211 
6212   call = build_call_expr_loc (loc, decl, 2, addr,
6213 			      build_int_cst (NULL,
6214 					     gimple_omp_atomic_seq_cst_p (stmt)
6215 					     ? MEMMODEL_SEQ_CST
6216 					     : MEMMODEL_RELAXED));
6217   if (!useless_type_conversion_p (type, itype))
6218     call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6219   call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6220 
6221   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6222   gsi_remove (&gsi, true);
6223 
6224   store_bb = single_succ (load_bb);
6225   gsi = gsi_last_bb (store_bb);
6226   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6227   gsi_remove (&gsi, true);
6228 
6229   if (gimple_in_ssa_p (cfun))
6230     update_ssa (TODO_update_ssa_no_phi);
6231 
6232   return true;
6233 }
6234 
6235 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
6236    operation as a normal volatile store.  */
6237 
6238 static bool
6239 expand_omp_atomic_store (basic_block load_bb, tree addr,
6240 			 tree loaded_val, tree stored_val, int index)
6241 {
6242   enum built_in_function tmpbase;
6243   gimple_stmt_iterator gsi;
6244   basic_block store_bb = single_succ (load_bb);
6245   location_t loc;
6246   gimple *stmt;
6247   tree decl, call, type, itype;
6248   machine_mode imode;
6249   bool exchange;
6250 
6251   gsi = gsi_last_bb (load_bb);
6252   stmt = gsi_stmt (gsi);
6253   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6254 
6255   /* If the load value is needed, then this isn't a store but an exchange.  */
6256   exchange = gimple_omp_atomic_need_value_p (stmt);
6257 
6258   gsi = gsi_last_bb (store_bb);
6259   stmt = gsi_stmt (gsi);
6260   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6261   loc = gimple_location (stmt);
6262 
6263   /* ??? If the target does not implement atomic_store_optab[mode], and mode
6264      is smaller than word size, then expand_atomic_store assumes that the store
6265      is atomic.  We could avoid the builtin entirely in this case.  */
6266 
6267   tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6268   tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6269   decl = builtin_decl_explicit (tmpbase);
6270   if (decl == NULL_TREE)
6271     return false;
6272 
6273   type = TREE_TYPE (stored_val);
6274 
6275   /* Dig out the type of the function's second argument.  */
6276   itype = TREE_TYPE (decl);
6277   itype = TYPE_ARG_TYPES (itype);
6278   itype = TREE_CHAIN (itype);
6279   itype = TREE_VALUE (itype);
6280   imode = TYPE_MODE (itype);
6281 
6282   if (exchange && !can_atomic_exchange_p (imode, true))
6283     return false;
6284 
6285   if (!useless_type_conversion_p (itype, type))
6286     stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6287   call = build_call_expr_loc (loc, decl, 3, addr, stored_val,
6288 			      build_int_cst (NULL,
6289 					     gimple_omp_atomic_seq_cst_p (stmt)
6290 					     ? MEMMODEL_SEQ_CST
6291 					     : MEMMODEL_RELAXED));
6292   if (exchange)
6293     {
6294       if (!useless_type_conversion_p (type, itype))
6295 	call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6296       call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6297     }
6298 
6299   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6300   gsi_remove (&gsi, true);
6301 
6302   /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above.  */
6303   gsi = gsi_last_bb (load_bb);
6304   gsi_remove (&gsi, true);
6305 
6306   if (gimple_in_ssa_p (cfun))
6307     update_ssa (TODO_update_ssa_no_phi);
6308 
6309   return true;
6310 }
6311 
6312 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
6313    operation as a __atomic_fetch_op builtin.  INDEX is log2 of the
6314    size of the data type, and thus usable to find the index of the builtin
6315    decl.  Returns false if the expression is not of the proper form.  */
6316 
6317 static bool
6318 expand_omp_atomic_fetch_op (basic_block load_bb,
6319 			    tree addr, tree loaded_val,
6320 			    tree stored_val, int index)
6321 {
6322   enum built_in_function oldbase, newbase, tmpbase;
6323   tree decl, itype, call;
6324   tree lhs, rhs;
6325   basic_block store_bb = single_succ (load_bb);
6326   gimple_stmt_iterator gsi;
6327   gimple *stmt;
6328   location_t loc;
6329   enum tree_code code;
6330   bool need_old, need_new;
6331   machine_mode imode;
6332   bool seq_cst;
6333 
6334   /* We expect to find the following sequences:
6335 
6336    load_bb:
6337        GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6338 
6339    store_bb:
6340        val = tmp OP something; (or: something OP tmp)
6341        GIMPLE_OMP_STORE (val)
6342 
6343   ???FIXME: Allow a more flexible sequence.
6344   Perhaps use data flow to pick the statements.
6345 
6346   */
6347 
6348   gsi = gsi_after_labels (store_bb);
6349   stmt = gsi_stmt (gsi);
6350   loc = gimple_location (stmt);
6351   if (!is_gimple_assign (stmt))
6352     return false;
6353   gsi_next (&gsi);
6354   if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6355     return false;
6356   need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6357   need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6358   seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb));
6359   gcc_checking_assert (!need_old || !need_new);
6360 
6361   if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6362     return false;
6363 
6364   /* Check for one of the supported fetch-op operations.  */
6365   code = gimple_assign_rhs_code (stmt);
6366   switch (code)
6367     {
6368     case PLUS_EXPR:
6369     case POINTER_PLUS_EXPR:
6370       oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6371       newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6372       break;
6373     case MINUS_EXPR:
6374       oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6375       newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6376       break;
6377     case BIT_AND_EXPR:
6378       oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6379       newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6380       break;
6381     case BIT_IOR_EXPR:
6382       oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6383       newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6384       break;
6385     case BIT_XOR_EXPR:
6386       oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6387       newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6388       break;
6389     default:
6390       return false;
6391     }
6392 
6393   /* Make sure the expression is of the proper form.  */
6394   if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6395     rhs = gimple_assign_rhs2 (stmt);
6396   else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6397 	   && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6398     rhs = gimple_assign_rhs1 (stmt);
6399   else
6400     return false;
6401 
6402   tmpbase = ((enum built_in_function)
6403 	     ((need_new ? newbase : oldbase) + index + 1));
6404   decl = builtin_decl_explicit (tmpbase);
6405   if (decl == NULL_TREE)
6406     return false;
6407   itype = TREE_TYPE (TREE_TYPE (decl));
6408   imode = TYPE_MODE (itype);
6409 
6410   /* We could test all of the various optabs involved, but the fact of the
6411      matter is that (with the exception of i486 vs i586 and xadd) all targets
6412      that support any atomic operaton optab also implements compare-and-swap.
6413      Let optabs.c take care of expanding any compare-and-swap loop.  */
6414   if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
6415     return false;
6416 
6417   gsi = gsi_last_bb (load_bb);
6418   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6419 
6420   /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6421      It only requires that the operation happen atomically.  Thus we can
6422      use the RELAXED memory model.  */
6423   call = build_call_expr_loc (loc, decl, 3, addr,
6424 			      fold_convert_loc (loc, itype, rhs),
6425 			      build_int_cst (NULL,
6426 					     seq_cst ? MEMMODEL_SEQ_CST
6427 						     : MEMMODEL_RELAXED));
6428 
6429   if (need_old || need_new)
6430     {
6431       lhs = need_old ? loaded_val : stored_val;
6432       call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6433       call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6434     }
6435   else
6436     call = fold_convert_loc (loc, void_type_node, call);
6437   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6438   gsi_remove (&gsi, true);
6439 
6440   gsi = gsi_last_bb (store_bb);
6441   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6442   gsi_remove (&gsi, true);
6443   gsi = gsi_last_bb (store_bb);
6444   stmt = gsi_stmt (gsi);
6445   gsi_remove (&gsi, true);
6446 
6447   if (gimple_in_ssa_p (cfun))
6448     {
6449       release_defs (stmt);
6450       update_ssa (TODO_update_ssa_no_phi);
6451     }
6452 
6453   return true;
6454 }
6455 
6456 /* A subroutine of expand_omp_atomic.  Implement the atomic operation as:
6457 
6458       oldval = *addr;
6459       repeat:
6460 	newval = rhs;	 // with oldval replacing *addr in rhs
6461 	oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6462 	if (oldval != newval)
6463 	  goto repeat;
6464 
6465    INDEX is log2 of the size of the data type, and thus usable to find the
6466    index of the builtin decl.  */
6467 
6468 static bool
6469 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6470 			    tree addr, tree loaded_val, tree stored_val,
6471 			    int index)
6472 {
6473   tree loadedi, storedi, initial, new_storedi, old_vali;
6474   tree type, itype, cmpxchg, iaddr;
6475   gimple_stmt_iterator si;
6476   basic_block loop_header = single_succ (load_bb);
6477   gimple *phi, *stmt;
6478   edge e;
6479   enum built_in_function fncode;
6480 
6481   /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6482      order to use the RELAXED memory model effectively.  */
6483   fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6484 				    + index + 1);
6485   cmpxchg = builtin_decl_explicit (fncode);
6486   if (cmpxchg == NULL_TREE)
6487     return false;
6488   type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6489   itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6490 
6491   if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
6492       || !can_atomic_load_p (TYPE_MODE (itype)))
6493     return false;
6494 
6495   /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD.  */
6496   si = gsi_last_bb (load_bb);
6497   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6498 
6499   /* For floating-point values, we'll need to view-convert them to integers
6500      so that we can perform the atomic compare and swap.  Simplify the
6501      following code by always setting up the "i"ntegral variables.  */
6502   if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6503     {
6504       tree iaddr_val;
6505 
6506       iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6507 							   true));
6508       iaddr_val
6509 	= force_gimple_operand_gsi (&si,
6510 				    fold_convert (TREE_TYPE (iaddr), addr),
6511 				    false, NULL_TREE, true, GSI_SAME_STMT);
6512       stmt = gimple_build_assign (iaddr, iaddr_val);
6513       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6514       loadedi = create_tmp_var (itype);
6515       if (gimple_in_ssa_p (cfun))
6516 	loadedi = make_ssa_name (loadedi);
6517     }
6518   else
6519     {
6520       iaddr = addr;
6521       loadedi = loaded_val;
6522     }
6523 
6524   fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6525   tree loaddecl = builtin_decl_explicit (fncode);
6526   if (loaddecl)
6527     initial
6528       = fold_convert (TREE_TYPE (TREE_TYPE (iaddr)),
6529 		      build_call_expr (loaddecl, 2, iaddr,
6530 				       build_int_cst (NULL_TREE,
6531 						      MEMMODEL_RELAXED)));
6532   else
6533     initial = build2 (MEM_REF, TREE_TYPE (TREE_TYPE (iaddr)), iaddr,
6534 		      build_int_cst (TREE_TYPE (iaddr), 0));
6535 
6536   initial
6537     = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6538 				GSI_SAME_STMT);
6539 
6540   /* Move the value to the LOADEDI temporary.  */
6541   if (gimple_in_ssa_p (cfun))
6542     {
6543       gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6544       phi = create_phi_node (loadedi, loop_header);
6545       SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6546 	       initial);
6547     }
6548   else
6549     gsi_insert_before (&si,
6550 		       gimple_build_assign (loadedi, initial),
6551 		       GSI_SAME_STMT);
6552   if (loadedi != loaded_val)
6553     {
6554       gimple_stmt_iterator gsi2;
6555       tree x;
6556 
6557       x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6558       gsi2 = gsi_start_bb (loop_header);
6559       if (gimple_in_ssa_p (cfun))
6560 	{
6561 	  gassign *stmt;
6562 	  x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6563 					true, GSI_SAME_STMT);
6564 	  stmt = gimple_build_assign (loaded_val, x);
6565 	  gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6566 	}
6567       else
6568 	{
6569 	  x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6570 	  force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6571 				    true, GSI_SAME_STMT);
6572 	}
6573     }
6574   gsi_remove (&si, true);
6575 
6576   si = gsi_last_bb (store_bb);
6577   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6578 
6579   if (iaddr == addr)
6580     storedi = stored_val;
6581   else
6582     storedi
6583       = force_gimple_operand_gsi (&si,
6584 				  build1 (VIEW_CONVERT_EXPR, itype,
6585 					  stored_val), true, NULL_TREE, true,
6586 				  GSI_SAME_STMT);
6587 
6588   /* Build the compare&swap statement.  */
6589   new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6590   new_storedi = force_gimple_operand_gsi (&si,
6591 					  fold_convert (TREE_TYPE (loadedi),
6592 							new_storedi),
6593 					  true, NULL_TREE,
6594 					  true, GSI_SAME_STMT);
6595 
6596   if (gimple_in_ssa_p (cfun))
6597     old_vali = loadedi;
6598   else
6599     {
6600       old_vali = create_tmp_var (TREE_TYPE (loadedi));
6601       stmt = gimple_build_assign (old_vali, loadedi);
6602       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6603 
6604       stmt = gimple_build_assign (loadedi, new_storedi);
6605       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6606     }
6607 
6608   /* Note that we always perform the comparison as an integer, even for
6609      floating point.  This allows the atomic operation to properly
6610      succeed even with NaNs and -0.0.  */
6611   tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6612   stmt = gimple_build_cond_empty (ne);
6613   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6614 
6615   /* Update cfg.  */
6616   e = single_succ_edge (store_bb);
6617   e->flags &= ~EDGE_FALLTHRU;
6618   e->flags |= EDGE_FALSE_VALUE;
6619 
6620   e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
6621 
6622   /* Copy the new value to loadedi (we already did that before the condition
6623      if we are not in SSA).  */
6624   if (gimple_in_ssa_p (cfun))
6625     {
6626       phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6627       SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6628     }
6629 
6630   /* Remove GIMPLE_OMP_ATOMIC_STORE.  */
6631   gsi_remove (&si, true);
6632 
6633   struct loop *loop = alloc_loop ();
6634   loop->header = loop_header;
6635   loop->latch = store_bb;
6636   add_loop (loop, loop_header->loop_father);
6637 
6638   if (gimple_in_ssa_p (cfun))
6639     update_ssa (TODO_update_ssa_no_phi);
6640 
6641   return true;
6642 }
6643 
6644 /* A subroutine of expand_omp_atomic.  Implement the atomic operation as:
6645 
6646 				  GOMP_atomic_start ();
6647 				  *addr = rhs;
6648 				  GOMP_atomic_end ();
6649 
6650    The result is not globally atomic, but works so long as all parallel
6651    references are within #pragma omp atomic directives.  According to
6652    responses received from omp@openmp.org, appears to be within spec.
6653    Which makes sense, since that's how several other compilers handle
6654    this situation as well.
6655    LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6656    expanding.  STORED_VAL is the operand of the matching
6657    GIMPLE_OMP_ATOMIC_STORE.
6658 
6659    We replace
6660    GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6661    loaded_val = *addr;
6662 
6663    and replace
6664    GIMPLE_OMP_ATOMIC_STORE (stored_val)  with
6665    *addr = stored_val;
6666 */
6667 
6668 static bool
6669 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6670 			 tree addr, tree loaded_val, tree stored_val)
6671 {
6672   gimple_stmt_iterator si;
6673   gassign *stmt;
6674   tree t;
6675 
6676   si = gsi_last_bb (load_bb);
6677   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6678 
6679   t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6680   t = build_call_expr (t, 0);
6681   force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6682 
6683   stmt = gimple_build_assign (loaded_val, build_simple_mem_ref (addr));
6684   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6685   gsi_remove (&si, true);
6686 
6687   si = gsi_last_bb (store_bb);
6688   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6689 
6690   stmt = gimple_build_assign (build_simple_mem_ref (unshare_expr (addr)),
6691 			      stored_val);
6692   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6693 
6694   t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6695   t = build_call_expr (t, 0);
6696   force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6697   gsi_remove (&si, true);
6698 
6699   if (gimple_in_ssa_p (cfun))
6700     update_ssa (TODO_update_ssa_no_phi);
6701   return true;
6702 }
6703 
6704 /* Expand an GIMPLE_OMP_ATOMIC statement.  We try to expand
6705    using expand_omp_atomic_fetch_op.  If it failed, we try to
6706    call expand_omp_atomic_pipeline, and if it fails too, the
6707    ultimate fallback is wrapping the operation in a mutex
6708    (expand_omp_atomic_mutex).  REGION is the atomic region built
6709    by build_omp_regions_1().  */
6710 
6711 static void
6712 expand_omp_atomic (struct omp_region *region)
6713 {
6714   basic_block load_bb = region->entry, store_bb = region->exit;
6715   gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6716   gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6717   tree loaded_val = gimple_omp_atomic_load_lhs (load);
6718   tree addr = gimple_omp_atomic_load_rhs (load);
6719   tree stored_val = gimple_omp_atomic_store_val (store);
6720   tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6721   HOST_WIDE_INT index;
6722 
6723   /* Make sure the type is one of the supported sizes.  */
6724   index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6725   index = exact_log2 (index);
6726   if (index >= 0 && index <= 4)
6727     {
6728       unsigned int align = TYPE_ALIGN_UNIT (type);
6729 
6730       /* __sync builtins require strict data alignment.  */
6731       if (exact_log2 (align) >= index)
6732 	{
6733 	  /* Atomic load.  */
6734 	  if (loaded_val == stored_val
6735 	      && (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
6736 		  || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
6737 	      && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
6738 	      && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6739 	    return;
6740 
6741 	  /* Atomic store.  */
6742 	  if ((GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
6743 	       || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
6744 	      && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
6745 	      && store_bb == single_succ (load_bb)
6746 	      && first_stmt (store_bb) == store
6747 	      && expand_omp_atomic_store (load_bb, addr, loaded_val,
6748 					  stored_val, index))
6749 	    return;
6750 
6751 	  /* When possible, use specialized atomic update functions.  */
6752 	  if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6753 	      && store_bb == single_succ (load_bb)
6754 	      && expand_omp_atomic_fetch_op (load_bb, addr,
6755 					     loaded_val, stored_val, index))
6756 	    return;
6757 
6758 	  /* If we don't have specialized __sync builtins, try and implement
6759 	     as a compare and swap loop.  */
6760 	  if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6761 					  loaded_val, stored_val, index))
6762 	    return;
6763 	}
6764     }
6765 
6766   /* The ultimate fallback is wrapping the operation in a mutex.  */
6767   expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6768 }
6769 
6770 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6771    at REGION_EXIT.  */
6772 
6773 static void
6774 mark_loops_in_oacc_kernels_region (basic_block region_entry,
6775 				   basic_block region_exit)
6776 {
6777   struct loop *outer = region_entry->loop_father;
6778   gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6779 
6780   /* Don't parallelize the kernels region if it contains more than one outer
6781      loop.  */
6782   unsigned int nr_outer_loops = 0;
6783   struct loop *single_outer = NULL;
6784   for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6785     {
6786       gcc_assert (loop_outer (loop) == outer);
6787 
6788       if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
6789 	continue;
6790 
6791       if (region_exit != NULL
6792 	  && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
6793 	continue;
6794 
6795       nr_outer_loops++;
6796       single_outer = loop;
6797     }
6798   if (nr_outer_loops != 1)
6799     return;
6800 
6801   for (struct loop *loop = single_outer->inner;
6802        loop != NULL;
6803        loop = loop->inner)
6804     if (loop->next)
6805       return;
6806 
6807   /* Mark the loops in the region.  */
6808   for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
6809     loop->in_oacc_kernels_region = true;
6810 }
6811 
6812 /* Types used to pass grid and wortkgroup sizes to kernel invocation.  */
6813 
6814 struct GTY(()) grid_launch_attributes_trees
6815 {
6816   tree kernel_dim_array_type;
6817   tree kernel_lattrs_dimnum_decl;
6818   tree kernel_lattrs_grid_decl;
6819   tree kernel_lattrs_group_decl;
6820   tree kernel_launch_attributes_type;
6821 };
6822 
6823 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
6824 
6825 /* Create types used to pass kernel launch attributes to target.  */
6826 
6827 static void
6828 grid_create_kernel_launch_attr_types (void)
6829 {
6830   if (grid_attr_trees)
6831     return;
6832   grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
6833 
6834   tree dim_arr_index_type
6835     = build_index_type (build_int_cst (integer_type_node, 2));
6836   grid_attr_trees->kernel_dim_array_type
6837     = build_array_type (uint32_type_node, dim_arr_index_type);
6838 
6839   grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
6840   grid_attr_trees->kernel_lattrs_dimnum_decl
6841     = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
6842 		  uint32_type_node);
6843   DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
6844 
6845   grid_attr_trees->kernel_lattrs_grid_decl
6846     = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
6847 		  grid_attr_trees->kernel_dim_array_type);
6848   DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
6849     = grid_attr_trees->kernel_lattrs_dimnum_decl;
6850   grid_attr_trees->kernel_lattrs_group_decl
6851     = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
6852 		  grid_attr_trees->kernel_dim_array_type);
6853   DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
6854     = grid_attr_trees->kernel_lattrs_grid_decl;
6855   finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
6856 			 "__gomp_kernel_launch_attributes",
6857 			 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
6858 }
6859 
6860 /* Insert before the current statement in GSI a store of VALUE to INDEX of
6861    array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR.  VALUE must be
6862    of type uint32_type_node.  */
6863 
6864 static void
6865 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
6866 			     tree fld_decl, int index, tree value)
6867 {
6868   tree ref = build4 (ARRAY_REF, uint32_type_node,
6869 		     build3 (COMPONENT_REF,
6870 			     grid_attr_trees->kernel_dim_array_type,
6871 			     range_var, fld_decl, NULL_TREE),
6872 		     build_int_cst (integer_type_node, index),
6873 		     NULL_TREE, NULL_TREE);
6874   gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
6875 }
6876 
6877 /* Return a tree representation of a pointer to a structure with grid and
6878    work-group size information.  Statements filling that information will be
6879    inserted before GSI, TGT_STMT is the target statement which has the
6880    necessary information in it.  */
6881 
6882 static tree
6883 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
6884 				       gomp_target *tgt_stmt)
6885 {
6886   grid_create_kernel_launch_attr_types ();
6887   tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
6888 				"__kernel_launch_attrs");
6889 
6890   unsigned max_dim = 0;
6891   for (tree clause = gimple_omp_target_clauses (tgt_stmt);
6892        clause;
6893        clause = OMP_CLAUSE_CHAIN (clause))
6894     {
6895       if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
6896 	continue;
6897 
6898       unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
6899       max_dim = MAX (dim, max_dim);
6900 
6901       grid_insert_store_range_dim (gsi, lattrs,
6902 				   grid_attr_trees->kernel_lattrs_grid_decl,
6903 				   dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
6904       grid_insert_store_range_dim (gsi, lattrs,
6905 				   grid_attr_trees->kernel_lattrs_group_decl,
6906 				   dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
6907     }
6908 
6909   tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
6910 			grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
6911   gcc_checking_assert (max_dim <= 2);
6912   tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
6913   gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
6914 		     GSI_SAME_STMT);
6915   TREE_ADDRESSABLE (lattrs) = 1;
6916   return build_fold_addr_expr (lattrs);
6917 }
6918 
6919 /* Build target argument identifier from the DEVICE identifier, value
6920    identifier ID and whether the element also has a SUBSEQUENT_PARAM.  */
6921 
6922 static tree
6923 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
6924 {
6925   tree t = build_int_cst (integer_type_node, device);
6926   if (subseqent_param)
6927     t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6928 		     build_int_cst (integer_type_node,
6929 				    GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
6930   t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6931 		   build_int_cst (integer_type_node, id));
6932   return t;
6933 }
6934 
6935 /* Like above but return it in type that can be directly stored as an element
6936    of the argument array.  */
6937 
6938 static tree
6939 get_target_argument_identifier (int device, bool subseqent_param, int id)
6940 {
6941   tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
6942   return fold_convert (ptr_type_node, t);
6943 }
6944 
6945 /* Return a target argument consisting of DEVICE identifier, value identifier
6946    ID, and the actual VALUE.  */
6947 
6948 static tree
6949 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
6950 			   tree value)
6951 {
6952   tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
6953 			fold_convert (integer_type_node, value),
6954 			build_int_cst (unsigned_type_node,
6955 				       GOMP_TARGET_ARG_VALUE_SHIFT));
6956   t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6957 		   get_target_argument_identifier_1 (device, false, id));
6958   t = fold_convert (ptr_type_node, t);
6959   return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
6960 }
6961 
6962 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
6963    push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
6964    otherwise push an identifier (with DEVICE and ID) and the VALUE in two
6965    arguments.  */
6966 
6967 static void
6968 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
6969 					 int id, tree value, vec <tree> *args)
6970 {
6971   if (tree_fits_shwi_p (value)
6972       && tree_to_shwi (value) > -(1 << 15)
6973       && tree_to_shwi (value) < (1 << 15))
6974     args->quick_push (get_target_argument_value (gsi, device, id, value));
6975   else
6976     {
6977       args->quick_push (get_target_argument_identifier (device, true, id));
6978       value = fold_convert (ptr_type_node, value);
6979       value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
6980 					GSI_SAME_STMT);
6981       args->quick_push (value);
6982     }
6983 }
6984 
6985 /* Create an array of arguments that is then passed to GOMP_target.  */
6986 
6987 static tree
6988 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
6989 {
6990   auto_vec <tree, 6> args;
6991   tree clauses = gimple_omp_target_clauses (tgt_stmt);
6992   tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
6993   if (c)
6994     t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
6995   else
6996     t = integer_minus_one_node;
6997   push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6998 					   GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
6999 
7000   c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
7001   if (c)
7002     t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
7003   else
7004     t = integer_minus_one_node;
7005   push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7006 					   GOMP_TARGET_ARG_THREAD_LIMIT, t,
7007 					   &args);
7008 
7009   /* Add HSA-specific grid sizes, if available.  */
7010   if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7011 		       OMP_CLAUSE__GRIDDIM_))
7012     {
7013       int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
7014       t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
7015       args.quick_push (t);
7016       args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
7017     }
7018 
7019   /* Produce more, perhaps device specific, arguments here.  */
7020 
7021   tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
7022 							  args.length () + 1),
7023 				  ".omp_target_args");
7024   for (unsigned i = 0; i < args.length (); i++)
7025     {
7026       tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7027 			 build_int_cst (integer_type_node, i),
7028 			 NULL_TREE, NULL_TREE);
7029       gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
7030 			 GSI_SAME_STMT);
7031     }
7032   tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7033 		     build_int_cst (integer_type_node, args.length ()),
7034 		     NULL_TREE, NULL_TREE);
7035   gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
7036 		     GSI_SAME_STMT);
7037   TREE_ADDRESSABLE (argarray) = 1;
7038   return build_fold_addr_expr (argarray);
7039 }
7040 
7041 /* Expand the GIMPLE_OMP_TARGET starting at REGION.  */
7042 
7043 static void
7044 expand_omp_target (struct omp_region *region)
7045 {
7046   basic_block entry_bb, exit_bb, new_bb;
7047   struct function *child_cfun;
7048   tree child_fn, block, t;
7049   gimple_stmt_iterator gsi;
7050   gomp_target *entry_stmt;
7051   gimple *stmt;
7052   edge e;
7053   bool offloaded, data_region;
7054 
7055   entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
7056   new_bb = region->entry;
7057 
7058   offloaded = is_gimple_omp_offloaded (entry_stmt);
7059   switch (gimple_omp_target_kind (entry_stmt))
7060     {
7061     case GF_OMP_TARGET_KIND_REGION:
7062     case GF_OMP_TARGET_KIND_UPDATE:
7063     case GF_OMP_TARGET_KIND_ENTER_DATA:
7064     case GF_OMP_TARGET_KIND_EXIT_DATA:
7065     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7066     case GF_OMP_TARGET_KIND_OACC_KERNELS:
7067     case GF_OMP_TARGET_KIND_OACC_UPDATE:
7068     case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7069     case GF_OMP_TARGET_KIND_OACC_DECLARE:
7070       data_region = false;
7071       break;
7072     case GF_OMP_TARGET_KIND_DATA:
7073     case GF_OMP_TARGET_KIND_OACC_DATA:
7074     case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7075       data_region = true;
7076       break;
7077     default:
7078       gcc_unreachable ();
7079     }
7080 
7081   child_fn = NULL_TREE;
7082   child_cfun = NULL;
7083   if (offloaded)
7084     {
7085       child_fn = gimple_omp_target_child_fn (entry_stmt);
7086       child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7087     }
7088 
7089   /* Supported by expand_omp_taskreg, but not here.  */
7090   if (child_cfun != NULL)
7091     gcc_checking_assert (!child_cfun->cfg);
7092   gcc_checking_assert (!gimple_in_ssa_p (cfun));
7093 
7094   entry_bb = region->entry;
7095   exit_bb = region->exit;
7096 
7097   if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
7098     mark_loops_in_oacc_kernels_region (region->entry, region->exit);
7099 
7100   if (offloaded)
7101     {
7102       unsigned srcidx, dstidx, num;
7103 
7104       /* If the offloading region needs data sent from the parent
7105 	 function, then the very first statement (except possible
7106 	 tree profile counter updates) of the offloading body
7107 	 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
7108 	 &.OMP_DATA_O is passed as an argument to the child function,
7109 	 we need to replace it with the argument as seen by the child
7110 	 function.
7111 
7112 	 In most cases, this will end up being the identity assignment
7113 	 .OMP_DATA_I = .OMP_DATA_I.  However, if the offloading body had
7114 	 a function call that has been inlined, the original PARM_DECL
7115 	 .OMP_DATA_I may have been converted into a different local
7116 	 variable.  In which case, we need to keep the assignment.  */
7117       tree data_arg = gimple_omp_target_data_arg (entry_stmt);
7118       if (data_arg)
7119 	{
7120 	  basic_block entry_succ_bb = single_succ (entry_bb);
7121 	  gimple_stmt_iterator gsi;
7122 	  tree arg;
7123 	  gimple *tgtcopy_stmt = NULL;
7124 	  tree sender = TREE_VEC_ELT (data_arg, 0);
7125 
7126 	  for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
7127 	    {
7128 	      gcc_assert (!gsi_end_p (gsi));
7129 	      stmt = gsi_stmt (gsi);
7130 	      if (gimple_code (stmt) != GIMPLE_ASSIGN)
7131 		continue;
7132 
7133 	      if (gimple_num_ops (stmt) == 2)
7134 		{
7135 		  tree arg = gimple_assign_rhs1 (stmt);
7136 
7137 		  /* We're ignoring the subcode because we're
7138 		     effectively doing a STRIP_NOPS.  */
7139 
7140 		  if (TREE_CODE (arg) == ADDR_EXPR
7141 		      && TREE_OPERAND (arg, 0) == sender)
7142 		    {
7143 		      tgtcopy_stmt = stmt;
7144 		      break;
7145 		    }
7146 		}
7147 	    }
7148 
7149 	  gcc_assert (tgtcopy_stmt != NULL);
7150 	  arg = DECL_ARGUMENTS (child_fn);
7151 
7152 	  gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
7153 	  gsi_remove (&gsi, true);
7154 	}
7155 
7156       /* Declare local variables needed in CHILD_CFUN.  */
7157       block = DECL_INITIAL (child_fn);
7158       BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7159       /* The gimplifier could record temporaries in the offloading block
7160 	 rather than in containing function's local_decls chain,
7161 	 which would mean cgraph missed finalizing them.  Do it now.  */
7162       for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7163 	if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7164 	  varpool_node::finalize_decl (t);
7165       DECL_SAVED_TREE (child_fn) = NULL;
7166       /* We'll create a CFG for child_fn, so no gimple body is needed.  */
7167       gimple_set_body (child_fn, NULL);
7168       TREE_USED (block) = 1;
7169 
7170       /* Reset DECL_CONTEXT on function arguments.  */
7171       for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7172 	DECL_CONTEXT (t) = child_fn;
7173 
7174       /* Split ENTRY_BB at GIMPLE_*,
7175 	 so that it can be moved to the child function.  */
7176       gsi = gsi_last_bb (entry_bb);
7177       stmt = gsi_stmt (gsi);
7178       gcc_assert (stmt
7179 		  && gimple_code (stmt) == gimple_code (entry_stmt));
7180       e = split_block (entry_bb, stmt);
7181       gsi_remove (&gsi, true);
7182       entry_bb = e->dest;
7183       single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7184 
7185       /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR.  */
7186       if (exit_bb)
7187 	{
7188 	  gsi = gsi_last_bb (exit_bb);
7189 	  gcc_assert (!gsi_end_p (gsi)
7190 		      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7191 	  stmt = gimple_build_return (NULL);
7192 	  gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7193 	  gsi_remove (&gsi, true);
7194 	}
7195 
7196       /* Make sure to generate early debug for the function before
7197          outlining anything.  */
7198       if (! gimple_in_ssa_p (cfun))
7199 	(*debug_hooks->early_global_decl) (cfun->decl);
7200 
7201       /* Move the offloading region into CHILD_CFUN.  */
7202 
7203       block = gimple_block (entry_stmt);
7204 
7205       new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7206       if (exit_bb)
7207 	single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7208       /* When the OMP expansion process cannot guarantee an up-to-date
7209 	 loop tree arrange for the child function to fixup loops.  */
7210       if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7211 	child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7212 
7213       /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
7214       num = vec_safe_length (child_cfun->local_decls);
7215       for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7216 	{
7217 	  t = (*child_cfun->local_decls)[srcidx];
7218 	  if (DECL_CONTEXT (t) == cfun->decl)
7219 	    continue;
7220 	  if (srcidx != dstidx)
7221 	    (*child_cfun->local_decls)[dstidx] = t;
7222 	  dstidx++;
7223 	}
7224       if (dstidx != num)
7225 	vec_safe_truncate (child_cfun->local_decls, dstidx);
7226 
7227       /* Inform the callgraph about the new function.  */
7228       child_cfun->curr_properties = cfun->curr_properties;
7229       child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7230       child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7231       cgraph_node *node = cgraph_node::get_create (child_fn);
7232       node->parallelized_function = 1;
7233       cgraph_node::add_new_function (child_fn, true);
7234 
7235       /* Add the new function to the offload table.  */
7236       if (ENABLE_OFFLOADING)
7237 	vec_safe_push (offload_funcs, child_fn);
7238 
7239       bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7240 		      && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7241 
7242       /* Fix the callgraph edges for child_cfun.  Those for cfun will be
7243 	 fixed in a following pass.  */
7244       push_cfun (child_cfun);
7245       if (need_asm)
7246 	assign_assembler_name_if_needed (child_fn);
7247       cgraph_edge::rebuild_edges ();
7248 
7249       /* Some EH regions might become dead, see PR34608.  If
7250 	 pass_cleanup_cfg isn't the first pass to happen with the
7251 	 new child, these dead EH edges might cause problems.
7252 	 Clean them up now.  */
7253       if (flag_exceptions)
7254 	{
7255 	  basic_block bb;
7256 	  bool changed = false;
7257 
7258 	  FOR_EACH_BB_FN (bb, cfun)
7259 	    changed |= gimple_purge_dead_eh_edges (bb);
7260 	  if (changed)
7261 	    cleanup_tree_cfg ();
7262 	}
7263       if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7264 	verify_loop_structure ();
7265       pop_cfun ();
7266 
7267       if (dump_file && !gimple_in_ssa_p (cfun))
7268 	{
7269 	  omp_any_child_fn_dumped = true;
7270 	  dump_function_header (dump_file, child_fn, dump_flags);
7271 	  dump_function_to_file (child_fn, dump_file, dump_flags);
7272 	}
7273     }
7274 
7275   /* Emit a library call to launch the offloading region, or do data
7276      transfers.  */
7277   tree t1, t2, t3, t4, device, cond, depend, c, clauses;
7278   enum built_in_function start_ix;
7279   location_t clause_loc;
7280   unsigned int flags_i = 0;
7281   bool oacc_kernels_p = false;
7282 
7283   switch (gimple_omp_target_kind (entry_stmt))
7284     {
7285     case GF_OMP_TARGET_KIND_REGION:
7286       start_ix = BUILT_IN_GOMP_TARGET;
7287       break;
7288     case GF_OMP_TARGET_KIND_DATA:
7289       start_ix = BUILT_IN_GOMP_TARGET_DATA;
7290       break;
7291     case GF_OMP_TARGET_KIND_UPDATE:
7292       start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7293       break;
7294     case GF_OMP_TARGET_KIND_ENTER_DATA:
7295       start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7296       break;
7297     case GF_OMP_TARGET_KIND_EXIT_DATA:
7298       start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7299       flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7300       break;
7301     case GF_OMP_TARGET_KIND_OACC_KERNELS:
7302       oacc_kernels_p = true;
7303       /* FALLTHROUGH */
7304     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7305       start_ix = BUILT_IN_GOACC_PARALLEL;
7306       break;
7307     case GF_OMP_TARGET_KIND_OACC_DATA:
7308     case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7309       start_ix = BUILT_IN_GOACC_DATA_START;
7310       break;
7311     case GF_OMP_TARGET_KIND_OACC_UPDATE:
7312       start_ix = BUILT_IN_GOACC_UPDATE;
7313       break;
7314     case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7315       start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7316       break;
7317     case GF_OMP_TARGET_KIND_OACC_DECLARE:
7318       start_ix = BUILT_IN_GOACC_DECLARE;
7319       break;
7320     default:
7321       gcc_unreachable ();
7322     }
7323 
7324   clauses = gimple_omp_target_clauses (entry_stmt);
7325 
7326   /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7327      library choose) and there is no conditional.  */
7328   cond = NULL_TREE;
7329   device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7330 
7331   c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7332   if (c)
7333     cond = OMP_CLAUSE_IF_EXPR (c);
7334 
7335   c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7336   if (c)
7337     {
7338       /* Even if we pass it to all library function calls, it is currently only
7339 	 defined/used for the OpenMP target ones.  */
7340       gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
7341 			   || start_ix == BUILT_IN_GOMP_TARGET_DATA
7342 			   || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
7343 			   || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
7344 
7345       device = OMP_CLAUSE_DEVICE_ID (c);
7346       clause_loc = OMP_CLAUSE_LOCATION (c);
7347     }
7348   else
7349     clause_loc = gimple_location (entry_stmt);
7350 
7351   c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7352   if (c)
7353     flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7354 
7355   /* Ensure 'device' is of the correct type.  */
7356   device = fold_convert_loc (clause_loc, integer_type_node, device);
7357 
7358   /* If we found the clause 'if (cond)', build
7359      (cond ? device : GOMP_DEVICE_HOST_FALLBACK).  */
7360   if (cond)
7361     {
7362       cond = gimple_boolify (cond);
7363 
7364       basic_block cond_bb, then_bb, else_bb;
7365       edge e;
7366       tree tmp_var;
7367 
7368       tmp_var = create_tmp_var (TREE_TYPE (device));
7369       if (offloaded)
7370 	e = split_block_after_labels (new_bb);
7371       else
7372 	{
7373 	  gsi = gsi_last_bb (new_bb);
7374 	  gsi_prev (&gsi);
7375 	  e = split_block (new_bb, gsi_stmt (gsi));
7376 	}
7377       cond_bb = e->src;
7378       new_bb = e->dest;
7379       remove_edge (e);
7380 
7381       then_bb = create_empty_bb (cond_bb);
7382       else_bb = create_empty_bb (then_bb);
7383       set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7384       set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7385 
7386       stmt = gimple_build_cond_empty (cond);
7387       gsi = gsi_last_bb (cond_bb);
7388       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7389 
7390       gsi = gsi_start_bb (then_bb);
7391       stmt = gimple_build_assign (tmp_var, device);
7392       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7393 
7394       gsi = gsi_start_bb (else_bb);
7395       stmt = gimple_build_assign (tmp_var,
7396 				  build_int_cst (integer_type_node,
7397 						 GOMP_DEVICE_HOST_FALLBACK));
7398       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7399 
7400       make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7401       make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7402       add_bb_to_loop (then_bb, cond_bb->loop_father);
7403       add_bb_to_loop (else_bb, cond_bb->loop_father);
7404       make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7405       make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7406 
7407       device = tmp_var;
7408       gsi = gsi_last_bb (new_bb);
7409     }
7410   else
7411     {
7412       gsi = gsi_last_bb (new_bb);
7413       device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7414 					 true, GSI_SAME_STMT);
7415     }
7416 
7417   t = gimple_omp_target_data_arg (entry_stmt);
7418   if (t == NULL)
7419     {
7420       t1 = size_zero_node;
7421       t2 = build_zero_cst (ptr_type_node);
7422       t3 = t2;
7423       t4 = t2;
7424     }
7425   else
7426     {
7427       t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7428       t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7429       t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7430       t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7431       t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7432     }
7433 
7434   gimple *g;
7435   bool tagging = false;
7436   /* The maximum number used by any start_ix, without varargs.  */
7437   auto_vec<tree, 11> args;
7438   args.quick_push (device);
7439   if (offloaded)
7440     args.quick_push (build_fold_addr_expr (child_fn));
7441   args.quick_push (t1);
7442   args.quick_push (t2);
7443   args.quick_push (t3);
7444   args.quick_push (t4);
7445   switch (start_ix)
7446     {
7447     case BUILT_IN_GOACC_DATA_START:
7448     case BUILT_IN_GOACC_DECLARE:
7449     case BUILT_IN_GOMP_TARGET_DATA:
7450       break;
7451     case BUILT_IN_GOMP_TARGET:
7452     case BUILT_IN_GOMP_TARGET_UPDATE:
7453     case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7454       args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7455       c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7456       if (c)
7457 	depend = OMP_CLAUSE_DECL (c);
7458       else
7459 	depend = build_int_cst (ptr_type_node, 0);
7460       args.quick_push (depend);
7461       if (start_ix == BUILT_IN_GOMP_TARGET)
7462 	args.quick_push (get_target_arguments (&gsi, entry_stmt));
7463       break;
7464     case BUILT_IN_GOACC_PARALLEL:
7465       {
7466 	oacc_set_fn_attrib (child_fn, clauses, oacc_kernels_p, &args);
7467 	tagging = true;
7468       }
7469       /* FALLTHRU */
7470     case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7471     case BUILT_IN_GOACC_UPDATE:
7472       {
7473 	tree t_async = NULL_TREE;
7474 
7475 	/* If present, use the value specified by the respective
7476 	   clause, making sure that is of the correct type.  */
7477 	c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7478 	if (c)
7479 	  t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7480 				      integer_type_node,
7481 				      OMP_CLAUSE_ASYNC_EXPR (c));
7482 	else if (!tagging)
7483 	  /* Default values for t_async.  */
7484 	  t_async = fold_convert_loc (gimple_location (entry_stmt),
7485 				      integer_type_node,
7486 				      build_int_cst (integer_type_node,
7487 						     GOMP_ASYNC_SYNC));
7488 	if (tagging && t_async)
7489 	  {
7490 	    unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7491 
7492 	    if (TREE_CODE (t_async) == INTEGER_CST)
7493 	      {
7494 		/* See if we can pack the async arg in to the tag's
7495 		   operand.  */
7496 		i_async = TREE_INT_CST_LOW (t_async);
7497 		if (i_async < GOMP_LAUNCH_OP_MAX)
7498 		  t_async = NULL_TREE;
7499 		else
7500 		  i_async = GOMP_LAUNCH_OP_MAX;
7501 	      }
7502 	    args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7503 					      i_async));
7504 	  }
7505 	if (t_async)
7506 	  args.safe_push (t_async);
7507 
7508 	/* Save the argument index, and ... */
7509 	unsigned t_wait_idx = args.length ();
7510 	unsigned num_waits = 0;
7511 	c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7512 	if (!tagging || c)
7513 	  /* ... push a placeholder.  */
7514 	  args.safe_push (integer_zero_node);
7515 
7516 	for (; c; c = OMP_CLAUSE_CHAIN (c))
7517 	  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7518 	    {
7519 	      args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7520 						integer_type_node,
7521 						OMP_CLAUSE_WAIT_EXPR (c)));
7522 	      num_waits++;
7523 	    }
7524 
7525 	if (!tagging || num_waits)
7526 	  {
7527 	    tree len;
7528 
7529 	    /* Now that we know the number, update the placeholder.  */
7530 	    if (tagging)
7531 	      len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7532 	    else
7533 	      len = build_int_cst (integer_type_node, num_waits);
7534 	    len = fold_convert_loc (gimple_location (entry_stmt),
7535 				    unsigned_type_node, len);
7536 	    args[t_wait_idx] = len;
7537 	  }
7538       }
7539       break;
7540     default:
7541       gcc_unreachable ();
7542     }
7543   if (tagging)
7544     /*  Push terminal marker - zero.  */
7545     args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7546 
7547   g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7548   gimple_set_location (g, gimple_location (entry_stmt));
7549   gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7550   if (!offloaded)
7551     {
7552       g = gsi_stmt (gsi);
7553       gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7554       gsi_remove (&gsi, true);
7555     }
7556   if (data_region && region->exit)
7557     {
7558       gsi = gsi_last_bb (region->exit);
7559       g = gsi_stmt (gsi);
7560       gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7561       gsi_remove (&gsi, true);
7562     }
7563 }
7564 
7565 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7566    iteration variable derived from the thread number.  INTRA_GROUP means this
7567    is an expansion of a loop iterating over work-items within a separate
7568    iteration over groups.  */
7569 
7570 static void
7571 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7572 {
7573   gimple_stmt_iterator gsi;
7574   gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7575   gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7576 		       == GF_OMP_FOR_KIND_GRID_LOOP);
7577   size_t collapse = gimple_omp_for_collapse (for_stmt);
7578   struct omp_for_data_loop *loops
7579     = XALLOCAVEC (struct omp_for_data_loop,
7580 		  gimple_omp_for_collapse (for_stmt));
7581   struct omp_for_data fd;
7582 
7583   remove_edge (BRANCH_EDGE (kfor->entry));
7584   basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7585 
7586   gcc_assert (kfor->cont);
7587   omp_extract_for_data (for_stmt, &fd, loops);
7588 
7589   gsi = gsi_start_bb (body_bb);
7590 
7591   for (size_t dim = 0; dim < collapse; dim++)
7592     {
7593       tree type, itype;
7594       itype = type = TREE_TYPE (fd.loops[dim].v);
7595       if (POINTER_TYPE_P (type))
7596 	itype = signed_type_for (type);
7597 
7598       tree n1 = fd.loops[dim].n1;
7599       tree step = fd.loops[dim].step;
7600       n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7601 				     true, NULL_TREE, true, GSI_SAME_STMT);
7602       step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7603 				       true, NULL_TREE, true, GSI_SAME_STMT);
7604       tree threadid;
7605       if (gimple_omp_for_grid_group_iter (for_stmt))
7606 	{
7607 	  gcc_checking_assert (!intra_group);
7608 	  threadid = build_call_expr (builtin_decl_explicit
7609 				      (BUILT_IN_HSA_WORKGROUPID), 1,
7610 				      build_int_cstu (unsigned_type_node, dim));
7611 	}
7612       else if (intra_group)
7613 	threadid = build_call_expr (builtin_decl_explicit
7614 				    (BUILT_IN_HSA_WORKITEMID), 1,
7615 				    build_int_cstu (unsigned_type_node, dim));
7616       else
7617 	threadid = build_call_expr (builtin_decl_explicit
7618 				    (BUILT_IN_HSA_WORKITEMABSID), 1,
7619 				    build_int_cstu (unsigned_type_node, dim));
7620       threadid = fold_convert (itype, threadid);
7621       threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7622 					   true, GSI_SAME_STMT);
7623 
7624       tree startvar = fd.loops[dim].v;
7625       tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7626       if (POINTER_TYPE_P (type))
7627 	t = fold_build_pointer_plus (n1, t);
7628       else
7629 	t = fold_build2 (PLUS_EXPR, type, t, n1);
7630       t = fold_convert (type, t);
7631       t = force_gimple_operand_gsi (&gsi, t,
7632 				    DECL_P (startvar)
7633 				    && TREE_ADDRESSABLE (startvar),
7634 				    NULL_TREE, true, GSI_SAME_STMT);
7635       gassign *assign_stmt = gimple_build_assign (startvar, t);
7636       gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7637     }
7638   /* Remove the omp for statement.  */
7639   gsi = gsi_last_bb (kfor->entry);
7640   gsi_remove (&gsi, true);
7641 
7642   /* Remove the GIMPLE_OMP_CONTINUE statement.  */
7643   gsi = gsi_last_bb (kfor->cont);
7644   gcc_assert (!gsi_end_p (gsi)
7645 	      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7646   gsi_remove (&gsi, true);
7647 
7648   /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary.  */
7649   gsi = gsi_last_bb (kfor->exit);
7650   gcc_assert (!gsi_end_p (gsi)
7651 	      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7652   if (intra_group)
7653     gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7654   gsi_remove (&gsi, true);
7655 
7656   /* Fixup the much simpler CFG.  */
7657   remove_edge (find_edge (kfor->cont, body_bb));
7658 
7659   if (kfor->cont != body_bb)
7660     set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7661   set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7662 }
7663 
7664 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7665    argument_decls.  */
7666 
7667 struct grid_arg_decl_map
7668 {
7669   tree old_arg;
7670   tree new_arg;
7671 };
7672 
7673 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7674    pertaining to kernel function.  */
7675 
7676 static tree
7677 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7678 {
7679   struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7680   struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7681   tree t = *tp;
7682 
7683   if (t == adm->old_arg)
7684     *tp = adm->new_arg;
7685   *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7686   return NULL_TREE;
7687 }
7688 
7689 /* If TARGET region contains a kernel body for loop, remove its region from the
7690    TARGET and expand it in HSA gridified kernel fashion.  */
7691 
7692 static void
7693 grid_expand_target_grid_body (struct omp_region *target)
7694 {
7695   if (!hsa_gen_requested_p ())
7696     return;
7697 
7698   gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7699   struct omp_region **pp;
7700 
7701   for (pp = &target->inner; *pp; pp = &(*pp)->next)
7702     if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7703       break;
7704 
7705   struct omp_region *gpukernel = *pp;
7706 
7707   tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7708   if (!gpukernel)
7709     {
7710       /* HSA cannot handle OACC stuff.  */
7711       if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7712 	return;
7713       gcc_checking_assert (orig_child_fndecl);
7714       gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7715 				    OMP_CLAUSE__GRIDDIM_));
7716       cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7717 
7718       hsa_register_kernel (n);
7719       return;
7720     }
7721 
7722   gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7723 			       OMP_CLAUSE__GRIDDIM_));
7724   tree inside_block
7725     = gimple_block (first_stmt (single_succ (gpukernel->entry)));
7726   *pp = gpukernel->next;
7727   for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7728     if ((*pp)->type == GIMPLE_OMP_FOR)
7729       break;
7730 
7731   struct omp_region *kfor = *pp;
7732   gcc_assert (kfor);
7733   gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7734   gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7735   *pp = kfor->next;
7736   if (kfor->inner)
7737     {
7738       if (gimple_omp_for_grid_group_iter (for_stmt))
7739 	{
7740 	  struct omp_region **next_pp;
7741 	  for (pp = &kfor->inner; *pp; pp = next_pp)
7742 	    {
7743 	      next_pp = &(*pp)->next;
7744 	      if ((*pp)->type != GIMPLE_OMP_FOR)
7745 		continue;
7746 	      gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7747 	      gcc_assert (gimple_omp_for_kind (inner)
7748 			  == GF_OMP_FOR_KIND_GRID_LOOP);
7749 	      grid_expand_omp_for_loop (*pp, true);
7750 	      *pp = (*pp)->next;
7751 	      next_pp = pp;
7752 	    }
7753 	}
7754       expand_omp (kfor->inner);
7755     }
7756   if (gpukernel->inner)
7757     expand_omp (gpukernel->inner);
7758 
7759   tree kern_fndecl = copy_node (orig_child_fndecl);
7760   DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel");
7761   SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
7762   tree tgtblock = gimple_block (tgt_stmt);
7763   tree fniniblock = make_node (BLOCK);
7764   BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock;
7765   BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
7766   BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
7767   BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
7768   DECL_INITIAL (kern_fndecl) = fniniblock;
7769   push_struct_function (kern_fndecl);
7770   cfun->function_end_locus = gimple_location (tgt_stmt);
7771   init_tree_ssa (cfun);
7772   pop_cfun ();
7773 
7774   /* Make sure to generate early debug for the function before
7775      outlining anything.  */
7776   if (! gimple_in_ssa_p (cfun))
7777     (*debug_hooks->early_global_decl) (cfun->decl);
7778 
7779   tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
7780   gcc_assert (!DECL_CHAIN (old_parm_decl));
7781   tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
7782   DECL_CONTEXT (new_parm_decl) = kern_fndecl;
7783   DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
7784   gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
7785   DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
7786   DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
7787   struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
7788   kern_cfun->curr_properties = cfun->curr_properties;
7789 
7790   grid_expand_omp_for_loop (kfor, false);
7791 
7792   /* Remove the omp for statement.  */
7793   gimple_stmt_iterator gsi = gsi_last_bb (gpukernel->entry);
7794   gsi_remove (&gsi, true);
7795   /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7796      return.  */
7797   gsi = gsi_last_bb (gpukernel->exit);
7798   gcc_assert (!gsi_end_p (gsi)
7799 	      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7800   gimple *ret_stmt = gimple_build_return (NULL);
7801   gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
7802   gsi_remove (&gsi, true);
7803 
7804   /* Statements in the first BB in the target construct have been produced by
7805      target lowering and must be copied inside the GPUKERNEL, with the two
7806      exceptions of the first OMP statement and the OMP_DATA assignment
7807      statement.  */
7808   gsi = gsi_start_bb (single_succ (gpukernel->entry));
7809   tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
7810   tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
7811   for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
7812        !gsi_end_p (tsi); gsi_next (&tsi))
7813     {
7814       gimple *stmt = gsi_stmt (tsi);
7815       if (is_gimple_omp (stmt))
7816 	break;
7817       if (sender
7818 	  && is_gimple_assign (stmt)
7819 	  && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
7820 	  && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
7821 	continue;
7822       gimple *copy = gimple_copy (stmt);
7823       gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
7824       gimple_set_block (copy, fniniblock);
7825     }
7826 
7827   move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
7828 			  gpukernel->exit, inside_block);
7829 
7830   cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
7831   kcn->mark_force_output ();
7832   cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
7833 
7834   hsa_register_kernel (kcn, orig_child);
7835 
7836   cgraph_node::add_new_function (kern_fndecl, true);
7837   push_cfun (kern_cfun);
7838   cgraph_edge::rebuild_edges ();
7839 
7840   /* Re-map any mention of the PARM_DECL of the original function to the
7841      PARM_DECL of the new one.
7842 
7843      TODO: It would be great if lowering produced references into the GPU
7844      kernel decl straight away and we did not have to do this.  */
7845   struct grid_arg_decl_map adm;
7846   adm.old_arg = old_parm_decl;
7847   adm.new_arg = new_parm_decl;
7848   basic_block bb;
7849   FOR_EACH_BB_FN (bb, kern_cfun)
7850     {
7851       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7852 	{
7853 	  gimple *stmt = gsi_stmt (gsi);
7854 	  struct walk_stmt_info wi;
7855 	  memset (&wi, 0, sizeof (wi));
7856 	  wi.info = &adm;
7857 	  walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
7858 	}
7859     }
7860   pop_cfun ();
7861 
7862   return;
7863 }
7864 
7865 /* Expand the parallel region tree rooted at REGION.  Expansion
7866    proceeds in depth-first order.  Innermost regions are expanded
7867    first.  This way, parallel regions that require a new function to
7868    be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7869    internal dependencies in their body.  */
7870 
7871 static void
7872 expand_omp (struct omp_region *region)
7873 {
7874   omp_any_child_fn_dumped = false;
7875   while (region)
7876     {
7877       location_t saved_location;
7878       gimple *inner_stmt = NULL;
7879 
7880       /* First, determine whether this is a combined parallel+workshare
7881 	 region.  */
7882       if (region->type == GIMPLE_OMP_PARALLEL)
7883 	determine_parallel_type (region);
7884       else if (region->type == GIMPLE_OMP_TARGET)
7885 	grid_expand_target_grid_body (region);
7886 
7887       if (region->type == GIMPLE_OMP_FOR
7888 	  && gimple_omp_for_combined_p (last_stmt (region->entry)))
7889 	inner_stmt = last_stmt (region->inner->entry);
7890 
7891       if (region->inner)
7892 	expand_omp (region->inner);
7893 
7894       saved_location = input_location;
7895       if (gimple_has_location (last_stmt (region->entry)))
7896 	input_location = gimple_location (last_stmt (region->entry));
7897 
7898       switch (region->type)
7899 	{
7900 	case GIMPLE_OMP_PARALLEL:
7901 	case GIMPLE_OMP_TASK:
7902 	  expand_omp_taskreg (region);
7903 	  break;
7904 
7905 	case GIMPLE_OMP_FOR:
7906 	  expand_omp_for (region, inner_stmt);
7907 	  break;
7908 
7909 	case GIMPLE_OMP_SECTIONS:
7910 	  expand_omp_sections (region);
7911 	  break;
7912 
7913 	case GIMPLE_OMP_SECTION:
7914 	  /* Individual omp sections are handled together with their
7915 	     parent GIMPLE_OMP_SECTIONS region.  */
7916 	  break;
7917 
7918 	case GIMPLE_OMP_SINGLE:
7919 	  expand_omp_single (region);
7920 	  break;
7921 
7922 	case GIMPLE_OMP_ORDERED:
7923 	  {
7924 	    gomp_ordered *ord_stmt
7925 	      = as_a <gomp_ordered *> (last_stmt (region->entry));
7926 	    if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
7927 				 OMP_CLAUSE_DEPEND))
7928 	      {
7929 		/* We'll expand these when expanding corresponding
7930 		   worksharing region with ordered(n) clause.  */
7931 		gcc_assert (region->outer
7932 			    && region->outer->type == GIMPLE_OMP_FOR);
7933 		region->ord_stmt = ord_stmt;
7934 		break;
7935 	      }
7936 	  }
7937 	  /* FALLTHRU */
7938 	case GIMPLE_OMP_MASTER:
7939 	case GIMPLE_OMP_TASKGROUP:
7940 	case GIMPLE_OMP_CRITICAL:
7941 	case GIMPLE_OMP_TEAMS:
7942 	  expand_omp_synch (region);
7943 	  break;
7944 
7945 	case GIMPLE_OMP_ATOMIC_LOAD:
7946 	  expand_omp_atomic (region);
7947 	  break;
7948 
7949 	case GIMPLE_OMP_TARGET:
7950 	  expand_omp_target (region);
7951 	  break;
7952 
7953 	default:
7954 	  gcc_unreachable ();
7955 	}
7956 
7957       input_location = saved_location;
7958       region = region->next;
7959     }
7960   if (omp_any_child_fn_dumped)
7961     {
7962       if (dump_file)
7963 	dump_function_header (dump_file, current_function_decl, dump_flags);
7964       omp_any_child_fn_dumped = false;
7965     }
7966 }
7967 
7968 /* Helper for build_omp_regions.  Scan the dominator tree starting at
7969    block BB.  PARENT is the region that contains BB.  If SINGLE_TREE is
7970    true, the function ends once a single tree is built (otherwise, whole
7971    forest of OMP constructs may be built).  */
7972 
7973 static void
7974 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
7975 		     bool single_tree)
7976 {
7977   gimple_stmt_iterator gsi;
7978   gimple *stmt;
7979   basic_block son;
7980 
7981   gsi = gsi_last_bb (bb);
7982   if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
7983     {
7984       struct omp_region *region;
7985       enum gimple_code code;
7986 
7987       stmt = gsi_stmt (gsi);
7988       code = gimple_code (stmt);
7989       if (code == GIMPLE_OMP_RETURN)
7990 	{
7991 	  /* STMT is the return point out of region PARENT.  Mark it
7992 	     as the exit point and make PARENT the immediately
7993 	     enclosing region.  */
7994 	  gcc_assert (parent);
7995 	  region = parent;
7996 	  region->exit = bb;
7997 	  parent = parent->outer;
7998 	}
7999       else if (code == GIMPLE_OMP_ATOMIC_STORE)
8000 	{
8001 	  /* GIMPLE_OMP_ATOMIC_STORE is analogous to
8002 	     GIMPLE_OMP_RETURN, but matches with
8003 	     GIMPLE_OMP_ATOMIC_LOAD.  */
8004 	  gcc_assert (parent);
8005 	  gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
8006 	  region = parent;
8007 	  region->exit = bb;
8008 	  parent = parent->outer;
8009 	}
8010       else if (code == GIMPLE_OMP_CONTINUE)
8011 	{
8012 	  gcc_assert (parent);
8013 	  parent->cont = bb;
8014 	}
8015       else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
8016 	{
8017 	  /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8018 	     GIMPLE_OMP_SECTIONS, and we do nothing for it.  */
8019 	}
8020       else
8021 	{
8022 	  region = new_omp_region (bb, code, parent);
8023 	  /* Otherwise...  */
8024 	  if (code == GIMPLE_OMP_TARGET)
8025 	    {
8026 	      switch (gimple_omp_target_kind (stmt))
8027 		{
8028 		case GF_OMP_TARGET_KIND_REGION:
8029 		case GF_OMP_TARGET_KIND_DATA:
8030 		case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8031 		case GF_OMP_TARGET_KIND_OACC_KERNELS:
8032 		case GF_OMP_TARGET_KIND_OACC_DATA:
8033 		case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8034 		  break;
8035 		case GF_OMP_TARGET_KIND_UPDATE:
8036 		case GF_OMP_TARGET_KIND_ENTER_DATA:
8037 		case GF_OMP_TARGET_KIND_EXIT_DATA:
8038 		case GF_OMP_TARGET_KIND_OACC_UPDATE:
8039 		case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8040 		case GF_OMP_TARGET_KIND_OACC_DECLARE:
8041 		  /* ..., other than for those stand-alone directives...  */
8042 		  region = NULL;
8043 		  break;
8044 		default:
8045 		  gcc_unreachable ();
8046 		}
8047 	    }
8048 	  else if (code == GIMPLE_OMP_ORDERED
8049 		   && omp_find_clause (gimple_omp_ordered_clauses
8050 					 (as_a <gomp_ordered *> (stmt)),
8051 				       OMP_CLAUSE_DEPEND))
8052 	    /* #pragma omp ordered depend is also just a stand-alone
8053 	       directive.  */
8054 	    region = NULL;
8055 	  /* ..., this directive becomes the parent for a new region.  */
8056 	  if (region)
8057 	    parent = region;
8058 	}
8059     }
8060 
8061   if (single_tree && !parent)
8062     return;
8063 
8064   for (son = first_dom_son (CDI_DOMINATORS, bb);
8065        son;
8066        son = next_dom_son (CDI_DOMINATORS, son))
8067     build_omp_regions_1 (son, parent, single_tree);
8068 }
8069 
8070 /* Builds the tree of OMP regions rooted at ROOT, storing it to
8071    root_omp_region.  */
8072 
8073 static void
8074 build_omp_regions_root (basic_block root)
8075 {
8076   gcc_assert (root_omp_region == NULL);
8077   build_omp_regions_1 (root, NULL, true);
8078   gcc_assert (root_omp_region != NULL);
8079 }
8080 
8081 /* Expands omp construct (and its subconstructs) starting in HEAD.  */
8082 
8083 void
8084 omp_expand_local (basic_block head)
8085 {
8086   build_omp_regions_root (head);
8087   if (dump_file && (dump_flags & TDF_DETAILS))
8088     {
8089       fprintf (dump_file, "\nOMP region tree\n\n");
8090       dump_omp_region (dump_file, root_omp_region, 0);
8091       fprintf (dump_file, "\n");
8092     }
8093 
8094   remove_exit_barriers (root_omp_region);
8095   expand_omp (root_omp_region);
8096 
8097   omp_free_regions ();
8098 }
8099 
8100 /* Scan the CFG and build a tree of OMP regions.  Return the root of
8101    the OMP region tree.  */
8102 
8103 static void
8104 build_omp_regions (void)
8105 {
8106   gcc_assert (root_omp_region == NULL);
8107   calculate_dominance_info (CDI_DOMINATORS);
8108   build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
8109 }
8110 
8111 /* Main entry point for expanding OMP-GIMPLE into runtime calls.  */
8112 
8113 static unsigned int
8114 execute_expand_omp (void)
8115 {
8116   build_omp_regions ();
8117 
8118   if (!root_omp_region)
8119     return 0;
8120 
8121   if (dump_file)
8122     {
8123       fprintf (dump_file, "\nOMP region tree\n\n");
8124       dump_omp_region (dump_file, root_omp_region, 0);
8125       fprintf (dump_file, "\n");
8126     }
8127 
8128   remove_exit_barriers (root_omp_region);
8129 
8130   expand_omp (root_omp_region);
8131 
8132   if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8133     verify_loop_structure ();
8134   cleanup_tree_cfg ();
8135 
8136   omp_free_regions ();
8137 
8138   return 0;
8139 }
8140 
8141 /* OMP expansion -- the default pass, run before creation of SSA form.  */
8142 
8143 namespace {
8144 
8145 const pass_data pass_data_expand_omp =
8146 {
8147   GIMPLE_PASS, /* type */
8148   "ompexp", /* name */
8149   OPTGROUP_OMP, /* optinfo_flags */
8150   TV_NONE, /* tv_id */
8151   PROP_gimple_any, /* properties_required */
8152   PROP_gimple_eomp, /* properties_provided */
8153   0, /* properties_destroyed */
8154   0, /* todo_flags_start */
8155   0, /* todo_flags_finish */
8156 };
8157 
8158 class pass_expand_omp : public gimple_opt_pass
8159 {
8160 public:
8161   pass_expand_omp (gcc::context *ctxt)
8162     : gimple_opt_pass (pass_data_expand_omp, ctxt)
8163   {}
8164 
8165   /* opt_pass methods: */
8166   virtual unsigned int execute (function *)
8167     {
8168       bool gate = ((flag_cilkplus != 0 || flag_openacc != 0 || flag_openmp != 0
8169 		    || flag_openmp_simd != 0)
8170 		   && !seen_error ());
8171 
8172       /* This pass always runs, to provide PROP_gimple_eomp.
8173 	 But often, there is nothing to do.  */
8174       if (!gate)
8175 	return 0;
8176 
8177       return execute_expand_omp ();
8178     }
8179 
8180 }; // class pass_expand_omp
8181 
8182 } // anon namespace
8183 
8184 gimple_opt_pass *
8185 make_pass_expand_omp (gcc::context *ctxt)
8186 {
8187   return new pass_expand_omp (ctxt);
8188 }
8189 
8190 namespace {
8191 
8192 const pass_data pass_data_expand_omp_ssa =
8193 {
8194   GIMPLE_PASS, /* type */
8195   "ompexpssa", /* name */
8196   OPTGROUP_OMP, /* optinfo_flags */
8197   TV_NONE, /* tv_id */
8198   PROP_cfg | PROP_ssa, /* properties_required */
8199   PROP_gimple_eomp, /* properties_provided */
8200   0, /* properties_destroyed */
8201   0, /* todo_flags_start */
8202   TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8203 };
8204 
8205 class pass_expand_omp_ssa : public gimple_opt_pass
8206 {
8207 public:
8208   pass_expand_omp_ssa (gcc::context *ctxt)
8209     : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8210   {}
8211 
8212   /* opt_pass methods: */
8213   virtual bool gate (function *fun)
8214     {
8215       return !(fun->curr_properties & PROP_gimple_eomp);
8216     }
8217   virtual unsigned int execute (function *) { return execute_expand_omp (); }
8218   opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8219 
8220 }; // class pass_expand_omp_ssa
8221 
8222 } // anon namespace
8223 
8224 gimple_opt_pass *
8225 make_pass_expand_omp_ssa (gcc::context *ctxt)
8226 {
8227   return new pass_expand_omp_ssa (ctxt);
8228 }
8229 
8230 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8231    GIMPLE_* codes.  */
8232 
8233 bool
8234 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8235 		       int *region_idx)
8236 {
8237   gimple *last = last_stmt (bb);
8238   enum gimple_code code = gimple_code (last);
8239   struct omp_region *cur_region = *region;
8240   bool fallthru = false;
8241 
8242   switch (code)
8243     {
8244     case GIMPLE_OMP_PARALLEL:
8245     case GIMPLE_OMP_TASK:
8246     case GIMPLE_OMP_FOR:
8247     case GIMPLE_OMP_SINGLE:
8248     case GIMPLE_OMP_TEAMS:
8249     case GIMPLE_OMP_MASTER:
8250     case GIMPLE_OMP_TASKGROUP:
8251     case GIMPLE_OMP_CRITICAL:
8252     case GIMPLE_OMP_SECTION:
8253     case GIMPLE_OMP_GRID_BODY:
8254       cur_region = new_omp_region (bb, code, cur_region);
8255       fallthru = true;
8256       break;
8257 
8258     case GIMPLE_OMP_ORDERED:
8259       cur_region = new_omp_region (bb, code, cur_region);
8260       fallthru = true;
8261       if (omp_find_clause (gimple_omp_ordered_clauses
8262 			     (as_a <gomp_ordered *> (last)),
8263 			   OMP_CLAUSE_DEPEND))
8264 	cur_region = cur_region->outer;
8265       break;
8266 
8267     case GIMPLE_OMP_TARGET:
8268       cur_region = new_omp_region (bb, code, cur_region);
8269       fallthru = true;
8270       switch (gimple_omp_target_kind (last))
8271 	{
8272 	case GF_OMP_TARGET_KIND_REGION:
8273 	case GF_OMP_TARGET_KIND_DATA:
8274 	case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8275 	case GF_OMP_TARGET_KIND_OACC_KERNELS:
8276 	case GF_OMP_TARGET_KIND_OACC_DATA:
8277 	case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8278 	  break;
8279 	case GF_OMP_TARGET_KIND_UPDATE:
8280 	case GF_OMP_TARGET_KIND_ENTER_DATA:
8281 	case GF_OMP_TARGET_KIND_EXIT_DATA:
8282 	case GF_OMP_TARGET_KIND_OACC_UPDATE:
8283 	case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8284 	case GF_OMP_TARGET_KIND_OACC_DECLARE:
8285 	  cur_region = cur_region->outer;
8286 	  break;
8287 	default:
8288 	  gcc_unreachable ();
8289 	}
8290       break;
8291 
8292     case GIMPLE_OMP_SECTIONS:
8293       cur_region = new_omp_region (bb, code, cur_region);
8294       fallthru = true;
8295       break;
8296 
8297     case GIMPLE_OMP_SECTIONS_SWITCH:
8298       fallthru = false;
8299       break;
8300 
8301     case GIMPLE_OMP_ATOMIC_LOAD:
8302     case GIMPLE_OMP_ATOMIC_STORE:
8303        fallthru = true;
8304        break;
8305 
8306     case GIMPLE_OMP_RETURN:
8307       /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8308 	 somewhere other than the next block.  This will be
8309 	 created later.  */
8310       cur_region->exit = bb;
8311       if (cur_region->type == GIMPLE_OMP_TASK)
8312 	/* Add an edge corresponding to not scheduling the task
8313 	   immediately.  */
8314 	make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8315       fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8316       cur_region = cur_region->outer;
8317       break;
8318 
8319     case GIMPLE_OMP_CONTINUE:
8320       cur_region->cont = bb;
8321       switch (cur_region->type)
8322 	{
8323 	case GIMPLE_OMP_FOR:
8324 	  /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8325 	     succs edges as abnormal to prevent splitting
8326 	     them.  */
8327 	  single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8328 	  /* Make the loopback edge.  */
8329 	  make_edge (bb, single_succ (cur_region->entry),
8330 		     EDGE_ABNORMAL);
8331 
8332 	  /* Create an edge from GIMPLE_OMP_FOR to exit, which
8333 	     corresponds to the case that the body of the loop
8334 	     is not executed at all.  */
8335 	  make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8336 	  make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8337 	  fallthru = false;
8338 	  break;
8339 
8340 	case GIMPLE_OMP_SECTIONS:
8341 	  /* Wire up the edges into and out of the nested sections.  */
8342 	  {
8343 	    basic_block switch_bb = single_succ (cur_region->entry);
8344 
8345 	    struct omp_region *i;
8346 	    for (i = cur_region->inner; i ; i = i->next)
8347 	      {
8348 		gcc_assert (i->type == GIMPLE_OMP_SECTION);
8349 		make_edge (switch_bb, i->entry, 0);
8350 		make_edge (i->exit, bb, EDGE_FALLTHRU);
8351 	      }
8352 
8353 	    /* Make the loopback edge to the block with
8354 	       GIMPLE_OMP_SECTIONS_SWITCH.  */
8355 	    make_edge (bb, switch_bb, 0);
8356 
8357 	    /* Make the edge from the switch to exit.  */
8358 	    make_edge (switch_bb, bb->next_bb, 0);
8359 	    fallthru = false;
8360 	  }
8361 	  break;
8362 
8363 	case GIMPLE_OMP_TASK:
8364 	  fallthru = true;
8365 	  break;
8366 
8367 	default:
8368 	  gcc_unreachable ();
8369 	}
8370       break;
8371 
8372     default:
8373       gcc_unreachable ();
8374     }
8375 
8376   if (*region != cur_region)
8377     {
8378       *region = cur_region;
8379       if (cur_region)
8380 	*region_idx = cur_region->entry->index;
8381       else
8382 	*region_idx = 0;
8383     }
8384 
8385   return fallthru;
8386 }
8387 
8388 #include "gt-omp-expand.h"
8389