xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/omp-expand.c (revision cef8759bd76c1b621f8eab8faa6f208faabc2e15)
1 /* Expansion pass for OMP directives.  Outlines regions of certain OMP
2    directives to separate functions, converts others into explicit calls to the
3    runtime library (libgomp) and so forth
4 
5 Copyright (C) 2005-2017 Free Software Foundation, Inc.
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "cilk.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "hsa-common.h"
60 #include "debug.h"
61 
62 
63 /* OMP region information.  Every parallel and workshare
64    directive is enclosed between two markers, the OMP_* directive
65    and a corresponding GIMPLE_OMP_RETURN statement.  */
66 
67 struct omp_region
68 {
69   /* The enclosing region.  */
70   struct omp_region *outer;
71 
72   /* First child region.  */
73   struct omp_region *inner;
74 
75   /* Next peer region.  */
76   struct omp_region *next;
77 
78   /* Block containing the omp directive as its last stmt.  */
79   basic_block entry;
80 
81   /* Block containing the GIMPLE_OMP_RETURN as its last stmt.  */
82   basic_block exit;
83 
84   /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt.  */
85   basic_block cont;
86 
87   /* If this is a combined parallel+workshare region, this is a list
88      of additional arguments needed by the combined parallel+workshare
89      library call.  */
90   vec<tree, va_gc> *ws_args;
91 
92   /* The code for the omp directive of this region.  */
93   enum gimple_code type;
94 
95   /* Schedule kind, only used for GIMPLE_OMP_FOR type regions.  */
96   enum omp_clause_schedule_kind sched_kind;
97 
98   /* Schedule modifiers.  */
99   unsigned char sched_modifiers;
100 
101   /* True if this is a combined parallel+workshare region.  */
102   bool is_combined_parallel;
103 
104   /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
105      a depend clause.  */
106   gomp_ordered *ord_stmt;
107 };
108 
109 static struct omp_region *root_omp_region;
110 static bool omp_any_child_fn_dumped;
111 
112 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
113 				     bool = false);
114 static gphi *find_phi_with_arg_on_edge (tree, edge);
115 static void expand_omp (struct omp_region *region);
116 
117 /* Return true if REGION is a combined parallel+workshare region.  */
118 
119 static inline bool
120 is_combined_parallel (struct omp_region *region)
121 {
122   return region->is_combined_parallel;
123 }
124 
125 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
126    is the immediate dominator of PAR_ENTRY_BB, return true if there
127    are no data dependencies that would prevent expanding the parallel
128    directive at PAR_ENTRY_BB as a combined parallel+workshare region.
129 
130    When expanding a combined parallel+workshare region, the call to
131    the child function may need additional arguments in the case of
132    GIMPLE_OMP_FOR regions.  In some cases, these arguments are
133    computed out of variables passed in from the parent to the child
134    via 'struct .omp_data_s'.  For instance:
135 
136 	#pragma omp parallel for schedule (guided, i * 4)
137 	for (j ...)
138 
139    Is lowered into:
140 
141 	# BLOCK 2 (PAR_ENTRY_BB)
142 	.omp_data_o.i = i;
143 	#pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
144 
145 	# BLOCK 3 (WS_ENTRY_BB)
146 	.omp_data_i = &.omp_data_o;
147 	D.1667 = .omp_data_i->i;
148 	D.1598 = D.1667 * 4;
149 	#pragma omp for schedule (guided, D.1598)
150 
151    When we outline the parallel region, the call to the child function
152    'bar.omp_fn.0' will need the value D.1598 in its argument list, but
153    that value is computed *after* the call site.  So, in principle we
154    cannot do the transformation.
155 
156    To see whether the code in WS_ENTRY_BB blocks the combined
157    parallel+workshare call, we collect all the variables used in the
158    GIMPLE_OMP_FOR header check whether they appear on the LHS of any
159    statement in WS_ENTRY_BB.  If so, then we cannot emit the combined
160    call.
161 
162    FIXME.  If we had the SSA form built at this point, we could merely
163    hoist the code in block 3 into block 2 and be done with it.  But at
164    this point we don't have dataflow information and though we could
165    hack something up here, it is really not worth the aggravation.  */
166 
167 static bool
168 workshare_safe_to_combine_p (basic_block ws_entry_bb)
169 {
170   struct omp_for_data fd;
171   gimple *ws_stmt = last_stmt (ws_entry_bb);
172 
173   if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
174     return true;
175 
176   gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
177 
178   omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
179 
180   if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
181     return false;
182   if (fd.iter_type != long_integer_type_node)
183     return false;
184 
185   /* FIXME.  We give up too easily here.  If any of these arguments
186      are not constants, they will likely involve variables that have
187      been mapped into fields of .omp_data_s for sharing with the child
188      function.  With appropriate data flow, it would be possible to
189      see through this.  */
190   if (!is_gimple_min_invariant (fd.loop.n1)
191       || !is_gimple_min_invariant (fd.loop.n2)
192       || !is_gimple_min_invariant (fd.loop.step)
193       || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
194     return false;
195 
196   return true;
197 }
198 
199 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
200    presence (SIMD_SCHEDULE).  */
201 
202 static tree
203 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
204 {
205   if (!simd_schedule)
206     return chunk_size;
207 
208   int vf = omp_max_vf ();
209   if (vf == 1)
210     return chunk_size;
211 
212   tree type = TREE_TYPE (chunk_size);
213   chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
214 			    build_int_cst (type, vf - 1));
215   return fold_build2 (BIT_AND_EXPR, type, chunk_size,
216 		      build_int_cst (type, -vf));
217 }
218 
219 /* Collect additional arguments needed to emit a combined
220    parallel+workshare call.  WS_STMT is the workshare directive being
221    expanded.  */
222 
223 static vec<tree, va_gc> *
224 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
225 {
226   tree t;
227   location_t loc = gimple_location (ws_stmt);
228   vec<tree, va_gc> *ws_args;
229 
230   if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
231     {
232       struct omp_for_data fd;
233       tree n1, n2;
234 
235       omp_extract_for_data (for_stmt, &fd, NULL);
236       n1 = fd.loop.n1;
237       n2 = fd.loop.n2;
238 
239       if (gimple_omp_for_combined_into_p (for_stmt))
240 	{
241 	  tree innerc
242 	    = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
243 			       OMP_CLAUSE__LOOPTEMP_);
244 	  gcc_assert (innerc);
245 	  n1 = OMP_CLAUSE_DECL (innerc);
246 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
247 				    OMP_CLAUSE__LOOPTEMP_);
248 	  gcc_assert (innerc);
249 	  n2 = OMP_CLAUSE_DECL (innerc);
250 	}
251 
252       vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
253 
254       t = fold_convert_loc (loc, long_integer_type_node, n1);
255       ws_args->quick_push (t);
256 
257       t = fold_convert_loc (loc, long_integer_type_node, n2);
258       ws_args->quick_push (t);
259 
260       t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
261       ws_args->quick_push (t);
262 
263       if (fd.chunk_size)
264 	{
265 	  t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
266 	  t = omp_adjust_chunk_size (t, fd.simd_schedule);
267 	  ws_args->quick_push (t);
268 	}
269 
270       return ws_args;
271     }
272   else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
273     {
274       /* Number of sections is equal to the number of edges from the
275 	 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
276 	 the exit of the sections region.  */
277       basic_block bb = single_succ (gimple_bb (ws_stmt));
278       t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
279       vec_alloc (ws_args, 1);
280       ws_args->quick_push (t);
281       return ws_args;
282     }
283 
284   gcc_unreachable ();
285 }
286 
287 /* Discover whether REGION is a combined parallel+workshare region.  */
288 
289 static void
290 determine_parallel_type (struct omp_region *region)
291 {
292   basic_block par_entry_bb, par_exit_bb;
293   basic_block ws_entry_bb, ws_exit_bb;
294 
295   if (region == NULL || region->inner == NULL
296       || region->exit == NULL || region->inner->exit == NULL
297       || region->inner->cont == NULL)
298     return;
299 
300   /* We only support parallel+for and parallel+sections.  */
301   if (region->type != GIMPLE_OMP_PARALLEL
302       || (region->inner->type != GIMPLE_OMP_FOR
303 	  && region->inner->type != GIMPLE_OMP_SECTIONS))
304     return;
305 
306   /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
307      WS_EXIT_BB -> PAR_EXIT_BB.  */
308   par_entry_bb = region->entry;
309   par_exit_bb = region->exit;
310   ws_entry_bb = region->inner->entry;
311   ws_exit_bb = region->inner->exit;
312 
313   if (single_succ (par_entry_bb) == ws_entry_bb
314       && single_succ (ws_exit_bb) == par_exit_bb
315       && workshare_safe_to_combine_p (ws_entry_bb)
316       && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
317 	  || (last_and_only_stmt (ws_entry_bb)
318 	      && last_and_only_stmt (par_exit_bb))))
319     {
320       gimple *par_stmt = last_stmt (par_entry_bb);
321       gimple *ws_stmt = last_stmt (ws_entry_bb);
322 
323       if (region->inner->type == GIMPLE_OMP_FOR)
324 	{
325 	  /* If this is a combined parallel loop, we need to determine
326 	     whether or not to use the combined library calls.  There
327 	     are two cases where we do not apply the transformation:
328 	     static loops and any kind of ordered loop.  In the first
329 	     case, we already open code the loop so there is no need
330 	     to do anything else.  In the latter case, the combined
331 	     parallel loop call would still need extra synchronization
332 	     to implement ordered semantics, so there would not be any
333 	     gain in using the combined call.  */
334 	  tree clauses = gimple_omp_for_clauses (ws_stmt);
335 	  tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
336 	  if (c == NULL
337 	      || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
338 		  == OMP_CLAUSE_SCHEDULE_STATIC)
339 	      || omp_find_clause (clauses, OMP_CLAUSE_ORDERED))
340 	    {
341 	      region->is_combined_parallel = false;
342 	      region->inner->is_combined_parallel = false;
343 	      return;
344 	    }
345 	}
346 
347       region->is_combined_parallel = true;
348       region->inner->is_combined_parallel = true;
349       region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
350     }
351 }
352 
353 /* Debugging dumps for parallel regions.  */
354 void dump_omp_region (FILE *, struct omp_region *, int);
355 void debug_omp_region (struct omp_region *);
356 void debug_all_omp_regions (void);
357 
358 /* Dump the parallel region tree rooted at REGION.  */
359 
360 void
361 dump_omp_region (FILE *file, struct omp_region *region, int indent)
362 {
363   fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
364 	   gimple_code_name[region->type]);
365 
366   if (region->inner)
367     dump_omp_region (file, region->inner, indent + 4);
368 
369   if (region->cont)
370     {
371       fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
372 	       region->cont->index);
373     }
374 
375   if (region->exit)
376     fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
377 	     region->exit->index);
378   else
379     fprintf (file, "%*s[no exit marker]\n", indent, "");
380 
381   if (region->next)
382     dump_omp_region (file, region->next, indent);
383 }
384 
385 DEBUG_FUNCTION void
386 debug_omp_region (struct omp_region *region)
387 {
388   dump_omp_region (stderr, region, 0);
389 }
390 
391 DEBUG_FUNCTION void
392 debug_all_omp_regions (void)
393 {
394   dump_omp_region (stderr, root_omp_region, 0);
395 }
396 
397 /* Create a new parallel region starting at STMT inside region PARENT.  */
398 
399 static struct omp_region *
400 new_omp_region (basic_block bb, enum gimple_code type,
401 		struct omp_region *parent)
402 {
403   struct omp_region *region = XCNEW (struct omp_region);
404 
405   region->outer = parent;
406   region->entry = bb;
407   region->type = type;
408 
409   if (parent)
410     {
411       /* This is a nested region.  Add it to the list of inner
412 	 regions in PARENT.  */
413       region->next = parent->inner;
414       parent->inner = region;
415     }
416   else
417     {
418       /* This is a toplevel region.  Add it to the list of toplevel
419 	 regions in ROOT_OMP_REGION.  */
420       region->next = root_omp_region;
421       root_omp_region = region;
422     }
423 
424   return region;
425 }
426 
427 /* Release the memory associated with the region tree rooted at REGION.  */
428 
429 static void
430 free_omp_region_1 (struct omp_region *region)
431 {
432   struct omp_region *i, *n;
433 
434   for (i = region->inner; i ; i = n)
435     {
436       n = i->next;
437       free_omp_region_1 (i);
438     }
439 
440   free (region);
441 }
442 
443 /* Release the memory for the entire omp region tree.  */
444 
445 void
446 omp_free_regions (void)
447 {
448   struct omp_region *r, *n;
449   for (r = root_omp_region; r ; r = n)
450     {
451       n = r->next;
452       free_omp_region_1 (r);
453     }
454   root_omp_region = NULL;
455 }
456 
457 /* A convenience function to build an empty GIMPLE_COND with just the
458    condition.  */
459 
460 static gcond *
461 gimple_build_cond_empty (tree cond)
462 {
463   enum tree_code pred_code;
464   tree lhs, rhs;
465 
466   gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
467   return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
468 }
469 
470 /* Return true if a parallel REGION is within a declare target function or
471    within a target region and is not a part of a gridified target.  */
472 
473 static bool
474 parallel_needs_hsa_kernel_p (struct omp_region *region)
475 {
476   bool indirect = false;
477   for (region = region->outer; region; region = region->outer)
478     {
479       if (region->type == GIMPLE_OMP_PARALLEL)
480 	indirect = true;
481       else if (region->type == GIMPLE_OMP_TARGET)
482 	{
483 	  gomp_target *tgt_stmt
484 	    = as_a <gomp_target *> (last_stmt (region->entry));
485 
486 	  if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
487 			       OMP_CLAUSE__GRIDDIM_))
488 	    return indirect;
489 	  else
490 	    return true;
491 	}
492     }
493 
494   if (lookup_attribute ("omp declare target",
495 			DECL_ATTRIBUTES (current_function_decl)))
496     return true;
497 
498   return false;
499 }
500 
501 /* Build the function calls to GOMP_parallel_start etc to actually
502    generate the parallel operation.  REGION is the parallel region
503    being expanded.  BB is the block where to insert the code.  WS_ARGS
504    will be set if this is a call to a combined parallel+workshare
505    construct, it contains the list of additional arguments needed by
506    the workshare construct.  */
507 
508 static void
509 expand_parallel_call (struct omp_region *region, basic_block bb,
510 		      gomp_parallel *entry_stmt,
511 		      vec<tree, va_gc> *ws_args)
512 {
513   tree t, t1, t2, val, cond, c, clauses, flags;
514   gimple_stmt_iterator gsi;
515   gimple *stmt;
516   enum built_in_function start_ix;
517   int start_ix2;
518   location_t clause_loc;
519   vec<tree, va_gc> *args;
520 
521   clauses = gimple_omp_parallel_clauses (entry_stmt);
522 
523   /* Determine what flavor of GOMP_parallel we will be
524      emitting.  */
525   start_ix = BUILT_IN_GOMP_PARALLEL;
526   if (is_combined_parallel (region))
527     {
528       switch (region->inner->type)
529 	{
530 	case GIMPLE_OMP_FOR:
531 	  gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
532 	  switch (region->inner->sched_kind)
533 	    {
534 	    case OMP_CLAUSE_SCHEDULE_RUNTIME:
535 	      start_ix2 = 3;
536 	      break;
537 	    case OMP_CLAUSE_SCHEDULE_DYNAMIC:
538 	    case OMP_CLAUSE_SCHEDULE_GUIDED:
539 	      if (region->inner->sched_modifiers
540 		  & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
541 		{
542 		  start_ix2 = 3 + region->inner->sched_kind;
543 		  break;
544 		}
545 	      /* FALLTHRU */
546 	    default:
547 	      start_ix2 = region->inner->sched_kind;
548 	      break;
549 	    }
550 	  start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
551 	  start_ix = (enum built_in_function) start_ix2;
552 	  break;
553 	case GIMPLE_OMP_SECTIONS:
554 	  start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
555 	  break;
556 	default:
557 	  gcc_unreachable ();
558 	}
559     }
560 
561   /* By default, the value of NUM_THREADS is zero (selected at run time)
562      and there is no conditional.  */
563   cond = NULL_TREE;
564   val = build_int_cst (unsigned_type_node, 0);
565   flags = build_int_cst (unsigned_type_node, 0);
566 
567   c = omp_find_clause (clauses, OMP_CLAUSE_IF);
568   if (c)
569     cond = OMP_CLAUSE_IF_EXPR (c);
570 
571   c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
572   if (c)
573     {
574       val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
575       clause_loc = OMP_CLAUSE_LOCATION (c);
576     }
577   else
578     clause_loc = gimple_location (entry_stmt);
579 
580   c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
581   if (c)
582     flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
583 
584   /* Ensure 'val' is of the correct type.  */
585   val = fold_convert_loc (clause_loc, unsigned_type_node, val);
586 
587   /* If we found the clause 'if (cond)', build either
588      (cond != 0) or (cond ? val : 1u).  */
589   if (cond)
590     {
591       cond = gimple_boolify (cond);
592 
593       if (integer_zerop (val))
594 	val = fold_build2_loc (clause_loc,
595 			   EQ_EXPR, unsigned_type_node, cond,
596 			   build_int_cst (TREE_TYPE (cond), 0));
597       else
598 	{
599 	  basic_block cond_bb, then_bb, else_bb;
600 	  edge e, e_then, e_else;
601 	  tree tmp_then, tmp_else, tmp_join, tmp_var;
602 
603 	  tmp_var = create_tmp_var (TREE_TYPE (val));
604 	  if (gimple_in_ssa_p (cfun))
605 	    {
606 	      tmp_then = make_ssa_name (tmp_var);
607 	      tmp_else = make_ssa_name (tmp_var);
608 	      tmp_join = make_ssa_name (tmp_var);
609 	    }
610 	  else
611 	    {
612 	      tmp_then = tmp_var;
613 	      tmp_else = tmp_var;
614 	      tmp_join = tmp_var;
615 	    }
616 
617 	  e = split_block_after_labels (bb);
618 	  cond_bb = e->src;
619 	  bb = e->dest;
620 	  remove_edge (e);
621 
622 	  then_bb = create_empty_bb (cond_bb);
623 	  else_bb = create_empty_bb (then_bb);
624 	  set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
625 	  set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
626 
627 	  stmt = gimple_build_cond_empty (cond);
628 	  gsi = gsi_start_bb (cond_bb);
629 	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
630 
631 	  gsi = gsi_start_bb (then_bb);
632 	  expand_omp_build_assign (&gsi, tmp_then, val, true);
633 
634 	  gsi = gsi_start_bb (else_bb);
635 	  expand_omp_build_assign (&gsi, tmp_else,
636 				   build_int_cst (unsigned_type_node, 1),
637 				   true);
638 
639 	  make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
640 	  make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
641 	  add_bb_to_loop (then_bb, cond_bb->loop_father);
642 	  add_bb_to_loop (else_bb, cond_bb->loop_father);
643 	  e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
644 	  e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
645 
646 	  if (gimple_in_ssa_p (cfun))
647 	    {
648 	      gphi *phi = create_phi_node (tmp_join, bb);
649 	      add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
650 	      add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
651 	    }
652 
653 	  val = tmp_join;
654 	}
655 
656       gsi = gsi_start_bb (bb);
657       val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
658 				      false, GSI_CONTINUE_LINKING);
659     }
660 
661   gsi = gsi_last_bb (bb);
662   t = gimple_omp_parallel_data_arg (entry_stmt);
663   if (t == NULL)
664     t1 = null_pointer_node;
665   else
666     t1 = build_fold_addr_expr (t);
667   tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
668   t2 = build_fold_addr_expr (child_fndecl);
669 
670   vec_alloc (args, 4 + vec_safe_length (ws_args));
671   args->quick_push (t2);
672   args->quick_push (t1);
673   args->quick_push (val);
674   if (ws_args)
675     args->splice (*ws_args);
676   args->quick_push (flags);
677 
678   t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
679 			       builtin_decl_explicit (start_ix), args);
680 
681   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
682 			    false, GSI_CONTINUE_LINKING);
683 
684   if (hsa_gen_requested_p ()
685       && parallel_needs_hsa_kernel_p (region))
686     {
687       cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
688       hsa_register_kernel (child_cnode);
689     }
690 }
691 
692 /* Insert a function call whose name is FUNC_NAME with the information from
693    ENTRY_STMT into the basic_block BB.  */
694 
695 static void
696 expand_cilk_for_call (basic_block bb, gomp_parallel *entry_stmt,
697 		      vec <tree, va_gc> *ws_args)
698 {
699   tree t, t1, t2;
700   gimple_stmt_iterator gsi;
701   vec <tree, va_gc> *args;
702 
703   gcc_assert (vec_safe_length (ws_args) == 2);
704   tree func_name = (*ws_args)[0];
705   tree grain = (*ws_args)[1];
706 
707   tree clauses = gimple_omp_parallel_clauses (entry_stmt);
708   tree count = omp_find_clause (clauses, OMP_CLAUSE__CILK_FOR_COUNT_);
709   gcc_assert (count != NULL_TREE);
710   count = OMP_CLAUSE_OPERAND (count, 0);
711 
712   gsi = gsi_last_bb (bb);
713   t = gimple_omp_parallel_data_arg (entry_stmt);
714   if (t == NULL)
715     t1 = null_pointer_node;
716   else
717     t1 = build_fold_addr_expr (t);
718   t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt));
719 
720   vec_alloc (args, 4);
721   args->quick_push (t2);
722   args->quick_push (t1);
723   args->quick_push (count);
724   args->quick_push (grain);
725   t = build_call_expr_loc_vec (UNKNOWN_LOCATION, func_name, args);
726 
727   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false,
728 			    GSI_CONTINUE_LINKING);
729 }
730 
731 /* Build the function call to GOMP_task to actually
732    generate the task operation.  BB is the block where to insert the code.  */
733 
734 static void
735 expand_task_call (struct omp_region *region, basic_block bb,
736 		  gomp_task *entry_stmt)
737 {
738   tree t1, t2, t3;
739   gimple_stmt_iterator gsi;
740   location_t loc = gimple_location (entry_stmt);
741 
742   tree clauses = gimple_omp_task_clauses (entry_stmt);
743 
744   tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
745   tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
746   tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
747   tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
748   tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
749   tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
750 
751   unsigned int iflags
752     = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
753       | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
754       | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
755 
756   bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
757   tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
758   tree num_tasks = NULL_TREE;
759   bool ull = false;
760   if (taskloop_p)
761     {
762       gimple *g = last_stmt (region->outer->entry);
763       gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
764 		  && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
765       struct omp_for_data fd;
766       omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
767       startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
768       endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
769 				OMP_CLAUSE__LOOPTEMP_);
770       startvar = OMP_CLAUSE_DECL (startvar);
771       endvar = OMP_CLAUSE_DECL (endvar);
772       step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
773       if (fd.loop.cond_code == LT_EXPR)
774 	iflags |= GOMP_TASK_FLAG_UP;
775       tree tclauses = gimple_omp_for_clauses (g);
776       num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
777       if (num_tasks)
778 	num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
779       else
780 	{
781 	  num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
782 	  if (num_tasks)
783 	    {
784 	      iflags |= GOMP_TASK_FLAG_GRAINSIZE;
785 	      num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
786 	    }
787 	  else
788 	    num_tasks = integer_zero_node;
789 	}
790       num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
791       if (ifc == NULL_TREE)
792 	iflags |= GOMP_TASK_FLAG_IF;
793       if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
794 	iflags |= GOMP_TASK_FLAG_NOGROUP;
795       ull = fd.iter_type == long_long_unsigned_type_node;
796     }
797   else if (priority)
798     iflags |= GOMP_TASK_FLAG_PRIORITY;
799 
800   tree flags = build_int_cst (unsigned_type_node, iflags);
801 
802   tree cond = boolean_true_node;
803   if (ifc)
804     {
805       if (taskloop_p)
806 	{
807 	  tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
808 	  t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
809 			       build_int_cst (unsigned_type_node,
810 					      GOMP_TASK_FLAG_IF),
811 			       build_int_cst (unsigned_type_node, 0));
812 	  flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
813 				   flags, t);
814 	}
815       else
816 	cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
817     }
818 
819   if (finalc)
820     {
821       tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
822       t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
823 			   build_int_cst (unsigned_type_node,
824 					  GOMP_TASK_FLAG_FINAL),
825 			   build_int_cst (unsigned_type_node, 0));
826       flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
827     }
828   if (depend)
829     depend = OMP_CLAUSE_DECL (depend);
830   else
831     depend = build_int_cst (ptr_type_node, 0);
832   if (priority)
833     priority = fold_convert (integer_type_node,
834 			     OMP_CLAUSE_PRIORITY_EXPR (priority));
835   else
836     priority = integer_zero_node;
837 
838   gsi = gsi_last_bb (bb);
839   tree t = gimple_omp_task_data_arg (entry_stmt);
840   if (t == NULL)
841     t2 = null_pointer_node;
842   else
843     t2 = build_fold_addr_expr_loc (loc, t);
844   t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
845   t = gimple_omp_task_copy_fn (entry_stmt);
846   if (t == NULL)
847     t3 = null_pointer_node;
848   else
849     t3 = build_fold_addr_expr_loc (loc, t);
850 
851   if (taskloop_p)
852     t = build_call_expr (ull
853 			 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
854 			 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
855 			 11, t1, t2, t3,
856 			 gimple_omp_task_arg_size (entry_stmt),
857 			 gimple_omp_task_arg_align (entry_stmt), flags,
858 			 num_tasks, priority, startvar, endvar, step);
859   else
860     t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
861 			 9, t1, t2, t3,
862 			 gimple_omp_task_arg_size (entry_stmt),
863 			 gimple_omp_task_arg_align (entry_stmt), cond, flags,
864 			 depend, priority);
865 
866   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
867 			    false, GSI_CONTINUE_LINKING);
868 }
869 
870 /* Chain all the DECLs in LIST by their TREE_CHAIN fields.  */
871 
872 static tree
873 vec2chain (vec<tree, va_gc> *v)
874 {
875   tree chain = NULL_TREE, t;
876   unsigned ix;
877 
878   FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
879     {
880       DECL_CHAIN (t) = chain;
881       chain = t;
882     }
883 
884   return chain;
885 }
886 
887 /* Remove barriers in REGION->EXIT's block.  Note that this is only
888    valid for GIMPLE_OMP_PARALLEL regions.  Since the end of a parallel region
889    is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
890    left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
891    removed.  */
892 
893 static void
894 remove_exit_barrier (struct omp_region *region)
895 {
896   gimple_stmt_iterator gsi;
897   basic_block exit_bb;
898   edge_iterator ei;
899   edge e;
900   gimple *stmt;
901   int any_addressable_vars = -1;
902 
903   exit_bb = region->exit;
904 
905   /* If the parallel region doesn't return, we don't have REGION->EXIT
906      block at all.  */
907   if (! exit_bb)
908     return;
909 
910   /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN.  The
911      workshare's GIMPLE_OMP_RETURN will be in a preceding block.  The kinds of
912      statements that can appear in between are extremely limited -- no
913      memory operations at all.  Here, we allow nothing at all, so the
914      only thing we allow to precede this GIMPLE_OMP_RETURN is a label.  */
915   gsi = gsi_last_bb (exit_bb);
916   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
917   gsi_prev (&gsi);
918   if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
919     return;
920 
921   FOR_EACH_EDGE (e, ei, exit_bb->preds)
922     {
923       gsi = gsi_last_bb (e->src);
924       if (gsi_end_p (gsi))
925 	continue;
926       stmt = gsi_stmt (gsi);
927       if (gimple_code (stmt) == GIMPLE_OMP_RETURN
928 	  && !gimple_omp_return_nowait_p (stmt))
929 	{
930 	  /* OpenMP 3.0 tasks unfortunately prevent this optimization
931 	     in many cases.  If there could be tasks queued, the barrier
932 	     might be needed to let the tasks run before some local
933 	     variable of the parallel that the task uses as shared
934 	     runs out of scope.  The task can be spawned either
935 	     from within current function (this would be easy to check)
936 	     or from some function it calls and gets passed an address
937 	     of such a variable.  */
938 	  if (any_addressable_vars < 0)
939 	    {
940 	      gomp_parallel *parallel_stmt
941 		= as_a <gomp_parallel *> (last_stmt (region->entry));
942 	      tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
943 	      tree local_decls, block, decl;
944 	      unsigned ix;
945 
946 	      any_addressable_vars = 0;
947 	      FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
948 		if (TREE_ADDRESSABLE (decl))
949 		  {
950 		    any_addressable_vars = 1;
951 		    break;
952 		  }
953 	      for (block = gimple_block (stmt);
954 		   !any_addressable_vars
955 		   && block
956 		   && TREE_CODE (block) == BLOCK;
957 		   block = BLOCK_SUPERCONTEXT (block))
958 		{
959 		  for (local_decls = BLOCK_VARS (block);
960 		       local_decls;
961 		       local_decls = DECL_CHAIN (local_decls))
962 		    if (TREE_ADDRESSABLE (local_decls))
963 		      {
964 			any_addressable_vars = 1;
965 			break;
966 		      }
967 		  if (block == gimple_block (parallel_stmt))
968 		    break;
969 		}
970 	    }
971 	  if (!any_addressable_vars)
972 	    gimple_omp_return_set_nowait (stmt);
973 	}
974     }
975 }
976 
977 static void
978 remove_exit_barriers (struct omp_region *region)
979 {
980   if (region->type == GIMPLE_OMP_PARALLEL)
981     remove_exit_barrier (region);
982 
983   if (region->inner)
984     {
985       region = region->inner;
986       remove_exit_barriers (region);
987       while (region->next)
988 	{
989 	  region = region->next;
990 	  remove_exit_barriers (region);
991 	}
992     }
993 }
994 
995 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
996    calls.  These can't be declared as const functions, but
997    within one parallel body they are constant, so they can be
998    transformed there into __builtin_omp_get_{thread_num,num_threads} ()
999    which are declared const.  Similarly for task body, except
1000    that in untied task omp_get_thread_num () can change at any task
1001    scheduling point.  */
1002 
1003 static void
1004 optimize_omp_library_calls (gimple *entry_stmt)
1005 {
1006   basic_block bb;
1007   gimple_stmt_iterator gsi;
1008   tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1009   tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1010   tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1011   tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1012   bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1013 		      && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1014 					  OMP_CLAUSE_UNTIED) != NULL);
1015 
1016   FOR_EACH_BB_FN (bb, cfun)
1017     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1018       {
1019 	gimple *call = gsi_stmt (gsi);
1020 	tree decl;
1021 
1022 	if (is_gimple_call (call)
1023 	    && (decl = gimple_call_fndecl (call))
1024 	    && DECL_EXTERNAL (decl)
1025 	    && TREE_PUBLIC (decl)
1026 	    && DECL_INITIAL (decl) == NULL)
1027 	  {
1028 	    tree built_in;
1029 
1030 	    if (DECL_NAME (decl) == thr_num_id)
1031 	      {
1032 		/* In #pragma omp task untied omp_get_thread_num () can change
1033 		   during the execution of the task region.  */
1034 		if (untied_task)
1035 		  continue;
1036 		built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1037 	      }
1038 	    else if (DECL_NAME (decl) == num_thr_id)
1039 	      built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1040 	    else
1041 	      continue;
1042 
1043 	    if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1044 		|| gimple_call_num_args (call) != 0)
1045 	      continue;
1046 
1047 	    if (flag_exceptions && !TREE_NOTHROW (decl))
1048 	      continue;
1049 
1050 	    if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1051 		|| !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1052 					TREE_TYPE (TREE_TYPE (built_in))))
1053 	      continue;
1054 
1055 	    gimple_call_set_fndecl (call, built_in);
1056 	  }
1057       }
1058 }
1059 
1060 /* Callback for expand_omp_build_assign.  Return non-NULL if *tp needs to be
1061    regimplified.  */
1062 
1063 static tree
1064 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1065 {
1066   tree t = *tp;
1067 
1068   /* Any variable with DECL_VALUE_EXPR needs to be regimplified.  */
1069   if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1070     return t;
1071 
1072   if (TREE_CODE (t) == ADDR_EXPR)
1073     recompute_tree_invariant_for_addr_expr (t);
1074 
1075   *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1076   return NULL_TREE;
1077 }
1078 
1079 /* Prepend or append TO = FROM assignment before or after *GSI_P.  */
1080 
1081 static void
1082 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1083 			 bool after)
1084 {
1085   bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1086   from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1087 				   !after, after ? GSI_CONTINUE_LINKING
1088 						 : GSI_SAME_STMT);
1089   gimple *stmt = gimple_build_assign (to, from);
1090   if (after)
1091     gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1092   else
1093     gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1094   if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1095       || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1096     {
1097       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1098       gimple_regimplify_operands (stmt, &gsi);
1099     }
1100 }
1101 
1102 /* Expand the OpenMP parallel or task directive starting at REGION.  */
1103 
1104 static void
1105 expand_omp_taskreg (struct omp_region *region)
1106 {
1107   basic_block entry_bb, exit_bb, new_bb;
1108   struct function *child_cfun;
1109   tree child_fn, block, t;
1110   gimple_stmt_iterator gsi;
1111   gimple *entry_stmt, *stmt;
1112   edge e;
1113   vec<tree, va_gc> *ws_args;
1114 
1115   entry_stmt = last_stmt (region->entry);
1116   child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1117   child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1118 
1119   entry_bb = region->entry;
1120   if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1121     exit_bb = region->cont;
1122   else
1123     exit_bb = region->exit;
1124 
1125   bool is_cilk_for
1126     = (flag_cilkplus
1127        && gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL
1128        && omp_find_clause (gimple_omp_parallel_clauses (entry_stmt),
1129 			   OMP_CLAUSE__CILK_FOR_COUNT_) != NULL_TREE);
1130 
1131   if (is_cilk_for)
1132     /* If it is a _Cilk_for statement, it is modelled *like* a parallel for,
1133        and the inner statement contains the name of the built-in function
1134        and grain.  */
1135     ws_args = region->inner->ws_args;
1136   else if (is_combined_parallel (region))
1137     ws_args = region->ws_args;
1138   else
1139     ws_args = NULL;
1140 
1141   if (child_cfun->cfg)
1142     {
1143       /* Due to inlining, it may happen that we have already outlined
1144 	 the region, in which case all we need to do is make the
1145 	 sub-graph unreachable and emit the parallel call.  */
1146       edge entry_succ_e, exit_succ_e;
1147 
1148       entry_succ_e = single_succ_edge (entry_bb);
1149 
1150       gsi = gsi_last_bb (entry_bb);
1151       gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1152 		  || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1153       gsi_remove (&gsi, true);
1154 
1155       new_bb = entry_bb;
1156       if (exit_bb)
1157 	{
1158 	  exit_succ_e = single_succ_edge (exit_bb);
1159 	  make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1160 	}
1161       remove_edge_and_dominated_blocks (entry_succ_e);
1162     }
1163   else
1164     {
1165       unsigned srcidx, dstidx, num;
1166 
1167       /* If the parallel region needs data sent from the parent
1168 	 function, then the very first statement (except possible
1169 	 tree profile counter updates) of the parallel body
1170 	 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
1171 	 &.OMP_DATA_O is passed as an argument to the child function,
1172 	 we need to replace it with the argument as seen by the child
1173 	 function.
1174 
1175 	 In most cases, this will end up being the identity assignment
1176 	 .OMP_DATA_I = .OMP_DATA_I.  However, if the parallel body had
1177 	 a function call that has been inlined, the original PARM_DECL
1178 	 .OMP_DATA_I may have been converted into a different local
1179 	 variable.  In which case, we need to keep the assignment.  */
1180       if (gimple_omp_taskreg_data_arg (entry_stmt))
1181 	{
1182 	  basic_block entry_succ_bb
1183 	    = single_succ_p (entry_bb) ? single_succ (entry_bb)
1184 				       : FALLTHRU_EDGE (entry_bb)->dest;
1185 	  tree arg;
1186 	  gimple *parcopy_stmt = NULL;
1187 
1188 	  for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1189 	    {
1190 	      gimple *stmt;
1191 
1192 	      gcc_assert (!gsi_end_p (gsi));
1193 	      stmt = gsi_stmt (gsi);
1194 	      if (gimple_code (stmt) != GIMPLE_ASSIGN)
1195 		continue;
1196 
1197 	      if (gimple_num_ops (stmt) == 2)
1198 		{
1199 		  tree arg = gimple_assign_rhs1 (stmt);
1200 
1201 		  /* We're ignore the subcode because we're
1202 		     effectively doing a STRIP_NOPS.  */
1203 
1204 		  if (TREE_CODE (arg) == ADDR_EXPR
1205 		      && TREE_OPERAND (arg, 0)
1206 			== gimple_omp_taskreg_data_arg (entry_stmt))
1207 		    {
1208 		      parcopy_stmt = stmt;
1209 		      break;
1210 		    }
1211 		}
1212 	    }
1213 
1214 	  gcc_assert (parcopy_stmt != NULL);
1215 	  arg = DECL_ARGUMENTS (child_fn);
1216 
1217 	  if (!gimple_in_ssa_p (cfun))
1218 	    {
1219 	      if (gimple_assign_lhs (parcopy_stmt) == arg)
1220 		gsi_remove (&gsi, true);
1221 	      else
1222 		{
1223 		  /* ?? Is setting the subcode really necessary ??  */
1224 		  gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1225 		  gimple_assign_set_rhs1 (parcopy_stmt, arg);
1226 		}
1227 	    }
1228 	  else
1229 	    {
1230 	      tree lhs = gimple_assign_lhs (parcopy_stmt);
1231 	      gcc_assert (SSA_NAME_VAR (lhs) == arg);
1232 	      /* We'd like to set the rhs to the default def in the child_fn,
1233 		 but it's too early to create ssa names in the child_fn.
1234 		 Instead, we set the rhs to the parm.  In
1235 		 move_sese_region_to_fn, we introduce a default def for the
1236 		 parm, map the parm to it's default def, and once we encounter
1237 		 this stmt, replace the parm with the default def.  */
1238 	      gimple_assign_set_rhs1 (parcopy_stmt, arg);
1239 	      update_stmt (parcopy_stmt);
1240 	    }
1241 	}
1242 
1243       /* Declare local variables needed in CHILD_CFUN.  */
1244       block = DECL_INITIAL (child_fn);
1245       BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1246       /* The gimplifier could record temporaries in parallel/task block
1247 	 rather than in containing function's local_decls chain,
1248 	 which would mean cgraph missed finalizing them.  Do it now.  */
1249       for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1250 	if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1251 	  varpool_node::finalize_decl (t);
1252       DECL_SAVED_TREE (child_fn) = NULL;
1253       /* We'll create a CFG for child_fn, so no gimple body is needed.  */
1254       gimple_set_body (child_fn, NULL);
1255       TREE_USED (block) = 1;
1256 
1257       /* Reset DECL_CONTEXT on function arguments.  */
1258       for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1259 	DECL_CONTEXT (t) = child_fn;
1260 
1261       /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1262 	 so that it can be moved to the child function.  */
1263       gsi = gsi_last_bb (entry_bb);
1264       stmt = gsi_stmt (gsi);
1265       gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1266 			   || gimple_code (stmt) == GIMPLE_OMP_TASK));
1267       e = split_block (entry_bb, stmt);
1268       gsi_remove (&gsi, true);
1269       entry_bb = e->dest;
1270       edge e2 = NULL;
1271       if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1272 	single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1273       else
1274 	{
1275 	  e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1276 	  gcc_assert (e2->dest == region->exit);
1277 	  remove_edge (BRANCH_EDGE (entry_bb));
1278 	  set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1279 	  gsi = gsi_last_bb (region->exit);
1280 	  gcc_assert (!gsi_end_p (gsi)
1281 		      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1282 	  gsi_remove (&gsi, true);
1283 	}
1284 
1285       /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR.  */
1286       if (exit_bb)
1287 	{
1288 	  gsi = gsi_last_bb (exit_bb);
1289 	  gcc_assert (!gsi_end_p (gsi)
1290 		      && (gimple_code (gsi_stmt (gsi))
1291 			  == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1292 	  stmt = gimple_build_return (NULL);
1293 	  gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1294 	  gsi_remove (&gsi, true);
1295 	}
1296 
1297       /* Move the parallel region into CHILD_CFUN.  */
1298 
1299       if (gimple_in_ssa_p (cfun))
1300 	{
1301 	  init_tree_ssa (child_cfun);
1302 	  init_ssa_operands (child_cfun);
1303 	  child_cfun->gimple_df->in_ssa_p = true;
1304 	  block = NULL_TREE;
1305 	}
1306       else
1307 	block = gimple_block (entry_stmt);
1308 
1309       /* Make sure to generate early debug for the function before
1310          outlining anything.  */
1311       if (! gimple_in_ssa_p (cfun))
1312 	(*debug_hooks->early_global_decl) (cfun->decl);
1313 
1314       new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1315       if (exit_bb)
1316 	single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1317       if (e2)
1318 	{
1319 	  basic_block dest_bb = e2->dest;
1320 	  if (!exit_bb)
1321 	    make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1322 	  remove_edge (e2);
1323 	  set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1324 	}
1325       /* When the OMP expansion process cannot guarantee an up-to-date
1326 	 loop tree arrange for the child function to fixup loops.  */
1327       if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1328 	child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1329 
1330       /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
1331       num = vec_safe_length (child_cfun->local_decls);
1332       for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1333 	{
1334 	  t = (*child_cfun->local_decls)[srcidx];
1335 	  if (DECL_CONTEXT (t) == cfun->decl)
1336 	    continue;
1337 	  if (srcidx != dstidx)
1338 	    (*child_cfun->local_decls)[dstidx] = t;
1339 	  dstidx++;
1340 	}
1341       if (dstidx != num)
1342 	vec_safe_truncate (child_cfun->local_decls, dstidx);
1343 
1344       /* Inform the callgraph about the new function.  */
1345       child_cfun->curr_properties = cfun->curr_properties;
1346       child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1347       child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1348       cgraph_node *node = cgraph_node::get_create (child_fn);
1349       node->parallelized_function = 1;
1350       cgraph_node::add_new_function (child_fn, true);
1351 
1352       bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1353 		      && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1354 
1355       /* Fix the callgraph edges for child_cfun.  Those for cfun will be
1356 	 fixed in a following pass.  */
1357       push_cfun (child_cfun);
1358       if (need_asm)
1359 	assign_assembler_name_if_needed (child_fn);
1360 
1361       if (optimize)
1362 	optimize_omp_library_calls (entry_stmt);
1363       cgraph_edge::rebuild_edges ();
1364 
1365       /* Some EH regions might become dead, see PR34608.  If
1366 	 pass_cleanup_cfg isn't the first pass to happen with the
1367 	 new child, these dead EH edges might cause problems.
1368 	 Clean them up now.  */
1369       if (flag_exceptions)
1370 	{
1371 	  basic_block bb;
1372 	  bool changed = false;
1373 
1374 	  FOR_EACH_BB_FN (bb, cfun)
1375 	    changed |= gimple_purge_dead_eh_edges (bb);
1376 	  if (changed)
1377 	    cleanup_tree_cfg ();
1378 	}
1379       if (gimple_in_ssa_p (cfun))
1380 	update_ssa (TODO_update_ssa);
1381       if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1382 	verify_loop_structure ();
1383       pop_cfun ();
1384 
1385       if (dump_file && !gimple_in_ssa_p (cfun))
1386 	{
1387 	  omp_any_child_fn_dumped = true;
1388 	  dump_function_header (dump_file, child_fn, dump_flags);
1389 	  dump_function_to_file (child_fn, dump_file, dump_flags);
1390 	}
1391     }
1392 
1393   /* Emit a library call to launch the children threads.  */
1394   if (is_cilk_for)
1395     expand_cilk_for_call (new_bb,
1396 			  as_a <gomp_parallel *> (entry_stmt), ws_args);
1397   else if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1398     expand_parallel_call (region, new_bb,
1399 			  as_a <gomp_parallel *> (entry_stmt), ws_args);
1400   else
1401     expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1402   if (gimple_in_ssa_p (cfun))
1403     update_ssa (TODO_update_ssa_only_virtuals);
1404 }
1405 
1406 /* Information about members of an OpenACC collapsed loop nest.  */
1407 
1408 struct oacc_collapse
1409 {
1410   tree base;  /* Base value.  */
1411   tree iters; /* Number of steps.  */
1412   tree step;  /* Step size.  */
1413   tree tile;  /* Tile increment (if tiled).  */
1414   tree outer; /* Tile iterator var. */
1415 };
1416 
1417 /* Helper for expand_oacc_for.  Determine collapsed loop information.
1418    Fill in COUNTS array.  Emit any initialization code before GSI.
1419    Return the calculated outer loop bound of BOUND_TYPE.  */
1420 
1421 static tree
1422 expand_oacc_collapse_init (const struct omp_for_data *fd,
1423 			   gimple_stmt_iterator *gsi,
1424 			   oacc_collapse *counts, tree bound_type,
1425 			   location_t loc)
1426 {
1427   tree tiling = fd->tiling;
1428   tree total = build_int_cst (bound_type, 1);
1429   int ix;
1430 
1431   gcc_assert (integer_onep (fd->loop.step));
1432   gcc_assert (integer_zerop (fd->loop.n1));
1433 
1434   /* When tiling, the first operand of the tile clause applies to the
1435      innermost loop, and we work outwards from there.  Seems
1436      backwards, but whatever.  */
1437   for (ix = fd->collapse; ix--;)
1438     {
1439       const omp_for_data_loop *loop = &fd->loops[ix];
1440 
1441       tree iter_type = TREE_TYPE (loop->v);
1442       tree diff_type = iter_type;
1443       tree plus_type = iter_type;
1444 
1445       gcc_assert (loop->cond_code == fd->loop.cond_code);
1446 
1447       if (POINTER_TYPE_P (iter_type))
1448 	plus_type = sizetype;
1449       if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1450 	diff_type = signed_type_for (diff_type);
1451 
1452       if (tiling)
1453 	{
1454 	  tree num = build_int_cst (integer_type_node, fd->collapse);
1455 	  tree loop_no = build_int_cst (integer_type_node, ix);
1456 	  tree tile = TREE_VALUE (tiling);
1457 	  gcall *call
1458 	    = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1459 					  /* gwv-outer=*/integer_zero_node,
1460 					  /* gwv-inner=*/integer_zero_node);
1461 
1462 	  counts[ix].outer = create_tmp_var (iter_type, ".outer");
1463 	  counts[ix].tile = create_tmp_var (diff_type, ".tile");
1464 	  gimple_call_set_lhs (call, counts[ix].tile);
1465 	  gimple_set_location (call, loc);
1466 	  gsi_insert_before (gsi, call, GSI_SAME_STMT);
1467 
1468 	  tiling = TREE_CHAIN (tiling);
1469 	}
1470       else
1471 	{
1472 	  counts[ix].tile = NULL;
1473 	  counts[ix].outer = loop->v;
1474 	}
1475 
1476       tree b = loop->n1;
1477       tree e = loop->n2;
1478       tree s = loop->step;
1479       bool up = loop->cond_code == LT_EXPR;
1480       tree dir = build_int_cst (diff_type, up ? +1 : -1);
1481       bool negating;
1482       tree expr;
1483 
1484       b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1485 				    true, GSI_SAME_STMT);
1486       e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1487 				    true, GSI_SAME_STMT);
1488 
1489       /* Convert the step, avoiding possible unsigned->signed overflow.  */
1490       negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1491       if (negating)
1492 	s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1493       s = fold_convert (diff_type, s);
1494       if (negating)
1495 	s = fold_build1 (NEGATE_EXPR, diff_type, s);
1496       s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1497 				    true, GSI_SAME_STMT);
1498 
1499       /* Determine the range, avoiding possible unsigned->signed overflow.  */
1500       negating = !up && TYPE_UNSIGNED (iter_type);
1501       expr = fold_build2 (MINUS_EXPR, plus_type,
1502 			  fold_convert (plus_type, negating ? b : e),
1503 			  fold_convert (plus_type, negating ? e : b));
1504       expr = fold_convert (diff_type, expr);
1505       if (negating)
1506 	expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1507       tree range = force_gimple_operand_gsi
1508 	(gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1509 
1510       /* Determine number of iterations.  */
1511       expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1512       expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1513       expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1514 
1515       tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1516 					     true, GSI_SAME_STMT);
1517 
1518       counts[ix].base = b;
1519       counts[ix].iters = iters;
1520       counts[ix].step = s;
1521 
1522       total = fold_build2 (MULT_EXPR, bound_type, total,
1523 			   fold_convert (bound_type, iters));
1524     }
1525 
1526   return total;
1527 }
1528 
1529 /* Emit initializers for collapsed loop members.  INNER is true if
1530    this is for the element loop of a TILE.  IVAR is the outer
1531    loop iteration variable, from which collapsed loop iteration values
1532    are  calculated.  COUNTS array has been initialized by
1533    expand_oacc_collapse_inits.  */
1534 
1535 static void
1536 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1537 			   gimple_stmt_iterator *gsi,
1538 			   const oacc_collapse *counts, tree ivar)
1539 {
1540   tree ivar_type = TREE_TYPE (ivar);
1541 
1542   /*  The most rapidly changing iteration variable is the innermost
1543       one.  */
1544   for (int ix = fd->collapse; ix--;)
1545     {
1546       const omp_for_data_loop *loop = &fd->loops[ix];
1547       const oacc_collapse *collapse = &counts[ix];
1548       tree v = inner ? loop->v : collapse->outer;
1549       tree iter_type = TREE_TYPE (v);
1550       tree diff_type = TREE_TYPE (collapse->step);
1551       tree plus_type = iter_type;
1552       enum tree_code plus_code = PLUS_EXPR;
1553       tree expr;
1554 
1555       if (POINTER_TYPE_P (iter_type))
1556 	{
1557 	  plus_code = POINTER_PLUS_EXPR;
1558 	  plus_type = sizetype;
1559 	}
1560 
1561       expr = ivar;
1562       if (ix)
1563 	{
1564 	  tree mod = fold_convert (ivar_type, collapse->iters);
1565 	  ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1566 	  expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1567 	  ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1568 					   true, GSI_SAME_STMT);
1569 	}
1570 
1571       expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1572 			  collapse->step);
1573       expr = fold_build2 (plus_code, iter_type,
1574 			  inner ? collapse->outer : collapse->base,
1575 			  fold_convert (plus_type, expr));
1576       expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1577 				       true, GSI_SAME_STMT);
1578       gassign *ass = gimple_build_assign (v, expr);
1579       gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1580     }
1581 }
1582 
1583 /* Helper function for expand_omp_{for_*,simd}.  If this is the outermost
1584    of the combined collapse > 1 loop constructs, generate code like:
1585 	if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1586 	if (cond3 is <)
1587 	  adj = STEP3 - 1;
1588 	else
1589 	  adj = STEP3 + 1;
1590 	count3 = (adj + N32 - N31) / STEP3;
1591 	if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1592 	if (cond2 is <)
1593 	  adj = STEP2 - 1;
1594 	else
1595 	  adj = STEP2 + 1;
1596 	count2 = (adj + N22 - N21) / STEP2;
1597 	if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1598 	if (cond1 is <)
1599 	  adj = STEP1 - 1;
1600 	else
1601 	  adj = STEP1 + 1;
1602 	count1 = (adj + N12 - N11) / STEP1;
1603 	count = count1 * count2 * count3;
1604    Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1605 	count = 0;
1606    and set ZERO_ITER_BB to that bb.  If this isn't the outermost
1607    of the combined loop constructs, just initialize COUNTS array
1608    from the _looptemp_ clauses.  */
1609 
1610 /* NOTE: It *could* be better to moosh all of the BBs together,
1611    creating one larger BB with all the computation and the unexpected
1612    jump at the end.  I.e.
1613 
1614    bool zero3, zero2, zero1, zero;
1615 
1616    zero3 = N32 c3 N31;
1617    count3 = (N32 - N31) /[cl] STEP3;
1618    zero2 = N22 c2 N21;
1619    count2 = (N22 - N21) /[cl] STEP2;
1620    zero1 = N12 c1 N11;
1621    count1 = (N12 - N11) /[cl] STEP1;
1622    zero = zero3 || zero2 || zero1;
1623    count = count1 * count2 * count3;
1624    if (__builtin_expect(zero, false)) goto zero_iter_bb;
1625 
1626    After all, we expect the zero=false, and thus we expect to have to
1627    evaluate all of the comparison expressions, so short-circuiting
1628    oughtn't be a win.  Since the condition isn't protecting a
1629    denominator, we're not concerned about divide-by-zero, so we can
1630    fully evaluate count even if a numerator turned out to be wrong.
1631 
1632    It seems like putting this all together would create much better
1633    scheduling opportunities, and less pressure on the chip's branch
1634    predictor.  */
1635 
1636 static void
1637 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1638 			    basic_block &entry_bb, tree *counts,
1639 			    basic_block &zero_iter1_bb, int &first_zero_iter1,
1640 			    basic_block &zero_iter2_bb, int &first_zero_iter2,
1641 			    basic_block &l2_dom_bb)
1642 {
1643   tree t, type = TREE_TYPE (fd->loop.v);
1644   edge e, ne;
1645   int i;
1646 
1647   /* Collapsed loops need work for expansion into SSA form.  */
1648   gcc_assert (!gimple_in_ssa_p (cfun));
1649 
1650   if (gimple_omp_for_combined_into_p (fd->for_stmt)
1651       && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1652     {
1653       gcc_assert (fd->ordered == 0);
1654       /* First two _looptemp_ clauses are for istart/iend, counts[0]
1655 	 isn't supposed to be handled, as the inner loop doesn't
1656 	 use it.  */
1657       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1658 				     OMP_CLAUSE__LOOPTEMP_);
1659       gcc_assert (innerc);
1660       for (i = 0; i < fd->collapse; i++)
1661 	{
1662 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1663 				    OMP_CLAUSE__LOOPTEMP_);
1664 	  gcc_assert (innerc);
1665 	  if (i)
1666 	    counts[i] = OMP_CLAUSE_DECL (innerc);
1667 	  else
1668 	    counts[0] = NULL_TREE;
1669 	}
1670       return;
1671     }
1672 
1673   for (i = fd->collapse; i < fd->ordered; i++)
1674     {
1675       tree itype = TREE_TYPE (fd->loops[i].v);
1676       counts[i] = NULL_TREE;
1677       t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1678 		       fold_convert (itype, fd->loops[i].n1),
1679 		       fold_convert (itype, fd->loops[i].n2));
1680       if (t && integer_zerop (t))
1681 	{
1682 	  for (i = fd->collapse; i < fd->ordered; i++)
1683 	    counts[i] = build_int_cst (type, 0);
1684 	  break;
1685 	}
1686     }
1687   for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1688     {
1689       tree itype = TREE_TYPE (fd->loops[i].v);
1690 
1691       if (i >= fd->collapse && counts[i])
1692 	continue;
1693       if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1694 	  && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1695 				fold_convert (itype, fd->loops[i].n1),
1696 				fold_convert (itype, fd->loops[i].n2)))
1697 	      == NULL_TREE || !integer_onep (t)))
1698 	{
1699 	  gcond *cond_stmt;
1700 	  tree n1, n2;
1701 	  n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1702 	  n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1703 					 true, GSI_SAME_STMT);
1704 	  n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1705 	  n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1706 					 true, GSI_SAME_STMT);
1707 	  cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1708 					 NULL_TREE, NULL_TREE);
1709 	  gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1710 	  if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1711 			 expand_omp_regimplify_p, NULL, NULL)
1712 	      || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1713 			    expand_omp_regimplify_p, NULL, NULL))
1714 	    {
1715 	      *gsi = gsi_for_stmt (cond_stmt);
1716 	      gimple_regimplify_operands (cond_stmt, gsi);
1717 	    }
1718 	  e = split_block (entry_bb, cond_stmt);
1719 	  basic_block &zero_iter_bb
1720 	    = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1721 	  int &first_zero_iter
1722 	    = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1723 	  if (zero_iter_bb == NULL)
1724 	    {
1725 	      gassign *assign_stmt;
1726 	      first_zero_iter = i;
1727 	      zero_iter_bb = create_empty_bb (entry_bb);
1728 	      add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1729 	      *gsi = gsi_after_labels (zero_iter_bb);
1730 	      if (i < fd->collapse)
1731 		assign_stmt = gimple_build_assign (fd->loop.n2,
1732 						   build_zero_cst (type));
1733 	      else
1734 		{
1735 		  counts[i] = create_tmp_reg (type, ".count");
1736 		  assign_stmt
1737 		    = gimple_build_assign (counts[i], build_zero_cst (type));
1738 		}
1739 	      gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1740 	      set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1741 				       entry_bb);
1742 	    }
1743 	  ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1744 	  ne->probability = REG_BR_PROB_BASE / 2000 - 1;
1745 	  e->flags = EDGE_TRUE_VALUE;
1746 	  e->probability = REG_BR_PROB_BASE - ne->probability;
1747 	  if (l2_dom_bb == NULL)
1748 	    l2_dom_bb = entry_bb;
1749 	  entry_bb = e->dest;
1750 	  *gsi = gsi_last_bb (entry_bb);
1751 	}
1752 
1753       if (POINTER_TYPE_P (itype))
1754 	itype = signed_type_for (itype);
1755       t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1756 				 ? -1 : 1));
1757       t = fold_build2 (PLUS_EXPR, itype,
1758 		       fold_convert (itype, fd->loops[i].step), t);
1759       t = fold_build2 (PLUS_EXPR, itype, t,
1760 		       fold_convert (itype, fd->loops[i].n2));
1761       t = fold_build2 (MINUS_EXPR, itype, t,
1762 		       fold_convert (itype, fd->loops[i].n1));
1763       /* ?? We could probably use CEIL_DIV_EXPR instead of
1764 	 TRUNC_DIV_EXPR and adjusting by hand.  Unless we can't
1765 	 generate the same code in the end because generically we
1766 	 don't know that the values involved must be negative for
1767 	 GT??  */
1768       if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1769 	t = fold_build2 (TRUNC_DIV_EXPR, itype,
1770 			 fold_build1 (NEGATE_EXPR, itype, t),
1771 			 fold_build1 (NEGATE_EXPR, itype,
1772 				      fold_convert (itype,
1773 						    fd->loops[i].step)));
1774       else
1775 	t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1776 			 fold_convert (itype, fd->loops[i].step));
1777       t = fold_convert (type, t);
1778       if (TREE_CODE (t) == INTEGER_CST)
1779 	counts[i] = t;
1780       else
1781 	{
1782 	  if (i < fd->collapse || i != first_zero_iter2)
1783 	    counts[i] = create_tmp_reg (type, ".count");
1784 	  expand_omp_build_assign (gsi, counts[i], t);
1785 	}
1786       if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1787 	{
1788 	  if (i == 0)
1789 	    t = counts[0];
1790 	  else
1791 	    t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1792 	  expand_omp_build_assign (gsi, fd->loop.n2, t);
1793 	}
1794     }
1795 }
1796 
1797 /* Helper function for expand_omp_{for_*,simd}.  Generate code like:
1798 	T = V;
1799 	V3 = N31 + (T % count3) * STEP3;
1800 	T = T / count3;
1801 	V2 = N21 + (T % count2) * STEP2;
1802 	T = T / count2;
1803 	V1 = N11 + T * STEP1;
1804    if this loop doesn't have an inner loop construct combined with it.
1805    If it does have an inner loop construct combined with it and the
1806    iteration count isn't known constant, store values from counts array
1807    into its _looptemp_ temporaries instead.  */
1808 
1809 static void
1810 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1811 			  tree *counts, gimple *inner_stmt, tree startvar)
1812 {
1813   int i;
1814   if (gimple_omp_for_combined_p (fd->for_stmt))
1815     {
1816       /* If fd->loop.n2 is constant, then no propagation of the counts
1817 	 is needed, they are constant.  */
1818       if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1819 	return;
1820 
1821       tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1822 		     ? gimple_omp_taskreg_clauses (inner_stmt)
1823 		     : gimple_omp_for_clauses (inner_stmt);
1824       /* First two _looptemp_ clauses are for istart/iend, counts[0]
1825 	 isn't supposed to be handled, as the inner loop doesn't
1826 	 use it.  */
1827       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1828       gcc_assert (innerc);
1829       for (i = 0; i < fd->collapse; i++)
1830 	{
1831 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1832 				    OMP_CLAUSE__LOOPTEMP_);
1833 	  gcc_assert (innerc);
1834 	  if (i)
1835 	    {
1836 	      tree tem = OMP_CLAUSE_DECL (innerc);
1837 	      tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1838 	      t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1839 					    false, GSI_CONTINUE_LINKING);
1840 	      gassign *stmt = gimple_build_assign (tem, t);
1841 	      gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1842 	    }
1843 	}
1844       return;
1845     }
1846 
1847   tree type = TREE_TYPE (fd->loop.v);
1848   tree tem = create_tmp_reg (type, ".tem");
1849   gassign *stmt = gimple_build_assign (tem, startvar);
1850   gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1851 
1852   for (i = fd->collapse - 1; i >= 0; i--)
1853     {
1854       tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1855       itype = vtype;
1856       if (POINTER_TYPE_P (vtype))
1857 	itype = signed_type_for (vtype);
1858       if (i != 0)
1859 	t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1860       else
1861 	t = tem;
1862       t = fold_convert (itype, t);
1863       t = fold_build2 (MULT_EXPR, itype, t,
1864 		       fold_convert (itype, fd->loops[i].step));
1865       if (POINTER_TYPE_P (vtype))
1866 	t = fold_build_pointer_plus (fd->loops[i].n1, t);
1867       else
1868 	t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1869       t = force_gimple_operand_gsi (gsi, t,
1870 				    DECL_P (fd->loops[i].v)
1871 				    && TREE_ADDRESSABLE (fd->loops[i].v),
1872 				    NULL_TREE, false,
1873 				    GSI_CONTINUE_LINKING);
1874       stmt = gimple_build_assign (fd->loops[i].v, t);
1875       gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1876       if (i != 0)
1877 	{
1878 	  t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1879 	  t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1880 					false, GSI_CONTINUE_LINKING);
1881 	  stmt = gimple_build_assign (tem, t);
1882 	  gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1883 	}
1884     }
1885 }
1886 
1887 /* Helper function for expand_omp_for_*.  Generate code like:
1888     L10:
1889 	V3 += STEP3;
1890 	if (V3 cond3 N32) goto BODY_BB; else goto L11;
1891     L11:
1892 	V3 = N31;
1893 	V2 += STEP2;
1894 	if (V2 cond2 N22) goto BODY_BB; else goto L12;
1895     L12:
1896 	V2 = N21;
1897 	V1 += STEP1;
1898 	goto BODY_BB;  */
1899 
1900 static basic_block
1901 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
1902 			     basic_block body_bb)
1903 {
1904   basic_block last_bb, bb, collapse_bb = NULL;
1905   int i;
1906   gimple_stmt_iterator gsi;
1907   edge e;
1908   tree t;
1909   gimple *stmt;
1910 
1911   last_bb = cont_bb;
1912   for (i = fd->collapse - 1; i >= 0; i--)
1913     {
1914       tree vtype = TREE_TYPE (fd->loops[i].v);
1915 
1916       bb = create_empty_bb (last_bb);
1917       add_bb_to_loop (bb, last_bb->loop_father);
1918       gsi = gsi_start_bb (bb);
1919 
1920       if (i < fd->collapse - 1)
1921 	{
1922 	  e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
1923 	  e->probability = REG_BR_PROB_BASE / 8;
1924 
1925 	  t = fd->loops[i + 1].n1;
1926 	  t = force_gimple_operand_gsi (&gsi, t,
1927 					DECL_P (fd->loops[i + 1].v)
1928 					&& TREE_ADDRESSABLE (fd->loops[i
1929 								       + 1].v),
1930 					NULL_TREE, false,
1931 					GSI_CONTINUE_LINKING);
1932 	  stmt = gimple_build_assign (fd->loops[i + 1].v, t);
1933 	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1934 	}
1935       else
1936 	collapse_bb = bb;
1937 
1938       set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
1939 
1940       if (POINTER_TYPE_P (vtype))
1941 	t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
1942       else
1943 	t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
1944       t = force_gimple_operand_gsi (&gsi, t,
1945 				    DECL_P (fd->loops[i].v)
1946 				    && TREE_ADDRESSABLE (fd->loops[i].v),
1947 				    NULL_TREE, false, GSI_CONTINUE_LINKING);
1948       stmt = gimple_build_assign (fd->loops[i].v, t);
1949       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1950 
1951       if (i > 0)
1952 	{
1953 	  t = fd->loops[i].n2;
1954 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
1955 					false, GSI_CONTINUE_LINKING);
1956 	  tree v = fd->loops[i].v;
1957 	  if (DECL_P (v) && TREE_ADDRESSABLE (v))
1958 	    v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
1959 					  false, GSI_CONTINUE_LINKING);
1960 	  t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
1961 	  stmt = gimple_build_cond_empty (t);
1962 	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1963 	  if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
1964 			 expand_omp_regimplify_p, NULL, NULL)
1965 	      || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
1966 			    expand_omp_regimplify_p, NULL, NULL))
1967 	    gimple_regimplify_operands (stmt, &gsi);
1968 	  e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
1969 	  e->probability = REG_BR_PROB_BASE * 7 / 8;
1970 	}
1971       else
1972 	make_edge (bb, body_bb, EDGE_FALLTHRU);
1973       last_bb = bb;
1974     }
1975 
1976   return collapse_bb;
1977 }
1978 
1979 /* Expand #pragma omp ordered depend(source).  */
1980 
1981 static void
1982 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1983 			   tree *counts, location_t loc)
1984 {
1985   enum built_in_function source_ix
1986     = fd->iter_type == long_integer_type_node
1987       ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
1988   gimple *g
1989     = gimple_build_call (builtin_decl_explicit (source_ix), 1,
1990 			 build_fold_addr_expr (counts[fd->ordered]));
1991   gimple_set_location (g, loc);
1992   gsi_insert_before (gsi, g, GSI_SAME_STMT);
1993 }
1994 
1995 /* Expand a single depend from #pragma omp ordered depend(sink:...).  */
1996 
1997 static void
1998 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1999 			 tree *counts, tree c, location_t loc)
2000 {
2001   auto_vec<tree, 10> args;
2002   enum built_in_function sink_ix
2003     = fd->iter_type == long_integer_type_node
2004       ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2005   tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2006   int i;
2007   gimple_stmt_iterator gsi2 = *gsi;
2008   bool warned_step = false;
2009 
2010   for (i = 0; i < fd->ordered; i++)
2011     {
2012       tree step = NULL_TREE;
2013       off = TREE_PURPOSE (deps);
2014       if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2015 	{
2016 	  step = TREE_OPERAND (off, 1);
2017 	  off = TREE_OPERAND (off, 0);
2018 	}
2019       if (!integer_zerop (off))
2020 	{
2021 	  gcc_assert (fd->loops[i].cond_code == LT_EXPR
2022 		      || fd->loops[i].cond_code == GT_EXPR);
2023 	  bool forward = fd->loops[i].cond_code == LT_EXPR;
2024 	  if (step)
2025 	    {
2026 	      /* Non-simple Fortran DO loops.  If step is variable,
2027 		 we don't know at compile even the direction, so can't
2028 		 warn.  */
2029 	      if (TREE_CODE (step) != INTEGER_CST)
2030 		break;
2031 	      forward = tree_int_cst_sgn (step) != -1;
2032 	    }
2033 	  if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2034 	    warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
2035 				"lexically later iteration");
2036 	  break;
2037 	}
2038       deps = TREE_CHAIN (deps);
2039     }
2040   /* If all offsets corresponding to the collapsed loops are zero,
2041      this depend clause can be ignored.  FIXME: but there is still a
2042      flush needed.  We need to emit one __sync_synchronize () for it
2043      though (perhaps conditionally)?  Solve this together with the
2044      conservative dependence folding optimization.
2045   if (i >= fd->collapse)
2046     return;  */
2047 
2048   deps = OMP_CLAUSE_DECL (c);
2049   gsi_prev (&gsi2);
2050   edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2051   edge e2 = split_block_after_labels (e1->dest);
2052 
2053   gsi2 = gsi_after_labels (e1->dest);
2054   *gsi = gsi_last_bb (e1->src);
2055   for (i = 0; i < fd->ordered; i++)
2056     {
2057       tree itype = TREE_TYPE (fd->loops[i].v);
2058       tree step = NULL_TREE;
2059       tree orig_off = NULL_TREE;
2060       if (POINTER_TYPE_P (itype))
2061 	itype = sizetype;
2062       if (i)
2063 	deps = TREE_CHAIN (deps);
2064       off = TREE_PURPOSE (deps);
2065       if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2066 	{
2067 	  step = TREE_OPERAND (off, 1);
2068 	  off = TREE_OPERAND (off, 0);
2069 	  gcc_assert (fd->loops[i].cond_code == LT_EXPR
2070 		      && integer_onep (fd->loops[i].step)
2071 		      && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2072 	}
2073       tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2074       if (step)
2075 	{
2076 	  off = fold_convert_loc (loc, itype, off);
2077 	  orig_off = off;
2078 	  off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2079 	}
2080 
2081       if (integer_zerop (off))
2082 	t = boolean_true_node;
2083       else
2084 	{
2085 	  tree a;
2086 	  tree co = fold_convert_loc (loc, itype, off);
2087 	  if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2088 	    {
2089 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2090 		co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2091 	      a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2092 				   TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2093 				   co);
2094 	    }
2095 	  else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2096 	    a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2097 				 fd->loops[i].v, co);
2098 	  else
2099 	    a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2100 				 fd->loops[i].v, co);
2101 	  if (step)
2102 	    {
2103 	      tree t1, t2;
2104 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2105 		t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2106 				      fd->loops[i].n1);
2107 	      else
2108 		t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2109 				      fd->loops[i].n2);
2110 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2111 		t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2112 				      fd->loops[i].n2);
2113 	      else
2114 		t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2115 				      fd->loops[i].n1);
2116 	      t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2117 				   step, build_int_cst (TREE_TYPE (step), 0));
2118 	      if (TREE_CODE (step) != INTEGER_CST)
2119 		{
2120 		  t1 = unshare_expr (t1);
2121 		  t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2122 						 false, GSI_CONTINUE_LINKING);
2123 		  t2 = unshare_expr (t2);
2124 		  t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2125 						 false, GSI_CONTINUE_LINKING);
2126 		}
2127 	      t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2128 				   t, t2, t1);
2129 	    }
2130 	  else if (fd->loops[i].cond_code == LT_EXPR)
2131 	    {
2132 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2133 		t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2134 				     fd->loops[i].n1);
2135 	      else
2136 		t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2137 				     fd->loops[i].n2);
2138 	    }
2139 	  else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2140 	    t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2141 				 fd->loops[i].n2);
2142 	  else
2143 	    t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2144 				 fd->loops[i].n1);
2145 	}
2146       if (cond)
2147 	cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2148       else
2149 	cond = t;
2150 
2151       off = fold_convert_loc (loc, itype, off);
2152 
2153       if (step
2154 	  || (fd->loops[i].cond_code == LT_EXPR
2155 	      ? !integer_onep (fd->loops[i].step)
2156 	      : !integer_minus_onep (fd->loops[i].step)))
2157 	{
2158 	  if (step == NULL_TREE
2159 	      && TYPE_UNSIGNED (itype)
2160 	      && fd->loops[i].cond_code == GT_EXPR)
2161 	    t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2162 				 fold_build1_loc (loc, NEGATE_EXPR, itype,
2163 						  s));
2164 	  else
2165 	    t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2166 				 orig_off ? orig_off : off, s);
2167 	  t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2168 			       build_int_cst (itype, 0));
2169 	  if (integer_zerop (t) && !warned_step)
2170 	    {
2171 	      warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
2172 				  "in the iteration space");
2173 	      warned_step = true;
2174 	    }
2175 	  cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2176 				  cond, t);
2177 	}
2178 
2179       if (i <= fd->collapse - 1 && fd->collapse > 1)
2180 	t = fd->loop.v;
2181       else if (counts[i])
2182 	t = counts[i];
2183       else
2184 	{
2185 	  t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2186 			       fd->loops[i].v, fd->loops[i].n1);
2187 	  t = fold_convert_loc (loc, fd->iter_type, t);
2188 	}
2189       if (step)
2190 	/* We have divided off by step already earlier.  */;
2191       else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2192 	off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2193 			       fold_build1_loc (loc, NEGATE_EXPR, itype,
2194 						s));
2195       else
2196 	off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2197       if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2198 	off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2199       off = fold_convert_loc (loc, fd->iter_type, off);
2200       if (i <= fd->collapse - 1 && fd->collapse > 1)
2201 	{
2202 	  if (i)
2203 	    off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2204 				   off);
2205 	  if (i < fd->collapse - 1)
2206 	    {
2207 	      coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2208 				      counts[i]);
2209 	      continue;
2210 	    }
2211 	}
2212       off = unshare_expr (off);
2213       t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2214       t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2215 				    true, GSI_SAME_STMT);
2216       args.safe_push (t);
2217     }
2218   gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2219   gimple_set_location (g, loc);
2220   gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2221 
2222   cond = unshare_expr (cond);
2223   cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2224 				   GSI_CONTINUE_LINKING);
2225   gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2226   edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2227   e3->probability = REG_BR_PROB_BASE / 8;
2228   e1->probability = REG_BR_PROB_BASE - e3->probability;
2229   e1->flags = EDGE_TRUE_VALUE;
2230   set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2231 
2232   *gsi = gsi_after_labels (e2->dest);
2233 }
2234 
2235 /* Expand all #pragma omp ordered depend(source) and
2236    #pragma omp ordered depend(sink:...) constructs in the current
2237    #pragma omp for ordered(n) region.  */
2238 
2239 static void
2240 expand_omp_ordered_source_sink (struct omp_region *region,
2241 				struct omp_for_data *fd, tree *counts,
2242 				basic_block cont_bb)
2243 {
2244   struct omp_region *inner;
2245   int i;
2246   for (i = fd->collapse - 1; i < fd->ordered; i++)
2247     if (i == fd->collapse - 1 && fd->collapse > 1)
2248       counts[i] = NULL_TREE;
2249     else if (i >= fd->collapse && !cont_bb)
2250       counts[i] = build_zero_cst (fd->iter_type);
2251     else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2252 	     && integer_onep (fd->loops[i].step))
2253       counts[i] = NULL_TREE;
2254     else
2255       counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2256   tree atype
2257     = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2258   counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2259   TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2260 
2261   for (inner = region->inner; inner; inner = inner->next)
2262     if (inner->type == GIMPLE_OMP_ORDERED)
2263       {
2264 	gomp_ordered *ord_stmt = inner->ord_stmt;
2265 	gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2266 	location_t loc = gimple_location (ord_stmt);
2267 	tree c;
2268 	for (c = gimple_omp_ordered_clauses (ord_stmt);
2269 	     c; c = OMP_CLAUSE_CHAIN (c))
2270 	  if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2271 	    break;
2272 	if (c)
2273 	  expand_omp_ordered_source (&gsi, fd, counts, loc);
2274 	for (c = gimple_omp_ordered_clauses (ord_stmt);
2275 	     c; c = OMP_CLAUSE_CHAIN (c))
2276 	  if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2277 	    expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2278 	gsi_remove (&gsi, true);
2279       }
2280 }
2281 
2282 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2283    collapsed.  */
2284 
2285 static basic_block
2286 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2287 			      basic_block cont_bb, basic_block body_bb,
2288 			      bool ordered_lastprivate)
2289 {
2290   if (fd->ordered == fd->collapse)
2291     return cont_bb;
2292 
2293   if (!cont_bb)
2294     {
2295       gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2296       for (int i = fd->collapse; i < fd->ordered; i++)
2297 	{
2298 	  tree type = TREE_TYPE (fd->loops[i].v);
2299 	  tree n1 = fold_convert (type, fd->loops[i].n1);
2300 	  expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2301 	  tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2302 			      size_int (i - fd->collapse + 1),
2303 			      NULL_TREE, NULL_TREE);
2304 	  expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2305 	}
2306       return NULL;
2307     }
2308 
2309   for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2310     {
2311       tree t, type = TREE_TYPE (fd->loops[i].v);
2312       gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2313       expand_omp_build_assign (&gsi, fd->loops[i].v,
2314 			       fold_convert (type, fd->loops[i].n1));
2315       if (counts[i])
2316 	expand_omp_build_assign (&gsi, counts[i],
2317 				 build_zero_cst (fd->iter_type));
2318       tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2319 			  size_int (i - fd->collapse + 1),
2320 			  NULL_TREE, NULL_TREE);
2321       expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2322       if (!gsi_end_p (gsi))
2323 	gsi_prev (&gsi);
2324       else
2325 	gsi = gsi_last_bb (body_bb);
2326       edge e1 = split_block (body_bb, gsi_stmt (gsi));
2327       basic_block new_body = e1->dest;
2328       if (body_bb == cont_bb)
2329 	cont_bb = new_body;
2330       edge e2 = NULL;
2331       basic_block new_header;
2332       if (EDGE_COUNT (cont_bb->preds) > 0)
2333 	{
2334 	  gsi = gsi_last_bb (cont_bb);
2335 	  if (POINTER_TYPE_P (type))
2336 	    t = fold_build_pointer_plus (fd->loops[i].v,
2337 					 fold_convert (sizetype,
2338 						       fd->loops[i].step));
2339 	  else
2340 	    t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2341 			     fold_convert (type, fd->loops[i].step));
2342 	  expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2343 	  if (counts[i])
2344 	    {
2345 	      t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2346 			       build_int_cst (fd->iter_type, 1));
2347 	      expand_omp_build_assign (&gsi, counts[i], t);
2348 	      t = counts[i];
2349 	    }
2350 	  else
2351 	    {
2352 	      t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2353 			       fd->loops[i].v, fd->loops[i].n1);
2354 	      t = fold_convert (fd->iter_type, t);
2355 	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2356 					    true, GSI_SAME_STMT);
2357 	    }
2358 	  aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2359 			 size_int (i - fd->collapse + 1),
2360 			 NULL_TREE, NULL_TREE);
2361 	  expand_omp_build_assign (&gsi, aref, t);
2362 	  gsi_prev (&gsi);
2363 	  e2 = split_block (cont_bb, gsi_stmt (gsi));
2364 	  new_header = e2->dest;
2365 	}
2366       else
2367 	new_header = cont_bb;
2368       gsi = gsi_after_labels (new_header);
2369       tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2370 					 true, GSI_SAME_STMT);
2371       tree n2
2372 	= force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2373 				    true, NULL_TREE, true, GSI_SAME_STMT);
2374       t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2375       gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2376       edge e3 = split_block (new_header, gsi_stmt (gsi));
2377       cont_bb = e3->dest;
2378       remove_edge (e1);
2379       make_edge (body_bb, new_header, EDGE_FALLTHRU);
2380       e3->flags = EDGE_FALSE_VALUE;
2381       e3->probability = REG_BR_PROB_BASE / 8;
2382       e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2383       e1->probability = REG_BR_PROB_BASE - e3->probability;
2384 
2385       set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2386       set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2387 
2388       if (e2)
2389 	{
2390 	  struct loop *loop = alloc_loop ();
2391 	  loop->header = new_header;
2392 	  loop->latch = e2->src;
2393 	  add_loop (loop, body_bb->loop_father);
2394 	}
2395     }
2396 
2397   /* If there are any lastprivate clauses and it is possible some loops
2398      might have zero iterations, ensure all the decls are initialized,
2399      otherwise we could crash evaluating C++ class iterators with lastprivate
2400      clauses.  */
2401   bool need_inits = false;
2402   for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2403     if (need_inits)
2404       {
2405 	tree type = TREE_TYPE (fd->loops[i].v);
2406 	gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2407 	expand_omp_build_assign (&gsi, fd->loops[i].v,
2408 				 fold_convert (type, fd->loops[i].n1));
2409       }
2410     else
2411       {
2412 	tree type = TREE_TYPE (fd->loops[i].v);
2413 	tree this_cond = fold_build2 (fd->loops[i].cond_code,
2414 				      boolean_type_node,
2415 				      fold_convert (type, fd->loops[i].n1),
2416 				      fold_convert (type, fd->loops[i].n2));
2417 	if (!integer_onep (this_cond))
2418 	  need_inits = true;
2419       }
2420 
2421   return cont_bb;
2422 }
2423 
2424 /* A subroutine of expand_omp_for.  Generate code for a parallel
2425    loop with any schedule.  Given parameters:
2426 
2427 	for (V = N1; V cond N2; V += STEP) BODY;
2428 
2429    where COND is "<" or ">", we generate pseudocode
2430 
2431 	more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2432 	if (more) goto L0; else goto L3;
2433     L0:
2434 	V = istart0;
2435 	iend = iend0;
2436     L1:
2437 	BODY;
2438 	V += STEP;
2439 	if (V cond iend) goto L1; else goto L2;
2440     L2:
2441 	if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2442     L3:
2443 
2444     If this is a combined omp parallel loop, instead of the call to
2445     GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2446     If this is gimple_omp_for_combined_p loop, then instead of assigning
2447     V and iend in L0 we assign the first two _looptemp_ clause decls of the
2448     inner GIMPLE_OMP_FOR and V += STEP; and
2449     if (V cond iend) goto L1; else goto L2; are removed.
2450 
2451     For collapsed loops, given parameters:
2452       collapse(3)
2453       for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2454 	for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2455 	  for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2456 	    BODY;
2457 
2458     we generate pseudocode
2459 
2460 	if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2461 	if (cond3 is <)
2462 	  adj = STEP3 - 1;
2463 	else
2464 	  adj = STEP3 + 1;
2465 	count3 = (adj + N32 - N31) / STEP3;
2466 	if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2467 	if (cond2 is <)
2468 	  adj = STEP2 - 1;
2469 	else
2470 	  adj = STEP2 + 1;
2471 	count2 = (adj + N22 - N21) / STEP2;
2472 	if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2473 	if (cond1 is <)
2474 	  adj = STEP1 - 1;
2475 	else
2476 	  adj = STEP1 + 1;
2477 	count1 = (adj + N12 - N11) / STEP1;
2478 	count = count1 * count2 * count3;
2479 	goto Z1;
2480     Z0:
2481 	count = 0;
2482     Z1:
2483 	more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2484 	if (more) goto L0; else goto L3;
2485     L0:
2486 	V = istart0;
2487 	T = V;
2488 	V3 = N31 + (T % count3) * STEP3;
2489 	T = T / count3;
2490 	V2 = N21 + (T % count2) * STEP2;
2491 	T = T / count2;
2492 	V1 = N11 + T * STEP1;
2493 	iend = iend0;
2494     L1:
2495 	BODY;
2496 	V += 1;
2497 	if (V < iend) goto L10; else goto L2;
2498     L10:
2499 	V3 += STEP3;
2500 	if (V3 cond3 N32) goto L1; else goto L11;
2501     L11:
2502 	V3 = N31;
2503 	V2 += STEP2;
2504 	if (V2 cond2 N22) goto L1; else goto L12;
2505     L12:
2506 	V2 = N21;
2507 	V1 += STEP1;
2508 	goto L1;
2509     L2:
2510 	if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2511     L3:
2512 
2513       */
2514 
2515 static void
2516 expand_omp_for_generic (struct omp_region *region,
2517 			struct omp_for_data *fd,
2518 			enum built_in_function start_fn,
2519 			enum built_in_function next_fn,
2520 			gimple *inner_stmt)
2521 {
2522   tree type, istart0, iend0, iend;
2523   tree t, vmain, vback, bias = NULL_TREE;
2524   basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2525   basic_block l2_bb = NULL, l3_bb = NULL;
2526   gimple_stmt_iterator gsi;
2527   gassign *assign_stmt;
2528   bool in_combined_parallel = is_combined_parallel (region);
2529   bool broken_loop = region->cont == NULL;
2530   edge e, ne;
2531   tree *counts = NULL;
2532   int i;
2533   bool ordered_lastprivate = false;
2534 
2535   gcc_assert (!broken_loop || !in_combined_parallel);
2536   gcc_assert (fd->iter_type == long_integer_type_node
2537 	      || !in_combined_parallel);
2538 
2539   entry_bb = region->entry;
2540   cont_bb = region->cont;
2541   collapse_bb = NULL;
2542   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2543   gcc_assert (broken_loop
2544 	      || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2545   l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2546   l1_bb = single_succ (l0_bb);
2547   if (!broken_loop)
2548     {
2549       l2_bb = create_empty_bb (cont_bb);
2550       gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2551 		  || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2552 		      == l1_bb));
2553       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2554     }
2555   else
2556     l2_bb = NULL;
2557   l3_bb = BRANCH_EDGE (entry_bb)->dest;
2558   exit_bb = region->exit;
2559 
2560   gsi = gsi_last_bb (entry_bb);
2561 
2562   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2563   if (fd->ordered
2564       && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2565 			  OMP_CLAUSE_LASTPRIVATE))
2566     ordered_lastprivate = false;
2567   if (fd->collapse > 1 || fd->ordered)
2568     {
2569       int first_zero_iter1 = -1, first_zero_iter2 = -1;
2570       basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2571 
2572       counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2573       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2574 				  zero_iter1_bb, first_zero_iter1,
2575 				  zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2576 
2577       if (zero_iter1_bb)
2578 	{
2579 	  /* Some counts[i] vars might be uninitialized if
2580 	     some loop has zero iterations.  But the body shouldn't
2581 	     be executed in that case, so just avoid uninit warnings.  */
2582 	  for (i = first_zero_iter1;
2583 	       i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2584 	    if (SSA_VAR_P (counts[i]))
2585 	      TREE_NO_WARNING (counts[i]) = 1;
2586 	  gsi_prev (&gsi);
2587 	  e = split_block (entry_bb, gsi_stmt (gsi));
2588 	  entry_bb = e->dest;
2589 	  make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2590 	  gsi = gsi_last_bb (entry_bb);
2591 	  set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2592 				   get_immediate_dominator (CDI_DOMINATORS,
2593 							    zero_iter1_bb));
2594 	}
2595       if (zero_iter2_bb)
2596 	{
2597 	  /* Some counts[i] vars might be uninitialized if
2598 	     some loop has zero iterations.  But the body shouldn't
2599 	     be executed in that case, so just avoid uninit warnings.  */
2600 	  for (i = first_zero_iter2; i < fd->ordered; i++)
2601 	    if (SSA_VAR_P (counts[i]))
2602 	      TREE_NO_WARNING (counts[i]) = 1;
2603 	  if (zero_iter1_bb)
2604 	    make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2605 	  else
2606 	    {
2607 	      gsi_prev (&gsi);
2608 	      e = split_block (entry_bb, gsi_stmt (gsi));
2609 	      entry_bb = e->dest;
2610 	      make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2611 	      gsi = gsi_last_bb (entry_bb);
2612 	      set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2613 				       get_immediate_dominator
2614 					 (CDI_DOMINATORS, zero_iter2_bb));
2615 	    }
2616 	}
2617       if (fd->collapse == 1)
2618 	{
2619 	  counts[0] = fd->loop.n2;
2620 	  fd->loop = fd->loops[0];
2621 	}
2622     }
2623 
2624   type = TREE_TYPE (fd->loop.v);
2625   istart0 = create_tmp_var (fd->iter_type, ".istart0");
2626   iend0 = create_tmp_var (fd->iter_type, ".iend0");
2627   TREE_ADDRESSABLE (istart0) = 1;
2628   TREE_ADDRESSABLE (iend0) = 1;
2629 
2630   /* See if we need to bias by LLONG_MIN.  */
2631   if (fd->iter_type == long_long_unsigned_type_node
2632       && TREE_CODE (type) == INTEGER_TYPE
2633       && !TYPE_UNSIGNED (type)
2634       && fd->ordered == 0)
2635     {
2636       tree n1, n2;
2637 
2638       if (fd->loop.cond_code == LT_EXPR)
2639 	{
2640 	  n1 = fd->loop.n1;
2641 	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2642 	}
2643       else
2644 	{
2645 	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2646 	  n2 = fd->loop.n1;
2647 	}
2648       if (TREE_CODE (n1) != INTEGER_CST
2649 	  || TREE_CODE (n2) != INTEGER_CST
2650 	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2651 	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2652     }
2653 
2654   gimple_stmt_iterator gsif = gsi;
2655   gsi_prev (&gsif);
2656 
2657   tree arr = NULL_TREE;
2658   if (in_combined_parallel)
2659     {
2660       gcc_assert (fd->ordered == 0);
2661       /* In a combined parallel loop, emit a call to
2662 	 GOMP_loop_foo_next.  */
2663       t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2664 			   build_fold_addr_expr (istart0),
2665 			   build_fold_addr_expr (iend0));
2666     }
2667   else
2668     {
2669       tree t0, t1, t2, t3, t4;
2670       /* If this is not a combined parallel loop, emit a call to
2671 	 GOMP_loop_foo_start in ENTRY_BB.  */
2672       t4 = build_fold_addr_expr (iend0);
2673       t3 = build_fold_addr_expr (istart0);
2674       if (fd->ordered)
2675 	{
2676 	  t0 = build_int_cst (unsigned_type_node,
2677 			      fd->ordered - fd->collapse + 1);
2678 	  arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2679 							fd->ordered
2680 							- fd->collapse + 1),
2681 				".omp_counts");
2682 	  DECL_NAMELESS (arr) = 1;
2683 	  TREE_ADDRESSABLE (arr) = 1;
2684 	  TREE_STATIC (arr) = 1;
2685 	  vec<constructor_elt, va_gc> *v;
2686 	  vec_alloc (v, fd->ordered - fd->collapse + 1);
2687 	  int idx;
2688 
2689 	  for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2690 	    {
2691 	      tree c;
2692 	      if (idx == 0 && fd->collapse > 1)
2693 		c = fd->loop.n2;
2694 	      else
2695 		c = counts[idx + fd->collapse - 1];
2696 	      tree purpose = size_int (idx);
2697 	      CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2698 	      if (TREE_CODE (c) != INTEGER_CST)
2699 		TREE_STATIC (arr) = 0;
2700 	    }
2701 
2702 	  DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2703 	  if (!TREE_STATIC (arr))
2704 	    force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2705 						    void_type_node, arr),
2706 				      true, NULL_TREE, true, GSI_SAME_STMT);
2707 	  t1 = build_fold_addr_expr (arr);
2708 	  t2 = NULL_TREE;
2709 	}
2710       else
2711 	{
2712 	  t2 = fold_convert (fd->iter_type, fd->loop.step);
2713 	  t1 = fd->loop.n2;
2714 	  t0 = fd->loop.n1;
2715 	  if (gimple_omp_for_combined_into_p (fd->for_stmt))
2716 	    {
2717 	      tree innerc
2718 		= omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2719 				   OMP_CLAUSE__LOOPTEMP_);
2720 	      gcc_assert (innerc);
2721 	      t0 = OMP_CLAUSE_DECL (innerc);
2722 	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2723 					OMP_CLAUSE__LOOPTEMP_);
2724 	      gcc_assert (innerc);
2725 	      t1 = OMP_CLAUSE_DECL (innerc);
2726 	    }
2727 	  if (POINTER_TYPE_P (TREE_TYPE (t0))
2728 	      && TYPE_PRECISION (TREE_TYPE (t0))
2729 		 != TYPE_PRECISION (fd->iter_type))
2730 	    {
2731 	      /* Avoid casting pointers to integer of a different size.  */
2732 	      tree itype = signed_type_for (type);
2733 	      t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2734 	      t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2735 	    }
2736 	  else
2737 	    {
2738 	      t1 = fold_convert (fd->iter_type, t1);
2739 	      t0 = fold_convert (fd->iter_type, t0);
2740 	    }
2741 	  if (bias)
2742 	    {
2743 	      t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2744 	      t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2745 	    }
2746 	}
2747       if (fd->iter_type == long_integer_type_node || fd->ordered)
2748 	{
2749 	  if (fd->chunk_size)
2750 	    {
2751 	      t = fold_convert (fd->iter_type, fd->chunk_size);
2752 	      t = omp_adjust_chunk_size (t, fd->simd_schedule);
2753 	      if (fd->ordered)
2754 		t = build_call_expr (builtin_decl_explicit (start_fn),
2755 				     5, t0, t1, t, t3, t4);
2756 	      else
2757 		t = build_call_expr (builtin_decl_explicit (start_fn),
2758 				     6, t0, t1, t2, t, t3, t4);
2759 	    }
2760 	  else if (fd->ordered)
2761 	    t = build_call_expr (builtin_decl_explicit (start_fn),
2762 				 4, t0, t1, t3, t4);
2763 	  else
2764 	    t = build_call_expr (builtin_decl_explicit (start_fn),
2765 				 5, t0, t1, t2, t3, t4);
2766 	}
2767       else
2768 	{
2769 	  tree t5;
2770 	  tree c_bool_type;
2771 	  tree bfn_decl;
2772 
2773 	  /* The GOMP_loop_ull_*start functions have additional boolean
2774 	     argument, true for < loops and false for > loops.
2775 	     In Fortran, the C bool type can be different from
2776 	     boolean_type_node.  */
2777 	  bfn_decl = builtin_decl_explicit (start_fn);
2778 	  c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2779 	  t5 = build_int_cst (c_bool_type,
2780 			      fd->loop.cond_code == LT_EXPR ? 1 : 0);
2781 	  if (fd->chunk_size)
2782 	    {
2783 	      tree bfn_decl = builtin_decl_explicit (start_fn);
2784 	      t = fold_convert (fd->iter_type, fd->chunk_size);
2785 	      t = omp_adjust_chunk_size (t, fd->simd_schedule);
2786 	      t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2787 	    }
2788 	  else
2789 	    t = build_call_expr (builtin_decl_explicit (start_fn),
2790 				 6, t5, t0, t1, t2, t3, t4);
2791 	}
2792     }
2793   if (TREE_TYPE (t) != boolean_type_node)
2794     t = fold_build2 (NE_EXPR, boolean_type_node,
2795 		     t, build_int_cst (TREE_TYPE (t), 0));
2796   t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2797 				true, GSI_SAME_STMT);
2798   if (arr && !TREE_STATIC (arr))
2799     {
2800       tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2801       TREE_THIS_VOLATILE (clobber) = 1;
2802       gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2803 			 GSI_SAME_STMT);
2804     }
2805   gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2806 
2807   /* Remove the GIMPLE_OMP_FOR statement.  */
2808   gsi_remove (&gsi, true);
2809 
2810   if (gsi_end_p (gsif))
2811     gsif = gsi_after_labels (gsi_bb (gsif));
2812   gsi_next (&gsif);
2813 
2814   /* Iteration setup for sequential loop goes in L0_BB.  */
2815   tree startvar = fd->loop.v;
2816   tree endvar = NULL_TREE;
2817 
2818   if (gimple_omp_for_combined_p (fd->for_stmt))
2819     {
2820       gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2821 		  && gimple_omp_for_kind (inner_stmt)
2822 		     == GF_OMP_FOR_KIND_SIMD);
2823       tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2824 				     OMP_CLAUSE__LOOPTEMP_);
2825       gcc_assert (innerc);
2826       startvar = OMP_CLAUSE_DECL (innerc);
2827       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2828 				OMP_CLAUSE__LOOPTEMP_);
2829       gcc_assert (innerc);
2830       endvar = OMP_CLAUSE_DECL (innerc);
2831     }
2832 
2833   gsi = gsi_start_bb (l0_bb);
2834   t = istart0;
2835   if (fd->ordered && fd->collapse == 1)
2836     t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2837 		     fold_convert (fd->iter_type, fd->loop.step));
2838   else if (bias)
2839     t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2840   if (fd->ordered && fd->collapse == 1)
2841     {
2842       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2843 	t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2844 			 fd->loop.n1, fold_convert (sizetype, t));
2845       else
2846 	{
2847 	  t = fold_convert (TREE_TYPE (startvar), t);
2848 	  t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2849 			   fd->loop.n1, t);
2850 	}
2851     }
2852   else
2853     {
2854       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2855 	t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2856       t = fold_convert (TREE_TYPE (startvar), t);
2857     }
2858   t = force_gimple_operand_gsi (&gsi, t,
2859 				DECL_P (startvar)
2860 				&& TREE_ADDRESSABLE (startvar),
2861 				NULL_TREE, false, GSI_CONTINUE_LINKING);
2862   assign_stmt = gimple_build_assign (startvar, t);
2863   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2864 
2865   t = iend0;
2866   if (fd->ordered && fd->collapse == 1)
2867     t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2868 		     fold_convert (fd->iter_type, fd->loop.step));
2869   else if (bias)
2870     t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2871   if (fd->ordered && fd->collapse == 1)
2872     {
2873       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2874 	t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2875 			 fd->loop.n1, fold_convert (sizetype, t));
2876       else
2877 	{
2878 	  t = fold_convert (TREE_TYPE (startvar), t);
2879 	  t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2880 			   fd->loop.n1, t);
2881 	}
2882     }
2883   else
2884     {
2885       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2886 	t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2887       t = fold_convert (TREE_TYPE (startvar), t);
2888     }
2889   iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2890 				   false, GSI_CONTINUE_LINKING);
2891   if (endvar)
2892     {
2893       assign_stmt = gimple_build_assign (endvar, iend);
2894       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2895       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
2896 	assign_stmt = gimple_build_assign (fd->loop.v, iend);
2897       else
2898 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
2899       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2900     }
2901   /* Handle linear clause adjustments.  */
2902   tree itercnt = NULL_TREE;
2903   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
2904     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
2905 	 c; c = OMP_CLAUSE_CHAIN (c))
2906       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2907 	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
2908 	{
2909 	  tree d = OMP_CLAUSE_DECL (c);
2910 	  bool is_ref = omp_is_reference (d);
2911 	  tree t = d, a, dest;
2912 	  if (is_ref)
2913 	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
2914 	  tree type = TREE_TYPE (t);
2915 	  if (POINTER_TYPE_P (type))
2916 	    type = sizetype;
2917 	  dest = unshare_expr (t);
2918 	  tree v = create_tmp_var (TREE_TYPE (t), NULL);
2919 	  expand_omp_build_assign (&gsif, v, t);
2920 	  if (itercnt == NULL_TREE)
2921 	    {
2922 	      itercnt = startvar;
2923 	      tree n1 = fd->loop.n1;
2924 	      if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
2925 		{
2926 		  itercnt
2927 		    = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
2928 				    itercnt);
2929 		  n1 = fold_convert (TREE_TYPE (itercnt), n1);
2930 		}
2931 	      itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
2932 				     itercnt, n1);
2933 	      itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
2934 				     itercnt, fd->loop.step);
2935 	      itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
2936 						  NULL_TREE, false,
2937 						  GSI_CONTINUE_LINKING);
2938 	    }
2939 	  a = fold_build2 (MULT_EXPR, type,
2940 			   fold_convert (type, itercnt),
2941 			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
2942 	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
2943 			   : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
2944 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2945 					false, GSI_CONTINUE_LINKING);
2946 	  assign_stmt = gimple_build_assign (dest, t);
2947 	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2948 	}
2949   if (fd->collapse > 1)
2950     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
2951 
2952   if (fd->ordered)
2953     {
2954       /* Until now, counts array contained number of iterations or
2955 	 variable containing it for ith loop.  From now on, we need
2956 	 those counts only for collapsed loops, and only for the 2nd
2957 	 till the last collapsed one.  Move those one element earlier,
2958 	 we'll use counts[fd->collapse - 1] for the first source/sink
2959 	 iteration counter and so on and counts[fd->ordered]
2960 	 as the array holding the current counter values for
2961 	 depend(source).  */
2962       if (fd->collapse > 1)
2963 	memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
2964       if (broken_loop)
2965 	{
2966 	  int i;
2967 	  for (i = fd->collapse; i < fd->ordered; i++)
2968 	    {
2969 	      tree type = TREE_TYPE (fd->loops[i].v);
2970 	      tree this_cond
2971 		= fold_build2 (fd->loops[i].cond_code, boolean_type_node,
2972 			       fold_convert (type, fd->loops[i].n1),
2973 			       fold_convert (type, fd->loops[i].n2));
2974 	      if (!integer_onep (this_cond))
2975 		break;
2976 	    }
2977 	  if (i < fd->ordered)
2978 	    {
2979 	      cont_bb
2980 		= create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
2981 	      add_bb_to_loop (cont_bb, l1_bb->loop_father);
2982 	      gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
2983 	      gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
2984 	      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2985 	      make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
2986 	      make_edge (cont_bb, l1_bb, 0);
2987 	      l2_bb = create_empty_bb (cont_bb);
2988 	      broken_loop = false;
2989 	    }
2990 	}
2991       expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
2992       cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
2993 					      ordered_lastprivate);
2994       if (counts[fd->collapse - 1])
2995 	{
2996 	  gcc_assert (fd->collapse == 1);
2997 	  gsi = gsi_last_bb (l0_bb);
2998 	  expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
2999 				   istart0, true);
3000 	  gsi = gsi_last_bb (cont_bb);
3001 	  t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
3002 			   build_int_cst (fd->iter_type, 1));
3003 	  expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3004 	  tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3005 			      size_zero_node, NULL_TREE, NULL_TREE);
3006 	  expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3007 	  t = counts[fd->collapse - 1];
3008 	}
3009       else if (fd->collapse > 1)
3010 	t = fd->loop.v;
3011       else
3012 	{
3013 	  t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3014 			   fd->loops[0].v, fd->loops[0].n1);
3015 	  t = fold_convert (fd->iter_type, t);
3016 	}
3017       gsi = gsi_last_bb (l0_bb);
3018       tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3019 			  size_zero_node, NULL_TREE, NULL_TREE);
3020       t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3021 				    false, GSI_CONTINUE_LINKING);
3022       expand_omp_build_assign (&gsi, aref, t, true);
3023     }
3024 
3025   if (!broken_loop)
3026     {
3027       /* Code to control the increment and predicate for the sequential
3028 	 loop goes in the CONT_BB.  */
3029       gsi = gsi_last_bb (cont_bb);
3030       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3031       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3032       vmain = gimple_omp_continue_control_use (cont_stmt);
3033       vback = gimple_omp_continue_control_def (cont_stmt);
3034 
3035       if (!gimple_omp_for_combined_p (fd->for_stmt))
3036 	{
3037 	  if (POINTER_TYPE_P (type))
3038 	    t = fold_build_pointer_plus (vmain, fd->loop.step);
3039 	  else
3040 	    t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3041 	  t = force_gimple_operand_gsi (&gsi, t,
3042 					DECL_P (vback)
3043 					&& TREE_ADDRESSABLE (vback),
3044 					NULL_TREE, true, GSI_SAME_STMT);
3045 	  assign_stmt = gimple_build_assign (vback, t);
3046 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3047 
3048 	  if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3049 	    {
3050 	      tree tem;
3051 	      if (fd->collapse > 1)
3052 		tem = fd->loop.v;
3053 	      else
3054 		{
3055 		  tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3056 				     fd->loops[0].v, fd->loops[0].n1);
3057 		  tem = fold_convert (fd->iter_type, tem);
3058 		}
3059 	      tree aref = build4 (ARRAY_REF, fd->iter_type,
3060 				  counts[fd->ordered], size_zero_node,
3061 				  NULL_TREE, NULL_TREE);
3062 	      tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
3063 					      true, GSI_SAME_STMT);
3064 	      expand_omp_build_assign (&gsi, aref, tem);
3065 	    }
3066 
3067 	  t = build2 (fd->loop.cond_code, boolean_type_node,
3068 		      DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3069 		      iend);
3070 	  gcond *cond_stmt = gimple_build_cond_empty (t);
3071 	  gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3072 	}
3073 
3074       /* Remove GIMPLE_OMP_CONTINUE.  */
3075       gsi_remove (&gsi, true);
3076 
3077       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3078 	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3079 
3080       /* Emit code to get the next parallel iteration in L2_BB.  */
3081       gsi = gsi_start_bb (l2_bb);
3082 
3083       t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3084 			   build_fold_addr_expr (istart0),
3085 			   build_fold_addr_expr (iend0));
3086       t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3087 				    false, GSI_CONTINUE_LINKING);
3088       if (TREE_TYPE (t) != boolean_type_node)
3089 	t = fold_build2 (NE_EXPR, boolean_type_node,
3090 			 t, build_int_cst (TREE_TYPE (t), 0));
3091       gcond *cond_stmt = gimple_build_cond_empty (t);
3092       gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3093     }
3094 
3095   /* Add the loop cleanup function.  */
3096   gsi = gsi_last_bb (exit_bb);
3097   if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3098     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3099   else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3100     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3101   else
3102     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3103   gcall *call_stmt = gimple_build_call (t, 0);
3104   if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3105     gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3106   gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3107   if (fd->ordered)
3108     {
3109       tree arr = counts[fd->ordered];
3110       tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3111       TREE_THIS_VOLATILE (clobber) = 1;
3112       gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3113 			GSI_SAME_STMT);
3114     }
3115   gsi_remove (&gsi, true);
3116 
3117   /* Connect the new blocks.  */
3118   find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3119   find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3120 
3121   if (!broken_loop)
3122     {
3123       gimple_seq phis;
3124 
3125       e = find_edge (cont_bb, l3_bb);
3126       ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3127 
3128       phis = phi_nodes (l3_bb);
3129       for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3130 	{
3131 	  gimple *phi = gsi_stmt (gsi);
3132 	  SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3133 		   PHI_ARG_DEF_FROM_EDGE (phi, e));
3134 	}
3135       remove_edge (e);
3136 
3137       make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3138       e = find_edge (cont_bb, l1_bb);
3139       if (e == NULL)
3140 	{
3141 	  e = BRANCH_EDGE (cont_bb);
3142 	  gcc_assert (single_succ (e->dest) == l1_bb);
3143 	}
3144       if (gimple_omp_for_combined_p (fd->for_stmt))
3145 	{
3146 	  remove_edge (e);
3147 	  e = NULL;
3148 	}
3149       else if (fd->collapse > 1)
3150 	{
3151 	  remove_edge (e);
3152 	  e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3153 	}
3154       else
3155 	e->flags = EDGE_TRUE_VALUE;
3156       if (e)
3157 	{
3158 	  e->probability = REG_BR_PROB_BASE * 7 / 8;
3159 	  find_edge (cont_bb, l2_bb)->probability = REG_BR_PROB_BASE / 8;
3160 	}
3161       else
3162 	{
3163 	  e = find_edge (cont_bb, l2_bb);
3164 	  e->flags = EDGE_FALLTHRU;
3165 	}
3166       make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3167 
3168       if (gimple_in_ssa_p (cfun))
3169 	{
3170 	  /* Add phis to the outer loop that connect to the phis in the inner,
3171 	     original loop, and move the loop entry value of the inner phi to
3172 	     the loop entry value of the outer phi.  */
3173 	  gphi_iterator psi;
3174 	  for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3175 	    {
3176 	      source_location locus;
3177 	      gphi *nphi;
3178 	      gphi *exit_phi = psi.phi ();
3179 
3180 	      edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3181 	      tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3182 
3183 	      basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3184 	      edge latch_to_l1 = find_edge (latch, l1_bb);
3185 	      gphi *inner_phi
3186 		= find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3187 
3188 	      tree t = gimple_phi_result (exit_phi);
3189 	      tree new_res = copy_ssa_name (t, NULL);
3190 	      nphi = create_phi_node (new_res, l0_bb);
3191 
3192 	      edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3193 	      t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3194 	      locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3195 	      edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3196 	      add_phi_arg (nphi, t, entry_to_l0, locus);
3197 
3198 	      edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3199 	      add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3200 
3201 	      add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3202 	    };
3203 	}
3204 
3205       set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3206 			       recompute_dominator (CDI_DOMINATORS, l2_bb));
3207       set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3208 			       recompute_dominator (CDI_DOMINATORS, l3_bb));
3209       set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3210 			       recompute_dominator (CDI_DOMINATORS, l0_bb));
3211       set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3212 			       recompute_dominator (CDI_DOMINATORS, l1_bb));
3213 
3214       /* We enter expand_omp_for_generic with a loop.  This original loop may
3215 	 have its own loop struct, or it may be part of an outer loop struct
3216 	 (which may be the fake loop).  */
3217       struct loop *outer_loop = entry_bb->loop_father;
3218       bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3219 
3220       add_bb_to_loop (l2_bb, outer_loop);
3221 
3222       /* We've added a new loop around the original loop.  Allocate the
3223 	 corresponding loop struct.  */
3224       struct loop *new_loop = alloc_loop ();
3225       new_loop->header = l0_bb;
3226       new_loop->latch = l2_bb;
3227       add_loop (new_loop, outer_loop);
3228 
3229       /* Allocate a loop structure for the original loop unless we already
3230 	 had one.  */
3231       if (!orig_loop_has_loop_struct
3232 	  && !gimple_omp_for_combined_p (fd->for_stmt))
3233 	{
3234 	  struct loop *orig_loop = alloc_loop ();
3235 	  orig_loop->header = l1_bb;
3236 	  /* The loop may have multiple latches.  */
3237 	  add_loop (orig_loop, new_loop);
3238 	}
3239     }
3240 }
3241 
3242 /* A subroutine of expand_omp_for.  Generate code for a parallel
3243    loop with static schedule and no specified chunk size.  Given
3244    parameters:
3245 
3246 	for (V = N1; V cond N2; V += STEP) BODY;
3247 
3248    where COND is "<" or ">", we generate pseudocode
3249 
3250 	if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3251 	if (cond is <)
3252 	  adj = STEP - 1;
3253 	else
3254 	  adj = STEP + 1;
3255 	if ((__typeof (V)) -1 > 0 && cond is >)
3256 	  n = -(adj + N2 - N1) / -STEP;
3257 	else
3258 	  n = (adj + N2 - N1) / STEP;
3259 	q = n / nthreads;
3260 	tt = n % nthreads;
3261 	if (threadid < tt) goto L3; else goto L4;
3262     L3:
3263 	tt = 0;
3264 	q = q + 1;
3265     L4:
3266 	s0 = q * threadid + tt;
3267 	e0 = s0 + q;
3268 	V = s0 * STEP + N1;
3269 	if (s0 >= e0) goto L2; else goto L0;
3270     L0:
3271 	e = e0 * STEP + N1;
3272     L1:
3273 	BODY;
3274 	V += STEP;
3275 	if (V cond e) goto L1;
3276     L2:
3277 */
3278 
3279 static void
3280 expand_omp_for_static_nochunk (struct omp_region *region,
3281 			       struct omp_for_data *fd,
3282 			       gimple *inner_stmt)
3283 {
3284   tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3285   tree type, itype, vmain, vback;
3286   basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3287   basic_block body_bb, cont_bb, collapse_bb = NULL;
3288   basic_block fin_bb;
3289   gimple_stmt_iterator gsi;
3290   edge ep;
3291   bool broken_loop = region->cont == NULL;
3292   tree *counts = NULL;
3293   tree n1, n2, step;
3294 
3295   itype = type = TREE_TYPE (fd->loop.v);
3296   if (POINTER_TYPE_P (type))
3297     itype = signed_type_for (type);
3298 
3299   entry_bb = region->entry;
3300   cont_bb = region->cont;
3301   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3302   fin_bb = BRANCH_EDGE (entry_bb)->dest;
3303   gcc_assert (broken_loop
3304 	      || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3305   seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3306   body_bb = single_succ (seq_start_bb);
3307   if (!broken_loop)
3308     {
3309       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3310 		  || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3311       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3312     }
3313   exit_bb = region->exit;
3314 
3315   /* Iteration space partitioning goes in ENTRY_BB.  */
3316   gsi = gsi_last_bb (entry_bb);
3317   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3318 
3319   if (fd->collapse > 1)
3320     {
3321       int first_zero_iter = -1, dummy = -1;
3322       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3323 
3324       counts = XALLOCAVEC (tree, fd->collapse);
3325       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3326 				  fin_bb, first_zero_iter,
3327 				  dummy_bb, dummy, l2_dom_bb);
3328       t = NULL_TREE;
3329     }
3330   else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3331     t = integer_one_node;
3332   else
3333     t = fold_binary (fd->loop.cond_code, boolean_type_node,
3334 		     fold_convert (type, fd->loop.n1),
3335 		     fold_convert (type, fd->loop.n2));
3336   if (fd->collapse == 1
3337       && TYPE_UNSIGNED (type)
3338       && (t == NULL_TREE || !integer_onep (t)))
3339     {
3340       n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3341       n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3342 				     true, GSI_SAME_STMT);
3343       n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3344       n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3345 				     true, GSI_SAME_STMT);
3346       gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3347 						 NULL_TREE, NULL_TREE);
3348       gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3349       if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3350 		     expand_omp_regimplify_p, NULL, NULL)
3351 	  || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3352 			expand_omp_regimplify_p, NULL, NULL))
3353 	{
3354 	  gsi = gsi_for_stmt (cond_stmt);
3355 	  gimple_regimplify_operands (cond_stmt, &gsi);
3356 	}
3357       ep = split_block (entry_bb, cond_stmt);
3358       ep->flags = EDGE_TRUE_VALUE;
3359       entry_bb = ep->dest;
3360       ep->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1);
3361       ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3362       ep->probability = REG_BR_PROB_BASE / 2000 - 1;
3363       if (gimple_in_ssa_p (cfun))
3364 	{
3365 	  int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3366 	  for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3367 	       !gsi_end_p (gpi); gsi_next (&gpi))
3368 	    {
3369 	      gphi *phi = gpi.phi ();
3370 	      add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3371 			   ep, UNKNOWN_LOCATION);
3372 	    }
3373 	}
3374       gsi = gsi_last_bb (entry_bb);
3375     }
3376 
3377   switch (gimple_omp_for_kind (fd->for_stmt))
3378     {
3379     case GF_OMP_FOR_KIND_FOR:
3380       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3381       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3382       break;
3383     case GF_OMP_FOR_KIND_DISTRIBUTE:
3384       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3385       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3386       break;
3387     default:
3388       gcc_unreachable ();
3389     }
3390   nthreads = build_call_expr (nthreads, 0);
3391   nthreads = fold_convert (itype, nthreads);
3392   nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3393 				       true, GSI_SAME_STMT);
3394   threadid = build_call_expr (threadid, 0);
3395   threadid = fold_convert (itype, threadid);
3396   threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3397 				       true, GSI_SAME_STMT);
3398 
3399   n1 = fd->loop.n1;
3400   n2 = fd->loop.n2;
3401   step = fd->loop.step;
3402   if (gimple_omp_for_combined_into_p (fd->for_stmt))
3403     {
3404       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3405 				     OMP_CLAUSE__LOOPTEMP_);
3406       gcc_assert (innerc);
3407       n1 = OMP_CLAUSE_DECL (innerc);
3408       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3409 				OMP_CLAUSE__LOOPTEMP_);
3410       gcc_assert (innerc);
3411       n2 = OMP_CLAUSE_DECL (innerc);
3412     }
3413   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3414 				 true, NULL_TREE, true, GSI_SAME_STMT);
3415   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3416 				 true, NULL_TREE, true, GSI_SAME_STMT);
3417   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3418 				   true, NULL_TREE, true, GSI_SAME_STMT);
3419 
3420   t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3421   t = fold_build2 (PLUS_EXPR, itype, step, t);
3422   t = fold_build2 (PLUS_EXPR, itype, t, n2);
3423   t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3424   if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3425     t = fold_build2 (TRUNC_DIV_EXPR, itype,
3426 		     fold_build1 (NEGATE_EXPR, itype, t),
3427 		     fold_build1 (NEGATE_EXPR, itype, step));
3428   else
3429     t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3430   t = fold_convert (itype, t);
3431   n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3432 
3433   q = create_tmp_reg (itype, "q");
3434   t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3435   t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3436   gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3437 
3438   tt = create_tmp_reg (itype, "tt");
3439   t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3440   t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3441   gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3442 
3443   t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3444   gcond *cond_stmt = gimple_build_cond_empty (t);
3445   gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3446 
3447   second_bb = split_block (entry_bb, cond_stmt)->dest;
3448   gsi = gsi_last_bb (second_bb);
3449   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3450 
3451   gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3452 		     GSI_SAME_STMT);
3453   gassign *assign_stmt
3454     = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3455   gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3456 
3457   third_bb = split_block (second_bb, assign_stmt)->dest;
3458   gsi = gsi_last_bb (third_bb);
3459   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3460 
3461   t = build2 (MULT_EXPR, itype, q, threadid);
3462   t = build2 (PLUS_EXPR, itype, t, tt);
3463   s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3464 
3465   t = fold_build2 (PLUS_EXPR, itype, s0, q);
3466   e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3467 
3468   t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3469   gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3470 
3471   /* Remove the GIMPLE_OMP_FOR statement.  */
3472   gsi_remove (&gsi, true);
3473 
3474   /* Setup code for sequential iteration goes in SEQ_START_BB.  */
3475   gsi = gsi_start_bb (seq_start_bb);
3476 
3477   tree startvar = fd->loop.v;
3478   tree endvar = NULL_TREE;
3479 
3480   if (gimple_omp_for_combined_p (fd->for_stmt))
3481     {
3482       tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3483 		     ? gimple_omp_parallel_clauses (inner_stmt)
3484 		     : gimple_omp_for_clauses (inner_stmt);
3485       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3486       gcc_assert (innerc);
3487       startvar = OMP_CLAUSE_DECL (innerc);
3488       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3489 				OMP_CLAUSE__LOOPTEMP_);
3490       gcc_assert (innerc);
3491       endvar = OMP_CLAUSE_DECL (innerc);
3492       if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3493 	  && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3494 	{
3495 	  int i;
3496 	  for (i = 1; i < fd->collapse; i++)
3497 	    {
3498 	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3499 					OMP_CLAUSE__LOOPTEMP_);
3500 	      gcc_assert (innerc);
3501 	    }
3502 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3503 				    OMP_CLAUSE__LOOPTEMP_);
3504 	  if (innerc)
3505 	    {
3506 	      /* If needed (distribute parallel for with lastprivate),
3507 		 propagate down the total number of iterations.  */
3508 	      tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3509 				     fd->loop.n2);
3510 	      t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3511 					    GSI_CONTINUE_LINKING);
3512 	      assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3513 	      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3514 	    }
3515 	}
3516     }
3517   t = fold_convert (itype, s0);
3518   t = fold_build2 (MULT_EXPR, itype, t, step);
3519   if (POINTER_TYPE_P (type))
3520     t = fold_build_pointer_plus (n1, t);
3521   else
3522     t = fold_build2 (PLUS_EXPR, type, t, n1);
3523   t = fold_convert (TREE_TYPE (startvar), t);
3524   t = force_gimple_operand_gsi (&gsi, t,
3525 				DECL_P (startvar)
3526 				&& TREE_ADDRESSABLE (startvar),
3527 				NULL_TREE, false, GSI_CONTINUE_LINKING);
3528   assign_stmt = gimple_build_assign (startvar, t);
3529   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3530 
3531   t = fold_convert (itype, e0);
3532   t = fold_build2 (MULT_EXPR, itype, t, step);
3533   if (POINTER_TYPE_P (type))
3534     t = fold_build_pointer_plus (n1, t);
3535   else
3536     t = fold_build2 (PLUS_EXPR, type, t, n1);
3537   t = fold_convert (TREE_TYPE (startvar), t);
3538   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3539 				false, GSI_CONTINUE_LINKING);
3540   if (endvar)
3541     {
3542       assign_stmt = gimple_build_assign (endvar, e);
3543       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3544       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3545 	assign_stmt = gimple_build_assign (fd->loop.v, e);
3546       else
3547 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3548       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3549     }
3550   /* Handle linear clause adjustments.  */
3551   tree itercnt = NULL_TREE;
3552   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3553     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3554 	 c; c = OMP_CLAUSE_CHAIN (c))
3555       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3556 	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3557 	{
3558 	  tree d = OMP_CLAUSE_DECL (c);
3559 	  bool is_ref = omp_is_reference (d);
3560 	  tree t = d, a, dest;
3561 	  if (is_ref)
3562 	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3563 	  if (itercnt == NULL_TREE)
3564 	    {
3565 	      if (gimple_omp_for_combined_into_p (fd->for_stmt))
3566 		{
3567 		  itercnt = fold_build2 (MINUS_EXPR, itype,
3568 					 fold_convert (itype, n1),
3569 					 fold_convert (itype, fd->loop.n1));
3570 		  itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3571 		  itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3572 		  itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3573 						      NULL_TREE, false,
3574 						      GSI_CONTINUE_LINKING);
3575 		}
3576 	      else
3577 		itercnt = s0;
3578 	    }
3579 	  tree type = TREE_TYPE (t);
3580 	  if (POINTER_TYPE_P (type))
3581 	    type = sizetype;
3582 	  a = fold_build2 (MULT_EXPR, type,
3583 			   fold_convert (type, itercnt),
3584 			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3585 	  dest = unshare_expr (t);
3586 	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3587 			   : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3588 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3589 					false, GSI_CONTINUE_LINKING);
3590 	  assign_stmt = gimple_build_assign (dest, t);
3591 	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3592 	}
3593   if (fd->collapse > 1)
3594     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3595 
3596   if (!broken_loop)
3597     {
3598       /* The code controlling the sequential loop replaces the
3599 	 GIMPLE_OMP_CONTINUE.  */
3600       gsi = gsi_last_bb (cont_bb);
3601       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3602       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3603       vmain = gimple_omp_continue_control_use (cont_stmt);
3604       vback = gimple_omp_continue_control_def (cont_stmt);
3605 
3606       if (!gimple_omp_for_combined_p (fd->for_stmt))
3607 	{
3608 	  if (POINTER_TYPE_P (type))
3609 	    t = fold_build_pointer_plus (vmain, step);
3610 	  else
3611 	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
3612 	  t = force_gimple_operand_gsi (&gsi, t,
3613 					DECL_P (vback)
3614 					&& TREE_ADDRESSABLE (vback),
3615 					NULL_TREE, true, GSI_SAME_STMT);
3616 	  assign_stmt = gimple_build_assign (vback, t);
3617 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3618 
3619 	  t = build2 (fd->loop.cond_code, boolean_type_node,
3620 		      DECL_P (vback) && TREE_ADDRESSABLE (vback)
3621 		      ? t : vback, e);
3622 	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3623 	}
3624 
3625       /* Remove the GIMPLE_OMP_CONTINUE statement.  */
3626       gsi_remove (&gsi, true);
3627 
3628       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3629 	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3630     }
3631 
3632   /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
3633   gsi = gsi_last_bb (exit_bb);
3634   if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3635     {
3636       t = gimple_omp_return_lhs (gsi_stmt (gsi));
3637       gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3638     }
3639   gsi_remove (&gsi, true);
3640 
3641   /* Connect all the blocks.  */
3642   ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
3643   ep->probability = REG_BR_PROB_BASE / 4 * 3;
3644   ep = find_edge (entry_bb, second_bb);
3645   ep->flags = EDGE_TRUE_VALUE;
3646   ep->probability = REG_BR_PROB_BASE / 4;
3647   find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3648   find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3649 
3650   if (!broken_loop)
3651     {
3652       ep = find_edge (cont_bb, body_bb);
3653       if (ep == NULL)
3654 	{
3655 	  ep = BRANCH_EDGE (cont_bb);
3656 	  gcc_assert (single_succ (ep->dest) == body_bb);
3657 	}
3658       if (gimple_omp_for_combined_p (fd->for_stmt))
3659 	{
3660 	  remove_edge (ep);
3661 	  ep = NULL;
3662 	}
3663       else if (fd->collapse > 1)
3664 	{
3665 	  remove_edge (ep);
3666 	  ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3667 	}
3668       else
3669 	ep->flags = EDGE_TRUE_VALUE;
3670       find_edge (cont_bb, fin_bb)->flags
3671 	= ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3672     }
3673 
3674   set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3675   set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3676   set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3677 
3678   set_immediate_dominator (CDI_DOMINATORS, body_bb,
3679 			   recompute_dominator (CDI_DOMINATORS, body_bb));
3680   set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3681 			   recompute_dominator (CDI_DOMINATORS, fin_bb));
3682 
3683   struct loop *loop = body_bb->loop_father;
3684   if (loop != entry_bb->loop_father)
3685     {
3686       gcc_assert (broken_loop || loop->header == body_bb);
3687       gcc_assert (broken_loop
3688 		  || loop->latch == region->cont
3689 		  || single_pred (loop->latch) == region->cont);
3690       return;
3691     }
3692 
3693   if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3694     {
3695       loop = alloc_loop ();
3696       loop->header = body_bb;
3697       if (collapse_bb == NULL)
3698 	loop->latch = cont_bb;
3699       add_loop (loop, body_bb->loop_father);
3700     }
3701 }
3702 
3703 /* Return phi in E->DEST with ARG on edge E.  */
3704 
3705 static gphi *
3706 find_phi_with_arg_on_edge (tree arg, edge e)
3707 {
3708   basic_block bb = e->dest;
3709 
3710   for (gphi_iterator gpi = gsi_start_phis (bb);
3711        !gsi_end_p (gpi);
3712        gsi_next (&gpi))
3713     {
3714       gphi *phi = gpi.phi ();
3715       if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3716 	return phi;
3717     }
3718 
3719   return NULL;
3720 }
3721 
3722 /* A subroutine of expand_omp_for.  Generate code for a parallel
3723    loop with static schedule and a specified chunk size.  Given
3724    parameters:
3725 
3726 	for (V = N1; V cond N2; V += STEP) BODY;
3727 
3728    where COND is "<" or ">", we generate pseudocode
3729 
3730 	if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3731 	if (cond is <)
3732 	  adj = STEP - 1;
3733 	else
3734 	  adj = STEP + 1;
3735 	if ((__typeof (V)) -1 > 0 && cond is >)
3736 	  n = -(adj + N2 - N1) / -STEP;
3737 	else
3738 	  n = (adj + N2 - N1) / STEP;
3739 	trip = 0;
3740 	V = threadid * CHUNK * STEP + N1;  -- this extra definition of V is
3741 					      here so that V is defined
3742 					      if the loop is not entered
3743     L0:
3744 	s0 = (trip * nthreads + threadid) * CHUNK;
3745 	e0 = min (s0 + CHUNK, n);
3746 	if (s0 < n) goto L1; else goto L4;
3747     L1:
3748 	V = s0 * STEP + N1;
3749 	e = e0 * STEP + N1;
3750     L2:
3751 	BODY;
3752 	V += STEP;
3753 	if (V cond e) goto L2; else goto L3;
3754     L3:
3755 	trip += 1;
3756 	goto L0;
3757     L4:
3758 */
3759 
3760 static void
3761 expand_omp_for_static_chunk (struct omp_region *region,
3762 			     struct omp_for_data *fd, gimple *inner_stmt)
3763 {
3764   tree n, s0, e0, e, t;
3765   tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3766   tree type, itype, vmain, vback, vextra;
3767   basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3768   basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
3769   gimple_stmt_iterator gsi;
3770   edge se;
3771   bool broken_loop = region->cont == NULL;
3772   tree *counts = NULL;
3773   tree n1, n2, step;
3774 
3775   itype = type = TREE_TYPE (fd->loop.v);
3776   if (POINTER_TYPE_P (type))
3777     itype = signed_type_for (type);
3778 
3779   entry_bb = region->entry;
3780   se = split_block (entry_bb, last_stmt (entry_bb));
3781   entry_bb = se->src;
3782   iter_part_bb = se->dest;
3783   cont_bb = region->cont;
3784   gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
3785   fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
3786   gcc_assert (broken_loop
3787 	      || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
3788   seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
3789   body_bb = single_succ (seq_start_bb);
3790   if (!broken_loop)
3791     {
3792       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3793 		  || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3794       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3795       trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
3796     }
3797   exit_bb = region->exit;
3798 
3799   /* Trip and adjustment setup goes in ENTRY_BB.  */
3800   gsi = gsi_last_bb (entry_bb);
3801   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3802 
3803   if (fd->collapse > 1)
3804     {
3805       int first_zero_iter = -1, dummy = -1;
3806       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3807 
3808       counts = XALLOCAVEC (tree, fd->collapse);
3809       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3810 				  fin_bb, first_zero_iter,
3811 				  dummy_bb, dummy, l2_dom_bb);
3812       t = NULL_TREE;
3813     }
3814   else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3815     t = integer_one_node;
3816   else
3817     t = fold_binary (fd->loop.cond_code, boolean_type_node,
3818 		     fold_convert (type, fd->loop.n1),
3819 		     fold_convert (type, fd->loop.n2));
3820   if (fd->collapse == 1
3821       && TYPE_UNSIGNED (type)
3822       && (t == NULL_TREE || !integer_onep (t)))
3823     {
3824       n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3825       n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3826 				     true, GSI_SAME_STMT);
3827       n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3828       n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3829 				     true, GSI_SAME_STMT);
3830       gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3831 						 NULL_TREE, NULL_TREE);
3832       gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3833       if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3834 		     expand_omp_regimplify_p, NULL, NULL)
3835 	  || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3836 			expand_omp_regimplify_p, NULL, NULL))
3837 	{
3838 	  gsi = gsi_for_stmt (cond_stmt);
3839 	  gimple_regimplify_operands (cond_stmt, &gsi);
3840 	}
3841       se = split_block (entry_bb, cond_stmt);
3842       se->flags = EDGE_TRUE_VALUE;
3843       entry_bb = se->dest;
3844       se->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1);
3845       se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
3846       se->probability = REG_BR_PROB_BASE / 2000 - 1;
3847       if (gimple_in_ssa_p (cfun))
3848 	{
3849 	  int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
3850 	  for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3851 	       !gsi_end_p (gpi); gsi_next (&gpi))
3852 	    {
3853 	      gphi *phi = gpi.phi ();
3854 	      add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3855 			   se, UNKNOWN_LOCATION);
3856 	    }
3857 	}
3858       gsi = gsi_last_bb (entry_bb);
3859     }
3860 
3861   switch (gimple_omp_for_kind (fd->for_stmt))
3862     {
3863     case GF_OMP_FOR_KIND_FOR:
3864       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3865       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3866       break;
3867     case GF_OMP_FOR_KIND_DISTRIBUTE:
3868       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3869       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3870       break;
3871     default:
3872       gcc_unreachable ();
3873     }
3874   nthreads = build_call_expr (nthreads, 0);
3875   nthreads = fold_convert (itype, nthreads);
3876   nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3877 				       true, GSI_SAME_STMT);
3878   threadid = build_call_expr (threadid, 0);
3879   threadid = fold_convert (itype, threadid);
3880   threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3881 				       true, GSI_SAME_STMT);
3882 
3883   n1 = fd->loop.n1;
3884   n2 = fd->loop.n2;
3885   step = fd->loop.step;
3886   if (gimple_omp_for_combined_into_p (fd->for_stmt))
3887     {
3888       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3889 				     OMP_CLAUSE__LOOPTEMP_);
3890       gcc_assert (innerc);
3891       n1 = OMP_CLAUSE_DECL (innerc);
3892       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3893 				OMP_CLAUSE__LOOPTEMP_);
3894       gcc_assert (innerc);
3895       n2 = OMP_CLAUSE_DECL (innerc);
3896     }
3897   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3898 				 true, NULL_TREE, true, GSI_SAME_STMT);
3899   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3900 				 true, NULL_TREE, true, GSI_SAME_STMT);
3901   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3902 				   true, NULL_TREE, true, GSI_SAME_STMT);
3903   tree chunk_size = fold_convert (itype, fd->chunk_size);
3904   chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
3905   chunk_size
3906     = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
3907 				GSI_SAME_STMT);
3908 
3909   t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3910   t = fold_build2 (PLUS_EXPR, itype, step, t);
3911   t = fold_build2 (PLUS_EXPR, itype, t, n2);
3912   t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3913   if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3914     t = fold_build2 (TRUNC_DIV_EXPR, itype,
3915 		     fold_build1 (NEGATE_EXPR, itype, t),
3916 		     fold_build1 (NEGATE_EXPR, itype, step));
3917   else
3918     t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3919   t = fold_convert (itype, t);
3920   n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3921 				true, GSI_SAME_STMT);
3922 
3923   trip_var = create_tmp_reg (itype, ".trip");
3924   if (gimple_in_ssa_p (cfun))
3925     {
3926       trip_init = make_ssa_name (trip_var);
3927       trip_main = make_ssa_name (trip_var);
3928       trip_back = make_ssa_name (trip_var);
3929     }
3930   else
3931     {
3932       trip_init = trip_var;
3933       trip_main = trip_var;
3934       trip_back = trip_var;
3935     }
3936 
3937   gassign *assign_stmt
3938     = gimple_build_assign (trip_init, build_int_cst (itype, 0));
3939   gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3940 
3941   t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
3942   t = fold_build2 (MULT_EXPR, itype, t, step);
3943   if (POINTER_TYPE_P (type))
3944     t = fold_build_pointer_plus (n1, t);
3945   else
3946     t = fold_build2 (PLUS_EXPR, type, t, n1);
3947   vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3948 				     true, GSI_SAME_STMT);
3949 
3950   /* Remove the GIMPLE_OMP_FOR.  */
3951   gsi_remove (&gsi, true);
3952 
3953   gimple_stmt_iterator gsif = gsi;
3954 
3955   /* Iteration space partitioning goes in ITER_PART_BB.  */
3956   gsi = gsi_last_bb (iter_part_bb);
3957 
3958   t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
3959   t = fold_build2 (PLUS_EXPR, itype, t, threadid);
3960   t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
3961   s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3962 				 false, GSI_CONTINUE_LINKING);
3963 
3964   t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
3965   t = fold_build2 (MIN_EXPR, itype, t, n);
3966   e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3967 				 false, GSI_CONTINUE_LINKING);
3968 
3969   t = build2 (LT_EXPR, boolean_type_node, s0, n);
3970   gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
3971 
3972   /* Setup code for sequential iteration goes in SEQ_START_BB.  */
3973   gsi = gsi_start_bb (seq_start_bb);
3974 
3975   tree startvar = fd->loop.v;
3976   tree endvar = NULL_TREE;
3977 
3978   if (gimple_omp_for_combined_p (fd->for_stmt))
3979     {
3980       tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3981 		     ? gimple_omp_parallel_clauses (inner_stmt)
3982 		     : gimple_omp_for_clauses (inner_stmt);
3983       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3984       gcc_assert (innerc);
3985       startvar = OMP_CLAUSE_DECL (innerc);
3986       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3987 				OMP_CLAUSE__LOOPTEMP_);
3988       gcc_assert (innerc);
3989       endvar = OMP_CLAUSE_DECL (innerc);
3990       if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3991 	  && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3992 	{
3993 	  int i;
3994 	  for (i = 1; i < fd->collapse; i++)
3995 	    {
3996 	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3997 					OMP_CLAUSE__LOOPTEMP_);
3998 	      gcc_assert (innerc);
3999 	    }
4000 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4001 				    OMP_CLAUSE__LOOPTEMP_);
4002 	  if (innerc)
4003 	    {
4004 	      /* If needed (distribute parallel for with lastprivate),
4005 		 propagate down the total number of iterations.  */
4006 	      tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4007 				     fd->loop.n2);
4008 	      t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4009 					    GSI_CONTINUE_LINKING);
4010 	      assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4011 	      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4012 	    }
4013 	}
4014     }
4015 
4016   t = fold_convert (itype, s0);
4017   t = fold_build2 (MULT_EXPR, itype, t, step);
4018   if (POINTER_TYPE_P (type))
4019     t = fold_build_pointer_plus (n1, t);
4020   else
4021     t = fold_build2 (PLUS_EXPR, type, t, n1);
4022   t = fold_convert (TREE_TYPE (startvar), t);
4023   t = force_gimple_operand_gsi (&gsi, t,
4024 				DECL_P (startvar)
4025 				&& TREE_ADDRESSABLE (startvar),
4026 				NULL_TREE, false, GSI_CONTINUE_LINKING);
4027   assign_stmt = gimple_build_assign (startvar, t);
4028   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4029 
4030   t = fold_convert (itype, e0);
4031   t = fold_build2 (MULT_EXPR, itype, t, step);
4032   if (POINTER_TYPE_P (type))
4033     t = fold_build_pointer_plus (n1, t);
4034   else
4035     t = fold_build2 (PLUS_EXPR, type, t, n1);
4036   t = fold_convert (TREE_TYPE (startvar), t);
4037   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4038 				false, GSI_CONTINUE_LINKING);
4039   if (endvar)
4040     {
4041       assign_stmt = gimple_build_assign (endvar, e);
4042       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4043       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4044 	assign_stmt = gimple_build_assign (fd->loop.v, e);
4045       else
4046 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4047       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4048     }
4049   /* Handle linear clause adjustments.  */
4050   tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4051   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4052     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4053 	 c; c = OMP_CLAUSE_CHAIN (c))
4054       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4055 	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4056 	{
4057 	  tree d = OMP_CLAUSE_DECL (c);
4058 	  bool is_ref = omp_is_reference (d);
4059 	  tree t = d, a, dest;
4060 	  if (is_ref)
4061 	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4062 	  tree type = TREE_TYPE (t);
4063 	  if (POINTER_TYPE_P (type))
4064 	    type = sizetype;
4065 	  dest = unshare_expr (t);
4066 	  tree v = create_tmp_var (TREE_TYPE (t), NULL);
4067 	  expand_omp_build_assign (&gsif, v, t);
4068 	  if (itercnt == NULL_TREE)
4069 	    {
4070 	      if (gimple_omp_for_combined_into_p (fd->for_stmt))
4071 		{
4072 		  itercntbias
4073 		    = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4074 				   fold_convert (itype, fd->loop.n1));
4075 		  itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4076 					     itercntbias, step);
4077 		  itercntbias
4078 		    = force_gimple_operand_gsi (&gsif, itercntbias, true,
4079 						NULL_TREE, true,
4080 						GSI_SAME_STMT);
4081 		  itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4082 		  itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4083 						      NULL_TREE, false,
4084 						      GSI_CONTINUE_LINKING);
4085 		}
4086 	      else
4087 		itercnt = s0;
4088 	    }
4089 	  a = fold_build2 (MULT_EXPR, type,
4090 			   fold_convert (type, itercnt),
4091 			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4092 	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4093 			   : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4094 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4095 					false, GSI_CONTINUE_LINKING);
4096 	  assign_stmt = gimple_build_assign (dest, t);
4097 	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4098 	}
4099   if (fd->collapse > 1)
4100     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4101 
4102   if (!broken_loop)
4103     {
4104       /* The code controlling the sequential loop goes in CONT_BB,
4105 	 replacing the GIMPLE_OMP_CONTINUE.  */
4106       gsi = gsi_last_bb (cont_bb);
4107       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4108       vmain = gimple_omp_continue_control_use (cont_stmt);
4109       vback = gimple_omp_continue_control_def (cont_stmt);
4110 
4111       if (!gimple_omp_for_combined_p (fd->for_stmt))
4112 	{
4113 	  if (POINTER_TYPE_P (type))
4114 	    t = fold_build_pointer_plus (vmain, step);
4115 	  else
4116 	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
4117 	  if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4118 	    t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4119 					  true, GSI_SAME_STMT);
4120 	  assign_stmt = gimple_build_assign (vback, t);
4121 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4122 
4123 	  if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4124 	    t = build2 (EQ_EXPR, boolean_type_node,
4125 			build_int_cst (itype, 0),
4126 			build_int_cst (itype, 1));
4127 	  else
4128 	    t = build2 (fd->loop.cond_code, boolean_type_node,
4129 			DECL_P (vback) && TREE_ADDRESSABLE (vback)
4130 			? t : vback, e);
4131 	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4132 	}
4133 
4134       /* Remove GIMPLE_OMP_CONTINUE.  */
4135       gsi_remove (&gsi, true);
4136 
4137       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4138 	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4139 
4140       /* Trip update code goes into TRIP_UPDATE_BB.  */
4141       gsi = gsi_start_bb (trip_update_bb);
4142 
4143       t = build_int_cst (itype, 1);
4144       t = build2 (PLUS_EXPR, itype, trip_main, t);
4145       assign_stmt = gimple_build_assign (trip_back, t);
4146       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4147     }
4148 
4149   /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
4150   gsi = gsi_last_bb (exit_bb);
4151   if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4152     {
4153       t = gimple_omp_return_lhs (gsi_stmt (gsi));
4154       gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4155     }
4156   gsi_remove (&gsi, true);
4157 
4158   /* Connect the new blocks.  */
4159   find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4160   find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4161 
4162   if (!broken_loop)
4163     {
4164       se = find_edge (cont_bb, body_bb);
4165       if (se == NULL)
4166 	{
4167 	  se = BRANCH_EDGE (cont_bb);
4168 	  gcc_assert (single_succ (se->dest) == body_bb);
4169 	}
4170       if (gimple_omp_for_combined_p (fd->for_stmt))
4171 	{
4172 	  remove_edge (se);
4173 	  se = NULL;
4174 	}
4175       else if (fd->collapse > 1)
4176 	{
4177 	  remove_edge (se);
4178 	  se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4179 	}
4180       else
4181 	se->flags = EDGE_TRUE_VALUE;
4182       find_edge (cont_bb, trip_update_bb)->flags
4183 	= se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4184 
4185       redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4186 				iter_part_bb);
4187     }
4188 
4189   if (gimple_in_ssa_p (cfun))
4190     {
4191       gphi_iterator psi;
4192       gphi *phi;
4193       edge re, ene;
4194       edge_var_map *vm;
4195       size_t i;
4196 
4197       gcc_assert (fd->collapse == 1 && !broken_loop);
4198 
4199       /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4200 	 remove arguments of the phi nodes in fin_bb.  We need to create
4201 	 appropriate phi nodes in iter_part_bb instead.  */
4202       se = find_edge (iter_part_bb, fin_bb);
4203       re = single_succ_edge (trip_update_bb);
4204       vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4205       ene = single_succ_edge (entry_bb);
4206 
4207       psi = gsi_start_phis (fin_bb);
4208       for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4209 	   gsi_next (&psi), ++i)
4210 	{
4211 	  gphi *nphi;
4212 	  source_location locus;
4213 
4214 	  phi = psi.phi ();
4215 	  t = gimple_phi_result (phi);
4216 	  gcc_assert (t == redirect_edge_var_map_result (vm));
4217 
4218 	  if (!single_pred_p (fin_bb))
4219 	    t = copy_ssa_name (t, phi);
4220 
4221 	  nphi = create_phi_node (t, iter_part_bb);
4222 
4223 	  t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4224 	  locus = gimple_phi_arg_location_from_edge (phi, se);
4225 
4226 	  /* A special case -- fd->loop.v is not yet computed in
4227 	     iter_part_bb, we need to use vextra instead.  */
4228 	  if (t == fd->loop.v)
4229 	    t = vextra;
4230 	  add_phi_arg (nphi, t, ene, locus);
4231 	  locus = redirect_edge_var_map_location (vm);
4232 	  tree back_arg = redirect_edge_var_map_def (vm);
4233 	  add_phi_arg (nphi, back_arg, re, locus);
4234 	  edge ce = find_edge (cont_bb, body_bb);
4235 	  if (ce == NULL)
4236 	    {
4237 	      ce = BRANCH_EDGE (cont_bb);
4238 	      gcc_assert (single_succ (ce->dest) == body_bb);
4239 	      ce = single_succ_edge (ce->dest);
4240 	    }
4241 	  gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4242 	  gcc_assert (inner_loop_phi != NULL);
4243 	  add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4244 		       find_edge (seq_start_bb, body_bb), locus);
4245 
4246 	  if (!single_pred_p (fin_bb))
4247 	    add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4248 	}
4249       gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4250       redirect_edge_var_map_clear (re);
4251       if (single_pred_p (fin_bb))
4252 	while (1)
4253 	  {
4254 	    psi = gsi_start_phis (fin_bb);
4255 	    if (gsi_end_p (psi))
4256 	      break;
4257 	    remove_phi_node (&psi, false);
4258 	  }
4259 
4260       /* Make phi node for trip.  */
4261       phi = create_phi_node (trip_main, iter_part_bb);
4262       add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4263 		   UNKNOWN_LOCATION);
4264       add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4265 		   UNKNOWN_LOCATION);
4266     }
4267 
4268   if (!broken_loop)
4269     set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4270   set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4271 			   recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4272   set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4273 			   recompute_dominator (CDI_DOMINATORS, fin_bb));
4274   set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4275 			   recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4276   set_immediate_dominator (CDI_DOMINATORS, body_bb,
4277 			   recompute_dominator (CDI_DOMINATORS, body_bb));
4278 
4279   if (!broken_loop)
4280     {
4281       struct loop *loop = body_bb->loop_father;
4282       struct loop *trip_loop = alloc_loop ();
4283       trip_loop->header = iter_part_bb;
4284       trip_loop->latch = trip_update_bb;
4285       add_loop (trip_loop, iter_part_bb->loop_father);
4286 
4287       if (loop != entry_bb->loop_father)
4288 	{
4289 	  gcc_assert (loop->header == body_bb);
4290 	  gcc_assert (loop->latch == region->cont
4291 		      || single_pred (loop->latch) == region->cont);
4292 	  trip_loop->inner = loop;
4293 	  return;
4294 	}
4295 
4296       if (!gimple_omp_for_combined_p (fd->for_stmt))
4297 	{
4298 	  loop = alloc_loop ();
4299 	  loop->header = body_bb;
4300 	  if (collapse_bb == NULL)
4301 	    loop->latch = cont_bb;
4302 	  add_loop (loop, trip_loop);
4303 	}
4304     }
4305 }
4306 
4307 /* A subroutine of expand_omp_for.  Generate code for _Cilk_for loop.
4308    Given parameters:
4309    for (V = N1; V cond N2; V += STEP) BODY;
4310 
4311    where COND is "<" or ">" or "!=", we generate pseudocode
4312 
4313    for (ind_var = low; ind_var < high; ind_var++)
4314      {
4315        V = n1 + (ind_var * STEP)
4316 
4317        <BODY>
4318      }
4319 
4320    In the above pseudocode, low and high are function parameters of the
4321    child function.  In the function below, we are inserting a temp.
4322    variable that will be making a call to two OMP functions that will not be
4323    found in the body of _Cilk_for (since OMP_FOR cannot be mixed
4324    with _Cilk_for).  These functions are replaced with low and high
4325    by the function that handles taskreg.  */
4326 
4327 
4328 static void
4329 expand_cilk_for (struct omp_region *region, struct omp_for_data *fd)
4330 {
4331   bool broken_loop = region->cont == NULL;
4332   basic_block entry_bb = region->entry;
4333   basic_block cont_bb = region->cont;
4334 
4335   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4336   gcc_assert (broken_loop
4337 	      || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4338   basic_block l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4339   basic_block l1_bb, l2_bb;
4340 
4341   if (!broken_loop)
4342     {
4343       gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4344       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4345       l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4346       l2_bb = BRANCH_EDGE (entry_bb)->dest;
4347     }
4348   else
4349     {
4350       BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4351       l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4352       l2_bb = single_succ (l1_bb);
4353     }
4354   basic_block exit_bb = region->exit;
4355   basic_block l2_dom_bb = NULL;
4356 
4357   gimple_stmt_iterator gsi = gsi_last_bb (entry_bb);
4358 
4359   /* Below statements until the "tree high_val = ..." are pseudo statements
4360      used to pass information to be used by expand_omp_taskreg.
4361      low_val and high_val will be replaced by the __low and __high
4362      parameter from the child function.
4363 
4364      The call_exprs part is a place-holder, it is mainly used
4365      to distinctly identify to the top-level part that this is
4366      where we should put low and high (reasoning given in header
4367      comment).  */
4368 
4369   gomp_parallel *par_stmt
4370     = as_a <gomp_parallel *> (last_stmt (region->outer->entry));
4371   tree child_fndecl = gimple_omp_parallel_child_fn (par_stmt);
4372   tree t, low_val = NULL_TREE, high_val = NULL_TREE;
4373   for (t = DECL_ARGUMENTS (child_fndecl); t; t = TREE_CHAIN (t))
4374     {
4375       if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__high"))
4376 	high_val = t;
4377       else if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__low"))
4378 	low_val = t;
4379     }
4380   gcc_assert (low_val && high_val);
4381 
4382   tree type = TREE_TYPE (low_val);
4383   tree ind_var = create_tmp_reg (type, "__cilk_ind_var");
4384   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4385 
4386   /* Not needed in SSA form right now.  */
4387   gcc_assert (!gimple_in_ssa_p (cfun));
4388   if (l2_dom_bb == NULL)
4389     l2_dom_bb = l1_bb;
4390 
4391   tree n1 = low_val;
4392   tree n2 = high_val;
4393 
4394   gimple *stmt = gimple_build_assign (ind_var, n1);
4395 
4396   /* Replace the GIMPLE_OMP_FOR statement.  */
4397   gsi_replace (&gsi, stmt, true);
4398 
4399   if (!broken_loop)
4400     {
4401       /* Code to control the increment goes in the CONT_BB.  */
4402       gsi = gsi_last_bb (cont_bb);
4403       stmt = gsi_stmt (gsi);
4404       gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4405       stmt = gimple_build_assign (ind_var, PLUS_EXPR, ind_var,
4406 				  build_one_cst (type));
4407 
4408       /* Replace GIMPLE_OMP_CONTINUE.  */
4409       gsi_replace (&gsi, stmt, true);
4410     }
4411 
4412   /* Emit the condition in L1_BB.  */
4413   gsi = gsi_after_labels (l1_bb);
4414   t = fold_build2 (MULT_EXPR, TREE_TYPE (fd->loop.step),
4415 		   fold_convert (TREE_TYPE (fd->loop.step), ind_var),
4416 		   fd->loop.step);
4417   if (POINTER_TYPE_P (TREE_TYPE (fd->loop.n1)))
4418     t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4419 		     fd->loop.n1, fold_convert (sizetype, t));
4420   else
4421     t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4422 		     fd->loop.n1, fold_convert (TREE_TYPE (fd->loop.n1), t));
4423   t = fold_convert (TREE_TYPE (fd->loop.v), t);
4424   expand_omp_build_assign (&gsi, fd->loop.v, t);
4425 
4426   /* The condition is always '<' since the runtime will fill in the low
4427      and high values.  */
4428   stmt = gimple_build_cond (LT_EXPR, ind_var, n2, NULL_TREE, NULL_TREE);
4429   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4430 
4431   /* Remove GIMPLE_OMP_RETURN.  */
4432   gsi = gsi_last_bb (exit_bb);
4433   gsi_remove (&gsi, true);
4434 
4435   /* Connect the new blocks.  */
4436   remove_edge (FALLTHRU_EDGE (entry_bb));
4437 
4438   edge e, ne;
4439   if (!broken_loop)
4440     {
4441       remove_edge (BRANCH_EDGE (entry_bb));
4442       make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4443 
4444       e = BRANCH_EDGE (l1_bb);
4445       ne = FALLTHRU_EDGE (l1_bb);
4446       e->flags = EDGE_TRUE_VALUE;
4447     }
4448   else
4449     {
4450       single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4451 
4452       ne = single_succ_edge (l1_bb);
4453       e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4454 
4455     }
4456   ne->flags = EDGE_FALSE_VALUE;
4457   e->probability = REG_BR_PROB_BASE * 7 / 8;
4458   ne->probability = REG_BR_PROB_BASE / 8;
4459 
4460   set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4461   set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4462   set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4463 
4464   if (!broken_loop)
4465     {
4466       struct loop *loop = alloc_loop ();
4467       loop->header = l1_bb;
4468       loop->latch = cont_bb;
4469       add_loop (loop, l1_bb->loop_father);
4470       loop->safelen = INT_MAX;
4471     }
4472 
4473   /* Pick the correct library function based on the precision of the
4474      induction variable type.  */
4475   tree lib_fun = NULL_TREE;
4476   if (TYPE_PRECISION (type) == 32)
4477     lib_fun = cilk_for_32_fndecl;
4478   else if (TYPE_PRECISION (type) == 64)
4479     lib_fun = cilk_for_64_fndecl;
4480   else
4481     gcc_unreachable ();
4482 
4483   gcc_assert (fd->sched_kind == OMP_CLAUSE_SCHEDULE_CILKFOR);
4484 
4485   /* WS_ARGS contains the library function flavor to call:
4486      __libcilkrts_cilk_for_64 or __libcilkrts_cilk_for_32), and the
4487      user-defined grain value.  If the user does not define one, then zero
4488      is passed in by the parser.  */
4489   vec_alloc (region->ws_args, 2);
4490   region->ws_args->quick_push (lib_fun);
4491   region->ws_args->quick_push (fd->chunk_size);
4492 }
4493 
4494 /* A subroutine of expand_omp_for.  Generate code for a simd non-worksharing
4495    loop.  Given parameters:
4496 
4497 	for (V = N1; V cond N2; V += STEP) BODY;
4498 
4499    where COND is "<" or ">", we generate pseudocode
4500 
4501 	V = N1;
4502 	goto L1;
4503     L0:
4504 	BODY;
4505 	V += STEP;
4506     L1:
4507 	if (V cond N2) goto L0; else goto L2;
4508     L2:
4509 
4510     For collapsed loops, given parameters:
4511       collapse(3)
4512       for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4513 	for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4514 	  for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4515 	    BODY;
4516 
4517     we generate pseudocode
4518 
4519 	if (cond3 is <)
4520 	  adj = STEP3 - 1;
4521 	else
4522 	  adj = STEP3 + 1;
4523 	count3 = (adj + N32 - N31) / STEP3;
4524 	if (cond2 is <)
4525 	  adj = STEP2 - 1;
4526 	else
4527 	  adj = STEP2 + 1;
4528 	count2 = (adj + N22 - N21) / STEP2;
4529 	if (cond1 is <)
4530 	  adj = STEP1 - 1;
4531 	else
4532 	  adj = STEP1 + 1;
4533 	count1 = (adj + N12 - N11) / STEP1;
4534 	count = count1 * count2 * count3;
4535 	V = 0;
4536 	V1 = N11;
4537 	V2 = N21;
4538 	V3 = N31;
4539 	goto L1;
4540     L0:
4541 	BODY;
4542 	V += 1;
4543 	V3 += STEP3;
4544 	V2 += (V3 cond3 N32) ? 0 : STEP2;
4545 	V3 = (V3 cond3 N32) ? V3 : N31;
4546 	V1 += (V2 cond2 N22) ? 0 : STEP1;
4547 	V2 = (V2 cond2 N22) ? V2 : N21;
4548     L1:
4549 	if (V < count) goto L0; else goto L2;
4550     L2:
4551 
4552       */
4553 
4554 static void
4555 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4556 {
4557   tree type, t;
4558   basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4559   gimple_stmt_iterator gsi;
4560   gimple *stmt;
4561   gcond *cond_stmt;
4562   bool broken_loop = region->cont == NULL;
4563   edge e, ne;
4564   tree *counts = NULL;
4565   int i;
4566   int safelen_int = INT_MAX;
4567   tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4568 				  OMP_CLAUSE_SAFELEN);
4569   tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4570 				  OMP_CLAUSE__SIMDUID_);
4571   tree n1, n2;
4572 
4573   if (safelen)
4574     {
4575       safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4576       if (TREE_CODE (safelen) != INTEGER_CST)
4577 	safelen_int = 0;
4578       else if (tree_fits_uhwi_p (safelen) && tree_to_uhwi (safelen) < INT_MAX)
4579 	safelen_int = tree_to_uhwi (safelen);
4580       if (safelen_int == 1)
4581 	safelen_int = 0;
4582     }
4583   type = TREE_TYPE (fd->loop.v);
4584   entry_bb = region->entry;
4585   cont_bb = region->cont;
4586   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4587   gcc_assert (broken_loop
4588 	      || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4589   l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4590   if (!broken_loop)
4591     {
4592       gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4593       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4594       l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4595       l2_bb = BRANCH_EDGE (entry_bb)->dest;
4596     }
4597   else
4598     {
4599       BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4600       l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4601       l2_bb = single_succ (l1_bb);
4602     }
4603   exit_bb = region->exit;
4604   l2_dom_bb = NULL;
4605 
4606   gsi = gsi_last_bb (entry_bb);
4607 
4608   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4609   /* Not needed in SSA form right now.  */
4610   gcc_assert (!gimple_in_ssa_p (cfun));
4611   if (fd->collapse > 1)
4612     {
4613       int first_zero_iter = -1, dummy = -1;
4614       basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4615 
4616       counts = XALLOCAVEC (tree, fd->collapse);
4617       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4618 				  zero_iter_bb, first_zero_iter,
4619 				  dummy_bb, dummy, l2_dom_bb);
4620     }
4621   if (l2_dom_bb == NULL)
4622     l2_dom_bb = l1_bb;
4623 
4624   n1 = fd->loop.n1;
4625   n2 = fd->loop.n2;
4626   if (gimple_omp_for_combined_into_p (fd->for_stmt))
4627     {
4628       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4629 				     OMP_CLAUSE__LOOPTEMP_);
4630       gcc_assert (innerc);
4631       n1 = OMP_CLAUSE_DECL (innerc);
4632       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4633 				OMP_CLAUSE__LOOPTEMP_);
4634       gcc_assert (innerc);
4635       n2 = OMP_CLAUSE_DECL (innerc);
4636     }
4637   tree step = fd->loop.step;
4638 
4639   bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4640 				  OMP_CLAUSE__SIMT_);
4641   if (is_simt)
4642     {
4643       cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4644       is_simt = safelen_int > 1;
4645     }
4646   tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4647   if (is_simt)
4648     {
4649       simt_lane = create_tmp_var (unsigned_type_node);
4650       gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4651       gimple_call_set_lhs (g, simt_lane);
4652       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4653       tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4654 				 fold_convert (TREE_TYPE (step), simt_lane));
4655       n1 = fold_convert (type, n1);
4656       if (POINTER_TYPE_P (type))
4657 	n1 = fold_build_pointer_plus (n1, offset);
4658       else
4659 	n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4660 
4661       /* Collapsed loops not handled for SIMT yet: limit to one lane only.  */
4662       if (fd->collapse > 1)
4663 	simt_maxlane = build_one_cst (unsigned_type_node);
4664       else if (safelen_int < omp_max_simt_vf ())
4665 	simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4666       tree vf
4667 	= build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4668 					unsigned_type_node, 0);
4669       if (simt_maxlane)
4670 	vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4671       vf = fold_convert (TREE_TYPE (step), vf);
4672       step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4673     }
4674 
4675   expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4676   if (fd->collapse > 1)
4677     {
4678       if (gimple_omp_for_combined_into_p (fd->for_stmt))
4679 	{
4680 	  gsi_prev (&gsi);
4681 	  expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4682 	  gsi_next (&gsi);
4683 	}
4684       else
4685 	for (i = 0; i < fd->collapse; i++)
4686 	  {
4687 	    tree itype = TREE_TYPE (fd->loops[i].v);
4688 	    if (POINTER_TYPE_P (itype))
4689 	      itype = signed_type_for (itype);
4690 	    t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4691 	    expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4692 	  }
4693     }
4694 
4695   /* Remove the GIMPLE_OMP_FOR statement.  */
4696   gsi_remove (&gsi, true);
4697 
4698   if (!broken_loop)
4699     {
4700       /* Code to control the increment goes in the CONT_BB.  */
4701       gsi = gsi_last_bb (cont_bb);
4702       stmt = gsi_stmt (gsi);
4703       gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4704 
4705       if (POINTER_TYPE_P (type))
4706 	t = fold_build_pointer_plus (fd->loop.v, step);
4707       else
4708 	t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4709       expand_omp_build_assign (&gsi, fd->loop.v, t);
4710 
4711       if (fd->collapse > 1)
4712 	{
4713 	  i = fd->collapse - 1;
4714 	  if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4715 	    {
4716 	      t = fold_convert (sizetype, fd->loops[i].step);
4717 	      t = fold_build_pointer_plus (fd->loops[i].v, t);
4718 	    }
4719 	  else
4720 	    {
4721 	      t = fold_convert (TREE_TYPE (fd->loops[i].v),
4722 				fd->loops[i].step);
4723 	      t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4724 			       fd->loops[i].v, t);
4725 	    }
4726 	  expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4727 
4728 	  for (i = fd->collapse - 1; i > 0; i--)
4729 	    {
4730 	      tree itype = TREE_TYPE (fd->loops[i].v);
4731 	      tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4732 	      if (POINTER_TYPE_P (itype2))
4733 		itype2 = signed_type_for (itype2);
4734 	      t = fold_convert (itype2, fd->loops[i - 1].step);
4735 	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4736 					    GSI_SAME_STMT);
4737 	      t = build3 (COND_EXPR, itype2,
4738 			  build2 (fd->loops[i].cond_code, boolean_type_node,
4739 				  fd->loops[i].v,
4740 				  fold_convert (itype, fd->loops[i].n2)),
4741 			  build_int_cst (itype2, 0), t);
4742 	      if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4743 		t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4744 	      else
4745 		t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4746 	      expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4747 
4748 	      t = fold_convert (itype, fd->loops[i].n1);
4749 	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4750 					    GSI_SAME_STMT);
4751 	      t = build3 (COND_EXPR, itype,
4752 			  build2 (fd->loops[i].cond_code, boolean_type_node,
4753 				  fd->loops[i].v,
4754 				  fold_convert (itype, fd->loops[i].n2)),
4755 			  fd->loops[i].v, t);
4756 	      expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4757 	    }
4758 	}
4759 
4760       /* Remove GIMPLE_OMP_CONTINUE.  */
4761       gsi_remove (&gsi, true);
4762     }
4763 
4764   /* Emit the condition in L1_BB.  */
4765   gsi = gsi_start_bb (l1_bb);
4766 
4767   t = fold_convert (type, n2);
4768   t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4769 				false, GSI_CONTINUE_LINKING);
4770   tree v = fd->loop.v;
4771   if (DECL_P (v) && TREE_ADDRESSABLE (v))
4772     v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4773 				  false, GSI_CONTINUE_LINKING);
4774   t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4775   cond_stmt = gimple_build_cond_empty (t);
4776   gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4777   if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4778 		 NULL, NULL)
4779       || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4780 		    NULL, NULL))
4781     {
4782       gsi = gsi_for_stmt (cond_stmt);
4783       gimple_regimplify_operands (cond_stmt, &gsi);
4784     }
4785 
4786   /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop.  */
4787   if (is_simt)
4788     {
4789       gsi = gsi_start_bb (l2_bb);
4790       step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4791       if (POINTER_TYPE_P (type))
4792 	t = fold_build_pointer_plus (fd->loop.v, step);
4793       else
4794 	t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4795       expand_omp_build_assign (&gsi, fd->loop.v, t);
4796     }
4797 
4798   /* Remove GIMPLE_OMP_RETURN.  */
4799   gsi = gsi_last_bb (exit_bb);
4800   gsi_remove (&gsi, true);
4801 
4802   /* Connect the new blocks.  */
4803   remove_edge (FALLTHRU_EDGE (entry_bb));
4804 
4805   if (!broken_loop)
4806     {
4807       remove_edge (BRANCH_EDGE (entry_bb));
4808       make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4809 
4810       e = BRANCH_EDGE (l1_bb);
4811       ne = FALLTHRU_EDGE (l1_bb);
4812       e->flags = EDGE_TRUE_VALUE;
4813     }
4814   else
4815     {
4816       single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4817 
4818       ne = single_succ_edge (l1_bb);
4819       e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4820 
4821     }
4822   ne->flags = EDGE_FALSE_VALUE;
4823   e->probability = REG_BR_PROB_BASE * 7 / 8;
4824   ne->probability = REG_BR_PROB_BASE / 8;
4825 
4826   set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4827   set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4828 
4829   if (simt_maxlane)
4830     {
4831       cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4832 				     NULL_TREE, NULL_TREE);
4833       gsi = gsi_last_bb (entry_bb);
4834       gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4835       make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4836       FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
4837       FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE * 7 / 8;
4838       BRANCH_EDGE (entry_bb)->probability = REG_BR_PROB_BASE / 8;
4839       l2_dom_bb = entry_bb;
4840     }
4841   set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4842 
4843   if (!broken_loop)
4844     {
4845       struct loop *loop = alloc_loop ();
4846       loop->header = l1_bb;
4847       loop->latch = cont_bb;
4848       add_loop (loop, l1_bb->loop_father);
4849       loop->safelen = safelen_int;
4850       if (simduid)
4851 	{
4852 	  loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4853 	  cfun->has_simduid_loops = true;
4854 	}
4855       /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4856 	 the loop.  */
4857       if ((flag_tree_loop_vectorize
4858 	   || (!global_options_set.x_flag_tree_loop_vectorize
4859 	       && !global_options_set.x_flag_tree_vectorize))
4860 	  && flag_tree_loop_optimize
4861 	  && loop->safelen > 1)
4862 	{
4863 	  loop->force_vectorize = true;
4864 	  cfun->has_force_vectorize_loops = true;
4865 	}
4866     }
4867   else if (simduid)
4868     cfun->has_simduid_loops = true;
4869 }
4870 
4871 /* Taskloop construct is represented after gimplification with
4872    two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4873    in between them.  This routine expands the outer GIMPLE_OMP_FOR,
4874    which should just compute all the needed loop temporaries
4875    for GIMPLE_OMP_TASK.  */
4876 
4877 static void
4878 expand_omp_taskloop_for_outer (struct omp_region *region,
4879 			       struct omp_for_data *fd,
4880 			       gimple *inner_stmt)
4881 {
4882   tree type, bias = NULL_TREE;
4883   basic_block entry_bb, cont_bb, exit_bb;
4884   gimple_stmt_iterator gsi;
4885   gassign *assign_stmt;
4886   tree *counts = NULL;
4887   int i;
4888 
4889   gcc_assert (inner_stmt);
4890   gcc_assert (region->cont);
4891   gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4892 	      && gimple_omp_task_taskloop_p (inner_stmt));
4893   type = TREE_TYPE (fd->loop.v);
4894 
4895   /* See if we need to bias by LLONG_MIN.  */
4896   if (fd->iter_type == long_long_unsigned_type_node
4897       && TREE_CODE (type) == INTEGER_TYPE
4898       && !TYPE_UNSIGNED (type))
4899     {
4900       tree n1, n2;
4901 
4902       if (fd->loop.cond_code == LT_EXPR)
4903 	{
4904 	  n1 = fd->loop.n1;
4905 	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4906 	}
4907       else
4908 	{
4909 	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4910 	  n2 = fd->loop.n1;
4911 	}
4912       if (TREE_CODE (n1) != INTEGER_CST
4913 	  || TREE_CODE (n2) != INTEGER_CST
4914 	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4915 	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4916     }
4917 
4918   entry_bb = region->entry;
4919   cont_bb = region->cont;
4920   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4921   gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4922   exit_bb = region->exit;
4923 
4924   gsi = gsi_last_bb (entry_bb);
4925   gimple *for_stmt = gsi_stmt (gsi);
4926   gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
4927   if (fd->collapse > 1)
4928     {
4929       int first_zero_iter = -1, dummy = -1;
4930       basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
4931 
4932       counts = XALLOCAVEC (tree, fd->collapse);
4933       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4934 				  zero_iter_bb, first_zero_iter,
4935 				  dummy_bb, dummy, l2_dom_bb);
4936 
4937       if (zero_iter_bb)
4938 	{
4939 	  /* Some counts[i] vars might be uninitialized if
4940 	     some loop has zero iterations.  But the body shouldn't
4941 	     be executed in that case, so just avoid uninit warnings.  */
4942 	  for (i = first_zero_iter; i < fd->collapse; i++)
4943 	    if (SSA_VAR_P (counts[i]))
4944 	      TREE_NO_WARNING (counts[i]) = 1;
4945 	  gsi_prev (&gsi);
4946 	  edge e = split_block (entry_bb, gsi_stmt (gsi));
4947 	  entry_bb = e->dest;
4948 	  make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
4949 	  gsi = gsi_last_bb (entry_bb);
4950 	  set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4951 				   get_immediate_dominator (CDI_DOMINATORS,
4952 							    zero_iter_bb));
4953 	}
4954     }
4955 
4956   tree t0, t1;
4957   t1 = fd->loop.n2;
4958   t0 = fd->loop.n1;
4959   if (POINTER_TYPE_P (TREE_TYPE (t0))
4960       && TYPE_PRECISION (TREE_TYPE (t0))
4961 	 != TYPE_PRECISION (fd->iter_type))
4962     {
4963       /* Avoid casting pointers to integer of a different size.  */
4964       tree itype = signed_type_for (type);
4965       t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4966       t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4967     }
4968   else
4969     {
4970       t1 = fold_convert (fd->iter_type, t1);
4971       t0 = fold_convert (fd->iter_type, t0);
4972     }
4973   if (bias)
4974     {
4975       t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4976       t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4977     }
4978 
4979   tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
4980 				 OMP_CLAUSE__LOOPTEMP_);
4981   gcc_assert (innerc);
4982   tree startvar = OMP_CLAUSE_DECL (innerc);
4983   innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4984   gcc_assert (innerc);
4985   tree endvar = OMP_CLAUSE_DECL (innerc);
4986   if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
4987     {
4988       gcc_assert (innerc);
4989       for (i = 1; i < fd->collapse; i++)
4990 	{
4991 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4992 				    OMP_CLAUSE__LOOPTEMP_);
4993 	  gcc_assert (innerc);
4994 	}
4995       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4996 				OMP_CLAUSE__LOOPTEMP_);
4997       if (innerc)
4998 	{
4999 	  /* If needed (inner taskloop has lastprivate clause), propagate
5000 	     down the total number of iterations.  */
5001 	  tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
5002 					     NULL_TREE, false,
5003 					     GSI_CONTINUE_LINKING);
5004 	  assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5005 	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5006 	}
5007     }
5008 
5009   t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
5010 				 GSI_CONTINUE_LINKING);
5011   assign_stmt = gimple_build_assign (startvar, t0);
5012   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5013 
5014   t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
5015 				 GSI_CONTINUE_LINKING);
5016   assign_stmt = gimple_build_assign (endvar, t1);
5017   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5018   if (fd->collapse > 1)
5019     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5020 
5021   /* Remove the GIMPLE_OMP_FOR statement.  */
5022   gsi = gsi_for_stmt (for_stmt);
5023   gsi_remove (&gsi, true);
5024 
5025   gsi = gsi_last_bb (cont_bb);
5026   gsi_remove (&gsi, true);
5027 
5028   gsi = gsi_last_bb (exit_bb);
5029   gsi_remove (&gsi, true);
5030 
5031   FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE;
5032   remove_edge (BRANCH_EDGE (entry_bb));
5033   FALLTHRU_EDGE (cont_bb)->probability = REG_BR_PROB_BASE;
5034   remove_edge (BRANCH_EDGE (cont_bb));
5035   set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
5036   set_immediate_dominator (CDI_DOMINATORS, region->entry,
5037 			   recompute_dominator (CDI_DOMINATORS, region->entry));
5038 }
5039 
5040 /* Taskloop construct is represented after gimplification with
5041    two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5042    in between them.  This routine expands the inner GIMPLE_OMP_FOR.
5043    GOMP_taskloop{,_ull} function arranges for each task to be given just
5044    a single range of iterations.  */
5045 
5046 static void
5047 expand_omp_taskloop_for_inner (struct omp_region *region,
5048 			       struct omp_for_data *fd,
5049 			       gimple *inner_stmt)
5050 {
5051   tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5052   basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5053   basic_block fin_bb;
5054   gimple_stmt_iterator gsi;
5055   edge ep;
5056   bool broken_loop = region->cont == NULL;
5057   tree *counts = NULL;
5058   tree n1, n2, step;
5059 
5060   itype = type = TREE_TYPE (fd->loop.v);
5061   if (POINTER_TYPE_P (type))
5062     itype = signed_type_for (type);
5063 
5064   /* See if we need to bias by LLONG_MIN.  */
5065   if (fd->iter_type == long_long_unsigned_type_node
5066       && TREE_CODE (type) == INTEGER_TYPE
5067       && !TYPE_UNSIGNED (type))
5068     {
5069       tree n1, n2;
5070 
5071       if (fd->loop.cond_code == LT_EXPR)
5072 	{
5073 	  n1 = fd->loop.n1;
5074 	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5075 	}
5076       else
5077 	{
5078 	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5079 	  n2 = fd->loop.n1;
5080 	}
5081       if (TREE_CODE (n1) != INTEGER_CST
5082 	  || TREE_CODE (n2) != INTEGER_CST
5083 	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5084 	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5085     }
5086 
5087   entry_bb = region->entry;
5088   cont_bb = region->cont;
5089   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5090   fin_bb = BRANCH_EDGE (entry_bb)->dest;
5091   gcc_assert (broken_loop
5092 	      || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5093   body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5094   if (!broken_loop)
5095     {
5096       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5097       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5098     }
5099   exit_bb = region->exit;
5100 
5101   /* Iteration space partitioning goes in ENTRY_BB.  */
5102   gsi = gsi_last_bb (entry_bb);
5103   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5104 
5105   if (fd->collapse > 1)
5106     {
5107       int first_zero_iter = -1, dummy = -1;
5108       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5109 
5110       counts = XALLOCAVEC (tree, fd->collapse);
5111       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5112 				  fin_bb, first_zero_iter,
5113 				  dummy_bb, dummy, l2_dom_bb);
5114       t = NULL_TREE;
5115     }
5116   else
5117     t = integer_one_node;
5118 
5119   step = fd->loop.step;
5120   tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5121 				 OMP_CLAUSE__LOOPTEMP_);
5122   gcc_assert (innerc);
5123   n1 = OMP_CLAUSE_DECL (innerc);
5124   innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5125   gcc_assert (innerc);
5126   n2 = OMP_CLAUSE_DECL (innerc);
5127   if (bias)
5128     {
5129       n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5130       n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5131     }
5132   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5133 				 true, NULL_TREE, true, GSI_SAME_STMT);
5134   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5135 				 true, NULL_TREE, true, GSI_SAME_STMT);
5136   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5137 				   true, NULL_TREE, true, GSI_SAME_STMT);
5138 
5139   tree startvar = fd->loop.v;
5140   tree endvar = NULL_TREE;
5141 
5142   if (gimple_omp_for_combined_p (fd->for_stmt))
5143     {
5144       tree clauses = gimple_omp_for_clauses (inner_stmt);
5145       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5146       gcc_assert (innerc);
5147       startvar = OMP_CLAUSE_DECL (innerc);
5148       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5149 				OMP_CLAUSE__LOOPTEMP_);
5150       gcc_assert (innerc);
5151       endvar = OMP_CLAUSE_DECL (innerc);
5152     }
5153   t = fold_convert (TREE_TYPE (startvar), n1);
5154   t = force_gimple_operand_gsi (&gsi, t,
5155 				DECL_P (startvar)
5156 				&& TREE_ADDRESSABLE (startvar),
5157 				NULL_TREE, false, GSI_CONTINUE_LINKING);
5158   gimple *assign_stmt = gimple_build_assign (startvar, t);
5159   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5160 
5161   t = fold_convert (TREE_TYPE (startvar), n2);
5162   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5163 				false, GSI_CONTINUE_LINKING);
5164   if (endvar)
5165     {
5166       assign_stmt = gimple_build_assign (endvar, e);
5167       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5168       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5169 	assign_stmt = gimple_build_assign (fd->loop.v, e);
5170       else
5171 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5172       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5173     }
5174   if (fd->collapse > 1)
5175     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5176 
5177   if (!broken_loop)
5178     {
5179       /* The code controlling the sequential loop replaces the
5180 	 GIMPLE_OMP_CONTINUE.  */
5181       gsi = gsi_last_bb (cont_bb);
5182       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5183       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5184       vmain = gimple_omp_continue_control_use (cont_stmt);
5185       vback = gimple_omp_continue_control_def (cont_stmt);
5186 
5187       if (!gimple_omp_for_combined_p (fd->for_stmt))
5188 	{
5189 	  if (POINTER_TYPE_P (type))
5190 	    t = fold_build_pointer_plus (vmain, step);
5191 	  else
5192 	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
5193 	  t = force_gimple_operand_gsi (&gsi, t,
5194 					DECL_P (vback)
5195 					&& TREE_ADDRESSABLE (vback),
5196 					NULL_TREE, true, GSI_SAME_STMT);
5197 	  assign_stmt = gimple_build_assign (vback, t);
5198 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5199 
5200 	  t = build2 (fd->loop.cond_code, boolean_type_node,
5201 		      DECL_P (vback) && TREE_ADDRESSABLE (vback)
5202 		      ? t : vback, e);
5203 	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5204 	}
5205 
5206       /* Remove the GIMPLE_OMP_CONTINUE statement.  */
5207       gsi_remove (&gsi, true);
5208 
5209       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5210 	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5211     }
5212 
5213   /* Remove the GIMPLE_OMP_FOR statement.  */
5214   gsi = gsi_for_stmt (fd->for_stmt);
5215   gsi_remove (&gsi, true);
5216 
5217   /* Remove the GIMPLE_OMP_RETURN statement.  */
5218   gsi = gsi_last_bb (exit_bb);
5219   gsi_remove (&gsi, true);
5220 
5221   FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE;
5222   if (!broken_loop)
5223     remove_edge (BRANCH_EDGE (entry_bb));
5224   else
5225     {
5226       remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5227       region->outer->cont = NULL;
5228     }
5229 
5230   /* Connect all the blocks.  */
5231   if (!broken_loop)
5232     {
5233       ep = find_edge (cont_bb, body_bb);
5234       if (gimple_omp_for_combined_p (fd->for_stmt))
5235 	{
5236 	  remove_edge (ep);
5237 	  ep = NULL;
5238 	}
5239       else if (fd->collapse > 1)
5240 	{
5241 	  remove_edge (ep);
5242 	  ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5243 	}
5244       else
5245 	ep->flags = EDGE_TRUE_VALUE;
5246       find_edge (cont_bb, fin_bb)->flags
5247 	= ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5248     }
5249 
5250   set_immediate_dominator (CDI_DOMINATORS, body_bb,
5251 			   recompute_dominator (CDI_DOMINATORS, body_bb));
5252   if (!broken_loop)
5253     set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5254 			     recompute_dominator (CDI_DOMINATORS, fin_bb));
5255 
5256   if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5257     {
5258       struct loop *loop = alloc_loop ();
5259       loop->header = body_bb;
5260       if (collapse_bb == NULL)
5261 	loop->latch = cont_bb;
5262       add_loop (loop, body_bb->loop_father);
5263     }
5264 }
5265 
5266 /* A subroutine of expand_omp_for.  Generate code for an OpenACC
5267    partitioned loop.  The lowering here is abstracted, in that the
5268    loop parameters are passed through internal functions, which are
5269    further lowered by oacc_device_lower, once we get to the target
5270    compiler.  The loop is of the form:
5271 
5272    for (V = B; V LTGT E; V += S) {BODY}
5273 
5274    where LTGT is < or >.  We may have a specified chunking size, CHUNKING
5275    (constant 0 for no chunking) and we will have a GWV partitioning
5276    mask, specifying dimensions over which the loop is to be
5277    partitioned (see note below).  We generate code that looks like
5278    (this ignores tiling):
5279 
5280    <entry_bb> [incoming FALL->body, BRANCH->exit]
5281      typedef signedintify (typeof (V)) T;  // underlying signed integral type
5282      T range = E - B;
5283      T chunk_no = 0;
5284      T DIR = LTGT == '<' ? +1 : -1;
5285      T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5286      T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5287 
5288    <head_bb> [created by splitting end of entry_bb]
5289      T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5290      T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5291      if (!(offset LTGT bound)) goto bottom_bb;
5292 
5293    <body_bb> [incoming]
5294      V = B + offset;
5295      {BODY}
5296 
5297    <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5298      offset += step;
5299      if (offset LTGT bound) goto body_bb; [*]
5300 
5301    <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5302      chunk_no++;
5303      if (chunk < chunk_max) goto head_bb;
5304 
5305    <exit_bb> [incoming]
5306      V = B + ((range -/+ 1) / S +/- 1) * S [*]
5307 
5308    [*] Needed if V live at end of loop.  */
5309 
5310 static void
5311 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5312 {
5313   tree v = fd->loop.v;
5314   enum tree_code cond_code = fd->loop.cond_code;
5315   enum tree_code plus_code = PLUS_EXPR;
5316 
5317   tree chunk_size = integer_minus_one_node;
5318   tree gwv = integer_zero_node;
5319   tree iter_type = TREE_TYPE (v);
5320   tree diff_type = iter_type;
5321   tree plus_type = iter_type;
5322   struct oacc_collapse *counts = NULL;
5323 
5324   gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5325 		       == GF_OMP_FOR_KIND_OACC_LOOP);
5326   gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5327   gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5328 
5329   if (POINTER_TYPE_P (iter_type))
5330     {
5331       plus_code = POINTER_PLUS_EXPR;
5332       plus_type = sizetype;
5333     }
5334   if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5335     diff_type = signed_type_for (diff_type);
5336 
5337   basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5338   basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5339   basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE  */
5340   basic_block bottom_bb = NULL;
5341 
5342   /* entry_bb has two sucessors; the branch edge is to the exit
5343      block,  fallthrough edge to body.  */
5344   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5345 	      && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5346 
5347   /* If cont_bb non-NULL, it has 2 successors.  The branch successor is
5348      body_bb, or to a block whose only successor is the body_bb.  Its
5349      fallthrough successor is the final block (same as the branch
5350      successor of the entry_bb).  */
5351   if (cont_bb)
5352     {
5353       basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5354       basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5355 
5356       gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5357       gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5358     }
5359   else
5360     gcc_assert (!gimple_in_ssa_p (cfun));
5361 
5362   /* The exit block only has entry_bb and cont_bb as predecessors.  */
5363   gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5364 
5365   tree chunk_no;
5366   tree chunk_max = NULL_TREE;
5367   tree bound, offset;
5368   tree step = create_tmp_var (diff_type, ".step");
5369   bool up = cond_code == LT_EXPR;
5370   tree dir = build_int_cst (diff_type, up ? +1 : -1);
5371   bool chunking = !gimple_in_ssa_p (cfun);
5372   bool negating;
5373 
5374   /* Tiling vars.  */
5375   tree tile_size = NULL_TREE;
5376   tree element_s = NULL_TREE;
5377   tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5378   basic_block elem_body_bb = NULL;
5379   basic_block elem_cont_bb = NULL;
5380 
5381   /* SSA instances.  */
5382   tree offset_incr = NULL_TREE;
5383   tree offset_init = NULL_TREE;
5384 
5385   gimple_stmt_iterator gsi;
5386   gassign *ass;
5387   gcall *call;
5388   gimple *stmt;
5389   tree expr;
5390   location_t loc;
5391   edge split, be, fte;
5392 
5393   /* Split the end of entry_bb to create head_bb.  */
5394   split = split_block (entry_bb, last_stmt (entry_bb));
5395   basic_block head_bb = split->dest;
5396   entry_bb = split->src;
5397 
5398   /* Chunk setup goes at end of entry_bb, replacing the omp_for.  */
5399   gsi = gsi_last_bb (entry_bb);
5400   gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5401   loc = gimple_location (for_stmt);
5402 
5403   if (gimple_in_ssa_p (cfun))
5404     {
5405       offset_init = gimple_omp_for_index (for_stmt, 0);
5406       gcc_assert (integer_zerop (fd->loop.n1));
5407       /* The SSA parallelizer does gang parallelism.  */
5408       gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5409     }
5410 
5411   if (fd->collapse > 1 || fd->tiling)
5412     {
5413       gcc_assert (!gimple_in_ssa_p (cfun) && up);
5414       counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5415       tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5416 					      TREE_TYPE (fd->loop.n2), loc);
5417 
5418       if (SSA_VAR_P (fd->loop.n2))
5419 	{
5420 	  total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5421 					    true, GSI_SAME_STMT);
5422 	  ass = gimple_build_assign (fd->loop.n2, total);
5423 	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5424 	}
5425     }
5426 
5427   tree b = fd->loop.n1;
5428   tree e = fd->loop.n2;
5429   tree s = fd->loop.step;
5430 
5431   b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5432   e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5433 
5434   /* Convert the step, avoiding possible unsigned->signed overflow.  */
5435   negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5436   if (negating)
5437     s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5438   s = fold_convert (diff_type, s);
5439   if (negating)
5440     s = fold_build1 (NEGATE_EXPR, diff_type, s);
5441   s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5442 
5443   if (!chunking)
5444     chunk_size = integer_zero_node;
5445   expr = fold_convert (diff_type, chunk_size);
5446   chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5447 					 NULL_TREE, true, GSI_SAME_STMT);
5448 
5449   if (fd->tiling)
5450     {
5451       /* Determine the tile size and element step,
5452 	 modify the outer loop step size.  */
5453       tile_size = create_tmp_var (diff_type, ".tile_size");
5454       expr = build_int_cst (diff_type, 1);
5455       for (int ix = 0; ix < fd->collapse; ix++)
5456 	expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5457       expr = force_gimple_operand_gsi (&gsi, expr, true,
5458 				       NULL_TREE, true, GSI_SAME_STMT);
5459       ass = gimple_build_assign (tile_size, expr);
5460       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5461 
5462       element_s = create_tmp_var (diff_type, ".element_s");
5463       ass = gimple_build_assign (element_s, s);
5464       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5465 
5466       expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5467       s = force_gimple_operand_gsi (&gsi, expr, true,
5468 				    NULL_TREE, true, GSI_SAME_STMT);
5469     }
5470 
5471   /* Determine the range, avoiding possible unsigned->signed overflow.  */
5472   negating = !up && TYPE_UNSIGNED (iter_type);
5473   expr = fold_build2 (MINUS_EXPR, plus_type,
5474 		      fold_convert (plus_type, negating ? b : e),
5475 		      fold_convert (plus_type, negating ? e : b));
5476   expr = fold_convert (diff_type, expr);
5477   if (negating)
5478     expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5479   tree range = force_gimple_operand_gsi (&gsi, expr, true,
5480 					 NULL_TREE, true, GSI_SAME_STMT);
5481 
5482   chunk_no = build_int_cst (diff_type, 0);
5483   if (chunking)
5484     {
5485       gcc_assert (!gimple_in_ssa_p (cfun));
5486 
5487       expr = chunk_no;
5488       chunk_max = create_tmp_var (diff_type, ".chunk_max");
5489       chunk_no = create_tmp_var (diff_type, ".chunk_no");
5490 
5491       ass = gimple_build_assign (chunk_no, expr);
5492       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5493 
5494       call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5495 					 build_int_cst (integer_type_node,
5496 							IFN_GOACC_LOOP_CHUNKS),
5497 					 dir, range, s, chunk_size, gwv);
5498       gimple_call_set_lhs (call, chunk_max);
5499       gimple_set_location (call, loc);
5500       gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5501     }
5502   else
5503     chunk_size = chunk_no;
5504 
5505   call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5506 				     build_int_cst (integer_type_node,
5507 						    IFN_GOACC_LOOP_STEP),
5508 				     dir, range, s, chunk_size, gwv);
5509   gimple_call_set_lhs (call, step);
5510   gimple_set_location (call, loc);
5511   gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5512 
5513   /* Remove the GIMPLE_OMP_FOR.  */
5514   gsi_remove (&gsi, true);
5515 
5516   /* Fixup edges from head_bb.  */
5517   be = BRANCH_EDGE (head_bb);
5518   fte = FALLTHRU_EDGE (head_bb);
5519   be->flags |= EDGE_FALSE_VALUE;
5520   fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5521 
5522   basic_block body_bb = fte->dest;
5523 
5524   if (gimple_in_ssa_p (cfun))
5525     {
5526       gsi = gsi_last_bb (cont_bb);
5527       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5528 
5529       offset = gimple_omp_continue_control_use (cont_stmt);
5530       offset_incr = gimple_omp_continue_control_def (cont_stmt);
5531     }
5532   else
5533     {
5534       offset = create_tmp_var (diff_type, ".offset");
5535       offset_init = offset_incr = offset;
5536     }
5537   bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5538 
5539   /* Loop offset & bound go into head_bb.  */
5540   gsi = gsi_start_bb (head_bb);
5541 
5542   call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5543 				     build_int_cst (integer_type_node,
5544 						    IFN_GOACC_LOOP_OFFSET),
5545 				     dir, range, s,
5546 				     chunk_size, gwv, chunk_no);
5547   gimple_call_set_lhs (call, offset_init);
5548   gimple_set_location (call, loc);
5549   gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5550 
5551   call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5552 				     build_int_cst (integer_type_node,
5553 						    IFN_GOACC_LOOP_BOUND),
5554 				     dir, range, s,
5555 				     chunk_size, gwv, offset_init);
5556   gimple_call_set_lhs (call, bound);
5557   gimple_set_location (call, loc);
5558   gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5559 
5560   expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5561   gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5562 		    GSI_CONTINUE_LINKING);
5563 
5564   /* V assignment goes into body_bb.  */
5565   if (!gimple_in_ssa_p (cfun))
5566     {
5567       gsi = gsi_start_bb (body_bb);
5568 
5569       expr = build2 (plus_code, iter_type, b,
5570 		     fold_convert (plus_type, offset));
5571       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5572 				       true, GSI_SAME_STMT);
5573       ass = gimple_build_assign (v, expr);
5574       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5575 
5576       if (fd->collapse > 1 || fd->tiling)
5577 	expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5578 
5579       if (fd->tiling)
5580 	{
5581 	  /* Determine the range of the element loop -- usually simply
5582 	     the tile_size, but could be smaller if the final
5583 	     iteration of the outer loop is a partial tile.  */
5584 	  tree e_range = create_tmp_var (diff_type, ".e_range");
5585 
5586 	  expr = build2 (MIN_EXPR, diff_type,
5587 			 build2 (MINUS_EXPR, diff_type, bound, offset),
5588 			 build2 (MULT_EXPR, diff_type, tile_size,
5589 				 element_s));
5590 	  expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5591 					   true, GSI_SAME_STMT);
5592 	  ass = gimple_build_assign (e_range, expr);
5593 	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5594 
5595 	  /* Determine bound, offset & step of inner loop. */
5596 	  e_bound = create_tmp_var (diff_type, ".e_bound");
5597 	  e_offset = create_tmp_var (diff_type, ".e_offset");
5598 	  e_step = create_tmp_var (diff_type, ".e_step");
5599 
5600 	  /* Mark these as element loops.  */
5601 	  tree t, e_gwv = integer_minus_one_node;
5602 	  tree chunk = build_int_cst (diff_type, 0); /* Never chunked.  */
5603 
5604 	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
5605 	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5606 					     element_s, chunk, e_gwv, chunk);
5607 	  gimple_call_set_lhs (call, e_offset);
5608 	  gimple_set_location (call, loc);
5609 	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5610 
5611 	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
5612 	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5613 					     element_s, chunk, e_gwv, e_offset);
5614 	  gimple_call_set_lhs (call, e_bound);
5615 	  gimple_set_location (call, loc);
5616 	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5617 
5618 	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
5619 	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
5620 					     element_s, chunk, e_gwv);
5621 	  gimple_call_set_lhs (call, e_step);
5622 	  gimple_set_location (call, loc);
5623 	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5624 
5625 	  /* Add test and split block.  */
5626 	  expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5627 	  stmt = gimple_build_cond_empty (expr);
5628 	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5629 	  split = split_block (body_bb, stmt);
5630 	  elem_body_bb = split->dest;
5631 	  if (cont_bb == body_bb)
5632 	    cont_bb = elem_body_bb;
5633 	  body_bb = split->src;
5634 
5635 	  split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5636 
5637 	  /* Add a dummy exit for the tiled block when cont_bb is missing.  */
5638 	  if (cont_bb == NULL)
5639 	    {
5640 	      edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
5641 	      e->probability = PROB_EVEN;
5642 	      split->probability = PROB_EVEN;
5643 	    }
5644 
5645 	  /* Initialize the user's loop vars.  */
5646 	  gsi = gsi_start_bb (elem_body_bb);
5647 	  expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
5648 	}
5649     }
5650 
5651   /* Loop increment goes into cont_bb.  If this is not a loop, we
5652      will have spawned threads as if it was, and each one will
5653      execute one iteration.  The specification is not explicit about
5654      whether such constructs are ill-formed or not, and they can
5655      occur, especially when noreturn routines are involved.  */
5656   if (cont_bb)
5657     {
5658       gsi = gsi_last_bb (cont_bb);
5659       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5660       loc = gimple_location (cont_stmt);
5661 
5662       if (fd->tiling)
5663 	{
5664 	  /* Insert element loop increment and test.  */
5665 	  expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
5666 	  expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5667 					   true, GSI_SAME_STMT);
5668 	  ass = gimple_build_assign (e_offset, expr);
5669 	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5670 	  expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5671 
5672 	  stmt = gimple_build_cond_empty (expr);
5673 	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5674 	  split = split_block (cont_bb, stmt);
5675 	  elem_cont_bb = split->src;
5676 	  cont_bb = split->dest;
5677 
5678 	  split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5679 	  make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
5680 
5681 	  make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
5682 
5683 	  gsi = gsi_for_stmt (cont_stmt);
5684 	}
5685 
5686       /* Increment offset.  */
5687       if (gimple_in_ssa_p (cfun))
5688 	expr = build2 (plus_code, iter_type, offset,
5689 		       fold_convert (plus_type, step));
5690       else
5691 	expr = build2 (PLUS_EXPR, diff_type, offset, step);
5692       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5693 				       true, GSI_SAME_STMT);
5694       ass = gimple_build_assign (offset_incr, expr);
5695       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5696       expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5697       gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5698 
5699       /*  Remove the GIMPLE_OMP_CONTINUE.  */
5700       gsi_remove (&gsi, true);
5701 
5702       /* Fixup edges from cont_bb.  */
5703       be = BRANCH_EDGE (cont_bb);
5704       fte = FALLTHRU_EDGE (cont_bb);
5705       be->flags |= EDGE_TRUE_VALUE;
5706       fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5707 
5708       if (chunking)
5709 	{
5710 	  /* Split the beginning of exit_bb to make bottom_bb.  We
5711 	     need to insert a nop at the start, because splitting is
5712 	     after a stmt, not before.  */
5713 	  gsi = gsi_start_bb (exit_bb);
5714 	  stmt = gimple_build_nop ();
5715 	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5716 	  split = split_block (exit_bb, stmt);
5717 	  bottom_bb = split->src;
5718 	  exit_bb = split->dest;
5719 	  gsi = gsi_last_bb (bottom_bb);
5720 
5721 	  /* Chunk increment and test goes into bottom_bb.  */
5722 	  expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5723 			 build_int_cst (diff_type, 1));
5724 	  ass = gimple_build_assign (chunk_no, expr);
5725 	  gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5726 
5727 	  /* Chunk test at end of bottom_bb.  */
5728 	  expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5729 	  gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5730 			    GSI_CONTINUE_LINKING);
5731 
5732 	  /* Fixup edges from bottom_bb.  */
5733 	  split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5734 	  make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5735 	}
5736     }
5737 
5738   gsi = gsi_last_bb (exit_bb);
5739   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5740   loc = gimple_location (gsi_stmt (gsi));
5741 
5742   if (!gimple_in_ssa_p (cfun))
5743     {
5744       /* Insert the final value of V, in case it is live.  This is the
5745 	 value for the only thread that survives past the join.  */
5746       expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5747       expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5748       expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5749       expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5750       expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5751       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5752 				       true, GSI_SAME_STMT);
5753       ass = gimple_build_assign (v, expr);
5754       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5755     }
5756 
5757   /* Remove the OMP_RETURN.  */
5758   gsi_remove (&gsi, true);
5759 
5760   if (cont_bb)
5761     {
5762       /* We now have one, two or three nested loops.  Update the loop
5763 	 structures.  */
5764       struct loop *parent = entry_bb->loop_father;
5765       struct loop *body = body_bb->loop_father;
5766 
5767       if (chunking)
5768 	{
5769 	  struct loop *chunk_loop = alloc_loop ();
5770 	  chunk_loop->header = head_bb;
5771 	  chunk_loop->latch = bottom_bb;
5772 	  add_loop (chunk_loop, parent);
5773 	  parent = chunk_loop;
5774 	}
5775       else if (parent != body)
5776 	{
5777 	  gcc_assert (body->header == body_bb);
5778 	  gcc_assert (body->latch == cont_bb
5779 		      || single_pred (body->latch) == cont_bb);
5780 	  parent = NULL;
5781 	}
5782 
5783       if (parent)
5784 	{
5785 	  struct loop *body_loop = alloc_loop ();
5786 	  body_loop->header = body_bb;
5787 	  body_loop->latch = cont_bb;
5788 	  add_loop (body_loop, parent);
5789 
5790 	  if (fd->tiling)
5791 	    {
5792 	      /* Insert tiling's element loop.  */
5793 	      struct loop *inner_loop = alloc_loop ();
5794 	      inner_loop->header = elem_body_bb;
5795 	      inner_loop->latch = elem_cont_bb;
5796 	      add_loop (inner_loop, body_loop);
5797 	    }
5798 	}
5799     }
5800 }
5801 
5802 /* Expand the OMP loop defined by REGION.  */
5803 
5804 static void
5805 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5806 {
5807   struct omp_for_data fd;
5808   struct omp_for_data_loop *loops;
5809 
5810   loops
5811     = (struct omp_for_data_loop *)
5812       alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5813 	      * sizeof (struct omp_for_data_loop));
5814   omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5815 			&fd, loops);
5816   region->sched_kind = fd.sched_kind;
5817   region->sched_modifiers = fd.sched_modifiers;
5818 
5819   gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5820   BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5821   FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5822   if (region->cont)
5823     {
5824       gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5825       BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5826       FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5827     }
5828   else
5829     /* If there isn't a continue then this is a degerate case where
5830        the introduction of abnormal edges during lowering will prevent
5831        original loops from being detected.  Fix that up.  */
5832     loops_state_set (LOOPS_NEED_FIXUP);
5833 
5834   if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5835     expand_omp_simd (region, &fd);
5836   else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_CILKFOR)
5837     expand_cilk_for (region, &fd);
5838   else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5839     {
5840       gcc_assert (!inner_stmt);
5841       expand_oacc_for (region, &fd);
5842     }
5843   else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5844     {
5845       if (gimple_omp_for_combined_into_p (fd.for_stmt))
5846 	expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5847       else
5848 	expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5849     }
5850   else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5851 	   && !fd.have_ordered)
5852     {
5853       if (fd.chunk_size == NULL)
5854 	expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5855       else
5856 	expand_omp_for_static_chunk (region, &fd, inner_stmt);
5857     }
5858   else
5859     {
5860       int fn_index, start_ix, next_ix;
5861 
5862       gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5863 		  == GF_OMP_FOR_KIND_FOR);
5864       if (fd.chunk_size == NULL
5865 	  && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5866 	fd.chunk_size = integer_zero_node;
5867       gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
5868       switch (fd.sched_kind)
5869 	{
5870 	case OMP_CLAUSE_SCHEDULE_RUNTIME:
5871 	  fn_index = 3;
5872 	  break;
5873 	case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5874 	case OMP_CLAUSE_SCHEDULE_GUIDED:
5875 	  if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
5876 	      && !fd.ordered
5877 	      && !fd.have_ordered)
5878 	    {
5879 	      fn_index = 3 + fd.sched_kind;
5880 	      break;
5881 	    }
5882 	  /* FALLTHRU */
5883 	default:
5884 	  fn_index = fd.sched_kind;
5885 	  break;
5886 	}
5887       if (!fd.ordered)
5888 	fn_index += fd.have_ordered * 6;
5889       if (fd.ordered)
5890 	start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
5891       else
5892 	start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
5893       next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
5894       if (fd.iter_type == long_long_unsigned_type_node)
5895 	{
5896 	  start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5897 			- (int)BUILT_IN_GOMP_LOOP_STATIC_START);
5898 	  next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5899 		      - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
5900 	}
5901       expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
5902 			      (enum built_in_function) next_ix, inner_stmt);
5903     }
5904 
5905   if (gimple_in_ssa_p (cfun))
5906     update_ssa (TODO_update_ssa_only_virtuals);
5907 }
5908 
5909 /* Expand code for an OpenMP sections directive.  In pseudo code, we generate
5910 
5911 	v = GOMP_sections_start (n);
5912     L0:
5913 	switch (v)
5914 	  {
5915 	  case 0:
5916 	    goto L2;
5917 	  case 1:
5918 	    section 1;
5919 	    goto L1;
5920 	  case 2:
5921 	    ...
5922 	  case n:
5923 	    ...
5924 	  default:
5925 	    abort ();
5926 	  }
5927     L1:
5928 	v = GOMP_sections_next ();
5929 	goto L0;
5930     L2:
5931 	reduction;
5932 
5933     If this is a combined parallel sections, replace the call to
5934     GOMP_sections_start with call to GOMP_sections_next.  */
5935 
5936 static void
5937 expand_omp_sections (struct omp_region *region)
5938 {
5939   tree t, u, vin = NULL, vmain, vnext, l2;
5940   unsigned len;
5941   basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
5942   gimple_stmt_iterator si, switch_si;
5943   gomp_sections *sections_stmt;
5944   gimple *stmt;
5945   gomp_continue *cont;
5946   edge_iterator ei;
5947   edge e;
5948   struct omp_region *inner;
5949   unsigned i, casei;
5950   bool exit_reachable = region->cont != NULL;
5951 
5952   gcc_assert (region->exit != NULL);
5953   entry_bb = region->entry;
5954   l0_bb = single_succ (entry_bb);
5955   l1_bb = region->cont;
5956   l2_bb = region->exit;
5957   if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
5958     l2 = gimple_block_label (l2_bb);
5959   else
5960     {
5961       /* This can happen if there are reductions.  */
5962       len = EDGE_COUNT (l0_bb->succs);
5963       gcc_assert (len > 0);
5964       e = EDGE_SUCC (l0_bb, len - 1);
5965       si = gsi_last_bb (e->dest);
5966       l2 = NULL_TREE;
5967       if (gsi_end_p (si)
5968 	  || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5969 	l2 = gimple_block_label (e->dest);
5970       else
5971 	FOR_EACH_EDGE (e, ei, l0_bb->succs)
5972 	  {
5973 	    si = gsi_last_bb (e->dest);
5974 	    if (gsi_end_p (si)
5975 		|| gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5976 	      {
5977 		l2 = gimple_block_label (e->dest);
5978 		break;
5979 	      }
5980 	  }
5981     }
5982   if (exit_reachable)
5983     default_bb = create_empty_bb (l1_bb->prev_bb);
5984   else
5985     default_bb = create_empty_bb (l0_bb);
5986 
5987   /* We will build a switch() with enough cases for all the
5988      GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
5989      and a default case to abort if something goes wrong.  */
5990   len = EDGE_COUNT (l0_bb->succs);
5991 
5992   /* Use vec::quick_push on label_vec throughout, since we know the size
5993      in advance.  */
5994   auto_vec<tree> label_vec (len);
5995 
5996   /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
5997      GIMPLE_OMP_SECTIONS statement.  */
5998   si = gsi_last_bb (entry_bb);
5999   sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
6000   gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
6001   vin = gimple_omp_sections_control (sections_stmt);
6002   if (!is_combined_parallel (region))
6003     {
6004       /* If we are not inside a combined parallel+sections region,
6005 	 call GOMP_sections_start.  */
6006       t = build_int_cst (unsigned_type_node, len - 1);
6007       u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
6008       stmt = gimple_build_call (u, 1, t);
6009     }
6010   else
6011     {
6012       /* Otherwise, call GOMP_sections_next.  */
6013       u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6014       stmt = gimple_build_call (u, 0);
6015     }
6016   gimple_call_set_lhs (stmt, vin);
6017   gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6018   gsi_remove (&si, true);
6019 
6020   /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6021      L0_BB.  */
6022   switch_si = gsi_last_bb (l0_bb);
6023   gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
6024   if (exit_reachable)
6025     {
6026       cont = as_a <gomp_continue *> (last_stmt (l1_bb));
6027       gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
6028       vmain = gimple_omp_continue_control_use (cont);
6029       vnext = gimple_omp_continue_control_def (cont);
6030     }
6031   else
6032     {
6033       vmain = vin;
6034       vnext = NULL_TREE;
6035     }
6036 
6037   t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
6038   label_vec.quick_push (t);
6039   i = 1;
6040 
6041   /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR.  */
6042   for (inner = region->inner, casei = 1;
6043        inner;
6044        inner = inner->next, i++, casei++)
6045     {
6046       basic_block s_entry_bb, s_exit_bb;
6047 
6048       /* Skip optional reduction region.  */
6049       if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
6050 	{
6051 	  --i;
6052 	  --casei;
6053 	  continue;
6054 	}
6055 
6056       s_entry_bb = inner->entry;
6057       s_exit_bb = inner->exit;
6058 
6059       t = gimple_block_label (s_entry_bb);
6060       u = build_int_cst (unsigned_type_node, casei);
6061       u = build_case_label (u, NULL, t);
6062       label_vec.quick_push (u);
6063 
6064       si = gsi_last_bb (s_entry_bb);
6065       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
6066       gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
6067       gsi_remove (&si, true);
6068       single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
6069 
6070       if (s_exit_bb == NULL)
6071 	continue;
6072 
6073       si = gsi_last_bb (s_exit_bb);
6074       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6075       gsi_remove (&si, true);
6076 
6077       single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
6078     }
6079 
6080   /* Error handling code goes in DEFAULT_BB.  */
6081   t = gimple_block_label (default_bb);
6082   u = build_case_label (NULL, NULL, t);
6083   make_edge (l0_bb, default_bb, 0);
6084   add_bb_to_loop (default_bb, current_loops->tree_root);
6085 
6086   stmt = gimple_build_switch (vmain, u, label_vec);
6087   gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
6088   gsi_remove (&switch_si, true);
6089 
6090   si = gsi_start_bb (default_bb);
6091   stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
6092   gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
6093 
6094   if (exit_reachable)
6095     {
6096       tree bfn_decl;
6097 
6098       /* Code to get the next section goes in L1_BB.  */
6099       si = gsi_last_bb (l1_bb);
6100       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
6101 
6102       bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6103       stmt = gimple_build_call (bfn_decl, 0);
6104       gimple_call_set_lhs (stmt, vnext);
6105       gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6106       gsi_remove (&si, true);
6107 
6108       single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
6109     }
6110 
6111   /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB.  */
6112   si = gsi_last_bb (l2_bb);
6113   if (gimple_omp_return_nowait_p (gsi_stmt (si)))
6114     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
6115   else if (gimple_omp_return_lhs (gsi_stmt (si)))
6116     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
6117   else
6118     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
6119   stmt = gimple_build_call (t, 0);
6120   if (gimple_omp_return_lhs (gsi_stmt (si)))
6121     gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
6122   gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6123   gsi_remove (&si, true);
6124 
6125   set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
6126 }
6127 
6128 /* Expand code for an OpenMP single directive.  We've already expanded
6129    much of the code, here we simply place the GOMP_barrier call.  */
6130 
6131 static void
6132 expand_omp_single (struct omp_region *region)
6133 {
6134   basic_block entry_bb, exit_bb;
6135   gimple_stmt_iterator si;
6136 
6137   entry_bb = region->entry;
6138   exit_bb = region->exit;
6139 
6140   si = gsi_last_bb (entry_bb);
6141   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
6142   gsi_remove (&si, true);
6143   single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6144 
6145   si = gsi_last_bb (exit_bb);
6146   if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
6147     {
6148       tree t = gimple_omp_return_lhs (gsi_stmt (si));
6149       gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
6150     }
6151   gsi_remove (&si, true);
6152   single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6153 }
6154 
6155 /* Generic expansion for OpenMP synchronization directives: master,
6156    ordered and critical.  All we need to do here is remove the entry
6157    and exit markers for REGION.  */
6158 
6159 static void
6160 expand_omp_synch (struct omp_region *region)
6161 {
6162   basic_block entry_bb, exit_bb;
6163   gimple_stmt_iterator si;
6164 
6165   entry_bb = region->entry;
6166   exit_bb = region->exit;
6167 
6168   si = gsi_last_bb (entry_bb);
6169   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
6170 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
6171 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6172 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6173 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6174 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
6175   gsi_remove (&si, true);
6176   single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6177 
6178   if (exit_bb)
6179     {
6180       si = gsi_last_bb (exit_bb);
6181       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6182       gsi_remove (&si, true);
6183       single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6184     }
6185 }
6186 
6187 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
6188    operation as a normal volatile load.  */
6189 
6190 static bool
6191 expand_omp_atomic_load (basic_block load_bb, tree addr,
6192 			tree loaded_val, int index)
6193 {
6194   enum built_in_function tmpbase;
6195   gimple_stmt_iterator gsi;
6196   basic_block store_bb;
6197   location_t loc;
6198   gimple *stmt;
6199   tree decl, call, type, itype;
6200 
6201   gsi = gsi_last_bb (load_bb);
6202   stmt = gsi_stmt (gsi);
6203   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6204   loc = gimple_location (stmt);
6205 
6206   /* ??? If the target does not implement atomic_load_optab[mode], and mode
6207      is smaller than word size, then expand_atomic_load assumes that the load
6208      is atomic.  We could avoid the builtin entirely in this case.  */
6209 
6210   tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6211   decl = builtin_decl_explicit (tmpbase);
6212   if (decl == NULL_TREE)
6213     return false;
6214 
6215   type = TREE_TYPE (loaded_val);
6216   itype = TREE_TYPE (TREE_TYPE (decl));
6217 
6218   call = build_call_expr_loc (loc, decl, 2, addr,
6219 			      build_int_cst (NULL,
6220 					     gimple_omp_atomic_seq_cst_p (stmt)
6221 					     ? MEMMODEL_SEQ_CST
6222 					     : MEMMODEL_RELAXED));
6223   if (!useless_type_conversion_p (type, itype))
6224     call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6225   call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6226 
6227   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6228   gsi_remove (&gsi, true);
6229 
6230   store_bb = single_succ (load_bb);
6231   gsi = gsi_last_bb (store_bb);
6232   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6233   gsi_remove (&gsi, true);
6234 
6235   if (gimple_in_ssa_p (cfun))
6236     update_ssa (TODO_update_ssa_no_phi);
6237 
6238   return true;
6239 }
6240 
6241 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
6242    operation as a normal volatile store.  */
6243 
6244 static bool
6245 expand_omp_atomic_store (basic_block load_bb, tree addr,
6246 			 tree loaded_val, tree stored_val, int index)
6247 {
6248   enum built_in_function tmpbase;
6249   gimple_stmt_iterator gsi;
6250   basic_block store_bb = single_succ (load_bb);
6251   location_t loc;
6252   gimple *stmt;
6253   tree decl, call, type, itype;
6254   machine_mode imode;
6255   bool exchange;
6256 
6257   gsi = gsi_last_bb (load_bb);
6258   stmt = gsi_stmt (gsi);
6259   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6260 
6261   /* If the load value is needed, then this isn't a store but an exchange.  */
6262   exchange = gimple_omp_atomic_need_value_p (stmt);
6263 
6264   gsi = gsi_last_bb (store_bb);
6265   stmt = gsi_stmt (gsi);
6266   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6267   loc = gimple_location (stmt);
6268 
6269   /* ??? If the target does not implement atomic_store_optab[mode], and mode
6270      is smaller than word size, then expand_atomic_store assumes that the store
6271      is atomic.  We could avoid the builtin entirely in this case.  */
6272 
6273   tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6274   tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6275   decl = builtin_decl_explicit (tmpbase);
6276   if (decl == NULL_TREE)
6277     return false;
6278 
6279   type = TREE_TYPE (stored_val);
6280 
6281   /* Dig out the type of the function's second argument.  */
6282   itype = TREE_TYPE (decl);
6283   itype = TYPE_ARG_TYPES (itype);
6284   itype = TREE_CHAIN (itype);
6285   itype = TREE_VALUE (itype);
6286   imode = TYPE_MODE (itype);
6287 
6288   if (exchange && !can_atomic_exchange_p (imode, true))
6289     return false;
6290 
6291   if (!useless_type_conversion_p (itype, type))
6292     stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6293   call = build_call_expr_loc (loc, decl, 3, addr, stored_val,
6294 			      build_int_cst (NULL,
6295 					     gimple_omp_atomic_seq_cst_p (stmt)
6296 					     ? MEMMODEL_SEQ_CST
6297 					     : MEMMODEL_RELAXED));
6298   if (exchange)
6299     {
6300       if (!useless_type_conversion_p (type, itype))
6301 	call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6302       call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6303     }
6304 
6305   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6306   gsi_remove (&gsi, true);
6307 
6308   /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above.  */
6309   gsi = gsi_last_bb (load_bb);
6310   gsi_remove (&gsi, true);
6311 
6312   if (gimple_in_ssa_p (cfun))
6313     update_ssa (TODO_update_ssa_no_phi);
6314 
6315   return true;
6316 }
6317 
6318 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
6319    operation as a __atomic_fetch_op builtin.  INDEX is log2 of the
6320    size of the data type, and thus usable to find the index of the builtin
6321    decl.  Returns false if the expression is not of the proper form.  */
6322 
6323 static bool
6324 expand_omp_atomic_fetch_op (basic_block load_bb,
6325 			    tree addr, tree loaded_val,
6326 			    tree stored_val, int index)
6327 {
6328   enum built_in_function oldbase, newbase, tmpbase;
6329   tree decl, itype, call;
6330   tree lhs, rhs;
6331   basic_block store_bb = single_succ (load_bb);
6332   gimple_stmt_iterator gsi;
6333   gimple *stmt;
6334   location_t loc;
6335   enum tree_code code;
6336   bool need_old, need_new;
6337   machine_mode imode;
6338   bool seq_cst;
6339 
6340   /* We expect to find the following sequences:
6341 
6342    load_bb:
6343        GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6344 
6345    store_bb:
6346        val = tmp OP something; (or: something OP tmp)
6347        GIMPLE_OMP_STORE (val)
6348 
6349   ???FIXME: Allow a more flexible sequence.
6350   Perhaps use data flow to pick the statements.
6351 
6352   */
6353 
6354   gsi = gsi_after_labels (store_bb);
6355   stmt = gsi_stmt (gsi);
6356   loc = gimple_location (stmt);
6357   if (!is_gimple_assign (stmt))
6358     return false;
6359   gsi_next (&gsi);
6360   if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6361     return false;
6362   need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6363   need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6364   seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb));
6365   gcc_checking_assert (!need_old || !need_new);
6366 
6367   if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6368     return false;
6369 
6370   /* Check for one of the supported fetch-op operations.  */
6371   code = gimple_assign_rhs_code (stmt);
6372   switch (code)
6373     {
6374     case PLUS_EXPR:
6375     case POINTER_PLUS_EXPR:
6376       oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6377       newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6378       break;
6379     case MINUS_EXPR:
6380       oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6381       newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6382       break;
6383     case BIT_AND_EXPR:
6384       oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6385       newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6386       break;
6387     case BIT_IOR_EXPR:
6388       oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6389       newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6390       break;
6391     case BIT_XOR_EXPR:
6392       oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6393       newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6394       break;
6395     default:
6396       return false;
6397     }
6398 
6399   /* Make sure the expression is of the proper form.  */
6400   if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6401     rhs = gimple_assign_rhs2 (stmt);
6402   else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6403 	   && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6404     rhs = gimple_assign_rhs1 (stmt);
6405   else
6406     return false;
6407 
6408   tmpbase = ((enum built_in_function)
6409 	     ((need_new ? newbase : oldbase) + index + 1));
6410   decl = builtin_decl_explicit (tmpbase);
6411   if (decl == NULL_TREE)
6412     return false;
6413   itype = TREE_TYPE (TREE_TYPE (decl));
6414   imode = TYPE_MODE (itype);
6415 
6416   /* We could test all of the various optabs involved, but the fact of the
6417      matter is that (with the exception of i486 vs i586 and xadd) all targets
6418      that support any atomic operaton optab also implements compare-and-swap.
6419      Let optabs.c take care of expanding any compare-and-swap loop.  */
6420   if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
6421     return false;
6422 
6423   gsi = gsi_last_bb (load_bb);
6424   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6425 
6426   /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6427      It only requires that the operation happen atomically.  Thus we can
6428      use the RELAXED memory model.  */
6429   call = build_call_expr_loc (loc, decl, 3, addr,
6430 			      fold_convert_loc (loc, itype, rhs),
6431 			      build_int_cst (NULL,
6432 					     seq_cst ? MEMMODEL_SEQ_CST
6433 						     : MEMMODEL_RELAXED));
6434 
6435   if (need_old || need_new)
6436     {
6437       lhs = need_old ? loaded_val : stored_val;
6438       call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6439       call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6440     }
6441   else
6442     call = fold_convert_loc (loc, void_type_node, call);
6443   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6444   gsi_remove (&gsi, true);
6445 
6446   gsi = gsi_last_bb (store_bb);
6447   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6448   gsi_remove (&gsi, true);
6449   gsi = gsi_last_bb (store_bb);
6450   stmt = gsi_stmt (gsi);
6451   gsi_remove (&gsi, true);
6452 
6453   if (gimple_in_ssa_p (cfun))
6454     {
6455       release_defs (stmt);
6456       update_ssa (TODO_update_ssa_no_phi);
6457     }
6458 
6459   return true;
6460 }
6461 
6462 /* A subroutine of expand_omp_atomic.  Implement the atomic operation as:
6463 
6464       oldval = *addr;
6465       repeat:
6466 	newval = rhs;	 // with oldval replacing *addr in rhs
6467 	oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6468 	if (oldval != newval)
6469 	  goto repeat;
6470 
6471    INDEX is log2 of the size of the data type, and thus usable to find the
6472    index of the builtin decl.  */
6473 
6474 static bool
6475 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6476 			    tree addr, tree loaded_val, tree stored_val,
6477 			    int index)
6478 {
6479   tree loadedi, storedi, initial, new_storedi, old_vali;
6480   tree type, itype, cmpxchg, iaddr;
6481   gimple_stmt_iterator si;
6482   basic_block loop_header = single_succ (load_bb);
6483   gimple *phi, *stmt;
6484   edge e;
6485   enum built_in_function fncode;
6486 
6487   /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6488      order to use the RELAXED memory model effectively.  */
6489   fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6490 				    + index + 1);
6491   cmpxchg = builtin_decl_explicit (fncode);
6492   if (cmpxchg == NULL_TREE)
6493     return false;
6494   type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6495   itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6496 
6497   if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
6498       || !can_atomic_load_p (TYPE_MODE (itype)))
6499     return false;
6500 
6501   /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD.  */
6502   si = gsi_last_bb (load_bb);
6503   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6504 
6505   /* For floating-point values, we'll need to view-convert them to integers
6506      so that we can perform the atomic compare and swap.  Simplify the
6507      following code by always setting up the "i"ntegral variables.  */
6508   if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6509     {
6510       tree iaddr_val;
6511 
6512       iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6513 							   true));
6514       iaddr_val
6515 	= force_gimple_operand_gsi (&si,
6516 				    fold_convert (TREE_TYPE (iaddr), addr),
6517 				    false, NULL_TREE, true, GSI_SAME_STMT);
6518       stmt = gimple_build_assign (iaddr, iaddr_val);
6519       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6520       loadedi = create_tmp_var (itype);
6521       if (gimple_in_ssa_p (cfun))
6522 	loadedi = make_ssa_name (loadedi);
6523     }
6524   else
6525     {
6526       iaddr = addr;
6527       loadedi = loaded_val;
6528     }
6529 
6530   fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6531   tree loaddecl = builtin_decl_explicit (fncode);
6532   if (loaddecl)
6533     initial
6534       = fold_convert (TREE_TYPE (TREE_TYPE (iaddr)),
6535 		      build_call_expr (loaddecl, 2, iaddr,
6536 				       build_int_cst (NULL_TREE,
6537 						      MEMMODEL_RELAXED)));
6538   else
6539     initial = build2 (MEM_REF, TREE_TYPE (TREE_TYPE (iaddr)), iaddr,
6540 		      build_int_cst (TREE_TYPE (iaddr), 0));
6541 
6542   initial
6543     = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6544 				GSI_SAME_STMT);
6545 
6546   /* Move the value to the LOADEDI temporary.  */
6547   if (gimple_in_ssa_p (cfun))
6548     {
6549       gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6550       phi = create_phi_node (loadedi, loop_header);
6551       SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6552 	       initial);
6553     }
6554   else
6555     gsi_insert_before (&si,
6556 		       gimple_build_assign (loadedi, initial),
6557 		       GSI_SAME_STMT);
6558   if (loadedi != loaded_val)
6559     {
6560       gimple_stmt_iterator gsi2;
6561       tree x;
6562 
6563       x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6564       gsi2 = gsi_start_bb (loop_header);
6565       if (gimple_in_ssa_p (cfun))
6566 	{
6567 	  gassign *stmt;
6568 	  x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6569 					true, GSI_SAME_STMT);
6570 	  stmt = gimple_build_assign (loaded_val, x);
6571 	  gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6572 	}
6573       else
6574 	{
6575 	  x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6576 	  force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6577 				    true, GSI_SAME_STMT);
6578 	}
6579     }
6580   gsi_remove (&si, true);
6581 
6582   si = gsi_last_bb (store_bb);
6583   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6584 
6585   if (iaddr == addr)
6586     storedi = stored_val;
6587   else
6588     storedi
6589       = force_gimple_operand_gsi (&si,
6590 				  build1 (VIEW_CONVERT_EXPR, itype,
6591 					  stored_val), true, NULL_TREE, true,
6592 				  GSI_SAME_STMT);
6593 
6594   /* Build the compare&swap statement.  */
6595   new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6596   new_storedi = force_gimple_operand_gsi (&si,
6597 					  fold_convert (TREE_TYPE (loadedi),
6598 							new_storedi),
6599 					  true, NULL_TREE,
6600 					  true, GSI_SAME_STMT);
6601 
6602   if (gimple_in_ssa_p (cfun))
6603     old_vali = loadedi;
6604   else
6605     {
6606       old_vali = create_tmp_var (TREE_TYPE (loadedi));
6607       stmt = gimple_build_assign (old_vali, loadedi);
6608       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6609 
6610       stmt = gimple_build_assign (loadedi, new_storedi);
6611       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6612     }
6613 
6614   /* Note that we always perform the comparison as an integer, even for
6615      floating point.  This allows the atomic operation to properly
6616      succeed even with NaNs and -0.0.  */
6617   tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6618   stmt = gimple_build_cond_empty (ne);
6619   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6620 
6621   /* Update cfg.  */
6622   e = single_succ_edge (store_bb);
6623   e->flags &= ~EDGE_FALLTHRU;
6624   e->flags |= EDGE_FALSE_VALUE;
6625 
6626   e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
6627 
6628   /* Copy the new value to loadedi (we already did that before the condition
6629      if we are not in SSA).  */
6630   if (gimple_in_ssa_p (cfun))
6631     {
6632       phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6633       SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6634     }
6635 
6636   /* Remove GIMPLE_OMP_ATOMIC_STORE.  */
6637   gsi_remove (&si, true);
6638 
6639   struct loop *loop = alloc_loop ();
6640   loop->header = loop_header;
6641   loop->latch = store_bb;
6642   add_loop (loop, loop_header->loop_father);
6643 
6644   if (gimple_in_ssa_p (cfun))
6645     update_ssa (TODO_update_ssa_no_phi);
6646 
6647   return true;
6648 }
6649 
6650 /* A subroutine of expand_omp_atomic.  Implement the atomic operation as:
6651 
6652 				  GOMP_atomic_start ();
6653 				  *addr = rhs;
6654 				  GOMP_atomic_end ();
6655 
6656    The result is not globally atomic, but works so long as all parallel
6657    references are within #pragma omp atomic directives.  According to
6658    responses received from omp@openmp.org, appears to be within spec.
6659    Which makes sense, since that's how several other compilers handle
6660    this situation as well.
6661    LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6662    expanding.  STORED_VAL is the operand of the matching
6663    GIMPLE_OMP_ATOMIC_STORE.
6664 
6665    We replace
6666    GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6667    loaded_val = *addr;
6668 
6669    and replace
6670    GIMPLE_OMP_ATOMIC_STORE (stored_val)  with
6671    *addr = stored_val;
6672 */
6673 
6674 static bool
6675 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6676 			 tree addr, tree loaded_val, tree stored_val)
6677 {
6678   gimple_stmt_iterator si;
6679   gassign *stmt;
6680   tree t;
6681 
6682   si = gsi_last_bb (load_bb);
6683   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6684 
6685   t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6686   t = build_call_expr (t, 0);
6687   force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6688 
6689   stmt = gimple_build_assign (loaded_val, build_simple_mem_ref (addr));
6690   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6691   gsi_remove (&si, true);
6692 
6693   si = gsi_last_bb (store_bb);
6694   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6695 
6696   stmt = gimple_build_assign (build_simple_mem_ref (unshare_expr (addr)),
6697 			      stored_val);
6698   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6699 
6700   t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6701   t = build_call_expr (t, 0);
6702   force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6703   gsi_remove (&si, true);
6704 
6705   if (gimple_in_ssa_p (cfun))
6706     update_ssa (TODO_update_ssa_no_phi);
6707   return true;
6708 }
6709 
6710 /* Expand an GIMPLE_OMP_ATOMIC statement.  We try to expand
6711    using expand_omp_atomic_fetch_op.  If it failed, we try to
6712    call expand_omp_atomic_pipeline, and if it fails too, the
6713    ultimate fallback is wrapping the operation in a mutex
6714    (expand_omp_atomic_mutex).  REGION is the atomic region built
6715    by build_omp_regions_1().  */
6716 
6717 static void
6718 expand_omp_atomic (struct omp_region *region)
6719 {
6720   basic_block load_bb = region->entry, store_bb = region->exit;
6721   gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6722   gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6723   tree loaded_val = gimple_omp_atomic_load_lhs (load);
6724   tree addr = gimple_omp_atomic_load_rhs (load);
6725   tree stored_val = gimple_omp_atomic_store_val (store);
6726   tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6727   HOST_WIDE_INT index;
6728 
6729   /* Make sure the type is one of the supported sizes.  */
6730   index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6731   index = exact_log2 (index);
6732   if (index >= 0 && index <= 4)
6733     {
6734       unsigned int align = TYPE_ALIGN_UNIT (type);
6735 
6736       /* __sync builtins require strict data alignment.  */
6737       if (exact_log2 (align) >= index)
6738 	{
6739 	  /* Atomic load.  */
6740 	  if (loaded_val == stored_val
6741 	      && (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
6742 		  || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
6743 	      && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
6744 	      && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6745 	    return;
6746 
6747 	  /* Atomic store.  */
6748 	  if ((GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
6749 	       || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
6750 	      && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
6751 	      && store_bb == single_succ (load_bb)
6752 	      && first_stmt (store_bb) == store
6753 	      && expand_omp_atomic_store (load_bb, addr, loaded_val,
6754 					  stored_val, index))
6755 	    return;
6756 
6757 	  /* When possible, use specialized atomic update functions.  */
6758 	  if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6759 	      && store_bb == single_succ (load_bb)
6760 	      && expand_omp_atomic_fetch_op (load_bb, addr,
6761 					     loaded_val, stored_val, index))
6762 	    return;
6763 
6764 	  /* If we don't have specialized __sync builtins, try and implement
6765 	     as a compare and swap loop.  */
6766 	  if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6767 					  loaded_val, stored_val, index))
6768 	    return;
6769 	}
6770     }
6771 
6772   /* The ultimate fallback is wrapping the operation in a mutex.  */
6773   expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6774 }
6775 
6776 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6777    at REGION_EXIT.  */
6778 
6779 static void
6780 mark_loops_in_oacc_kernels_region (basic_block region_entry,
6781 				   basic_block region_exit)
6782 {
6783   struct loop *outer = region_entry->loop_father;
6784   gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6785 
6786   /* Don't parallelize the kernels region if it contains more than one outer
6787      loop.  */
6788   unsigned int nr_outer_loops = 0;
6789   struct loop *single_outer = NULL;
6790   for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6791     {
6792       gcc_assert (loop_outer (loop) == outer);
6793 
6794       if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
6795 	continue;
6796 
6797       if (region_exit != NULL
6798 	  && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
6799 	continue;
6800 
6801       nr_outer_loops++;
6802       single_outer = loop;
6803     }
6804   if (nr_outer_loops != 1)
6805     return;
6806 
6807   for (struct loop *loop = single_outer->inner;
6808        loop != NULL;
6809        loop = loop->inner)
6810     if (loop->next)
6811       return;
6812 
6813   /* Mark the loops in the region.  */
6814   for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
6815     loop->in_oacc_kernels_region = true;
6816 }
6817 
6818 /* Types used to pass grid and wortkgroup sizes to kernel invocation.  */
6819 
6820 struct GTY(()) grid_launch_attributes_trees
6821 {
6822   tree kernel_dim_array_type;
6823   tree kernel_lattrs_dimnum_decl;
6824   tree kernel_lattrs_grid_decl;
6825   tree kernel_lattrs_group_decl;
6826   tree kernel_launch_attributes_type;
6827 };
6828 
6829 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
6830 
6831 /* Create types used to pass kernel launch attributes to target.  */
6832 
6833 static void
6834 grid_create_kernel_launch_attr_types (void)
6835 {
6836   if (grid_attr_trees)
6837     return;
6838   grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
6839 
6840   tree dim_arr_index_type
6841     = build_index_type (build_int_cst (integer_type_node, 2));
6842   grid_attr_trees->kernel_dim_array_type
6843     = build_array_type (uint32_type_node, dim_arr_index_type);
6844 
6845   grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
6846   grid_attr_trees->kernel_lattrs_dimnum_decl
6847     = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
6848 		  uint32_type_node);
6849   DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
6850 
6851   grid_attr_trees->kernel_lattrs_grid_decl
6852     = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
6853 		  grid_attr_trees->kernel_dim_array_type);
6854   DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
6855     = grid_attr_trees->kernel_lattrs_dimnum_decl;
6856   grid_attr_trees->kernel_lattrs_group_decl
6857     = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
6858 		  grid_attr_trees->kernel_dim_array_type);
6859   DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
6860     = grid_attr_trees->kernel_lattrs_grid_decl;
6861   finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
6862 			 "__gomp_kernel_launch_attributes",
6863 			 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
6864 }
6865 
6866 /* Insert before the current statement in GSI a store of VALUE to INDEX of
6867    array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR.  VALUE must be
6868    of type uint32_type_node.  */
6869 
6870 static void
6871 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
6872 			     tree fld_decl, int index, tree value)
6873 {
6874   tree ref = build4 (ARRAY_REF, uint32_type_node,
6875 		     build3 (COMPONENT_REF,
6876 			     grid_attr_trees->kernel_dim_array_type,
6877 			     range_var, fld_decl, NULL_TREE),
6878 		     build_int_cst (integer_type_node, index),
6879 		     NULL_TREE, NULL_TREE);
6880   gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
6881 }
6882 
6883 /* Return a tree representation of a pointer to a structure with grid and
6884    work-group size information.  Statements filling that information will be
6885    inserted before GSI, TGT_STMT is the target statement which has the
6886    necessary information in it.  */
6887 
6888 static tree
6889 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
6890 				       gomp_target *tgt_stmt)
6891 {
6892   grid_create_kernel_launch_attr_types ();
6893   tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
6894 				"__kernel_launch_attrs");
6895 
6896   unsigned max_dim = 0;
6897   for (tree clause = gimple_omp_target_clauses (tgt_stmt);
6898        clause;
6899        clause = OMP_CLAUSE_CHAIN (clause))
6900     {
6901       if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
6902 	continue;
6903 
6904       unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
6905       max_dim = MAX (dim, max_dim);
6906 
6907       grid_insert_store_range_dim (gsi, lattrs,
6908 				   grid_attr_trees->kernel_lattrs_grid_decl,
6909 				   dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
6910       grid_insert_store_range_dim (gsi, lattrs,
6911 				   grid_attr_trees->kernel_lattrs_group_decl,
6912 				   dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
6913     }
6914 
6915   tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
6916 			grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
6917   gcc_checking_assert (max_dim <= 2);
6918   tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
6919   gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
6920 		     GSI_SAME_STMT);
6921   TREE_ADDRESSABLE (lattrs) = 1;
6922   return build_fold_addr_expr (lattrs);
6923 }
6924 
6925 /* Build target argument identifier from the DEVICE identifier, value
6926    identifier ID and whether the element also has a SUBSEQUENT_PARAM.  */
6927 
6928 static tree
6929 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
6930 {
6931   tree t = build_int_cst (integer_type_node, device);
6932   if (subseqent_param)
6933     t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6934 		     build_int_cst (integer_type_node,
6935 				    GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
6936   t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6937 		   build_int_cst (integer_type_node, id));
6938   return t;
6939 }
6940 
6941 /* Like above but return it in type that can be directly stored as an element
6942    of the argument array.  */
6943 
6944 static tree
6945 get_target_argument_identifier (int device, bool subseqent_param, int id)
6946 {
6947   tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
6948   return fold_convert (ptr_type_node, t);
6949 }
6950 
6951 /* Return a target argument consisting of DEVICE identifier, value identifier
6952    ID, and the actual VALUE.  */
6953 
6954 static tree
6955 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
6956 			   tree value)
6957 {
6958   tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
6959 			fold_convert (integer_type_node, value),
6960 			build_int_cst (unsigned_type_node,
6961 				       GOMP_TARGET_ARG_VALUE_SHIFT));
6962   t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6963 		   get_target_argument_identifier_1 (device, false, id));
6964   t = fold_convert (ptr_type_node, t);
6965   return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
6966 }
6967 
6968 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
6969    push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
6970    otherwise push an identifier (with DEVICE and ID) and the VALUE in two
6971    arguments.  */
6972 
6973 static void
6974 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
6975 					 int id, tree value, vec <tree> *args)
6976 {
6977   if (tree_fits_shwi_p (value)
6978       && tree_to_shwi (value) > -(1 << 15)
6979       && tree_to_shwi (value) < (1 << 15))
6980     args->quick_push (get_target_argument_value (gsi, device, id, value));
6981   else
6982     {
6983       args->quick_push (get_target_argument_identifier (device, true, id));
6984       value = fold_convert (ptr_type_node, value);
6985       value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
6986 					GSI_SAME_STMT);
6987       args->quick_push (value);
6988     }
6989 }
6990 
6991 /* Create an array of arguments that is then passed to GOMP_target.  */
6992 
6993 static tree
6994 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
6995 {
6996   auto_vec <tree, 6> args;
6997   tree clauses = gimple_omp_target_clauses (tgt_stmt);
6998   tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
6999   if (c)
7000     t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
7001   else
7002     t = integer_minus_one_node;
7003   push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7004 					   GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
7005 
7006   c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
7007   if (c)
7008     t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
7009   else
7010     t = integer_minus_one_node;
7011   push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7012 					   GOMP_TARGET_ARG_THREAD_LIMIT, t,
7013 					   &args);
7014 
7015   /* Add HSA-specific grid sizes, if available.  */
7016   if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7017 		       OMP_CLAUSE__GRIDDIM_))
7018     {
7019       int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
7020       t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
7021       args.quick_push (t);
7022       args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
7023     }
7024 
7025   /* Produce more, perhaps device specific, arguments here.  */
7026 
7027   tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
7028 							  args.length () + 1),
7029 				  ".omp_target_args");
7030   for (unsigned i = 0; i < args.length (); i++)
7031     {
7032       tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7033 			 build_int_cst (integer_type_node, i),
7034 			 NULL_TREE, NULL_TREE);
7035       gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
7036 			 GSI_SAME_STMT);
7037     }
7038   tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7039 		     build_int_cst (integer_type_node, args.length ()),
7040 		     NULL_TREE, NULL_TREE);
7041   gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
7042 		     GSI_SAME_STMT);
7043   TREE_ADDRESSABLE (argarray) = 1;
7044   return build_fold_addr_expr (argarray);
7045 }
7046 
7047 /* Expand the GIMPLE_OMP_TARGET starting at REGION.  */
7048 
7049 static void
7050 expand_omp_target (struct omp_region *region)
7051 {
7052   basic_block entry_bb, exit_bb, new_bb;
7053   struct function *child_cfun;
7054   tree child_fn, block, t;
7055   gimple_stmt_iterator gsi;
7056   gomp_target *entry_stmt;
7057   gimple *stmt;
7058   edge e;
7059   bool offloaded, data_region;
7060 
7061   entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
7062   new_bb = region->entry;
7063 
7064   offloaded = is_gimple_omp_offloaded (entry_stmt);
7065   switch (gimple_omp_target_kind (entry_stmt))
7066     {
7067     case GF_OMP_TARGET_KIND_REGION:
7068     case GF_OMP_TARGET_KIND_UPDATE:
7069     case GF_OMP_TARGET_KIND_ENTER_DATA:
7070     case GF_OMP_TARGET_KIND_EXIT_DATA:
7071     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7072     case GF_OMP_TARGET_KIND_OACC_KERNELS:
7073     case GF_OMP_TARGET_KIND_OACC_UPDATE:
7074     case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7075     case GF_OMP_TARGET_KIND_OACC_DECLARE:
7076       data_region = false;
7077       break;
7078     case GF_OMP_TARGET_KIND_DATA:
7079     case GF_OMP_TARGET_KIND_OACC_DATA:
7080     case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7081       data_region = true;
7082       break;
7083     default:
7084       gcc_unreachable ();
7085     }
7086 
7087   child_fn = NULL_TREE;
7088   child_cfun = NULL;
7089   if (offloaded)
7090     {
7091       child_fn = gimple_omp_target_child_fn (entry_stmt);
7092       child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7093     }
7094 
7095   /* Supported by expand_omp_taskreg, but not here.  */
7096   if (child_cfun != NULL)
7097     gcc_checking_assert (!child_cfun->cfg);
7098   gcc_checking_assert (!gimple_in_ssa_p (cfun));
7099 
7100   entry_bb = region->entry;
7101   exit_bb = region->exit;
7102 
7103   if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
7104     mark_loops_in_oacc_kernels_region (region->entry, region->exit);
7105 
7106   if (offloaded)
7107     {
7108       unsigned srcidx, dstidx, num;
7109 
7110       /* If the offloading region needs data sent from the parent
7111 	 function, then the very first statement (except possible
7112 	 tree profile counter updates) of the offloading body
7113 	 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
7114 	 &.OMP_DATA_O is passed as an argument to the child function,
7115 	 we need to replace it with the argument as seen by the child
7116 	 function.
7117 
7118 	 In most cases, this will end up being the identity assignment
7119 	 .OMP_DATA_I = .OMP_DATA_I.  However, if the offloading body had
7120 	 a function call that has been inlined, the original PARM_DECL
7121 	 .OMP_DATA_I may have been converted into a different local
7122 	 variable.  In which case, we need to keep the assignment.  */
7123       tree data_arg = gimple_omp_target_data_arg (entry_stmt);
7124       if (data_arg)
7125 	{
7126 	  basic_block entry_succ_bb = single_succ (entry_bb);
7127 	  gimple_stmt_iterator gsi;
7128 	  tree arg;
7129 	  gimple *tgtcopy_stmt = NULL;
7130 	  tree sender = TREE_VEC_ELT (data_arg, 0);
7131 
7132 	  for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
7133 	    {
7134 	      gcc_assert (!gsi_end_p (gsi));
7135 	      stmt = gsi_stmt (gsi);
7136 	      if (gimple_code (stmt) != GIMPLE_ASSIGN)
7137 		continue;
7138 
7139 	      if (gimple_num_ops (stmt) == 2)
7140 		{
7141 		  tree arg = gimple_assign_rhs1 (stmt);
7142 
7143 		  /* We're ignoring the subcode because we're
7144 		     effectively doing a STRIP_NOPS.  */
7145 
7146 		  if (TREE_CODE (arg) == ADDR_EXPR
7147 		      && TREE_OPERAND (arg, 0) == sender)
7148 		    {
7149 		      tgtcopy_stmt = stmt;
7150 		      break;
7151 		    }
7152 		}
7153 	    }
7154 
7155 	  gcc_assert (tgtcopy_stmt != NULL);
7156 	  arg = DECL_ARGUMENTS (child_fn);
7157 
7158 	  gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
7159 	  gsi_remove (&gsi, true);
7160 	}
7161 
7162       /* Declare local variables needed in CHILD_CFUN.  */
7163       block = DECL_INITIAL (child_fn);
7164       BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7165       /* The gimplifier could record temporaries in the offloading block
7166 	 rather than in containing function's local_decls chain,
7167 	 which would mean cgraph missed finalizing them.  Do it now.  */
7168       for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7169 	if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7170 	  varpool_node::finalize_decl (t);
7171       DECL_SAVED_TREE (child_fn) = NULL;
7172       /* We'll create a CFG for child_fn, so no gimple body is needed.  */
7173       gimple_set_body (child_fn, NULL);
7174       TREE_USED (block) = 1;
7175 
7176       /* Reset DECL_CONTEXT on function arguments.  */
7177       for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7178 	DECL_CONTEXT (t) = child_fn;
7179 
7180       /* Split ENTRY_BB at GIMPLE_*,
7181 	 so that it can be moved to the child function.  */
7182       gsi = gsi_last_bb (entry_bb);
7183       stmt = gsi_stmt (gsi);
7184       gcc_assert (stmt
7185 		  && gimple_code (stmt) == gimple_code (entry_stmt));
7186       e = split_block (entry_bb, stmt);
7187       gsi_remove (&gsi, true);
7188       entry_bb = e->dest;
7189       single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7190 
7191       /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR.  */
7192       if (exit_bb)
7193 	{
7194 	  gsi = gsi_last_bb (exit_bb);
7195 	  gcc_assert (!gsi_end_p (gsi)
7196 		      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7197 	  stmt = gimple_build_return (NULL);
7198 	  gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7199 	  gsi_remove (&gsi, true);
7200 	}
7201 
7202       /* Make sure to generate early debug for the function before
7203          outlining anything.  */
7204       if (! gimple_in_ssa_p (cfun))
7205 	(*debug_hooks->early_global_decl) (cfun->decl);
7206 
7207       /* Move the offloading region into CHILD_CFUN.  */
7208 
7209       block = gimple_block (entry_stmt);
7210 
7211       new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7212       if (exit_bb)
7213 	single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7214       /* When the OMP expansion process cannot guarantee an up-to-date
7215 	 loop tree arrange for the child function to fixup loops.  */
7216       if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7217 	child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7218 
7219       /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
7220       num = vec_safe_length (child_cfun->local_decls);
7221       for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7222 	{
7223 	  t = (*child_cfun->local_decls)[srcidx];
7224 	  if (DECL_CONTEXT (t) == cfun->decl)
7225 	    continue;
7226 	  if (srcidx != dstidx)
7227 	    (*child_cfun->local_decls)[dstidx] = t;
7228 	  dstidx++;
7229 	}
7230       if (dstidx != num)
7231 	vec_safe_truncate (child_cfun->local_decls, dstidx);
7232 
7233       /* Inform the callgraph about the new function.  */
7234       child_cfun->curr_properties = cfun->curr_properties;
7235       child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7236       child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7237       cgraph_node *node = cgraph_node::get_create (child_fn);
7238       node->parallelized_function = 1;
7239       cgraph_node::add_new_function (child_fn, true);
7240 
7241       /* Add the new function to the offload table.  */
7242       if (ENABLE_OFFLOADING)
7243 	vec_safe_push (offload_funcs, child_fn);
7244 
7245       bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7246 		      && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7247 
7248       /* Fix the callgraph edges for child_cfun.  Those for cfun will be
7249 	 fixed in a following pass.  */
7250       push_cfun (child_cfun);
7251       if (need_asm)
7252 	assign_assembler_name_if_needed (child_fn);
7253       cgraph_edge::rebuild_edges ();
7254 
7255       /* Some EH regions might become dead, see PR34608.  If
7256 	 pass_cleanup_cfg isn't the first pass to happen with the
7257 	 new child, these dead EH edges might cause problems.
7258 	 Clean them up now.  */
7259       if (flag_exceptions)
7260 	{
7261 	  basic_block bb;
7262 	  bool changed = false;
7263 
7264 	  FOR_EACH_BB_FN (bb, cfun)
7265 	    changed |= gimple_purge_dead_eh_edges (bb);
7266 	  if (changed)
7267 	    cleanup_tree_cfg ();
7268 	}
7269       if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7270 	verify_loop_structure ();
7271       pop_cfun ();
7272 
7273       if (dump_file && !gimple_in_ssa_p (cfun))
7274 	{
7275 	  omp_any_child_fn_dumped = true;
7276 	  dump_function_header (dump_file, child_fn, dump_flags);
7277 	  dump_function_to_file (child_fn, dump_file, dump_flags);
7278 	}
7279     }
7280 
7281   /* Emit a library call to launch the offloading region, or do data
7282      transfers.  */
7283   tree t1, t2, t3, t4, device, cond, depend, c, clauses;
7284   enum built_in_function start_ix;
7285   location_t clause_loc;
7286   unsigned int flags_i = 0;
7287   bool oacc_kernels_p = false;
7288 
7289   switch (gimple_omp_target_kind (entry_stmt))
7290     {
7291     case GF_OMP_TARGET_KIND_REGION:
7292       start_ix = BUILT_IN_GOMP_TARGET;
7293       break;
7294     case GF_OMP_TARGET_KIND_DATA:
7295       start_ix = BUILT_IN_GOMP_TARGET_DATA;
7296       break;
7297     case GF_OMP_TARGET_KIND_UPDATE:
7298       start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7299       break;
7300     case GF_OMP_TARGET_KIND_ENTER_DATA:
7301       start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7302       break;
7303     case GF_OMP_TARGET_KIND_EXIT_DATA:
7304       start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7305       flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7306       break;
7307     case GF_OMP_TARGET_KIND_OACC_KERNELS:
7308       oacc_kernels_p = true;
7309       /* FALLTHROUGH */
7310     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7311       start_ix = BUILT_IN_GOACC_PARALLEL;
7312       break;
7313     case GF_OMP_TARGET_KIND_OACC_DATA:
7314     case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7315       start_ix = BUILT_IN_GOACC_DATA_START;
7316       break;
7317     case GF_OMP_TARGET_KIND_OACC_UPDATE:
7318       start_ix = BUILT_IN_GOACC_UPDATE;
7319       break;
7320     case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7321       start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7322       break;
7323     case GF_OMP_TARGET_KIND_OACC_DECLARE:
7324       start_ix = BUILT_IN_GOACC_DECLARE;
7325       break;
7326     default:
7327       gcc_unreachable ();
7328     }
7329 
7330   clauses = gimple_omp_target_clauses (entry_stmt);
7331 
7332   /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7333      library choose) and there is no conditional.  */
7334   cond = NULL_TREE;
7335   device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7336 
7337   c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7338   if (c)
7339     cond = OMP_CLAUSE_IF_EXPR (c);
7340 
7341   c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7342   if (c)
7343     {
7344       /* Even if we pass it to all library function calls, it is currently only
7345 	 defined/used for the OpenMP target ones.  */
7346       gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
7347 			   || start_ix == BUILT_IN_GOMP_TARGET_DATA
7348 			   || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
7349 			   || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
7350 
7351       device = OMP_CLAUSE_DEVICE_ID (c);
7352       clause_loc = OMP_CLAUSE_LOCATION (c);
7353     }
7354   else
7355     clause_loc = gimple_location (entry_stmt);
7356 
7357   c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7358   if (c)
7359     flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7360 
7361   /* Ensure 'device' is of the correct type.  */
7362   device = fold_convert_loc (clause_loc, integer_type_node, device);
7363 
7364   /* If we found the clause 'if (cond)', build
7365      (cond ? device : GOMP_DEVICE_HOST_FALLBACK).  */
7366   if (cond)
7367     {
7368       cond = gimple_boolify (cond);
7369 
7370       basic_block cond_bb, then_bb, else_bb;
7371       edge e;
7372       tree tmp_var;
7373 
7374       tmp_var = create_tmp_var (TREE_TYPE (device));
7375       if (offloaded)
7376 	e = split_block_after_labels (new_bb);
7377       else
7378 	{
7379 	  gsi = gsi_last_bb (new_bb);
7380 	  gsi_prev (&gsi);
7381 	  e = split_block (new_bb, gsi_stmt (gsi));
7382 	}
7383       cond_bb = e->src;
7384       new_bb = e->dest;
7385       remove_edge (e);
7386 
7387       then_bb = create_empty_bb (cond_bb);
7388       else_bb = create_empty_bb (then_bb);
7389       set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7390       set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7391 
7392       stmt = gimple_build_cond_empty (cond);
7393       gsi = gsi_last_bb (cond_bb);
7394       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7395 
7396       gsi = gsi_start_bb (then_bb);
7397       stmt = gimple_build_assign (tmp_var, device);
7398       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7399 
7400       gsi = gsi_start_bb (else_bb);
7401       stmt = gimple_build_assign (tmp_var,
7402 				  build_int_cst (integer_type_node,
7403 						 GOMP_DEVICE_HOST_FALLBACK));
7404       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7405 
7406       make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7407       make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7408       add_bb_to_loop (then_bb, cond_bb->loop_father);
7409       add_bb_to_loop (else_bb, cond_bb->loop_father);
7410       make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7411       make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7412 
7413       device = tmp_var;
7414       gsi = gsi_last_bb (new_bb);
7415     }
7416   else
7417     {
7418       gsi = gsi_last_bb (new_bb);
7419       device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7420 					 true, GSI_SAME_STMT);
7421     }
7422 
7423   t = gimple_omp_target_data_arg (entry_stmt);
7424   if (t == NULL)
7425     {
7426       t1 = size_zero_node;
7427       t2 = build_zero_cst (ptr_type_node);
7428       t3 = t2;
7429       t4 = t2;
7430     }
7431   else
7432     {
7433       t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7434       t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7435       t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7436       t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7437       t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7438     }
7439 
7440   gimple *g;
7441   bool tagging = false;
7442   /* The maximum number used by any start_ix, without varargs.  */
7443   auto_vec<tree, 11> args;
7444   args.quick_push (device);
7445   if (offloaded)
7446     args.quick_push (build_fold_addr_expr (child_fn));
7447   args.quick_push (t1);
7448   args.quick_push (t2);
7449   args.quick_push (t3);
7450   args.quick_push (t4);
7451   switch (start_ix)
7452     {
7453     case BUILT_IN_GOACC_DATA_START:
7454     case BUILT_IN_GOACC_DECLARE:
7455     case BUILT_IN_GOMP_TARGET_DATA:
7456       break;
7457     case BUILT_IN_GOMP_TARGET:
7458     case BUILT_IN_GOMP_TARGET_UPDATE:
7459     case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7460       args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7461       c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7462       if (c)
7463 	depend = OMP_CLAUSE_DECL (c);
7464       else
7465 	depend = build_int_cst (ptr_type_node, 0);
7466       args.quick_push (depend);
7467       if (start_ix == BUILT_IN_GOMP_TARGET)
7468 	args.quick_push (get_target_arguments (&gsi, entry_stmt));
7469       break;
7470     case BUILT_IN_GOACC_PARALLEL:
7471       {
7472 	oacc_set_fn_attrib (child_fn, clauses, oacc_kernels_p, &args);
7473 	tagging = true;
7474       }
7475       /* FALLTHRU */
7476     case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7477     case BUILT_IN_GOACC_UPDATE:
7478       {
7479 	tree t_async = NULL_TREE;
7480 
7481 	/* If present, use the value specified by the respective
7482 	   clause, making sure that is of the correct type.  */
7483 	c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7484 	if (c)
7485 	  t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7486 				      integer_type_node,
7487 				      OMP_CLAUSE_ASYNC_EXPR (c));
7488 	else if (!tagging)
7489 	  /* Default values for t_async.  */
7490 	  t_async = fold_convert_loc (gimple_location (entry_stmt),
7491 				      integer_type_node,
7492 				      build_int_cst (integer_type_node,
7493 						     GOMP_ASYNC_SYNC));
7494 	if (tagging && t_async)
7495 	  {
7496 	    unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7497 
7498 	    if (TREE_CODE (t_async) == INTEGER_CST)
7499 	      {
7500 		/* See if we can pack the async arg in to the tag's
7501 		   operand.  */
7502 		i_async = TREE_INT_CST_LOW (t_async);
7503 		if (i_async < GOMP_LAUNCH_OP_MAX)
7504 		  t_async = NULL_TREE;
7505 		else
7506 		  i_async = GOMP_LAUNCH_OP_MAX;
7507 	      }
7508 	    args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7509 					      i_async));
7510 	  }
7511 	if (t_async)
7512 	  args.safe_push (t_async);
7513 
7514 	/* Save the argument index, and ... */
7515 	unsigned t_wait_idx = args.length ();
7516 	unsigned num_waits = 0;
7517 	c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7518 	if (!tagging || c)
7519 	  /* ... push a placeholder.  */
7520 	  args.safe_push (integer_zero_node);
7521 
7522 	for (; c; c = OMP_CLAUSE_CHAIN (c))
7523 	  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7524 	    {
7525 	      args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7526 						integer_type_node,
7527 						OMP_CLAUSE_WAIT_EXPR (c)));
7528 	      num_waits++;
7529 	    }
7530 
7531 	if (!tagging || num_waits)
7532 	  {
7533 	    tree len;
7534 
7535 	    /* Now that we know the number, update the placeholder.  */
7536 	    if (tagging)
7537 	      len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7538 	    else
7539 	      len = build_int_cst (integer_type_node, num_waits);
7540 	    len = fold_convert_loc (gimple_location (entry_stmt),
7541 				    unsigned_type_node, len);
7542 	    args[t_wait_idx] = len;
7543 	  }
7544       }
7545       break;
7546     default:
7547       gcc_unreachable ();
7548     }
7549   if (tagging)
7550     /*  Push terminal marker - zero.  */
7551     args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7552 
7553   g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7554   gimple_set_location (g, gimple_location (entry_stmt));
7555   gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7556   if (!offloaded)
7557     {
7558       g = gsi_stmt (gsi);
7559       gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7560       gsi_remove (&gsi, true);
7561     }
7562   if (data_region && region->exit)
7563     {
7564       gsi = gsi_last_bb (region->exit);
7565       g = gsi_stmt (gsi);
7566       gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7567       gsi_remove (&gsi, true);
7568     }
7569 }
7570 
7571 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7572    iteration variable derived from the thread number.  INTRA_GROUP means this
7573    is an expansion of a loop iterating over work-items within a separate
7574    iteration over groups.  */
7575 
7576 static void
7577 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7578 {
7579   gimple_stmt_iterator gsi;
7580   gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7581   gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7582 		       == GF_OMP_FOR_KIND_GRID_LOOP);
7583   size_t collapse = gimple_omp_for_collapse (for_stmt);
7584   struct omp_for_data_loop *loops
7585     = XALLOCAVEC (struct omp_for_data_loop,
7586 		  gimple_omp_for_collapse (for_stmt));
7587   struct omp_for_data fd;
7588 
7589   remove_edge (BRANCH_EDGE (kfor->entry));
7590   basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7591 
7592   gcc_assert (kfor->cont);
7593   omp_extract_for_data (for_stmt, &fd, loops);
7594 
7595   gsi = gsi_start_bb (body_bb);
7596 
7597   for (size_t dim = 0; dim < collapse; dim++)
7598     {
7599       tree type, itype;
7600       itype = type = TREE_TYPE (fd.loops[dim].v);
7601       if (POINTER_TYPE_P (type))
7602 	itype = signed_type_for (type);
7603 
7604       tree n1 = fd.loops[dim].n1;
7605       tree step = fd.loops[dim].step;
7606       n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7607 				     true, NULL_TREE, true, GSI_SAME_STMT);
7608       step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7609 				       true, NULL_TREE, true, GSI_SAME_STMT);
7610       tree threadid;
7611       if (gimple_omp_for_grid_group_iter (for_stmt))
7612 	{
7613 	  gcc_checking_assert (!intra_group);
7614 	  threadid = build_call_expr (builtin_decl_explicit
7615 				      (BUILT_IN_HSA_WORKGROUPID), 1,
7616 				      build_int_cstu (unsigned_type_node, dim));
7617 	}
7618       else if (intra_group)
7619 	threadid = build_call_expr (builtin_decl_explicit
7620 				    (BUILT_IN_HSA_WORKITEMID), 1,
7621 				    build_int_cstu (unsigned_type_node, dim));
7622       else
7623 	threadid = build_call_expr (builtin_decl_explicit
7624 				    (BUILT_IN_HSA_WORKITEMABSID), 1,
7625 				    build_int_cstu (unsigned_type_node, dim));
7626       threadid = fold_convert (itype, threadid);
7627       threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7628 					   true, GSI_SAME_STMT);
7629 
7630       tree startvar = fd.loops[dim].v;
7631       tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7632       if (POINTER_TYPE_P (type))
7633 	t = fold_build_pointer_plus (n1, t);
7634       else
7635 	t = fold_build2 (PLUS_EXPR, type, t, n1);
7636       t = fold_convert (type, t);
7637       t = force_gimple_operand_gsi (&gsi, t,
7638 				    DECL_P (startvar)
7639 				    && TREE_ADDRESSABLE (startvar),
7640 				    NULL_TREE, true, GSI_SAME_STMT);
7641       gassign *assign_stmt = gimple_build_assign (startvar, t);
7642       gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7643     }
7644   /* Remove the omp for statement.  */
7645   gsi = gsi_last_bb (kfor->entry);
7646   gsi_remove (&gsi, true);
7647 
7648   /* Remove the GIMPLE_OMP_CONTINUE statement.  */
7649   gsi = gsi_last_bb (kfor->cont);
7650   gcc_assert (!gsi_end_p (gsi)
7651 	      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7652   gsi_remove (&gsi, true);
7653 
7654   /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary.  */
7655   gsi = gsi_last_bb (kfor->exit);
7656   gcc_assert (!gsi_end_p (gsi)
7657 	      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7658   if (intra_group)
7659     gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7660   gsi_remove (&gsi, true);
7661 
7662   /* Fixup the much simpler CFG.  */
7663   remove_edge (find_edge (kfor->cont, body_bb));
7664 
7665   if (kfor->cont != body_bb)
7666     set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7667   set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7668 }
7669 
7670 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7671    argument_decls.  */
7672 
7673 struct grid_arg_decl_map
7674 {
7675   tree old_arg;
7676   tree new_arg;
7677 };
7678 
7679 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7680    pertaining to kernel function.  */
7681 
7682 static tree
7683 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7684 {
7685   struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7686   struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7687   tree t = *tp;
7688 
7689   if (t == adm->old_arg)
7690     *tp = adm->new_arg;
7691   *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7692   return NULL_TREE;
7693 }
7694 
7695 /* If TARGET region contains a kernel body for loop, remove its region from the
7696    TARGET and expand it in HSA gridified kernel fashion.  */
7697 
7698 static void
7699 grid_expand_target_grid_body (struct omp_region *target)
7700 {
7701   if (!hsa_gen_requested_p ())
7702     return;
7703 
7704   gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7705   struct omp_region **pp;
7706 
7707   for (pp = &target->inner; *pp; pp = &(*pp)->next)
7708     if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7709       break;
7710 
7711   struct omp_region *gpukernel = *pp;
7712 
7713   tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7714   if (!gpukernel)
7715     {
7716       /* HSA cannot handle OACC stuff.  */
7717       if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7718 	return;
7719       gcc_checking_assert (orig_child_fndecl);
7720       gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7721 				    OMP_CLAUSE__GRIDDIM_));
7722       cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7723 
7724       hsa_register_kernel (n);
7725       return;
7726     }
7727 
7728   gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7729 			       OMP_CLAUSE__GRIDDIM_));
7730   tree inside_block
7731     = gimple_block (first_stmt (single_succ (gpukernel->entry)));
7732   *pp = gpukernel->next;
7733   for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7734     if ((*pp)->type == GIMPLE_OMP_FOR)
7735       break;
7736 
7737   struct omp_region *kfor = *pp;
7738   gcc_assert (kfor);
7739   gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7740   gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7741   *pp = kfor->next;
7742   if (kfor->inner)
7743     {
7744       if (gimple_omp_for_grid_group_iter (for_stmt))
7745 	{
7746 	  struct omp_region **next_pp;
7747 	  for (pp = &kfor->inner; *pp; pp = next_pp)
7748 	    {
7749 	      next_pp = &(*pp)->next;
7750 	      if ((*pp)->type != GIMPLE_OMP_FOR)
7751 		continue;
7752 	      gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7753 	      gcc_assert (gimple_omp_for_kind (inner)
7754 			  == GF_OMP_FOR_KIND_GRID_LOOP);
7755 	      grid_expand_omp_for_loop (*pp, true);
7756 	      *pp = (*pp)->next;
7757 	      next_pp = pp;
7758 	    }
7759 	}
7760       expand_omp (kfor->inner);
7761     }
7762   if (gpukernel->inner)
7763     expand_omp (gpukernel->inner);
7764 
7765   tree kern_fndecl = copy_node (orig_child_fndecl);
7766   DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel");
7767   SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
7768   tree tgtblock = gimple_block (tgt_stmt);
7769   tree fniniblock = make_node (BLOCK);
7770   BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock;
7771   BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
7772   BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
7773   BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
7774   DECL_INITIAL (kern_fndecl) = fniniblock;
7775   push_struct_function (kern_fndecl);
7776   cfun->function_end_locus = gimple_location (tgt_stmt);
7777   init_tree_ssa (cfun);
7778   pop_cfun ();
7779 
7780   /* Make sure to generate early debug for the function before
7781      outlining anything.  */
7782   if (! gimple_in_ssa_p (cfun))
7783     (*debug_hooks->early_global_decl) (cfun->decl);
7784 
7785   tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
7786   gcc_assert (!DECL_CHAIN (old_parm_decl));
7787   tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
7788   DECL_CONTEXT (new_parm_decl) = kern_fndecl;
7789   DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
7790   gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
7791   DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
7792   DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
7793   struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
7794   kern_cfun->curr_properties = cfun->curr_properties;
7795 
7796   grid_expand_omp_for_loop (kfor, false);
7797 
7798   /* Remove the omp for statement.  */
7799   gimple_stmt_iterator gsi = gsi_last_bb (gpukernel->entry);
7800   gsi_remove (&gsi, true);
7801   /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7802      return.  */
7803   gsi = gsi_last_bb (gpukernel->exit);
7804   gcc_assert (!gsi_end_p (gsi)
7805 	      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7806   gimple *ret_stmt = gimple_build_return (NULL);
7807   gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
7808   gsi_remove (&gsi, true);
7809 
7810   /* Statements in the first BB in the target construct have been produced by
7811      target lowering and must be copied inside the GPUKERNEL, with the two
7812      exceptions of the first OMP statement and the OMP_DATA assignment
7813      statement.  */
7814   gsi = gsi_start_bb (single_succ (gpukernel->entry));
7815   tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
7816   tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
7817   for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
7818        !gsi_end_p (tsi); gsi_next (&tsi))
7819     {
7820       gimple *stmt = gsi_stmt (tsi);
7821       if (is_gimple_omp (stmt))
7822 	break;
7823       if (sender
7824 	  && is_gimple_assign (stmt)
7825 	  && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
7826 	  && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
7827 	continue;
7828       gimple *copy = gimple_copy (stmt);
7829       gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
7830       gimple_set_block (copy, fniniblock);
7831     }
7832 
7833   move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
7834 			  gpukernel->exit, inside_block);
7835 
7836   cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
7837   kcn->mark_force_output ();
7838   cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
7839 
7840   hsa_register_kernel (kcn, orig_child);
7841 
7842   cgraph_node::add_new_function (kern_fndecl, true);
7843   push_cfun (kern_cfun);
7844   cgraph_edge::rebuild_edges ();
7845 
7846   /* Re-map any mention of the PARM_DECL of the original function to the
7847      PARM_DECL of the new one.
7848 
7849      TODO: It would be great if lowering produced references into the GPU
7850      kernel decl straight away and we did not have to do this.  */
7851   struct grid_arg_decl_map adm;
7852   adm.old_arg = old_parm_decl;
7853   adm.new_arg = new_parm_decl;
7854   basic_block bb;
7855   FOR_EACH_BB_FN (bb, kern_cfun)
7856     {
7857       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7858 	{
7859 	  gimple *stmt = gsi_stmt (gsi);
7860 	  struct walk_stmt_info wi;
7861 	  memset (&wi, 0, sizeof (wi));
7862 	  wi.info = &adm;
7863 	  walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
7864 	}
7865     }
7866   pop_cfun ();
7867 
7868   return;
7869 }
7870 
7871 /* Expand the parallel region tree rooted at REGION.  Expansion
7872    proceeds in depth-first order.  Innermost regions are expanded
7873    first.  This way, parallel regions that require a new function to
7874    be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7875    internal dependencies in their body.  */
7876 
7877 static void
7878 expand_omp (struct omp_region *region)
7879 {
7880   omp_any_child_fn_dumped = false;
7881   while (region)
7882     {
7883       location_t saved_location;
7884       gimple *inner_stmt = NULL;
7885 
7886       /* First, determine whether this is a combined parallel+workshare
7887 	 region.  */
7888       if (region->type == GIMPLE_OMP_PARALLEL)
7889 	determine_parallel_type (region);
7890       else if (region->type == GIMPLE_OMP_TARGET)
7891 	grid_expand_target_grid_body (region);
7892 
7893       if (region->type == GIMPLE_OMP_FOR
7894 	  && gimple_omp_for_combined_p (last_stmt (region->entry)))
7895 	inner_stmt = last_stmt (region->inner->entry);
7896 
7897       if (region->inner)
7898 	expand_omp (region->inner);
7899 
7900       saved_location = input_location;
7901       if (gimple_has_location (last_stmt (region->entry)))
7902 	input_location = gimple_location (last_stmt (region->entry));
7903 
7904       switch (region->type)
7905 	{
7906 	case GIMPLE_OMP_PARALLEL:
7907 	case GIMPLE_OMP_TASK:
7908 	  expand_omp_taskreg (region);
7909 	  break;
7910 
7911 	case GIMPLE_OMP_FOR:
7912 	  expand_omp_for (region, inner_stmt);
7913 	  break;
7914 
7915 	case GIMPLE_OMP_SECTIONS:
7916 	  expand_omp_sections (region);
7917 	  break;
7918 
7919 	case GIMPLE_OMP_SECTION:
7920 	  /* Individual omp sections are handled together with their
7921 	     parent GIMPLE_OMP_SECTIONS region.  */
7922 	  break;
7923 
7924 	case GIMPLE_OMP_SINGLE:
7925 	  expand_omp_single (region);
7926 	  break;
7927 
7928 	case GIMPLE_OMP_ORDERED:
7929 	  {
7930 	    gomp_ordered *ord_stmt
7931 	      = as_a <gomp_ordered *> (last_stmt (region->entry));
7932 	    if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
7933 				 OMP_CLAUSE_DEPEND))
7934 	      {
7935 		/* We'll expand these when expanding corresponding
7936 		   worksharing region with ordered(n) clause.  */
7937 		gcc_assert (region->outer
7938 			    && region->outer->type == GIMPLE_OMP_FOR);
7939 		region->ord_stmt = ord_stmt;
7940 		break;
7941 	      }
7942 	  }
7943 	  /* FALLTHRU */
7944 	case GIMPLE_OMP_MASTER:
7945 	case GIMPLE_OMP_TASKGROUP:
7946 	case GIMPLE_OMP_CRITICAL:
7947 	case GIMPLE_OMP_TEAMS:
7948 	  expand_omp_synch (region);
7949 	  break;
7950 
7951 	case GIMPLE_OMP_ATOMIC_LOAD:
7952 	  expand_omp_atomic (region);
7953 	  break;
7954 
7955 	case GIMPLE_OMP_TARGET:
7956 	  expand_omp_target (region);
7957 	  break;
7958 
7959 	default:
7960 	  gcc_unreachable ();
7961 	}
7962 
7963       input_location = saved_location;
7964       region = region->next;
7965     }
7966   if (omp_any_child_fn_dumped)
7967     {
7968       if (dump_file)
7969 	dump_function_header (dump_file, current_function_decl, dump_flags);
7970       omp_any_child_fn_dumped = false;
7971     }
7972 }
7973 
7974 /* Helper for build_omp_regions.  Scan the dominator tree starting at
7975    block BB.  PARENT is the region that contains BB.  If SINGLE_TREE is
7976    true, the function ends once a single tree is built (otherwise, whole
7977    forest of OMP constructs may be built).  */
7978 
7979 static void
7980 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
7981 		     bool single_tree)
7982 {
7983   gimple_stmt_iterator gsi;
7984   gimple *stmt;
7985   basic_block son;
7986 
7987   gsi = gsi_last_bb (bb);
7988   if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
7989     {
7990       struct omp_region *region;
7991       enum gimple_code code;
7992 
7993       stmt = gsi_stmt (gsi);
7994       code = gimple_code (stmt);
7995       if (code == GIMPLE_OMP_RETURN)
7996 	{
7997 	  /* STMT is the return point out of region PARENT.  Mark it
7998 	     as the exit point and make PARENT the immediately
7999 	     enclosing region.  */
8000 	  gcc_assert (parent);
8001 	  region = parent;
8002 	  region->exit = bb;
8003 	  parent = parent->outer;
8004 	}
8005       else if (code == GIMPLE_OMP_ATOMIC_STORE)
8006 	{
8007 	  /* GIMPLE_OMP_ATOMIC_STORE is analogous to
8008 	     GIMPLE_OMP_RETURN, but matches with
8009 	     GIMPLE_OMP_ATOMIC_LOAD.  */
8010 	  gcc_assert (parent);
8011 	  gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
8012 	  region = parent;
8013 	  region->exit = bb;
8014 	  parent = parent->outer;
8015 	}
8016       else if (code == GIMPLE_OMP_CONTINUE)
8017 	{
8018 	  gcc_assert (parent);
8019 	  parent->cont = bb;
8020 	}
8021       else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
8022 	{
8023 	  /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8024 	     GIMPLE_OMP_SECTIONS, and we do nothing for it.  */
8025 	}
8026       else
8027 	{
8028 	  region = new_omp_region (bb, code, parent);
8029 	  /* Otherwise...  */
8030 	  if (code == GIMPLE_OMP_TARGET)
8031 	    {
8032 	      switch (gimple_omp_target_kind (stmt))
8033 		{
8034 		case GF_OMP_TARGET_KIND_REGION:
8035 		case GF_OMP_TARGET_KIND_DATA:
8036 		case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8037 		case GF_OMP_TARGET_KIND_OACC_KERNELS:
8038 		case GF_OMP_TARGET_KIND_OACC_DATA:
8039 		case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8040 		  break;
8041 		case GF_OMP_TARGET_KIND_UPDATE:
8042 		case GF_OMP_TARGET_KIND_ENTER_DATA:
8043 		case GF_OMP_TARGET_KIND_EXIT_DATA:
8044 		case GF_OMP_TARGET_KIND_OACC_UPDATE:
8045 		case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8046 		case GF_OMP_TARGET_KIND_OACC_DECLARE:
8047 		  /* ..., other than for those stand-alone directives...  */
8048 		  region = NULL;
8049 		  break;
8050 		default:
8051 		  gcc_unreachable ();
8052 		}
8053 	    }
8054 	  else if (code == GIMPLE_OMP_ORDERED
8055 		   && omp_find_clause (gimple_omp_ordered_clauses
8056 					 (as_a <gomp_ordered *> (stmt)),
8057 				       OMP_CLAUSE_DEPEND))
8058 	    /* #pragma omp ordered depend is also just a stand-alone
8059 	       directive.  */
8060 	    region = NULL;
8061 	  /* ..., this directive becomes the parent for a new region.  */
8062 	  if (region)
8063 	    parent = region;
8064 	}
8065     }
8066 
8067   if (single_tree && !parent)
8068     return;
8069 
8070   for (son = first_dom_son (CDI_DOMINATORS, bb);
8071        son;
8072        son = next_dom_son (CDI_DOMINATORS, son))
8073     build_omp_regions_1 (son, parent, single_tree);
8074 }
8075 
8076 /* Builds the tree of OMP regions rooted at ROOT, storing it to
8077    root_omp_region.  */
8078 
8079 static void
8080 build_omp_regions_root (basic_block root)
8081 {
8082   gcc_assert (root_omp_region == NULL);
8083   build_omp_regions_1 (root, NULL, true);
8084   gcc_assert (root_omp_region != NULL);
8085 }
8086 
8087 /* Expands omp construct (and its subconstructs) starting in HEAD.  */
8088 
8089 void
8090 omp_expand_local (basic_block head)
8091 {
8092   build_omp_regions_root (head);
8093   if (dump_file && (dump_flags & TDF_DETAILS))
8094     {
8095       fprintf (dump_file, "\nOMP region tree\n\n");
8096       dump_omp_region (dump_file, root_omp_region, 0);
8097       fprintf (dump_file, "\n");
8098     }
8099 
8100   remove_exit_barriers (root_omp_region);
8101   expand_omp (root_omp_region);
8102 
8103   omp_free_regions ();
8104 }
8105 
8106 /* Scan the CFG and build a tree of OMP regions.  Return the root of
8107    the OMP region tree.  */
8108 
8109 static void
8110 build_omp_regions (void)
8111 {
8112   gcc_assert (root_omp_region == NULL);
8113   calculate_dominance_info (CDI_DOMINATORS);
8114   build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
8115 }
8116 
8117 /* Main entry point for expanding OMP-GIMPLE into runtime calls.  */
8118 
8119 static unsigned int
8120 execute_expand_omp (void)
8121 {
8122   build_omp_regions ();
8123 
8124   if (!root_omp_region)
8125     return 0;
8126 
8127   if (dump_file)
8128     {
8129       fprintf (dump_file, "\nOMP region tree\n\n");
8130       dump_omp_region (dump_file, root_omp_region, 0);
8131       fprintf (dump_file, "\n");
8132     }
8133 
8134   remove_exit_barriers (root_omp_region);
8135 
8136   expand_omp (root_omp_region);
8137 
8138   if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8139     verify_loop_structure ();
8140   cleanup_tree_cfg ();
8141 
8142   omp_free_regions ();
8143 
8144   return 0;
8145 }
8146 
8147 /* OMP expansion -- the default pass, run before creation of SSA form.  */
8148 
8149 namespace {
8150 
8151 const pass_data pass_data_expand_omp =
8152 {
8153   GIMPLE_PASS, /* type */
8154   "ompexp", /* name */
8155   OPTGROUP_OMP, /* optinfo_flags */
8156   TV_NONE, /* tv_id */
8157   PROP_gimple_any, /* properties_required */
8158   PROP_gimple_eomp, /* properties_provided */
8159   0, /* properties_destroyed */
8160   0, /* todo_flags_start */
8161   0, /* todo_flags_finish */
8162 };
8163 
8164 class pass_expand_omp : public gimple_opt_pass
8165 {
8166 public:
8167   pass_expand_omp (gcc::context *ctxt)
8168     : gimple_opt_pass (pass_data_expand_omp, ctxt)
8169   {}
8170 
8171   /* opt_pass methods: */
8172   virtual unsigned int execute (function *)
8173     {
8174       bool gate = ((flag_cilkplus != 0 || flag_openacc != 0 || flag_openmp != 0
8175 		    || flag_openmp_simd != 0)
8176 		   && !seen_error ());
8177 
8178       /* This pass always runs, to provide PROP_gimple_eomp.
8179 	 But often, there is nothing to do.  */
8180       if (!gate)
8181 	return 0;
8182 
8183       return execute_expand_omp ();
8184     }
8185 
8186 }; // class pass_expand_omp
8187 
8188 } // anon namespace
8189 
8190 gimple_opt_pass *
8191 make_pass_expand_omp (gcc::context *ctxt)
8192 {
8193   return new pass_expand_omp (ctxt);
8194 }
8195 
8196 namespace {
8197 
8198 const pass_data pass_data_expand_omp_ssa =
8199 {
8200   GIMPLE_PASS, /* type */
8201   "ompexpssa", /* name */
8202   OPTGROUP_OMP, /* optinfo_flags */
8203   TV_NONE, /* tv_id */
8204   PROP_cfg | PROP_ssa, /* properties_required */
8205   PROP_gimple_eomp, /* properties_provided */
8206   0, /* properties_destroyed */
8207   0, /* todo_flags_start */
8208   TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8209 };
8210 
8211 class pass_expand_omp_ssa : public gimple_opt_pass
8212 {
8213 public:
8214   pass_expand_omp_ssa (gcc::context *ctxt)
8215     : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8216   {}
8217 
8218   /* opt_pass methods: */
8219   virtual bool gate (function *fun)
8220     {
8221       return !(fun->curr_properties & PROP_gimple_eomp);
8222     }
8223   virtual unsigned int execute (function *) { return execute_expand_omp (); }
8224   opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8225 
8226 }; // class pass_expand_omp_ssa
8227 
8228 } // anon namespace
8229 
8230 gimple_opt_pass *
8231 make_pass_expand_omp_ssa (gcc::context *ctxt)
8232 {
8233   return new pass_expand_omp_ssa (ctxt);
8234 }
8235 
8236 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8237    GIMPLE_* codes.  */
8238 
8239 bool
8240 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8241 		       int *region_idx)
8242 {
8243   gimple *last = last_stmt (bb);
8244   enum gimple_code code = gimple_code (last);
8245   struct omp_region *cur_region = *region;
8246   bool fallthru = false;
8247 
8248   switch (code)
8249     {
8250     case GIMPLE_OMP_PARALLEL:
8251     case GIMPLE_OMP_TASK:
8252     case GIMPLE_OMP_FOR:
8253     case GIMPLE_OMP_SINGLE:
8254     case GIMPLE_OMP_TEAMS:
8255     case GIMPLE_OMP_MASTER:
8256     case GIMPLE_OMP_TASKGROUP:
8257     case GIMPLE_OMP_CRITICAL:
8258     case GIMPLE_OMP_SECTION:
8259     case GIMPLE_OMP_GRID_BODY:
8260       cur_region = new_omp_region (bb, code, cur_region);
8261       fallthru = true;
8262       break;
8263 
8264     case GIMPLE_OMP_ORDERED:
8265       cur_region = new_omp_region (bb, code, cur_region);
8266       fallthru = true;
8267       if (omp_find_clause (gimple_omp_ordered_clauses
8268 			     (as_a <gomp_ordered *> (last)),
8269 			   OMP_CLAUSE_DEPEND))
8270 	cur_region = cur_region->outer;
8271       break;
8272 
8273     case GIMPLE_OMP_TARGET:
8274       cur_region = new_omp_region (bb, code, cur_region);
8275       fallthru = true;
8276       switch (gimple_omp_target_kind (last))
8277 	{
8278 	case GF_OMP_TARGET_KIND_REGION:
8279 	case GF_OMP_TARGET_KIND_DATA:
8280 	case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8281 	case GF_OMP_TARGET_KIND_OACC_KERNELS:
8282 	case GF_OMP_TARGET_KIND_OACC_DATA:
8283 	case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8284 	  break;
8285 	case GF_OMP_TARGET_KIND_UPDATE:
8286 	case GF_OMP_TARGET_KIND_ENTER_DATA:
8287 	case GF_OMP_TARGET_KIND_EXIT_DATA:
8288 	case GF_OMP_TARGET_KIND_OACC_UPDATE:
8289 	case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8290 	case GF_OMP_TARGET_KIND_OACC_DECLARE:
8291 	  cur_region = cur_region->outer;
8292 	  break;
8293 	default:
8294 	  gcc_unreachable ();
8295 	}
8296       break;
8297 
8298     case GIMPLE_OMP_SECTIONS:
8299       cur_region = new_omp_region (bb, code, cur_region);
8300       fallthru = true;
8301       break;
8302 
8303     case GIMPLE_OMP_SECTIONS_SWITCH:
8304       fallthru = false;
8305       break;
8306 
8307     case GIMPLE_OMP_ATOMIC_LOAD:
8308     case GIMPLE_OMP_ATOMIC_STORE:
8309        fallthru = true;
8310        break;
8311 
8312     case GIMPLE_OMP_RETURN:
8313       /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8314 	 somewhere other than the next block.  This will be
8315 	 created later.  */
8316       cur_region->exit = bb;
8317       if (cur_region->type == GIMPLE_OMP_TASK)
8318 	/* Add an edge corresponding to not scheduling the task
8319 	   immediately.  */
8320 	make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8321       fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8322       cur_region = cur_region->outer;
8323       break;
8324 
8325     case GIMPLE_OMP_CONTINUE:
8326       cur_region->cont = bb;
8327       switch (cur_region->type)
8328 	{
8329 	case GIMPLE_OMP_FOR:
8330 	  /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8331 	     succs edges as abnormal to prevent splitting
8332 	     them.  */
8333 	  single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8334 	  /* Make the loopback edge.  */
8335 	  make_edge (bb, single_succ (cur_region->entry),
8336 		     EDGE_ABNORMAL);
8337 
8338 	  /* Create an edge from GIMPLE_OMP_FOR to exit, which
8339 	     corresponds to the case that the body of the loop
8340 	     is not executed at all.  */
8341 	  make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8342 	  make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8343 	  fallthru = false;
8344 	  break;
8345 
8346 	case GIMPLE_OMP_SECTIONS:
8347 	  /* Wire up the edges into and out of the nested sections.  */
8348 	  {
8349 	    basic_block switch_bb = single_succ (cur_region->entry);
8350 
8351 	    struct omp_region *i;
8352 	    for (i = cur_region->inner; i ; i = i->next)
8353 	      {
8354 		gcc_assert (i->type == GIMPLE_OMP_SECTION);
8355 		make_edge (switch_bb, i->entry, 0);
8356 		make_edge (i->exit, bb, EDGE_FALLTHRU);
8357 	      }
8358 
8359 	    /* Make the loopback edge to the block with
8360 	       GIMPLE_OMP_SECTIONS_SWITCH.  */
8361 	    make_edge (bb, switch_bb, 0);
8362 
8363 	    /* Make the edge from the switch to exit.  */
8364 	    make_edge (switch_bb, bb->next_bb, 0);
8365 	    fallthru = false;
8366 	  }
8367 	  break;
8368 
8369 	case GIMPLE_OMP_TASK:
8370 	  fallthru = true;
8371 	  break;
8372 
8373 	default:
8374 	  gcc_unreachable ();
8375 	}
8376       break;
8377 
8378     default:
8379       gcc_unreachable ();
8380     }
8381 
8382   if (*region != cur_region)
8383     {
8384       *region = cur_region;
8385       if (cur_region)
8386 	*region_idx = cur_region->entry->index;
8387       else
8388 	*region_idx = 0;
8389     }
8390 
8391   return fallthru;
8392 }
8393 
8394 #include "gt-omp-expand.h"
8395