xref: /netbsd-src/external/gpl3/gcc/dist/gcc/omp-expand.cc (revision 0a3071956a3a9fdebdbf7f338cf2d439b45fc728)
1 /* Expansion pass for OMP directives.  Outlines regions of certain OMP
2    directives to separate functions, converts others into explicit calls to the
3    runtime library (libgomp) and so forth
4 
5 Copyright (C) 2005-2022 Free Software Foundation, Inc.
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "alloc-pool.h"
56 #include "symbol-summary.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "stringpool.h"
60 #include "attribs.h"
61 #include "tree-eh.h"
62 #include "opts.h"
63 
64 /* OMP region information.  Every parallel and workshare
65    directive is enclosed between two markers, the OMP_* directive
66    and a corresponding GIMPLE_OMP_RETURN statement.  */
67 
68 struct omp_region
69 {
70   /* The enclosing region.  */
71   struct omp_region *outer;
72 
73   /* First child region.  */
74   struct omp_region *inner;
75 
76   /* Next peer region.  */
77   struct omp_region *next;
78 
79   /* Block containing the omp directive as its last stmt.  */
80   basic_block entry;
81 
82   /* Block containing the GIMPLE_OMP_RETURN as its last stmt.  */
83   basic_block exit;
84 
85   /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt.  */
86   basic_block cont;
87 
88   /* If this is a combined parallel+workshare region, this is a list
89      of additional arguments needed by the combined parallel+workshare
90      library call.  */
91   vec<tree, va_gc> *ws_args;
92 
93   /* The code for the omp directive of this region.  */
94   enum gimple_code type;
95 
96   /* Schedule kind, only used for GIMPLE_OMP_FOR type regions.  */
97   enum omp_clause_schedule_kind sched_kind;
98 
99   /* Schedule modifiers.  */
100   unsigned char sched_modifiers;
101 
102   /* True if this is a combined parallel+workshare region.  */
103   bool is_combined_parallel;
104 
105   /* Copy of fd.lastprivate_conditional != 0.  */
106   bool has_lastprivate_conditional;
107 
108   /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
109      a depend clause.  */
110   gomp_ordered *ord_stmt;
111 };
112 
113 static struct omp_region *root_omp_region;
114 static bool omp_any_child_fn_dumped;
115 
116 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
117 				     bool = false);
118 static gphi *find_phi_with_arg_on_edge (tree, edge);
119 static void expand_omp (struct omp_region *region);
120 
121 /* Return true if REGION is a combined parallel+workshare region.  */
122 
123 static inline bool
is_combined_parallel(struct omp_region * region)124 is_combined_parallel (struct omp_region *region)
125 {
126   return region->is_combined_parallel;
127 }
128 
129 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
130    is the immediate dominator of PAR_ENTRY_BB, return true if there
131    are no data dependencies that would prevent expanding the parallel
132    directive at PAR_ENTRY_BB as a combined parallel+workshare region.
133 
134    When expanding a combined parallel+workshare region, the call to
135    the child function may need additional arguments in the case of
136    GIMPLE_OMP_FOR regions.  In some cases, these arguments are
137    computed out of variables passed in from the parent to the child
138    via 'struct .omp_data_s'.  For instance:
139 
140 	#pragma omp parallel for schedule (guided, i * 4)
141 	for (j ...)
142 
143    Is lowered into:
144 
145 	# BLOCK 2 (PAR_ENTRY_BB)
146 	.omp_data_o.i = i;
147 	#pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
148 
149 	# BLOCK 3 (WS_ENTRY_BB)
150 	.omp_data_i = &.omp_data_o;
151 	D.1667 = .omp_data_i->i;
152 	D.1598 = D.1667 * 4;
153 	#pragma omp for schedule (guided, D.1598)
154 
155    When we outline the parallel region, the call to the child function
156    'bar.omp_fn.0' will need the value D.1598 in its argument list, but
157    that value is computed *after* the call site.  So, in principle we
158    cannot do the transformation.
159 
160    To see whether the code in WS_ENTRY_BB blocks the combined
161    parallel+workshare call, we collect all the variables used in the
162    GIMPLE_OMP_FOR header check whether they appear on the LHS of any
163    statement in WS_ENTRY_BB.  If so, then we cannot emit the combined
164    call.
165 
166    FIXME.  If we had the SSA form built at this point, we could merely
167    hoist the code in block 3 into block 2 and be done with it.  But at
168    this point we don't have dataflow information and though we could
169    hack something up here, it is really not worth the aggravation.  */
170 
171 static bool
workshare_safe_to_combine_p(basic_block ws_entry_bb)172 workshare_safe_to_combine_p (basic_block ws_entry_bb)
173 {
174   struct omp_for_data fd;
175   gimple *ws_stmt = last_stmt (ws_entry_bb);
176 
177   if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
178     return true;
179 
180   gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
181   if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
182     return false;
183 
184   omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
185 
186   if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
187     return false;
188   if (fd.iter_type != long_integer_type_node)
189     return false;
190 
191   /* FIXME.  We give up too easily here.  If any of these arguments
192      are not constants, they will likely involve variables that have
193      been mapped into fields of .omp_data_s for sharing with the child
194      function.  With appropriate data flow, it would be possible to
195      see through this.  */
196   if (!is_gimple_min_invariant (fd.loop.n1)
197       || !is_gimple_min_invariant (fd.loop.n2)
198       || !is_gimple_min_invariant (fd.loop.step)
199       || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
200     return false;
201 
202   return true;
203 }
204 
205 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
206    presence (SIMD_SCHEDULE).  */
207 
208 static tree
omp_adjust_chunk_size(tree chunk_size,bool simd_schedule)209 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
210 {
211   if (!simd_schedule || integer_zerop (chunk_size))
212     return chunk_size;
213 
214   poly_uint64 vf = omp_max_vf ();
215   if (known_eq (vf, 1U))
216     return chunk_size;
217 
218   tree type = TREE_TYPE (chunk_size);
219   chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
220 			    build_int_cst (type, vf - 1));
221   return fold_build2 (BIT_AND_EXPR, type, chunk_size,
222 		      build_int_cst (type, -vf));
223 }
224 
225 /* Collect additional arguments needed to emit a combined
226    parallel+workshare call.  WS_STMT is the workshare directive being
227    expanded.  */
228 
229 static vec<tree, va_gc> *
get_ws_args_for(gimple * par_stmt,gimple * ws_stmt)230 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
231 {
232   tree t;
233   location_t loc = gimple_location (ws_stmt);
234   vec<tree, va_gc> *ws_args;
235 
236   if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
237     {
238       struct omp_for_data fd;
239       tree n1, n2;
240 
241       omp_extract_for_data (for_stmt, &fd, NULL);
242       n1 = fd.loop.n1;
243       n2 = fd.loop.n2;
244 
245       if (gimple_omp_for_combined_into_p (for_stmt))
246 	{
247 	  tree innerc
248 	    = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
249 			       OMP_CLAUSE__LOOPTEMP_);
250 	  gcc_assert (innerc);
251 	  n1 = OMP_CLAUSE_DECL (innerc);
252 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
253 				    OMP_CLAUSE__LOOPTEMP_);
254 	  gcc_assert (innerc);
255 	  n2 = OMP_CLAUSE_DECL (innerc);
256 	}
257 
258       vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
259 
260       t = fold_convert_loc (loc, long_integer_type_node, n1);
261       ws_args->quick_push (t);
262 
263       t = fold_convert_loc (loc, long_integer_type_node, n2);
264       ws_args->quick_push (t);
265 
266       t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
267       ws_args->quick_push (t);
268 
269       if (fd.chunk_size)
270 	{
271 	  t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
272 	  t = omp_adjust_chunk_size (t, fd.simd_schedule);
273 	  ws_args->quick_push (t);
274 	}
275 
276       return ws_args;
277     }
278   else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
279     {
280       /* Number of sections is equal to the number of edges from the
281 	 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
282 	 the exit of the sections region.  */
283       basic_block bb = single_succ (gimple_bb (ws_stmt));
284       t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
285       vec_alloc (ws_args, 1);
286       ws_args->quick_push (t);
287       return ws_args;
288     }
289 
290   gcc_unreachable ();
291 }
292 
293 /* Discover whether REGION is a combined parallel+workshare region.  */
294 
295 static void
determine_parallel_type(struct omp_region * region)296 determine_parallel_type (struct omp_region *region)
297 {
298   basic_block par_entry_bb, par_exit_bb;
299   basic_block ws_entry_bb, ws_exit_bb;
300 
301   if (region == NULL || region->inner == NULL
302       || region->exit == NULL || region->inner->exit == NULL
303       || region->inner->cont == NULL)
304     return;
305 
306   /* We only support parallel+for and parallel+sections.  */
307   if (region->type != GIMPLE_OMP_PARALLEL
308       || (region->inner->type != GIMPLE_OMP_FOR
309 	  && region->inner->type != GIMPLE_OMP_SECTIONS))
310     return;
311 
312   /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
313      WS_EXIT_BB -> PAR_EXIT_BB.  */
314   par_entry_bb = region->entry;
315   par_exit_bb = region->exit;
316   ws_entry_bb = region->inner->entry;
317   ws_exit_bb = region->inner->exit;
318 
319   /* Give up for task reductions on the parallel, while it is implementable,
320      adding another big set of APIs or slowing down the normal paths is
321      not acceptable.  */
322   tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
323   if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
324     return;
325 
326   if (single_succ (par_entry_bb) == ws_entry_bb
327       && single_succ (ws_exit_bb) == par_exit_bb
328       && workshare_safe_to_combine_p (ws_entry_bb)
329       && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
330 	  || (last_and_only_stmt (ws_entry_bb)
331 	      && last_and_only_stmt (par_exit_bb))))
332     {
333       gimple *par_stmt = last_stmt (par_entry_bb);
334       gimple *ws_stmt = last_stmt (ws_entry_bb);
335 
336       if (region->inner->type == GIMPLE_OMP_FOR)
337 	{
338 	  /* If this is a combined parallel loop, we need to determine
339 	     whether or not to use the combined library calls.  There
340 	     are two cases where we do not apply the transformation:
341 	     static loops and any kind of ordered loop.  In the first
342 	     case, we already open code the loop so there is no need
343 	     to do anything else.  In the latter case, the combined
344 	     parallel loop call would still need extra synchronization
345 	     to implement ordered semantics, so there would not be any
346 	     gain in using the combined call.  */
347 	  tree clauses = gimple_omp_for_clauses (ws_stmt);
348 	  tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
349 	  if (c == NULL
350 	      || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
351 		  == OMP_CLAUSE_SCHEDULE_STATIC)
352 	      || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
353 	      || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
354 	      || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
355 		  && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
356 	    return;
357 	}
358       else if (region->inner->type == GIMPLE_OMP_SECTIONS
359 	       && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
360 				    OMP_CLAUSE__REDUCTEMP_)
361 		   || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
362 				       OMP_CLAUSE__CONDTEMP_)))
363 	return;
364 
365       region->is_combined_parallel = true;
366       region->inner->is_combined_parallel = true;
367       region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
368     }
369 }
370 
371 /* Debugging dumps for parallel regions.  */
372 void dump_omp_region (FILE *, struct omp_region *, int);
373 void debug_omp_region (struct omp_region *);
374 void debug_all_omp_regions (void);
375 
376 /* Dump the parallel region tree rooted at REGION.  */
377 
378 void
dump_omp_region(FILE * file,struct omp_region * region,int indent)379 dump_omp_region (FILE *file, struct omp_region *region, int indent)
380 {
381   fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
382 	   gimple_code_name[region->type]);
383 
384   if (region->inner)
385     dump_omp_region (file, region->inner, indent + 4);
386 
387   if (region->cont)
388     {
389       fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
390 	       region->cont->index);
391     }
392 
393   if (region->exit)
394     fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
395 	     region->exit->index);
396   else
397     fprintf (file, "%*s[no exit marker]\n", indent, "");
398 
399   if (region->next)
400     dump_omp_region (file, region->next, indent);
401 }
402 
403 DEBUG_FUNCTION void
debug_omp_region(struct omp_region * region)404 debug_omp_region (struct omp_region *region)
405 {
406   dump_omp_region (stderr, region, 0);
407 }
408 
409 DEBUG_FUNCTION void
debug_all_omp_regions(void)410 debug_all_omp_regions (void)
411 {
412   dump_omp_region (stderr, root_omp_region, 0);
413 }
414 
415 /* Create a new parallel region starting at STMT inside region PARENT.  */
416 
417 static struct omp_region *
new_omp_region(basic_block bb,enum gimple_code type,struct omp_region * parent)418 new_omp_region (basic_block bb, enum gimple_code type,
419 		struct omp_region *parent)
420 {
421   struct omp_region *region = XCNEW (struct omp_region);
422 
423   region->outer = parent;
424   region->entry = bb;
425   region->type = type;
426 
427   if (parent)
428     {
429       /* This is a nested region.  Add it to the list of inner
430 	 regions in PARENT.  */
431       region->next = parent->inner;
432       parent->inner = region;
433     }
434   else
435     {
436       /* This is a toplevel region.  Add it to the list of toplevel
437 	 regions in ROOT_OMP_REGION.  */
438       region->next = root_omp_region;
439       root_omp_region = region;
440     }
441 
442   return region;
443 }
444 
445 /* Release the memory associated with the region tree rooted at REGION.  */
446 
447 static void
free_omp_region_1(struct omp_region * region)448 free_omp_region_1 (struct omp_region *region)
449 {
450   struct omp_region *i, *n;
451 
452   for (i = region->inner; i ; i = n)
453     {
454       n = i->next;
455       free_omp_region_1 (i);
456     }
457 
458   free (region);
459 }
460 
461 /* Release the memory for the entire omp region tree.  */
462 
463 void
omp_free_regions(void)464 omp_free_regions (void)
465 {
466   struct omp_region *r, *n;
467   for (r = root_omp_region; r ; r = n)
468     {
469       n = r->next;
470       free_omp_region_1 (r);
471     }
472   root_omp_region = NULL;
473 }
474 
475 /* A convenience function to build an empty GIMPLE_COND with just the
476    condition.  */
477 
478 static gcond *
gimple_build_cond_empty(tree cond)479 gimple_build_cond_empty (tree cond)
480 {
481   enum tree_code pred_code;
482   tree lhs, rhs;
483 
484   gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
485   return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
486 }
487 
488 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
489    Add CHILD_FNDECL to decl chain of the supercontext of the block
490    ENTRY_BLOCK - this is the block which originally contained the
491    code from which CHILD_FNDECL was created.
492 
493    Together, these actions ensure that the debug info for the outlined
494    function will be emitted with the correct lexical scope.  */
495 
496 static void
adjust_context_and_scope(struct omp_region * region,tree entry_block,tree child_fndecl)497 adjust_context_and_scope (struct omp_region *region, tree entry_block,
498 			  tree child_fndecl)
499 {
500   tree parent_fndecl = NULL_TREE;
501   gimple *entry_stmt;
502   /* OMP expansion expands inner regions before outer ones, so if
503      we e.g. have explicit task region nested in parallel region, when
504      expanding the task region current_function_decl will be the original
505      source function, but we actually want to use as context the child
506      function of the parallel.  */
507   for (region = region->outer;
508        region && parent_fndecl == NULL_TREE; region = region->outer)
509     switch (region->type)
510       {
511       case GIMPLE_OMP_PARALLEL:
512       case GIMPLE_OMP_TASK:
513       case GIMPLE_OMP_TEAMS:
514 	entry_stmt = last_stmt (region->entry);
515 	parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
516 	break;
517       case GIMPLE_OMP_TARGET:
518 	entry_stmt = last_stmt (region->entry);
519 	parent_fndecl
520 	  = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
521 	break;
522       default:
523 	break;
524       }
525 
526   if (parent_fndecl == NULL_TREE)
527     parent_fndecl = current_function_decl;
528   DECL_CONTEXT (child_fndecl) = parent_fndecl;
529 
530   if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
531     {
532       tree b = BLOCK_SUPERCONTEXT (entry_block);
533       if (TREE_CODE (b) == BLOCK)
534         {
535 	  DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
536 	  BLOCK_VARS (b) = child_fndecl;
537 	}
538     }
539 }
540 
541 /* Build the function calls to GOMP_parallel etc to actually
542    generate the parallel operation.  REGION is the parallel region
543    being expanded.  BB is the block where to insert the code.  WS_ARGS
544    will be set if this is a call to a combined parallel+workshare
545    construct, it contains the list of additional arguments needed by
546    the workshare construct.  */
547 
548 static void
expand_parallel_call(struct omp_region * region,basic_block bb,gomp_parallel * entry_stmt,vec<tree,va_gc> * ws_args)549 expand_parallel_call (struct omp_region *region, basic_block bb,
550 		      gomp_parallel *entry_stmt,
551 		      vec<tree, va_gc> *ws_args)
552 {
553   tree t, t1, t2, val, cond, c, clauses, flags;
554   gimple_stmt_iterator gsi;
555   gimple *stmt;
556   enum built_in_function start_ix;
557   int start_ix2;
558   location_t clause_loc;
559   vec<tree, va_gc> *args;
560 
561   clauses = gimple_omp_parallel_clauses (entry_stmt);
562 
563   /* Determine what flavor of GOMP_parallel we will be
564      emitting.  */
565   start_ix = BUILT_IN_GOMP_PARALLEL;
566   tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
567   if (rtmp)
568     start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
569   else if (is_combined_parallel (region))
570     {
571       switch (region->inner->type)
572 	{
573 	case GIMPLE_OMP_FOR:
574 	  gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
575 	  switch (region->inner->sched_kind)
576 	    {
577 	    case OMP_CLAUSE_SCHEDULE_RUNTIME:
578 	      /* For lastprivate(conditional:), our implementation
579 		 requires monotonic behavior.  */
580 	      if (region->inner->has_lastprivate_conditional != 0)
581 		start_ix2 = 3;
582 	      else if ((region->inner->sched_modifiers
583 		       & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
584 		start_ix2 = 6;
585 	      else if ((region->inner->sched_modifiers
586 			& OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
587 		start_ix2 = 7;
588 	      else
589 		start_ix2 = 3;
590 	      break;
591 	    case OMP_CLAUSE_SCHEDULE_DYNAMIC:
592 	    case OMP_CLAUSE_SCHEDULE_GUIDED:
593 	      if ((region->inner->sched_modifiers
594 		   & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
595 		  && !region->inner->has_lastprivate_conditional)
596 		{
597 		  start_ix2 = 3 + region->inner->sched_kind;
598 		  break;
599 		}
600 	      /* FALLTHRU */
601 	    default:
602 	      start_ix2 = region->inner->sched_kind;
603 	      break;
604 	    }
605 	  start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
606 	  start_ix = (enum built_in_function) start_ix2;
607 	  break;
608 	case GIMPLE_OMP_SECTIONS:
609 	  start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
610 	  break;
611 	default:
612 	  gcc_unreachable ();
613 	}
614     }
615 
616   /* By default, the value of NUM_THREADS is zero (selected at run time)
617      and there is no conditional.  */
618   cond = NULL_TREE;
619   val = build_int_cst (unsigned_type_node, 0);
620   flags = build_int_cst (unsigned_type_node, 0);
621 
622   c = omp_find_clause (clauses, OMP_CLAUSE_IF);
623   if (c)
624     cond = OMP_CLAUSE_IF_EXPR (c);
625 
626   c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
627   if (c)
628     {
629       val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
630       clause_loc = OMP_CLAUSE_LOCATION (c);
631     }
632   else
633     clause_loc = gimple_location (entry_stmt);
634 
635   c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
636   if (c)
637     flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
638 
639   /* Ensure 'val' is of the correct type.  */
640   val = fold_convert_loc (clause_loc, unsigned_type_node, val);
641 
642   /* If we found the clause 'if (cond)', build either
643      (cond != 0) or (cond ? val : 1u).  */
644   if (cond)
645     {
646       cond = gimple_boolify (cond);
647 
648       if (integer_zerop (val))
649 	val = fold_build2_loc (clause_loc,
650 			   EQ_EXPR, unsigned_type_node, cond,
651 			   build_int_cst (TREE_TYPE (cond), 0));
652       else
653 	{
654 	  basic_block cond_bb, then_bb, else_bb;
655 	  edge e, e_then, e_else;
656 	  tree tmp_then, tmp_else, tmp_join, tmp_var;
657 
658 	  tmp_var = create_tmp_var (TREE_TYPE (val));
659 	  if (gimple_in_ssa_p (cfun))
660 	    {
661 	      tmp_then = make_ssa_name (tmp_var);
662 	      tmp_else = make_ssa_name (tmp_var);
663 	      tmp_join = make_ssa_name (tmp_var);
664 	    }
665 	  else
666 	    {
667 	      tmp_then = tmp_var;
668 	      tmp_else = tmp_var;
669 	      tmp_join = tmp_var;
670 	    }
671 
672 	  e = split_block_after_labels (bb);
673 	  cond_bb = e->src;
674 	  bb = e->dest;
675 	  remove_edge (e);
676 
677 	  then_bb = create_empty_bb (cond_bb);
678 	  else_bb = create_empty_bb (then_bb);
679 	  set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
680 	  set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
681 
682 	  stmt = gimple_build_cond_empty (cond);
683 	  gsi = gsi_start_bb (cond_bb);
684 	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
685 
686 	  gsi = gsi_start_bb (then_bb);
687 	  expand_omp_build_assign (&gsi, tmp_then, val, true);
688 
689 	  gsi = gsi_start_bb (else_bb);
690 	  expand_omp_build_assign (&gsi, tmp_else,
691 				   build_int_cst (unsigned_type_node, 1),
692 				   true);
693 
694 	  make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
695 	  make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
696 	  add_bb_to_loop (then_bb, cond_bb->loop_father);
697 	  add_bb_to_loop (else_bb, cond_bb->loop_father);
698 	  e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
699 	  e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
700 
701 	  if (gimple_in_ssa_p (cfun))
702 	    {
703 	      gphi *phi = create_phi_node (tmp_join, bb);
704 	      add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
705 	      add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
706 	    }
707 
708 	  val = tmp_join;
709 	}
710 
711       gsi = gsi_start_bb (bb);
712       val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
713 				      false, GSI_CONTINUE_LINKING);
714     }
715 
716   gsi = gsi_last_nondebug_bb (bb);
717   t = gimple_omp_parallel_data_arg (entry_stmt);
718   if (t == NULL)
719     t1 = null_pointer_node;
720   else
721     t1 = build_fold_addr_expr (t);
722   tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
723   t2 = build_fold_addr_expr (child_fndecl);
724 
725   vec_alloc (args, 4 + vec_safe_length (ws_args));
726   args->quick_push (t2);
727   args->quick_push (t1);
728   args->quick_push (val);
729   if (ws_args)
730     args->splice (*ws_args);
731   args->quick_push (flags);
732 
733   t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
734 			       builtin_decl_explicit (start_ix), args);
735 
736   if (rtmp)
737     {
738       tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
739       t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
740 		  fold_convert (type,
741 				fold_convert (pointer_sized_int_node, t)));
742     }
743   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
744 			    false, GSI_CONTINUE_LINKING);
745 }
746 
747 /* Build the function call to GOMP_task to actually
748    generate the task operation.  BB is the block where to insert the code.  */
749 
750 static void
expand_task_call(struct omp_region * region,basic_block bb,gomp_task * entry_stmt)751 expand_task_call (struct omp_region *region, basic_block bb,
752 		  gomp_task *entry_stmt)
753 {
754   tree t1, t2, t3;
755   gimple_stmt_iterator gsi;
756   location_t loc = gimple_location (entry_stmt);
757 
758   tree clauses = gimple_omp_task_clauses (entry_stmt);
759 
760   tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
761   tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
762   tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
763   tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
764   tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
765   tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
766   tree detach = omp_find_clause (clauses, OMP_CLAUSE_DETACH);
767 
768   unsigned int iflags
769     = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
770       | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
771       | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
772 
773   bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
774   tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
775   tree num_tasks = NULL_TREE;
776   bool ull = false;
777   if (taskloop_p)
778     {
779       gimple *g = last_stmt (region->outer->entry);
780       gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
781 		  && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
782       struct omp_for_data fd;
783       omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
784       startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
785       endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
786 				OMP_CLAUSE__LOOPTEMP_);
787       startvar = OMP_CLAUSE_DECL (startvar);
788       endvar = OMP_CLAUSE_DECL (endvar);
789       step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
790       if (fd.loop.cond_code == LT_EXPR)
791 	iflags |= GOMP_TASK_FLAG_UP;
792       tree tclauses = gimple_omp_for_clauses (g);
793       num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
794       if (num_tasks)
795 	{
796 	  if (OMP_CLAUSE_NUM_TASKS_STRICT (num_tasks))
797 	    iflags |= GOMP_TASK_FLAG_STRICT;
798 	  num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
799 	}
800       else
801 	{
802 	  num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
803 	  if (num_tasks)
804 	    {
805 	      iflags |= GOMP_TASK_FLAG_GRAINSIZE;
806 	      if (OMP_CLAUSE_GRAINSIZE_STRICT (num_tasks))
807 		iflags |= GOMP_TASK_FLAG_STRICT;
808 	      num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
809 	    }
810 	  else
811 	    num_tasks = integer_zero_node;
812 	}
813       num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
814       if (ifc == NULL_TREE)
815 	iflags |= GOMP_TASK_FLAG_IF;
816       if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
817 	iflags |= GOMP_TASK_FLAG_NOGROUP;
818       ull = fd.iter_type == long_long_unsigned_type_node;
819       if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
820 	iflags |= GOMP_TASK_FLAG_REDUCTION;
821     }
822   else
823     {
824       if (priority)
825 	iflags |= GOMP_TASK_FLAG_PRIORITY;
826       if (detach)
827 	iflags |= GOMP_TASK_FLAG_DETACH;
828     }
829 
830   tree flags = build_int_cst (unsigned_type_node, iflags);
831 
832   tree cond = boolean_true_node;
833   if (ifc)
834     {
835       if (taskloop_p)
836 	{
837 	  tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
838 	  t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
839 			       build_int_cst (unsigned_type_node,
840 					      GOMP_TASK_FLAG_IF),
841 			       build_int_cst (unsigned_type_node, 0));
842 	  flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
843 				   flags, t);
844 	}
845       else
846 	cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
847     }
848 
849   if (finalc)
850     {
851       tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
852       t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
853 			   build_int_cst (unsigned_type_node,
854 					  GOMP_TASK_FLAG_FINAL),
855 			   build_int_cst (unsigned_type_node, 0));
856       flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
857     }
858   if (depend)
859     depend = OMP_CLAUSE_DECL (depend);
860   else
861     depend = build_int_cst (ptr_type_node, 0);
862   if (priority)
863     priority = fold_convert (integer_type_node,
864 			     OMP_CLAUSE_PRIORITY_EXPR (priority));
865   else
866     priority = integer_zero_node;
867 
868   gsi = gsi_last_nondebug_bb (bb);
869 
870   detach = (detach
871 	    ? build_fold_addr_expr (OMP_CLAUSE_DECL (detach))
872 	    : null_pointer_node);
873 
874   tree t = gimple_omp_task_data_arg (entry_stmt);
875   if (t == NULL)
876     t2 = null_pointer_node;
877   else
878     t2 = build_fold_addr_expr_loc (loc, t);
879   t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
880   t = gimple_omp_task_copy_fn (entry_stmt);
881   if (t == NULL)
882     t3 = null_pointer_node;
883   else
884     t3 = build_fold_addr_expr_loc (loc, t);
885 
886   if (taskloop_p)
887     t = build_call_expr (ull
888 			 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
889 			 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
890 			 11, t1, t2, t3,
891 			 gimple_omp_task_arg_size (entry_stmt),
892 			 gimple_omp_task_arg_align (entry_stmt), flags,
893 			 num_tasks, priority, startvar, endvar, step);
894   else
895     t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
896 			 10, t1, t2, t3,
897 			 gimple_omp_task_arg_size (entry_stmt),
898 			 gimple_omp_task_arg_align (entry_stmt), cond, flags,
899 			 depend, priority, detach);
900 
901   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
902 			    false, GSI_CONTINUE_LINKING);
903 }
904 
905 /* Build the function call to GOMP_taskwait_depend to actually
906    generate the taskwait operation.  BB is the block where to insert the
907    code.  */
908 
909 static void
expand_taskwait_call(basic_block bb,gomp_task * entry_stmt)910 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
911 {
912   tree clauses = gimple_omp_task_clauses (entry_stmt);
913   tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
914   if (depend == NULL_TREE)
915     return;
916 
917   depend = OMP_CLAUSE_DECL (depend);
918 
919   gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
920   tree t
921     = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
922 		       1, depend);
923 
924   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
925 			    false, GSI_CONTINUE_LINKING);
926 }
927 
928 /* Build the function call to GOMP_teams_reg to actually
929    generate the host teams operation.  REGION is the teams region
930    being expanded.  BB is the block where to insert the code.  */
931 
932 static void
expand_teams_call(basic_block bb,gomp_teams * entry_stmt)933 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
934 {
935   tree clauses = gimple_omp_teams_clauses (entry_stmt);
936   tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
937   if (num_teams == NULL_TREE)
938     num_teams = build_int_cst (unsigned_type_node, 0);
939   else
940     {
941       num_teams = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (num_teams);
942       num_teams = fold_convert (unsigned_type_node, num_teams);
943     }
944   tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
945   if (thread_limit == NULL_TREE)
946     thread_limit = build_int_cst (unsigned_type_node, 0);
947   else
948     {
949       thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
950       thread_limit = fold_convert (unsigned_type_node, thread_limit);
951     }
952 
953   gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
954   tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
955   if (t == NULL)
956     t1 = null_pointer_node;
957   else
958     t1 = build_fold_addr_expr (t);
959   tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
960   tree t2 = build_fold_addr_expr (child_fndecl);
961 
962   vec<tree, va_gc> *args;
963   vec_alloc (args, 5);
964   args->quick_push (t2);
965   args->quick_push (t1);
966   args->quick_push (num_teams);
967   args->quick_push (thread_limit);
968   /* For future extensibility.  */
969   args->quick_push (build_zero_cst (unsigned_type_node));
970 
971   t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
972 			       builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
973 			       args);
974 
975   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
976 			    false, GSI_CONTINUE_LINKING);
977 }
978 
979 /* Chain all the DECLs in LIST by their TREE_CHAIN fields.  */
980 
981 static tree
vec2chain(vec<tree,va_gc> * v)982 vec2chain (vec<tree, va_gc> *v)
983 {
984   tree chain = NULL_TREE, t;
985   unsigned ix;
986 
987   FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
988     {
989       DECL_CHAIN (t) = chain;
990       chain = t;
991     }
992 
993   return chain;
994 }
995 
996 /* Remove barriers in REGION->EXIT's block.  Note that this is only
997    valid for GIMPLE_OMP_PARALLEL regions.  Since the end of a parallel region
998    is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
999    left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1000    removed.  */
1001 
1002 static void
remove_exit_barrier(struct omp_region * region)1003 remove_exit_barrier (struct omp_region *region)
1004 {
1005   gimple_stmt_iterator gsi;
1006   basic_block exit_bb;
1007   edge_iterator ei;
1008   edge e;
1009   gimple *stmt;
1010   int any_addressable_vars = -1;
1011 
1012   exit_bb = region->exit;
1013 
1014   /* If the parallel region doesn't return, we don't have REGION->EXIT
1015      block at all.  */
1016   if (! exit_bb)
1017     return;
1018 
1019   /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN.  The
1020      workshare's GIMPLE_OMP_RETURN will be in a preceding block.  The kinds of
1021      statements that can appear in between are extremely limited -- no
1022      memory operations at all.  Here, we allow nothing at all, so the
1023      only thing we allow to precede this GIMPLE_OMP_RETURN is a label.  */
1024   gsi = gsi_last_nondebug_bb (exit_bb);
1025   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1026   gsi_prev_nondebug (&gsi);
1027   if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1028     return;
1029 
1030   FOR_EACH_EDGE (e, ei, exit_bb->preds)
1031     {
1032       gsi = gsi_last_nondebug_bb (e->src);
1033       if (gsi_end_p (gsi))
1034 	continue;
1035       stmt = gsi_stmt (gsi);
1036       if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1037 	  && !gimple_omp_return_nowait_p (stmt))
1038 	{
1039 	  /* OpenMP 3.0 tasks unfortunately prevent this optimization
1040 	     in many cases.  If there could be tasks queued, the barrier
1041 	     might be needed to let the tasks run before some local
1042 	     variable of the parallel that the task uses as shared
1043 	     runs out of scope.  The task can be spawned either
1044 	     from within current function (this would be easy to check)
1045 	     or from some function it calls and gets passed an address
1046 	     of such a variable.  */
1047 	  if (any_addressable_vars < 0)
1048 	    {
1049 	      gomp_parallel *parallel_stmt
1050 		= as_a <gomp_parallel *> (last_stmt (region->entry));
1051 	      tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1052 	      tree local_decls, block, decl;
1053 	      unsigned ix;
1054 
1055 	      any_addressable_vars = 0;
1056 	      FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1057 		if (TREE_ADDRESSABLE (decl))
1058 		  {
1059 		    any_addressable_vars = 1;
1060 		    break;
1061 		  }
1062 	      for (block = gimple_block (stmt);
1063 		   !any_addressable_vars
1064 		   && block
1065 		   && TREE_CODE (block) == BLOCK;
1066 		   block = BLOCK_SUPERCONTEXT (block))
1067 		{
1068 		  for (local_decls = BLOCK_VARS (block);
1069 		       local_decls;
1070 		       local_decls = DECL_CHAIN (local_decls))
1071 		    if (TREE_ADDRESSABLE (local_decls))
1072 		      {
1073 			any_addressable_vars = 1;
1074 			break;
1075 		      }
1076 		  if (block == gimple_block (parallel_stmt))
1077 		    break;
1078 		}
1079 	    }
1080 	  if (!any_addressable_vars)
1081 	    gimple_omp_return_set_nowait (stmt);
1082 	}
1083     }
1084 }
1085 
1086 static void
remove_exit_barriers(struct omp_region * region)1087 remove_exit_barriers (struct omp_region *region)
1088 {
1089   if (region->type == GIMPLE_OMP_PARALLEL)
1090     remove_exit_barrier (region);
1091 
1092   if (region->inner)
1093     {
1094       region = region->inner;
1095       remove_exit_barriers (region);
1096       while (region->next)
1097 	{
1098 	  region = region->next;
1099 	  remove_exit_barriers (region);
1100 	}
1101     }
1102 }
1103 
1104 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1105    calls.  These can't be declared as const functions, but
1106    within one parallel body they are constant, so they can be
1107    transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1108    which are declared const.  Similarly for task body, except
1109    that in untied task omp_get_thread_num () can change at any task
1110    scheduling point.  */
1111 
1112 static void
optimize_omp_library_calls(gimple * entry_stmt)1113 optimize_omp_library_calls (gimple *entry_stmt)
1114 {
1115   basic_block bb;
1116   gimple_stmt_iterator gsi;
1117   tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1118   tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1119   tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1120   tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1121   bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1122 		      && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1123 					  OMP_CLAUSE_UNTIED) != NULL);
1124 
1125   FOR_EACH_BB_FN (bb, cfun)
1126     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1127       {
1128 	gimple *call = gsi_stmt (gsi);
1129 	tree decl;
1130 
1131 	if (is_gimple_call (call)
1132 	    && (decl = gimple_call_fndecl (call))
1133 	    && DECL_EXTERNAL (decl)
1134 	    && TREE_PUBLIC (decl)
1135 	    && DECL_INITIAL (decl) == NULL)
1136 	  {
1137 	    tree built_in;
1138 
1139 	    if (DECL_NAME (decl) == thr_num_id)
1140 	      {
1141 		/* In #pragma omp task untied omp_get_thread_num () can change
1142 		   during the execution of the task region.  */
1143 		if (untied_task)
1144 		  continue;
1145 		built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1146 	      }
1147 	    else if (DECL_NAME (decl) == num_thr_id)
1148 	      built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1149 	    else
1150 	      continue;
1151 
1152 	    if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1153 		|| gimple_call_num_args (call) != 0)
1154 	      continue;
1155 
1156 	    if (flag_exceptions && !TREE_NOTHROW (decl))
1157 	      continue;
1158 
1159 	    if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1160 		|| !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1161 					TREE_TYPE (TREE_TYPE (built_in))))
1162 	      continue;
1163 
1164 	    gimple_call_set_fndecl (call, built_in);
1165 	  }
1166       }
1167 }
1168 
1169 /* Callback for expand_omp_build_assign.  Return non-NULL if *tp needs to be
1170    regimplified.  */
1171 
1172 static tree
expand_omp_regimplify_p(tree * tp,int * walk_subtrees,void *)1173 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1174 {
1175   tree t = *tp;
1176 
1177   /* Any variable with DECL_VALUE_EXPR needs to be regimplified.  */
1178   if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1179     return t;
1180 
1181   if (TREE_CODE (t) == ADDR_EXPR)
1182     recompute_tree_invariant_for_addr_expr (t);
1183 
1184   *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1185   return NULL_TREE;
1186 }
1187 
1188 /* Prepend or append TO = FROM assignment before or after *GSI_P.  */
1189 
1190 static void
expand_omp_build_assign(gimple_stmt_iterator * gsi_p,tree to,tree from,bool after)1191 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1192 			 bool after)
1193 {
1194   bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1195   from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1196 				   !after, after ? GSI_CONTINUE_LINKING
1197 						 : GSI_SAME_STMT);
1198   gimple *stmt = gimple_build_assign (to, from);
1199   if (after)
1200     gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1201   else
1202     gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1203   if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1204       || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1205     {
1206       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1207       gimple_regimplify_operands (stmt, &gsi);
1208     }
1209 }
1210 
1211 /* Prepend or append LHS CODE RHS condition before or after *GSI_P.  */
1212 
1213 static gcond *
expand_omp_build_cond(gimple_stmt_iterator * gsi_p,enum tree_code code,tree lhs,tree rhs,bool after=false)1214 expand_omp_build_cond (gimple_stmt_iterator *gsi_p, enum tree_code code,
1215 		       tree lhs, tree rhs, bool after = false)
1216 {
1217   gcond *cond_stmt = gimple_build_cond (code, lhs, rhs, NULL_TREE, NULL_TREE);
1218   if (after)
1219     gsi_insert_after (gsi_p, cond_stmt, GSI_CONTINUE_LINKING);
1220   else
1221     gsi_insert_before (gsi_p, cond_stmt, GSI_SAME_STMT);
1222   if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
1223 		 NULL, NULL)
1224       || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
1225 		    NULL, NULL))
1226     {
1227       gimple_stmt_iterator gsi = gsi_for_stmt (cond_stmt);
1228       gimple_regimplify_operands (cond_stmt, &gsi);
1229     }
1230   return cond_stmt;
1231 }
1232 
1233 /* Expand the OpenMP parallel or task directive starting at REGION.  */
1234 
1235 static void
expand_omp_taskreg(struct omp_region * region)1236 expand_omp_taskreg (struct omp_region *region)
1237 {
1238   basic_block entry_bb, exit_bb, new_bb;
1239   struct function *child_cfun;
1240   tree child_fn, block, t;
1241   gimple_stmt_iterator gsi;
1242   gimple *entry_stmt, *stmt;
1243   edge e;
1244   vec<tree, va_gc> *ws_args;
1245 
1246   entry_stmt = last_stmt (region->entry);
1247   if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1248       && gimple_omp_task_taskwait_p (entry_stmt))
1249     {
1250       new_bb = region->entry;
1251       gsi = gsi_last_nondebug_bb (region->entry);
1252       gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1253       gsi_remove (&gsi, true);
1254       expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1255       return;
1256     }
1257 
1258   child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1259   child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1260 
1261   entry_bb = region->entry;
1262   if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1263     exit_bb = region->cont;
1264   else
1265     exit_bb = region->exit;
1266 
1267   if (is_combined_parallel (region))
1268     ws_args = region->ws_args;
1269   else
1270     ws_args = NULL;
1271 
1272   if (child_cfun->cfg)
1273     {
1274       /* Due to inlining, it may happen that we have already outlined
1275 	 the region, in which case all we need to do is make the
1276 	 sub-graph unreachable and emit the parallel call.  */
1277       edge entry_succ_e, exit_succ_e;
1278 
1279       entry_succ_e = single_succ_edge (entry_bb);
1280 
1281       gsi = gsi_last_nondebug_bb (entry_bb);
1282       gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1283 		  || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1284 		  || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1285       gsi_remove (&gsi, true);
1286 
1287       new_bb = entry_bb;
1288       if (exit_bb)
1289 	{
1290 	  exit_succ_e = single_succ_edge (exit_bb);
1291 	  make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1292 	}
1293       remove_edge_and_dominated_blocks (entry_succ_e);
1294     }
1295   else
1296     {
1297       unsigned srcidx, dstidx, num;
1298 
1299       /* If the parallel region needs data sent from the parent
1300 	 function, then the very first statement (except possible
1301 	 tree profile counter updates) of the parallel body
1302 	 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
1303 	 &.OMP_DATA_O is passed as an argument to the child function,
1304 	 we need to replace it with the argument as seen by the child
1305 	 function.
1306 
1307 	 In most cases, this will end up being the identity assignment
1308 	 .OMP_DATA_I = .OMP_DATA_I.  However, if the parallel body had
1309 	 a function call that has been inlined, the original PARM_DECL
1310 	 .OMP_DATA_I may have been converted into a different local
1311 	 variable.  In which case, we need to keep the assignment.  */
1312       if (gimple_omp_taskreg_data_arg (entry_stmt))
1313 	{
1314 	  basic_block entry_succ_bb
1315 	    = single_succ_p (entry_bb) ? single_succ (entry_bb)
1316 				       : FALLTHRU_EDGE (entry_bb)->dest;
1317 	  tree arg;
1318 	  gimple *parcopy_stmt = NULL;
1319 
1320 	  for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1321 	    {
1322 	      gimple *stmt;
1323 
1324 	      gcc_assert (!gsi_end_p (gsi));
1325 	      stmt = gsi_stmt (gsi);
1326 	      if (gimple_code (stmt) != GIMPLE_ASSIGN)
1327 		continue;
1328 
1329 	      if (gimple_num_ops (stmt) == 2)
1330 		{
1331 		  tree arg = gimple_assign_rhs1 (stmt);
1332 
1333 		  /* We're ignore the subcode because we're
1334 		     effectively doing a STRIP_NOPS.  */
1335 
1336 		  if (TREE_CODE (arg) == ADDR_EXPR
1337 		      && (TREE_OPERAND (arg, 0)
1338 			  == gimple_omp_taskreg_data_arg (entry_stmt)))
1339 		    {
1340 		      parcopy_stmt = stmt;
1341 		      break;
1342 		    }
1343 		}
1344 	    }
1345 
1346 	  gcc_assert (parcopy_stmt != NULL);
1347 	  arg = DECL_ARGUMENTS (child_fn);
1348 
1349 	  if (!gimple_in_ssa_p (cfun))
1350 	    {
1351 	      if (gimple_assign_lhs (parcopy_stmt) == arg)
1352 		gsi_remove (&gsi, true);
1353 	      else
1354 		{
1355 		  /* ?? Is setting the subcode really necessary ??  */
1356 		  gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1357 		  gimple_assign_set_rhs1 (parcopy_stmt, arg);
1358 		}
1359 	    }
1360 	  else
1361 	    {
1362 	      tree lhs = gimple_assign_lhs (parcopy_stmt);
1363 	      gcc_assert (SSA_NAME_VAR (lhs) == arg);
1364 	      /* We'd like to set the rhs to the default def in the child_fn,
1365 		 but it's too early to create ssa names in the child_fn.
1366 		 Instead, we set the rhs to the parm.  In
1367 		 move_sese_region_to_fn, we introduce a default def for the
1368 		 parm, map the parm to it's default def, and once we encounter
1369 		 this stmt, replace the parm with the default def.  */
1370 	      gimple_assign_set_rhs1 (parcopy_stmt, arg);
1371 	      update_stmt (parcopy_stmt);
1372 	    }
1373 	}
1374 
1375       /* Declare local variables needed in CHILD_CFUN.  */
1376       block = DECL_INITIAL (child_fn);
1377       BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1378       /* The gimplifier could record temporaries in parallel/task block
1379 	 rather than in containing function's local_decls chain,
1380 	 which would mean cgraph missed finalizing them.  Do it now.  */
1381       for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1382 	if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1383 	  varpool_node::finalize_decl (t);
1384       DECL_SAVED_TREE (child_fn) = NULL;
1385       /* We'll create a CFG for child_fn, so no gimple body is needed.  */
1386       gimple_set_body (child_fn, NULL);
1387       TREE_USED (block) = 1;
1388 
1389       /* Reset DECL_CONTEXT on function arguments.  */
1390       for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1391 	DECL_CONTEXT (t) = child_fn;
1392 
1393       /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1394 	 so that it can be moved to the child function.  */
1395       gsi = gsi_last_nondebug_bb (entry_bb);
1396       stmt = gsi_stmt (gsi);
1397       gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1398 			   || gimple_code (stmt) == GIMPLE_OMP_TASK
1399 			   || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1400       e = split_block (entry_bb, stmt);
1401       gsi_remove (&gsi, true);
1402       entry_bb = e->dest;
1403       edge e2 = NULL;
1404       if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1405 	single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1406       else
1407 	{
1408 	  e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1409 	  gcc_assert (e2->dest == region->exit);
1410 	  remove_edge (BRANCH_EDGE (entry_bb));
1411 	  set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1412 	  gsi = gsi_last_nondebug_bb (region->exit);
1413 	  gcc_assert (!gsi_end_p (gsi)
1414 		      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1415 	  gsi_remove (&gsi, true);
1416 	}
1417 
1418       /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR.  */
1419       if (exit_bb)
1420 	{
1421 	  gsi = gsi_last_nondebug_bb (exit_bb);
1422 	  gcc_assert (!gsi_end_p (gsi)
1423 		      && (gimple_code (gsi_stmt (gsi))
1424 			  == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1425 	  stmt = gimple_build_return (NULL);
1426 	  gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1427 	  gsi_remove (&gsi, true);
1428 	}
1429 
1430       /* Move the parallel region into CHILD_CFUN.  */
1431 
1432       if (gimple_in_ssa_p (cfun))
1433 	{
1434 	  init_tree_ssa (child_cfun);
1435 	  init_ssa_operands (child_cfun);
1436 	  child_cfun->gimple_df->in_ssa_p = true;
1437 	  block = NULL_TREE;
1438 	}
1439       else
1440 	block = gimple_block (entry_stmt);
1441 
1442       new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1443       if (exit_bb)
1444 	single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1445       if (e2)
1446 	{
1447 	  basic_block dest_bb = e2->dest;
1448 	  if (!exit_bb)
1449 	    make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1450 	  remove_edge (e2);
1451 	  set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1452 	}
1453       /* When the OMP expansion process cannot guarantee an up-to-date
1454 	 loop tree arrange for the child function to fixup loops.  */
1455       if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1456 	child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1457 
1458       /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
1459       num = vec_safe_length (child_cfun->local_decls);
1460       for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1461 	{
1462 	  t = (*child_cfun->local_decls)[srcidx];
1463 	  if (DECL_CONTEXT (t) == cfun->decl)
1464 	    continue;
1465 	  if (srcidx != dstidx)
1466 	    (*child_cfun->local_decls)[dstidx] = t;
1467 	  dstidx++;
1468 	}
1469       if (dstidx != num)
1470 	vec_safe_truncate (child_cfun->local_decls, dstidx);
1471 
1472       /* Inform the callgraph about the new function.  */
1473       child_cfun->curr_properties = cfun->curr_properties;
1474       child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1475       child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1476       cgraph_node *node = cgraph_node::get_create (child_fn);
1477       node->parallelized_function = 1;
1478       cgraph_node::add_new_function (child_fn, true);
1479 
1480       bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1481 		      && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1482 
1483       /* Fix the callgraph edges for child_cfun.  Those for cfun will be
1484 	 fixed in a following pass.  */
1485       push_cfun (child_cfun);
1486       if (need_asm)
1487 	assign_assembler_name_if_needed (child_fn);
1488 
1489       if (optimize)
1490 	optimize_omp_library_calls (entry_stmt);
1491       update_max_bb_count ();
1492       cgraph_edge::rebuild_edges ();
1493 
1494       /* Some EH regions might become dead, see PR34608.  If
1495 	 pass_cleanup_cfg isn't the first pass to happen with the
1496 	 new child, these dead EH edges might cause problems.
1497 	 Clean them up now.  */
1498       if (flag_exceptions)
1499 	{
1500 	  basic_block bb;
1501 	  bool changed = false;
1502 
1503 	  FOR_EACH_BB_FN (bb, cfun)
1504 	    changed |= gimple_purge_dead_eh_edges (bb);
1505 	  if (changed)
1506 	    cleanup_tree_cfg ();
1507 	}
1508       if (gimple_in_ssa_p (cfun))
1509 	update_ssa (TODO_update_ssa);
1510       if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1511 	verify_loop_structure ();
1512       pop_cfun ();
1513 
1514       if (dump_file && !gimple_in_ssa_p (cfun))
1515 	{
1516 	  omp_any_child_fn_dumped = true;
1517 	  dump_function_header (dump_file, child_fn, dump_flags);
1518 	  dump_function_to_file (child_fn, dump_file, dump_flags);
1519 	}
1520     }
1521 
1522   adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1523 
1524   if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1525     expand_parallel_call (region, new_bb,
1526 			  as_a <gomp_parallel *> (entry_stmt), ws_args);
1527   else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1528     expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1529   else
1530     expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1531   if (gimple_in_ssa_p (cfun))
1532     update_ssa (TODO_update_ssa_only_virtuals);
1533 }
1534 
1535 /* Information about members of an OpenACC collapsed loop nest.  */
1536 
1537 struct oacc_collapse
1538 {
1539   tree base;  /* Base value.  */
1540   tree iters; /* Number of steps.  */
1541   tree step;  /* Step size.  */
1542   tree tile;  /* Tile increment (if tiled).  */
1543   tree outer; /* Tile iterator var. */
1544 };
1545 
1546 /* Helper for expand_oacc_for.  Determine collapsed loop information.
1547    Fill in COUNTS array.  Emit any initialization code before GSI.
1548    Return the calculated outer loop bound of BOUND_TYPE.  */
1549 
1550 static tree
expand_oacc_collapse_init(const struct omp_for_data * fd,gimple_stmt_iterator * gsi,oacc_collapse * counts,tree diff_type,tree bound_type,location_t loc)1551 expand_oacc_collapse_init (const struct omp_for_data *fd,
1552 			   gimple_stmt_iterator *gsi,
1553 			   oacc_collapse *counts, tree diff_type,
1554 			   tree bound_type, location_t loc)
1555 {
1556   tree tiling = fd->tiling;
1557   tree total = build_int_cst (bound_type, 1);
1558   int ix;
1559 
1560   gcc_assert (integer_onep (fd->loop.step));
1561   gcc_assert (integer_zerop (fd->loop.n1));
1562 
1563   /* When tiling, the first operand of the tile clause applies to the
1564      innermost loop, and we work outwards from there.  Seems
1565      backwards, but whatever.  */
1566   for (ix = fd->collapse; ix--;)
1567     {
1568       const omp_for_data_loop *loop = &fd->loops[ix];
1569 
1570       tree iter_type = TREE_TYPE (loop->v);
1571       tree plus_type = iter_type;
1572 
1573       gcc_assert (loop->cond_code == LT_EXPR || loop->cond_code == GT_EXPR);
1574 
1575       if (POINTER_TYPE_P (iter_type))
1576 	plus_type = sizetype;
1577 
1578       if (tiling)
1579 	{
1580 	  tree num = build_int_cst (integer_type_node, fd->collapse);
1581 	  tree loop_no = build_int_cst (integer_type_node, ix);
1582 	  tree tile = TREE_VALUE (tiling);
1583 	  gcall *call
1584 	    = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1585 					  /* gwv-outer=*/integer_zero_node,
1586 					  /* gwv-inner=*/integer_zero_node);
1587 
1588 	  counts[ix].outer = create_tmp_var (iter_type, ".outer");
1589 	  counts[ix].tile = create_tmp_var (diff_type, ".tile");
1590 	  gimple_call_set_lhs (call, counts[ix].tile);
1591 	  gimple_set_location (call, loc);
1592 	  gsi_insert_before (gsi, call, GSI_SAME_STMT);
1593 
1594 	  tiling = TREE_CHAIN (tiling);
1595 	}
1596       else
1597 	{
1598 	  counts[ix].tile = NULL;
1599 	  counts[ix].outer = loop->v;
1600 	}
1601 
1602       tree b = loop->n1;
1603       tree e = loop->n2;
1604       tree s = loop->step;
1605       bool up = loop->cond_code == LT_EXPR;
1606       tree dir = build_int_cst (diff_type, up ? +1 : -1);
1607       bool negating;
1608       tree expr;
1609 
1610       b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1611 				    true, GSI_SAME_STMT);
1612       e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1613 				    true, GSI_SAME_STMT);
1614 
1615       /* Convert the step, avoiding possible unsigned->signed overflow.  */
1616       negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1617       if (negating)
1618 	s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1619       s = fold_convert (diff_type, s);
1620       if (negating)
1621 	s = fold_build1 (NEGATE_EXPR, diff_type, s);
1622       s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1623 				    true, GSI_SAME_STMT);
1624 
1625       /* Determine the range, avoiding possible unsigned->signed overflow.  */
1626       negating = !up && TYPE_UNSIGNED (iter_type);
1627       expr = fold_build2 (MINUS_EXPR, plus_type,
1628 			  fold_convert (plus_type, negating ? b : e),
1629 			  fold_convert (plus_type, negating ? e : b));
1630       expr = fold_convert (diff_type, expr);
1631       if (negating)
1632 	expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1633       tree range = force_gimple_operand_gsi
1634 	(gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1635 
1636       /* Determine number of iterations.  */
1637       expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1638       expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1639       expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1640 
1641       tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1642 					     true, GSI_SAME_STMT);
1643 
1644       counts[ix].base = b;
1645       counts[ix].iters = iters;
1646       counts[ix].step = s;
1647 
1648       total = fold_build2 (MULT_EXPR, bound_type, total,
1649 			   fold_convert (bound_type, iters));
1650     }
1651 
1652   return total;
1653 }
1654 
1655 /* Emit initializers for collapsed loop members.  INNER is true if
1656    this is for the element loop of a TILE.  IVAR is the outer
1657    loop iteration variable, from which collapsed loop iteration values
1658    are  calculated.  COUNTS array has been initialized by
1659    expand_oacc_collapse_inits.  */
1660 
1661 static void
expand_oacc_collapse_vars(const struct omp_for_data * fd,bool inner,gimple_stmt_iterator * gsi,const oacc_collapse * counts,tree ivar,tree diff_type)1662 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1663 			   gimple_stmt_iterator *gsi,
1664 			   const oacc_collapse *counts, tree ivar,
1665 			   tree diff_type)
1666 {
1667   tree ivar_type = TREE_TYPE (ivar);
1668 
1669   /*  The most rapidly changing iteration variable is the innermost
1670       one.  */
1671   for (int ix = fd->collapse; ix--;)
1672     {
1673       const omp_for_data_loop *loop = &fd->loops[ix];
1674       const oacc_collapse *collapse = &counts[ix];
1675       tree v = inner ? loop->v : collapse->outer;
1676       tree iter_type = TREE_TYPE (v);
1677       tree plus_type = iter_type;
1678       enum tree_code plus_code = PLUS_EXPR;
1679       tree expr;
1680 
1681       if (POINTER_TYPE_P (iter_type))
1682 	{
1683 	  plus_code = POINTER_PLUS_EXPR;
1684 	  plus_type = sizetype;
1685 	}
1686 
1687       expr = ivar;
1688       if (ix)
1689 	{
1690 	  tree mod = fold_convert (ivar_type, collapse->iters);
1691 	  ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1692 	  expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1693 	  ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1694 					   true, GSI_SAME_STMT);
1695 	}
1696 
1697       expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1698 			  fold_convert (diff_type, collapse->step));
1699       expr = fold_build2 (plus_code, iter_type,
1700 			  inner ? collapse->outer : collapse->base,
1701 			  fold_convert (plus_type, expr));
1702       expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1703 				       true, GSI_SAME_STMT);
1704       gassign *ass = gimple_build_assign (v, expr);
1705       gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1706     }
1707 }
1708 
1709 /* Helper function for expand_omp_{for_*,simd}.  If this is the outermost
1710    of the combined collapse > 1 loop constructs, generate code like:
1711 	if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1712 	if (cond3 is <)
1713 	  adj = STEP3 - 1;
1714 	else
1715 	  adj = STEP3 + 1;
1716 	count3 = (adj + N32 - N31) / STEP3;
1717 	if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1718 	if (cond2 is <)
1719 	  adj = STEP2 - 1;
1720 	else
1721 	  adj = STEP2 + 1;
1722 	count2 = (adj + N22 - N21) / STEP2;
1723 	if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1724 	if (cond1 is <)
1725 	  adj = STEP1 - 1;
1726 	else
1727 	  adj = STEP1 + 1;
1728 	count1 = (adj + N12 - N11) / STEP1;
1729 	count = count1 * count2 * count3;
1730    Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1731 	count = 0;
1732    and set ZERO_ITER_BB to that bb.  If this isn't the outermost
1733    of the combined loop constructs, just initialize COUNTS array
1734    from the _looptemp_ clauses.  For loop nests with non-rectangular
1735    loops, do this only for the rectangular loops.  Then pick
1736    the loops which reference outer vars in their bound expressions
1737    and the loops which they refer to and for this sub-nest compute
1738    number of iterations.  For triangular loops use Faulhaber's formula,
1739    otherwise as a fallback, compute by iterating the loops.
1740    If e.g. the sub-nest is
1741 	for (I = N11; I COND1 N12; I += STEP1)
1742 	for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1743 	for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1744    do:
1745 	COUNT = 0;
1746 	for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1747 	for (tmpj = M21 * tmpi + N21;
1748 	     tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1749 	  {
1750 	    int tmpk1 = M31 * tmpj + N31;
1751 	    int tmpk2 = M32 * tmpj + N32;
1752 	    if (tmpk1 COND3 tmpk2)
1753 	      {
1754 		if (COND3 is <)
1755 		  adj = STEP3 - 1;
1756 		else
1757 		  adj = STEP3 + 1;
1758 		COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1759 	      }
1760 	  }
1761    and finally multiply the counts of the rectangular loops not
1762    in the sub-nest with COUNT.  Also, as counts[fd->last_nonrect]
1763    store number of iterations of the loops from fd->first_nonrect
1764    to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1765    by the counts of rectangular loops not referenced in any non-rectangular
1766    loops sandwitched in between those.  */
1767 
1768 /* NOTE: It *could* be better to moosh all of the BBs together,
1769    creating one larger BB with all the computation and the unexpected
1770    jump at the end.  I.e.
1771 
1772    bool zero3, zero2, zero1, zero;
1773 
1774    zero3 = N32 c3 N31;
1775    count3 = (N32 - N31) /[cl] STEP3;
1776    zero2 = N22 c2 N21;
1777    count2 = (N22 - N21) /[cl] STEP2;
1778    zero1 = N12 c1 N11;
1779    count1 = (N12 - N11) /[cl] STEP1;
1780    zero = zero3 || zero2 || zero1;
1781    count = count1 * count2 * count3;
1782    if (__builtin_expect(zero, false)) goto zero_iter_bb;
1783 
1784    After all, we expect the zero=false, and thus we expect to have to
1785    evaluate all of the comparison expressions, so short-circuiting
1786    oughtn't be a win.  Since the condition isn't protecting a
1787    denominator, we're not concerned about divide-by-zero, so we can
1788    fully evaluate count even if a numerator turned out to be wrong.
1789 
1790    It seems like putting this all together would create much better
1791    scheduling opportunities, and less pressure on the chip's branch
1792    predictor.  */
1793 
1794 static void
expand_omp_for_init_counts(struct omp_for_data * fd,gimple_stmt_iterator * gsi,basic_block & entry_bb,tree * counts,basic_block & zero_iter1_bb,int & first_zero_iter1,basic_block & zero_iter2_bb,int & first_zero_iter2,basic_block & l2_dom_bb)1795 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1796 			    basic_block &entry_bb, tree *counts,
1797 			    basic_block &zero_iter1_bb, int &first_zero_iter1,
1798 			    basic_block &zero_iter2_bb, int &first_zero_iter2,
1799 			    basic_block &l2_dom_bb)
1800 {
1801   tree t, type = TREE_TYPE (fd->loop.v);
1802   edge e, ne;
1803   int i;
1804 
1805   /* Collapsed loops need work for expansion into SSA form.  */
1806   gcc_assert (!gimple_in_ssa_p (cfun));
1807 
1808   if (gimple_omp_for_combined_into_p (fd->for_stmt)
1809       && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1810     {
1811       gcc_assert (fd->ordered == 0);
1812       /* First two _looptemp_ clauses are for istart/iend, counts[0]
1813 	 isn't supposed to be handled, as the inner loop doesn't
1814 	 use it.  */
1815       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1816 				     OMP_CLAUSE__LOOPTEMP_);
1817       gcc_assert (innerc);
1818       for (i = 0; i < fd->collapse; i++)
1819 	{
1820 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1821 				    OMP_CLAUSE__LOOPTEMP_);
1822 	  gcc_assert (innerc);
1823 	  if (i)
1824 	    counts[i] = OMP_CLAUSE_DECL (innerc);
1825 	  else
1826 	    counts[0] = NULL_TREE;
1827 	}
1828       if (fd->non_rect
1829 	  && fd->last_nonrect == fd->first_nonrect + 1
1830 	  && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
1831 	{
1832 	  tree c[4];
1833 	  for (i = 0; i < 4; i++)
1834 	    {
1835 	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1836 					OMP_CLAUSE__LOOPTEMP_);
1837 	      gcc_assert (innerc);
1838 	      c[i] = OMP_CLAUSE_DECL (innerc);
1839 	    }
1840 	  counts[0] = c[0];
1841 	  fd->first_inner_iterations = c[1];
1842 	  fd->factor = c[2];
1843 	  fd->adjn1 = c[3];
1844 	}
1845       return;
1846     }
1847 
1848   for (i = fd->collapse; i < fd->ordered; i++)
1849     {
1850       tree itype = TREE_TYPE (fd->loops[i].v);
1851       counts[i] = NULL_TREE;
1852       t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1853 		       fold_convert (itype, fd->loops[i].n1),
1854 		       fold_convert (itype, fd->loops[i].n2));
1855       if (t && integer_zerop (t))
1856 	{
1857 	  for (i = fd->collapse; i < fd->ordered; i++)
1858 	    counts[i] = build_int_cst (type, 0);
1859 	  break;
1860 	}
1861     }
1862   bool rect_count_seen = false;
1863   for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1864     {
1865       tree itype = TREE_TYPE (fd->loops[i].v);
1866 
1867       if (i >= fd->collapse && counts[i])
1868 	continue;
1869       if (fd->non_rect)
1870 	{
1871 	  /* Skip loops that use outer iterators in their expressions
1872 	     during this phase.  */
1873 	  if (fd->loops[i].m1 || fd->loops[i].m2)
1874 	    {
1875 	      counts[i] = build_zero_cst (type);
1876 	      continue;
1877 	    }
1878 	}
1879       if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1880 	  && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1881 				fold_convert (itype, fd->loops[i].n1),
1882 				fold_convert (itype, fd->loops[i].n2)))
1883 	      == NULL_TREE || !integer_onep (t)))
1884 	{
1885 	  gcond *cond_stmt;
1886 	  tree n1, n2;
1887 	  n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1888 	  n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1889 					 true, GSI_SAME_STMT);
1890 	  n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1891 	  n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1892 					 true, GSI_SAME_STMT);
1893 	  cond_stmt = expand_omp_build_cond (gsi, fd->loops[i].cond_code,
1894 					     n1, n2);
1895 	  e = split_block (entry_bb, cond_stmt);
1896 	  basic_block &zero_iter_bb
1897 	    = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1898 	  int &first_zero_iter
1899 	    = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1900 	  if (zero_iter_bb == NULL)
1901 	    {
1902 	      gassign *assign_stmt;
1903 	      first_zero_iter = i;
1904 	      zero_iter_bb = create_empty_bb (entry_bb);
1905 	      add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1906 	      *gsi = gsi_after_labels (zero_iter_bb);
1907 	      if (i < fd->collapse)
1908 		assign_stmt = gimple_build_assign (fd->loop.n2,
1909 						   build_zero_cst (type));
1910 	      else
1911 		{
1912 		  counts[i] = create_tmp_reg (type, ".count");
1913 		  assign_stmt
1914 		    = gimple_build_assign (counts[i], build_zero_cst (type));
1915 		}
1916 	      gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1917 	      set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1918 				       entry_bb);
1919 	    }
1920 	  ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1921 	  ne->probability = profile_probability::very_unlikely ();
1922 	  e->flags = EDGE_TRUE_VALUE;
1923 	  e->probability = ne->probability.invert ();
1924 	  if (l2_dom_bb == NULL)
1925 	    l2_dom_bb = entry_bb;
1926 	  entry_bb = e->dest;
1927 	  *gsi = gsi_last_nondebug_bb (entry_bb);
1928 	}
1929 
1930       if (POINTER_TYPE_P (itype))
1931 	itype = signed_type_for (itype);
1932       t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1933 				 ? -1 : 1));
1934       t = fold_build2 (PLUS_EXPR, itype,
1935 		       fold_convert (itype, fd->loops[i].step), t);
1936       t = fold_build2 (PLUS_EXPR, itype, t,
1937 		       fold_convert (itype, fd->loops[i].n2));
1938       t = fold_build2 (MINUS_EXPR, itype, t,
1939 		       fold_convert (itype, fd->loops[i].n1));
1940       /* ?? We could probably use CEIL_DIV_EXPR instead of
1941 	 TRUNC_DIV_EXPR and adjusting by hand.  Unless we can't
1942 	 generate the same code in the end because generically we
1943 	 don't know that the values involved must be negative for
1944 	 GT??  */
1945       if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1946 	t = fold_build2 (TRUNC_DIV_EXPR, itype,
1947 			 fold_build1 (NEGATE_EXPR, itype, t),
1948 			 fold_build1 (NEGATE_EXPR, itype,
1949 				      fold_convert (itype,
1950 						    fd->loops[i].step)));
1951       else
1952 	t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1953 			 fold_convert (itype, fd->loops[i].step));
1954       t = fold_convert (type, t);
1955       if (TREE_CODE (t) == INTEGER_CST)
1956 	counts[i] = t;
1957       else
1958 	{
1959 	  if (i < fd->collapse || i != first_zero_iter2)
1960 	    counts[i] = create_tmp_reg (type, ".count");
1961 	  expand_omp_build_assign (gsi, counts[i], t);
1962 	}
1963       if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1964 	{
1965 	  if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
1966 	    continue;
1967 	  if (!rect_count_seen)
1968 	    {
1969 	      t = counts[i];
1970 	      rect_count_seen = true;
1971 	    }
1972 	  else
1973 	    t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1974 	  expand_omp_build_assign (gsi, fd->loop.n2, t);
1975 	}
1976     }
1977   if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
1978     {
1979       gcc_assert (fd->last_nonrect != -1);
1980 
1981       counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
1982       expand_omp_build_assign (gsi, counts[fd->last_nonrect],
1983 			       build_zero_cst (type));
1984       for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
1985 	if (fd->loops[i].m1
1986 	    || fd->loops[i].m2
1987 	    || fd->loops[i].non_rect_referenced)
1988 	  break;
1989       if (i == fd->last_nonrect
1990 	  && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
1991 	  && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
1992 	  && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
1993 	{
1994 	  int o = fd->first_nonrect;
1995 	  tree itype = TREE_TYPE (fd->loops[o].v);
1996 	  tree n1o = create_tmp_reg (itype, ".n1o");
1997 	  t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
1998 	  expand_omp_build_assign (gsi, n1o, t);
1999 	  tree n2o = create_tmp_reg (itype, ".n2o");
2000 	  t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
2001 	  expand_omp_build_assign (gsi, n2o, t);
2002 	  if (fd->loops[i].m1 && fd->loops[i].m2)
2003 	    t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
2004 			     unshare_expr (fd->loops[i].m1));
2005 	  else if (fd->loops[i].m1)
2006 	    t = fold_build1 (NEGATE_EXPR, itype,
2007 			     unshare_expr (fd->loops[i].m1));
2008 	  else
2009 	    t = unshare_expr (fd->loops[i].m2);
2010 	  tree m2minusm1
2011 	    = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2012 					true, GSI_SAME_STMT);
2013 
2014 	  gimple_stmt_iterator gsi2 = *gsi;
2015 	  gsi_prev (&gsi2);
2016 	  e = split_block (entry_bb, gsi_stmt (gsi2));
2017 	  e = split_block (e->dest, (gimple *) NULL);
2018 	  basic_block bb1 = e->src;
2019 	  entry_bb = e->dest;
2020 	  *gsi = gsi_after_labels (entry_bb);
2021 
2022 	  gsi2 = gsi_after_labels (bb1);
2023 	  tree ostep = fold_convert (itype, fd->loops[o].step);
2024 	  t = build_int_cst (itype, (fd->loops[o].cond_code
2025 				     == LT_EXPR ? -1 : 1));
2026 	  t = fold_build2 (PLUS_EXPR, itype, ostep, t);
2027 	  t = fold_build2 (PLUS_EXPR, itype, t, n2o);
2028 	  t = fold_build2 (MINUS_EXPR, itype, t, n1o);
2029 	  if (TYPE_UNSIGNED (itype)
2030 	      && fd->loops[o].cond_code == GT_EXPR)
2031 	    t = fold_build2 (TRUNC_DIV_EXPR, itype,
2032 			     fold_build1 (NEGATE_EXPR, itype, t),
2033 			     fold_build1 (NEGATE_EXPR, itype, ostep));
2034 	  else
2035 	    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
2036 	  tree outer_niters
2037 	    = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2038 					true, GSI_SAME_STMT);
2039 	  t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2040 			   build_one_cst (itype));
2041 	  t = fold_build2 (MULT_EXPR, itype, t, ostep);
2042 	  t = fold_build2 (PLUS_EXPR, itype, n1o, t);
2043 	  tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2044 						true, GSI_SAME_STMT);
2045 	  tree n1, n2, n1e, n2e;
2046 	  t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2047 	  if (fd->loops[i].m1)
2048 	    {
2049 	      n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2050 	      n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
2051 	      n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2052 	    }
2053 	  else
2054 	    n1 = t;
2055 	  n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2056 					 true, GSI_SAME_STMT);
2057 	  t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2058 	  if (fd->loops[i].m2)
2059 	    {
2060 	      n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2061 	      n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
2062 	      n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2063 	    }
2064 	  else
2065 	    n2 = t;
2066 	  n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2067 					 true, GSI_SAME_STMT);
2068 	  t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2069 	  if (fd->loops[i].m1)
2070 	    {
2071 	      n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2072 	      n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
2073 	      n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
2074 	    }
2075 	  else
2076 	    n1e = t;
2077 	  n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
2078 					  true, GSI_SAME_STMT);
2079 	  t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2080 	  if (fd->loops[i].m2)
2081 	    {
2082 	      n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2083 	      n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
2084 	      n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
2085 	    }
2086 	  else
2087 	    n2e = t;
2088 	  n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
2089 					  true, GSI_SAME_STMT);
2090 	  gcond *cond_stmt
2091 	    = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2092 				     n1, n2);
2093 	  e = split_block (bb1, cond_stmt);
2094 	  e->flags = EDGE_TRUE_VALUE;
2095 	  e->probability = profile_probability::likely ().guessed ();
2096 	  basic_block bb2 = e->dest;
2097 	  gsi2 = gsi_after_labels (bb2);
2098 
2099 	  cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2100 					     n1e, n2e);
2101 	  e = split_block (bb2, cond_stmt);
2102 	  e->flags = EDGE_TRUE_VALUE;
2103 	  e->probability = profile_probability::likely ().guessed ();
2104 	  gsi2 = gsi_after_labels (e->dest);
2105 
2106 	  tree step = fold_convert (itype, fd->loops[i].step);
2107 	  t = build_int_cst (itype, (fd->loops[i].cond_code
2108 				     == LT_EXPR ? -1 : 1));
2109 	  t = fold_build2 (PLUS_EXPR, itype, step, t);
2110 	  t = fold_build2 (PLUS_EXPR, itype, t, n2);
2111 	  t = fold_build2 (MINUS_EXPR, itype, t, n1);
2112 	  if (TYPE_UNSIGNED (itype)
2113 	      && fd->loops[i].cond_code == GT_EXPR)
2114 	    t = fold_build2 (TRUNC_DIV_EXPR, itype,
2115 			     fold_build1 (NEGATE_EXPR, itype, t),
2116 			     fold_build1 (NEGATE_EXPR, itype, step));
2117 	  else
2118 	    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2119 	  tree first_inner_iterations
2120 	    = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2121 					true, GSI_SAME_STMT);
2122 	  t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
2123 	  if (TYPE_UNSIGNED (itype)
2124 	      && fd->loops[i].cond_code == GT_EXPR)
2125 	    t = fold_build2 (TRUNC_DIV_EXPR, itype,
2126 			     fold_build1 (NEGATE_EXPR, itype, t),
2127 			     fold_build1 (NEGATE_EXPR, itype, step));
2128 	  else
2129 	    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2130 	  tree factor
2131 	    = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2132 					true, GSI_SAME_STMT);
2133 	  t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2134 			   build_one_cst (itype));
2135 	  t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
2136 	  t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
2137 	  t = fold_build2 (MULT_EXPR, itype, factor, t);
2138 	  t = fold_build2 (PLUS_EXPR, itype,
2139 			   fold_build2 (MULT_EXPR, itype, outer_niters,
2140 					first_inner_iterations), t);
2141 	  expand_omp_build_assign (&gsi2, counts[fd->last_nonrect],
2142 				   fold_convert (type, t));
2143 
2144 	  basic_block bb3 = create_empty_bb (bb1);
2145 	  add_bb_to_loop (bb3, bb1->loop_father);
2146 
2147 	  e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2148 	  e->probability = profile_probability::unlikely ().guessed ();
2149 
2150 	  gsi2 = gsi_after_labels (bb3);
2151 	  cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2152 					     n1e, n2e);
2153 	  e = split_block (bb3, cond_stmt);
2154 	  e->flags = EDGE_TRUE_VALUE;
2155 	  e->probability = profile_probability::likely ().guessed ();
2156 	  basic_block bb4 = e->dest;
2157 
2158 	  ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
2159 	  ne->probability = e->probability.invert ();
2160 
2161 	  basic_block bb5 = create_empty_bb (bb2);
2162 	  add_bb_to_loop (bb5, bb2->loop_father);
2163 
2164 	  ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
2165 	  ne->probability = profile_probability::unlikely ().guessed ();
2166 
2167 	  for (int j = 0; j < 2; j++)
2168 	    {
2169 	      gsi2 = gsi_after_labels (j ? bb5 : bb4);
2170 	      t = fold_build2 (MINUS_EXPR, itype,
2171 			       unshare_expr (fd->loops[i].n1),
2172 			       unshare_expr (fd->loops[i].n2));
2173 	      t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
2174 	      tree tem
2175 		= force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2176 					    true, GSI_SAME_STMT);
2177 	      t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
2178 	      t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
2179 	      t = fold_build2 (MINUS_EXPR, itype, tem, t);
2180 	      tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2181 					      true, GSI_SAME_STMT);
2182 	      t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2183 	      if (fd->loops[i].m1)
2184 		{
2185 		  n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2186 		  n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
2187 		  n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2188 		}
2189 	      else
2190 		n1 = t;
2191 	      n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2192 					     true, GSI_SAME_STMT);
2193 	      t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2194 	      if (fd->loops[i].m2)
2195 		{
2196 		  n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2197 		  n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
2198 		  n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2199 		}
2200 	      else
2201 		n2 = t;
2202 	      n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2203 					     true, GSI_SAME_STMT);
2204 	      expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem);
2205 
2206 	      cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2207 						 n1, n2);
2208 	      e = split_block (gsi_bb (gsi2), cond_stmt);
2209 	      e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
2210 	      e->probability = profile_probability::unlikely ().guessed ();
2211 	      ne = make_edge (e->src, bb1,
2212 			      j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
2213 	      ne->probability = e->probability.invert ();
2214 	      gsi2 = gsi_after_labels (e->dest);
2215 
2216 	      t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
2217 	      expand_omp_build_assign (&gsi2, j ? n2o : n1o, t);
2218 
2219 	      make_edge (e->dest, bb1, EDGE_FALLTHRU);
2220 	    }
2221 
2222 	  set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
2223 	  set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
2224 	  set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
2225 
2226 	  if (fd->first_nonrect + 1 == fd->last_nonrect)
2227 	    {
2228 	      fd->first_inner_iterations = first_inner_iterations;
2229 	      fd->factor = factor;
2230 	      fd->adjn1 = n1o;
2231 	    }
2232 	}
2233       else
2234 	{
2235 	  /* Fallback implementation.  Evaluate the loops with m1/m2
2236 	     non-NULL as well as their outer loops at runtime using temporaries
2237 	     instead of the original iteration variables, and in the
2238 	     body just bump the counter.  */
2239 	  gimple_stmt_iterator gsi2 = *gsi;
2240 	  gsi_prev (&gsi2);
2241 	  e = split_block (entry_bb, gsi_stmt (gsi2));
2242 	  e = split_block (e->dest, (gimple *) NULL);
2243 	  basic_block cur_bb = e->src;
2244 	  basic_block next_bb = e->dest;
2245 	  entry_bb = e->dest;
2246 	  *gsi = gsi_after_labels (entry_bb);
2247 
2248 	  tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2249 	  memset (vs, 0, fd->last_nonrect * sizeof (tree));
2250 
2251 	  for (i = 0; i <= fd->last_nonrect; i++)
2252 	    {
2253 	      if (fd->loops[i].m1 == NULL_TREE
2254 		  && fd->loops[i].m2 == NULL_TREE
2255 		  && !fd->loops[i].non_rect_referenced)
2256 		continue;
2257 
2258 	      tree itype = TREE_TYPE (fd->loops[i].v);
2259 
2260 	      gsi2 = gsi_after_labels (cur_bb);
2261 	      tree n1, n2;
2262 	      t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2263 	      if (fd->loops[i].m1 == NULL_TREE)
2264 		n1 = t;
2265 	      else if (POINTER_TYPE_P (itype))
2266 		{
2267 		  gcc_assert (integer_onep (fd->loops[i].m1));
2268 		  t = fold_convert (sizetype,
2269 				    unshare_expr (fd->loops[i].n1));
2270 		  n1 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2271 		}
2272 	      else
2273 		{
2274 		  n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2275 		  n1 = fold_build2 (MULT_EXPR, itype,
2276 				    vs[i - fd->loops[i].outer], n1);
2277 		  n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2278 		}
2279 	      n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2280 					     true, GSI_SAME_STMT);
2281 	      if (i < fd->last_nonrect)
2282 		{
2283 		  vs[i] = create_tmp_reg (itype, ".it");
2284 		  expand_omp_build_assign (&gsi2, vs[i], n1);
2285 		}
2286 	      t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2287 	      if (fd->loops[i].m2 == NULL_TREE)
2288 		n2 = t;
2289 	      else if (POINTER_TYPE_P (itype))
2290 		{
2291 		  gcc_assert (integer_onep (fd->loops[i].m2));
2292 		  t = fold_convert (sizetype,
2293 				    unshare_expr (fd->loops[i].n2));
2294 		  n2 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2295 		}
2296 	      else
2297 		{
2298 		  n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2299 		  n2 = fold_build2 (MULT_EXPR, itype,
2300 				    vs[i - fd->loops[i].outer], n2);
2301 		  n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2302 		}
2303 	      n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2304 					     true, GSI_SAME_STMT);
2305 	      if (POINTER_TYPE_P (itype))
2306 		itype = signed_type_for (itype);
2307 	      if (i == fd->last_nonrect)
2308 		{
2309 		  gcond *cond_stmt
2310 		    = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2311 					     n1, n2);
2312 		  e = split_block (cur_bb, cond_stmt);
2313 		  e->flags = EDGE_TRUE_VALUE;
2314 		  ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2315 		  e->probability = profile_probability::likely ().guessed ();
2316 		  ne->probability = e->probability.invert ();
2317 		  gsi2 = gsi_after_labels (e->dest);
2318 
2319 		  t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2320 					     ? -1 : 1));
2321 		  t = fold_build2 (PLUS_EXPR, itype,
2322 				   fold_convert (itype, fd->loops[i].step), t);
2323 		  t = fold_build2 (PLUS_EXPR, itype, t,
2324 				   fold_convert (itype, n2));
2325 		  t = fold_build2 (MINUS_EXPR, itype, t,
2326 				   fold_convert (itype, n1));
2327 		  tree step = fold_convert (itype, fd->loops[i].step);
2328 		  if (TYPE_UNSIGNED (itype)
2329 		      && fd->loops[i].cond_code == GT_EXPR)
2330 		    t = fold_build2 (TRUNC_DIV_EXPR, itype,
2331 				     fold_build1 (NEGATE_EXPR, itype, t),
2332 				     fold_build1 (NEGATE_EXPR, itype, step));
2333 		  else
2334 		    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2335 		  t = fold_convert (type, t);
2336 		  t = fold_build2 (PLUS_EXPR, type,
2337 				   counts[fd->last_nonrect], t);
2338 		  t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2339 						true, GSI_SAME_STMT);
2340 		  expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
2341 		  e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2342 		  set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2343 		  break;
2344 		}
2345 	      e = split_block (cur_bb, last_stmt (cur_bb));
2346 
2347 	      basic_block new_cur_bb = create_empty_bb (cur_bb);
2348 	      add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2349 
2350 	      gsi2 = gsi_after_labels (e->dest);
2351 	      tree step = fold_convert (itype,
2352 					unshare_expr (fd->loops[i].step));
2353 	      if (POINTER_TYPE_P (TREE_TYPE (vs[i])))
2354 		t = fold_build_pointer_plus (vs[i],
2355 					     fold_convert (sizetype, step));
2356 	      else
2357 		t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2358 	      t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2359 					    true, GSI_SAME_STMT);
2360 	      expand_omp_build_assign (&gsi2, vs[i], t);
2361 
2362 	      ne = split_block (e->dest, last_stmt (e->dest));
2363 	      gsi2 = gsi_after_labels (ne->dest);
2364 
2365 	      expand_omp_build_cond (&gsi2, fd->loops[i].cond_code, vs[i], n2);
2366 	      edge e3, e4;
2367 	      if (next_bb == entry_bb)
2368 		{
2369 		  e3 = find_edge (ne->dest, next_bb);
2370 		  e3->flags = EDGE_FALSE_VALUE;
2371 		}
2372 	      else
2373 		e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2374 	      e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2375 	      e4->probability = profile_probability::likely ().guessed ();
2376 	      e3->probability = e4->probability.invert ();
2377 	      basic_block esrc = e->src;
2378 	      make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2379 	      cur_bb = new_cur_bb;
2380 	      basic_block latch_bb = next_bb;
2381 	      next_bb = e->dest;
2382 	      remove_edge (e);
2383 	      set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2384 	      set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2385 	      set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2386 	    }
2387 	}
2388       t = NULL_TREE;
2389       for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2390 	if (!fd->loops[i].non_rect_referenced
2391 	    && fd->loops[i].m1 == NULL_TREE
2392 	    && fd->loops[i].m2 == NULL_TREE)
2393 	  {
2394 	    if (t == NULL_TREE)
2395 	      t = counts[i];
2396 	    else
2397 	      t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2398 	  }
2399       if (t)
2400 	{
2401 	  t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2402 	  expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
2403 	}
2404       if (!rect_count_seen)
2405 	t = counts[fd->last_nonrect];
2406       else
2407 	t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2408 			 counts[fd->last_nonrect]);
2409       expand_omp_build_assign (gsi, fd->loop.n2, t);
2410     }
2411   else if (fd->non_rect)
2412     {
2413       tree t = fd->loop.n2;
2414       gcc_assert (TREE_CODE (t) == INTEGER_CST);
2415       int non_rect_referenced = 0, non_rect = 0;
2416       for (i = 0; i < fd->collapse; i++)
2417 	{
2418 	  if ((i < fd->first_nonrect || i > fd->last_nonrect)
2419 	      && !integer_zerop (counts[i]))
2420 	    t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2421 	  if (fd->loops[i].non_rect_referenced)
2422 	    non_rect_referenced++;
2423 	  if (fd->loops[i].m1 || fd->loops[i].m2)
2424 	    non_rect++;
2425 	}
2426       gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2427       counts[fd->last_nonrect] = t;
2428     }
2429 }
2430 
2431 /* Helper function for expand_omp_{for_*,simd}.  Generate code like:
2432 	T = V;
2433 	V3 = N31 + (T % count3) * STEP3;
2434 	T = T / count3;
2435 	V2 = N21 + (T % count2) * STEP2;
2436 	T = T / count2;
2437 	V1 = N11 + T * STEP1;
2438    if this loop doesn't have an inner loop construct combined with it.
2439    If it does have an inner loop construct combined with it and the
2440    iteration count isn't known constant, store values from counts array
2441    into its _looptemp_ temporaries instead.
2442    For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2443    inclusive), use the count of all those loops together, and either
2444    find quadratic etc. equation roots, or as a fallback, do:
2445 	COUNT = 0;
2446 	for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2447 	for (tmpj = M21 * tmpi + N21;
2448 	     tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2449 	  {
2450 	    int tmpk1 = M31 * tmpj + N31;
2451 	    int tmpk2 = M32 * tmpj + N32;
2452 	    if (tmpk1 COND3 tmpk2)
2453 	      {
2454 		if (COND3 is <)
2455 		  adj = STEP3 - 1;
2456 		else
2457 		  adj = STEP3 + 1;
2458 		int temp = (adj + tmpk2 - tmpk1) / STEP3;
2459 		if (COUNT + temp > T)
2460 		  {
2461 		    V1 = tmpi;
2462 		    V2 = tmpj;
2463 		    V3 = tmpk1 + (T - COUNT) * STEP3;
2464 		    goto done;
2465 		  }
2466 		else
2467 		  COUNT += temp;
2468 	      }
2469 	  }
2470 	done:;
2471    but for optional innermost or outermost rectangular loops that aren't
2472    referenced by other loop expressions keep doing the division/modulo.  */
2473 
2474 static void
expand_omp_for_init_vars(struct omp_for_data * fd,gimple_stmt_iterator * gsi,tree * counts,tree * nonrect_bounds,gimple * inner_stmt,tree startvar)2475 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
2476 			  tree *counts, tree *nonrect_bounds,
2477 			  gimple *inner_stmt, tree startvar)
2478 {
2479   int i;
2480   if (gimple_omp_for_combined_p (fd->for_stmt))
2481     {
2482       /* If fd->loop.n2 is constant, then no propagation of the counts
2483 	 is needed, they are constant.  */
2484       if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2485 	return;
2486 
2487       tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
2488 		     ? gimple_omp_taskreg_clauses (inner_stmt)
2489 		     : gimple_omp_for_clauses (inner_stmt);
2490       /* First two _looptemp_ clauses are for istart/iend, counts[0]
2491 	 isn't supposed to be handled, as the inner loop doesn't
2492 	 use it.  */
2493       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
2494       gcc_assert (innerc);
2495       int count = 0;
2496       if (fd->non_rect
2497 	  && fd->last_nonrect == fd->first_nonrect + 1
2498 	  && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
2499 	count = 4;
2500       for (i = 0; i < fd->collapse + count; i++)
2501 	{
2502 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2503 				    OMP_CLAUSE__LOOPTEMP_);
2504 	  gcc_assert (innerc);
2505 	  if (i)
2506 	    {
2507 	      tree tem = OMP_CLAUSE_DECL (innerc);
2508 	      tree t;
2509 	      if (i < fd->collapse)
2510 		t = counts[i];
2511 	      else
2512 		switch (i - fd->collapse)
2513 		  {
2514 		  case 0: t = counts[0]; break;
2515 		  case 1: t = fd->first_inner_iterations; break;
2516 		  case 2: t = fd->factor; break;
2517 		  case 3: t = fd->adjn1; break;
2518 		  default: gcc_unreachable ();
2519 		  }
2520 	      t = fold_convert (TREE_TYPE (tem), t);
2521 	      t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2522 					    false, GSI_CONTINUE_LINKING);
2523 	      gassign *stmt = gimple_build_assign (tem, t);
2524 	      gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2525 	    }
2526 	}
2527       return;
2528     }
2529 
2530   tree type = TREE_TYPE (fd->loop.v);
2531   tree tem = create_tmp_reg (type, ".tem");
2532   gassign *stmt = gimple_build_assign (tem, startvar);
2533   gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2534 
2535   for (i = fd->collapse - 1; i >= 0; i--)
2536     {
2537       tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2538       itype = vtype;
2539       if (POINTER_TYPE_P (vtype))
2540 	itype = signed_type_for (vtype);
2541       if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2542 	t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2543       else
2544 	t = tem;
2545       if (i == fd->last_nonrect)
2546 	{
2547 	  t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2548 					false, GSI_CONTINUE_LINKING);
2549 	  tree stopval = t;
2550 	  tree idx = create_tmp_reg (type, ".count");
2551 	  expand_omp_build_assign (gsi, idx,
2552 				   build_zero_cst (type), true);
2553 	  basic_block bb_triang = NULL, bb_triang_dom = NULL;
2554 	  if (fd->first_nonrect + 1 == fd->last_nonrect
2555 	      && (TREE_CODE (fd->loop.n2) == INTEGER_CST
2556 		  || fd->first_inner_iterations)
2557 	      && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
2558 		  != CODE_FOR_nothing)
2559 	      && !integer_zerop (fd->loop.n2))
2560 	    {
2561 	      tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
2562 	      tree itype = TREE_TYPE (fd->loops[i].v);
2563 	      tree first_inner_iterations = fd->first_inner_iterations;
2564 	      tree factor = fd->factor;
2565 	      gcond *cond_stmt
2566 		= expand_omp_build_cond (gsi, NE_EXPR, factor,
2567 					 build_zero_cst (TREE_TYPE (factor)),
2568 					 true);
2569 	      edge e = split_block (gsi_bb (*gsi), cond_stmt);
2570 	      basic_block bb0 = e->src;
2571 	      e->flags = EDGE_TRUE_VALUE;
2572 	      e->probability = profile_probability::likely ();
2573 	      bb_triang_dom = bb0;
2574 	      *gsi = gsi_after_labels (e->dest);
2575 	      tree slltype = long_long_integer_type_node;
2576 	      tree ulltype = long_long_unsigned_type_node;
2577 	      tree stopvalull = fold_convert (ulltype, stopval);
2578 	      stopvalull
2579 		= force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2580 					    false, GSI_CONTINUE_LINKING);
2581 	      first_inner_iterations
2582 		= fold_convert (slltype, first_inner_iterations);
2583 	      first_inner_iterations
2584 		= force_gimple_operand_gsi (gsi, first_inner_iterations, true,
2585 					    NULL_TREE, false,
2586 					    GSI_CONTINUE_LINKING);
2587 	      factor = fold_convert (slltype, factor);
2588 	      factor
2589 		= force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2590 					    false, GSI_CONTINUE_LINKING);
2591 	      tree first_inner_iterationsd
2592 		= fold_build1 (FLOAT_EXPR, double_type_node,
2593 			       first_inner_iterations);
2594 	      first_inner_iterationsd
2595 		= force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
2596 					    NULL_TREE, false,
2597 					    GSI_CONTINUE_LINKING);
2598 	      tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2599 					  factor);
2600 	      factord = force_gimple_operand_gsi (gsi, factord, true,
2601 						  NULL_TREE, false,
2602 						  GSI_CONTINUE_LINKING);
2603 	      tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2604 					   stopvalull);
2605 	      stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2606 						   NULL_TREE, false,
2607 						   GSI_CONTINUE_LINKING);
2608 	      /* Temporarily disable flag_rounding_math, values will be
2609 		 decimal numbers divided by 2 and worst case imprecisions
2610 		 due to too large values ought to be caught later by the
2611 		 checks for fallback.  */
2612 	      int save_flag_rounding_math = flag_rounding_math;
2613 	      flag_rounding_math = 0;
2614 	      t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2615 			       build_real (double_type_node, dconst2));
2616 	      tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
2617 				     first_inner_iterationsd, t);
2618 	      t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2619 					     GSI_CONTINUE_LINKING);
2620 	      t = fold_build2 (MULT_EXPR, double_type_node, factord,
2621 			       build_real (double_type_node, dconst2));
2622 	      t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2623 	      t = fold_build2 (PLUS_EXPR, double_type_node, t,
2624 			       fold_build2 (MULT_EXPR, double_type_node,
2625 					    t3, t3));
2626 	      flag_rounding_math = save_flag_rounding_math;
2627 	      t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2628 					    GSI_CONTINUE_LINKING);
2629 	      if (flag_exceptions
2630 		  && cfun->can_throw_non_call_exceptions
2631 		  && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
2632 		{
2633 		  tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
2634 					  build_zero_cst (double_type_node));
2635 		  tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
2636 						  false, GSI_CONTINUE_LINKING);
2637 		  cond_stmt = gimple_build_cond (NE_EXPR, tem,
2638 						 boolean_false_node,
2639 						 NULL_TREE, NULL_TREE);
2640 		}
2641 	      else
2642 		cond_stmt
2643 		  = gimple_build_cond (LT_EXPR, t,
2644 				       build_zero_cst (double_type_node),
2645 				       NULL_TREE, NULL_TREE);
2646 	      gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2647 	      e = split_block (gsi_bb (*gsi), cond_stmt);
2648 	      basic_block bb1 = e->src;
2649 	      e->flags = EDGE_FALSE_VALUE;
2650 	      e->probability = profile_probability::very_likely ();
2651 	      *gsi = gsi_after_labels (e->dest);
2652 	      gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2653 	      tree sqrtr = create_tmp_var (double_type_node);
2654 	      gimple_call_set_lhs (call, sqrtr);
2655 	      gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2656 	      t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2657 	      t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2658 	      t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2659 	      tree c = create_tmp_var (ulltype);
2660 	      tree d = create_tmp_var (ulltype);
2661 	      expand_omp_build_assign (gsi, c, t, true);
2662 	      t = fold_build2 (MINUS_EXPR, ulltype, c,
2663 			       build_one_cst (ulltype));
2664 	      t = fold_build2 (MULT_EXPR, ulltype, c, t);
2665 	      t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
2666 	      t = fold_build2 (MULT_EXPR, ulltype,
2667 			       fold_convert (ulltype, fd->factor), t);
2668 	      tree t2
2669 		= fold_build2 (MULT_EXPR, ulltype, c,
2670 			       fold_convert (ulltype,
2671 					     fd->first_inner_iterations));
2672 	      t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2673 	      expand_omp_build_assign (gsi, d, t, true);
2674 	      t = fold_build2 (MULT_EXPR, ulltype,
2675 			       fold_convert (ulltype, fd->factor), c);
2676 	      t = fold_build2 (PLUS_EXPR, ulltype,
2677 			       t, fold_convert (ulltype,
2678 						fd->first_inner_iterations));
2679 	      t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2680 					     GSI_CONTINUE_LINKING);
2681 	      cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2682 					     NULL_TREE, NULL_TREE);
2683 	      gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2684 	      e = split_block (gsi_bb (*gsi), cond_stmt);
2685 	      basic_block bb2 = e->src;
2686 	      e->flags = EDGE_TRUE_VALUE;
2687 	      e->probability = profile_probability::very_likely ();
2688 	      *gsi = gsi_after_labels (e->dest);
2689 	      t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2690 	      t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2691 					    GSI_CONTINUE_LINKING);
2692 	      cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2693 					     NULL_TREE, NULL_TREE);
2694 	      gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2695 	      e = split_block (gsi_bb (*gsi), cond_stmt);
2696 	      basic_block bb3 = e->src;
2697 	      e->flags = EDGE_FALSE_VALUE;
2698 	      e->probability = profile_probability::very_likely ();
2699 	      *gsi = gsi_after_labels (e->dest);
2700 	      t = fold_convert (itype, c);
2701 	      t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
2702 	      t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
2703 	      t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2704 					    GSI_CONTINUE_LINKING);
2705 	      expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
2706 	      t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2707 	      t2 = fold_convert (itype, t2);
2708 	      t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2709 	      t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2710 	      if (fd->loops[i].m1)
2711 		{
2712 		  t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2713 		  t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2714 		}
2715 	      expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
2716 	      e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2717 	      bb_triang = e->src;
2718 	      *gsi = gsi_after_labels (e->dest);
2719 	      remove_edge (e);
2720 	      e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2721 	      e->probability = profile_probability::very_unlikely ();
2722 	      e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
2723 	      e->probability = profile_probability::very_unlikely ();
2724 	      e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2725 	      e->probability = profile_probability::very_unlikely ();
2726 
2727 	      basic_block bb4 = create_empty_bb (bb0);
2728 	      add_bb_to_loop (bb4, bb0->loop_father);
2729 	      e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2730 	      e->probability = profile_probability::unlikely ();
2731 	      make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
2732 	      set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2733 	      set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
2734 	      gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
2735 	      t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2736 				counts[i], counts[i - 1]);
2737 	      t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2738 					     GSI_CONTINUE_LINKING);
2739 	      t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2740 	      t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2741 	      t = fold_convert (itype, t);
2742 	      t2 = fold_convert (itype, t2);
2743 	      t = fold_build2 (MULT_EXPR, itype, t,
2744 			       fold_convert (itype, fd->loops[i].step));
2745 	      t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2746 	      t2 = fold_build2 (MULT_EXPR, itype, t2,
2747 				fold_convert (itype, fd->loops[i - 1].step));
2748 	      t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2749 	      t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2750 					     false, GSI_CONTINUE_LINKING);
2751 	      stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2752 	      gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2753 	      if (fd->loops[i].m1)
2754 		{
2755 		  t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2756 				    fd->loops[i - 1].v);
2757 		  t = fold_build2 (PLUS_EXPR, itype, t, t2);
2758 		}
2759 	      t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2760 					    false, GSI_CONTINUE_LINKING);
2761 	      stmt = gimple_build_assign (fd->loops[i].v, t);
2762 	      gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2763 	    }
2764 	  /* Fallback implementation.  Evaluate the loops in between
2765 	     (inclusive) fd->first_nonrect and fd->last_nonrect at
2766 	     runtime unsing temporaries instead of the original iteration
2767 	     variables, in the body just bump the counter and compare
2768 	     with the desired value.  */
2769 	  gimple_stmt_iterator gsi2 = *gsi;
2770 	  basic_block entry_bb = gsi_bb (gsi2);
2771 	  edge e = split_block (entry_bb, gsi_stmt (gsi2));
2772 	  e = split_block (e->dest, (gimple *) NULL);
2773 	  basic_block dom_bb = NULL;
2774 	  basic_block cur_bb = e->src;
2775 	  basic_block next_bb = e->dest;
2776 	  entry_bb = e->dest;
2777 	  *gsi = gsi_after_labels (entry_bb);
2778 
2779 	  tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2780 	  tree n1 = NULL_TREE, n2 = NULL_TREE;
2781 	  memset (vs, 0, fd->last_nonrect * sizeof (tree));
2782 
2783 	  for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2784 	    {
2785 	      tree itype = TREE_TYPE (fd->loops[j].v);
2786 	      bool rect_p = (fd->loops[j].m1 == NULL_TREE
2787 			     && fd->loops[j].m2 == NULL_TREE
2788 			     && !fd->loops[j].non_rect_referenced);
2789 	      gsi2 = gsi_after_labels (cur_bb);
2790 	      t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2791 	      if (fd->loops[j].m1 == NULL_TREE)
2792 		n1 = rect_p ? build_zero_cst (type) : t;
2793 	      else if (POINTER_TYPE_P (itype))
2794 		{
2795 		  gcc_assert (integer_onep (fd->loops[j].m1));
2796 		  t = fold_convert (sizetype,
2797 				    unshare_expr (fd->loops[j].n1));
2798 		  n1 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2799 		}
2800 	      else
2801 		{
2802 		  n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2803 		  n1 = fold_build2 (MULT_EXPR, itype,
2804 				    vs[j - fd->loops[j].outer], n1);
2805 		  n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2806 		}
2807 	      n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2808 					     true, GSI_SAME_STMT);
2809 	      if (j < fd->last_nonrect)
2810 		{
2811 		  vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2812 		  expand_omp_build_assign (&gsi2, vs[j], n1);
2813 		}
2814 	      t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2815 	      if (fd->loops[j].m2 == NULL_TREE)
2816 		n2 = rect_p ? counts[j] : t;
2817 	      else if (POINTER_TYPE_P (itype))
2818 		{
2819 		  gcc_assert (integer_onep (fd->loops[j].m2));
2820 		  t = fold_convert (sizetype,
2821 				    unshare_expr (fd->loops[j].n2));
2822 		  n2 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2823 		}
2824 	      else
2825 		{
2826 		  n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2827 		  n2 = fold_build2 (MULT_EXPR, itype,
2828 				    vs[j - fd->loops[j].outer], n2);
2829 		  n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2830 		}
2831 	      n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2832 					     true, GSI_SAME_STMT);
2833 	      if (POINTER_TYPE_P (itype))
2834 		itype = signed_type_for (itype);
2835 	      if (j == fd->last_nonrect)
2836 		{
2837 		  gcond *cond_stmt
2838 		    = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2839 					     n1, n2);
2840 		  e = split_block (cur_bb, cond_stmt);
2841 		  e->flags = EDGE_TRUE_VALUE;
2842 		  edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2843 		  e->probability = profile_probability::likely ().guessed ();
2844 		  ne->probability = e->probability.invert ();
2845 		  gsi2 = gsi_after_labels (e->dest);
2846 
2847 		  t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2848 					     ? -1 : 1));
2849 		  t = fold_build2 (PLUS_EXPR, itype,
2850 				   fold_convert (itype, fd->loops[j].step), t);
2851 		  t = fold_build2 (PLUS_EXPR, itype, t,
2852 				   fold_convert (itype, n2));
2853 		  t = fold_build2 (MINUS_EXPR, itype, t,
2854 				   fold_convert (itype, n1));
2855 		  tree step = fold_convert (itype, fd->loops[j].step);
2856 		  if (TYPE_UNSIGNED (itype)
2857 		      && fd->loops[j].cond_code == GT_EXPR)
2858 		    t = fold_build2 (TRUNC_DIV_EXPR, itype,
2859 				     fold_build1 (NEGATE_EXPR, itype, t),
2860 				     fold_build1 (NEGATE_EXPR, itype, step));
2861 		  else
2862 		    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2863 		  t = fold_convert (type, t);
2864 		  t = fold_build2 (PLUS_EXPR, type, idx, t);
2865 		  t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2866 						true, GSI_SAME_STMT);
2867 		  e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2868 		  set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2869 		  cond_stmt
2870 		    = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2871 					 NULL_TREE);
2872 		  gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2873 		  e = split_block (gsi_bb (gsi2), cond_stmt);
2874 		  e->flags = EDGE_TRUE_VALUE;
2875 		  e->probability = profile_probability::likely ().guessed ();
2876 		  ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2877 		  ne->probability = e->probability.invert ();
2878 		  gsi2 = gsi_after_labels (e->dest);
2879 		  expand_omp_build_assign (&gsi2, idx, t);
2880 		  set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2881 		  break;
2882 		}
2883 	      e = split_block (cur_bb, last_stmt (cur_bb));
2884 
2885 	      basic_block new_cur_bb = create_empty_bb (cur_bb);
2886 	      add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2887 
2888 	      gsi2 = gsi_after_labels (e->dest);
2889 	      if (rect_p)
2890 		t = fold_build2 (PLUS_EXPR, type, vs[j],
2891 				 build_one_cst (type));
2892 	      else
2893 		{
2894 		  tree step
2895 		    = fold_convert (itype, unshare_expr (fd->loops[j].step));
2896 		  if (POINTER_TYPE_P (vtype))
2897 		    t = fold_build_pointer_plus (vs[j], fold_convert (sizetype,
2898 								      step));
2899 		  else
2900 		    t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2901 		}
2902 	      t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2903 					    true, GSI_SAME_STMT);
2904 	      expand_omp_build_assign (&gsi2, vs[j], t);
2905 
2906 	      edge ne = split_block (e->dest, last_stmt (e->dest));
2907 	      gsi2 = gsi_after_labels (ne->dest);
2908 
2909 	      gcond *cond_stmt;
2910 	      if (next_bb == entry_bb)
2911 		/* No need to actually check the outermost condition.  */
2912 		cond_stmt
2913 		  = gimple_build_cond (EQ_EXPR, boolean_true_node,
2914 				       boolean_true_node,
2915 				       NULL_TREE, NULL_TREE);
2916 	      else
2917 		cond_stmt
2918 		  = gimple_build_cond (rect_p ? LT_EXPR
2919 					      : fd->loops[j].cond_code,
2920 				       vs[j], n2, NULL_TREE, NULL_TREE);
2921 	      gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2922 	      edge e3, e4;
2923 	      if (next_bb == entry_bb)
2924 		{
2925 		  e3 = find_edge (ne->dest, next_bb);
2926 		  e3->flags = EDGE_FALSE_VALUE;
2927 		  dom_bb = ne->dest;
2928 		}
2929 	      else
2930 		e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2931 	      e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2932 	      e4->probability = profile_probability::likely ().guessed ();
2933 	      e3->probability = e4->probability.invert ();
2934 	      basic_block esrc = e->src;
2935 	      make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2936 	      cur_bb = new_cur_bb;
2937 	      basic_block latch_bb = next_bb;
2938 	      next_bb = e->dest;
2939 	      remove_edge (e);
2940 	      set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2941 	      set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2942 	      set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2943 	    }
2944 	  for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2945 	    {
2946 	      tree vtype = TREE_TYPE (fd->loops[j].v);
2947 	      tree itype = vtype;
2948 	      if (POINTER_TYPE_P (itype))
2949 		itype = signed_type_for (itype);
2950 	      bool rect_p = (fd->loops[j].m1 == NULL_TREE
2951 			     && fd->loops[j].m2 == NULL_TREE
2952 			     && !fd->loops[j].non_rect_referenced);
2953 	      if (j == fd->last_nonrect)
2954 		{
2955 		  t = fold_build2 (MINUS_EXPR, type, stopval, idx);
2956 		  t = fold_convert (itype, t);
2957 		  tree t2
2958 		    = fold_convert (itype, unshare_expr (fd->loops[j].step));
2959 		  t = fold_build2 (MULT_EXPR, itype, t, t2);
2960 		  if (POINTER_TYPE_P (vtype))
2961 		    t = fold_build_pointer_plus (n1,
2962 						 fold_convert (sizetype, t));
2963 		  else
2964 		    t = fold_build2 (PLUS_EXPR, itype, n1, t);
2965 		}
2966 	      else if (rect_p)
2967 		{
2968 		  t = fold_convert (itype, vs[j]);
2969 		  t = fold_build2 (MULT_EXPR, itype, t,
2970 				   fold_convert (itype, fd->loops[j].step));
2971 		  if (POINTER_TYPE_P (vtype))
2972 		    t = fold_build_pointer_plus (fd->loops[j].n1,
2973 						 fold_convert (sizetype, t));
2974 		  else
2975 		    t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
2976 		}
2977 	      else
2978 		t = vs[j];
2979 	      t = force_gimple_operand_gsi (gsi, t, false,
2980 					    NULL_TREE, true,
2981 					    GSI_SAME_STMT);
2982 	      stmt = gimple_build_assign (fd->loops[j].v, t);
2983 	      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
2984 	    }
2985 	  if (gsi_end_p (*gsi))
2986 	    *gsi = gsi_last_bb (gsi_bb (*gsi));
2987 	  else
2988 	    gsi_prev (gsi);
2989 	  if (bb_triang)
2990 	    {
2991 	      e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2992 	      make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
2993 	      *gsi = gsi_after_labels (e->dest);
2994 	      if (!gsi_end_p (*gsi))
2995 		gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
2996 	      set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom);
2997 	    }
2998 	}
2999       else
3000 	{
3001 	  t = fold_convert (itype, t);
3002 	  t = fold_build2 (MULT_EXPR, itype, t,
3003 			   fold_convert (itype, fd->loops[i].step));
3004 	  if (POINTER_TYPE_P (vtype))
3005 	    t = fold_build_pointer_plus (fd->loops[i].n1, t);
3006 	  else
3007 	    t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
3008 	  t = force_gimple_operand_gsi (gsi, t,
3009 					DECL_P (fd->loops[i].v)
3010 					&& TREE_ADDRESSABLE (fd->loops[i].v),
3011 					NULL_TREE, false,
3012 					GSI_CONTINUE_LINKING);
3013 	  stmt = gimple_build_assign (fd->loops[i].v, t);
3014 	  gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3015 	}
3016       if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
3017 	{
3018 	  t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
3019 	  t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
3020 					false, GSI_CONTINUE_LINKING);
3021 	  stmt = gimple_build_assign (tem, t);
3022 	  gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3023 	}
3024       if (i == fd->last_nonrect)
3025 	i = fd->first_nonrect;
3026     }
3027   if (fd->non_rect)
3028     for (i = 0; i <= fd->last_nonrect; i++)
3029       if (fd->loops[i].m2)
3030 	{
3031 	  tree itype = TREE_TYPE (fd->loops[i].v);
3032 
3033 	  tree t;
3034 	  if (POINTER_TYPE_P (itype))
3035 	    {
3036 	      gcc_assert (integer_onep (fd->loops[i].m2));
3037 	      t = fold_convert (sizetype, unshare_expr (fd->loops[i].n2));
3038 	      t = fold_build_pointer_plus (fd->loops[i - fd->loops[i].outer].v,
3039 					   t);
3040 	    }
3041 	  else
3042 	    {
3043 	      t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
3044 	      t = fold_build2 (MULT_EXPR, itype,
3045 			       fd->loops[i - fd->loops[i].outer].v, t);
3046 	      t = fold_build2 (PLUS_EXPR, itype, t,
3047 			       fold_convert (itype,
3048 					     unshare_expr (fd->loops[i].n2)));
3049 	    }
3050 	  nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
3051 	  t = force_gimple_operand_gsi (gsi, t, false,
3052 					NULL_TREE, false,
3053 					GSI_CONTINUE_LINKING);
3054 	  stmt = gimple_build_assign (nonrect_bounds[i], t);
3055 	  gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3056 	}
3057 }
3058 
3059 /* Helper function for expand_omp_for_*.  Generate code like:
3060     L10:
3061 	V3 += STEP3;
3062 	if (V3 cond3 N32) goto BODY_BB; else goto L11;
3063     L11:
3064 	V3 = N31;
3065 	V2 += STEP2;
3066 	if (V2 cond2 N22) goto BODY_BB; else goto L12;
3067     L12:
3068 	V2 = N21;
3069 	V1 += STEP1;
3070 	goto BODY_BB;
3071    For non-rectangular loops, use temporaries stored in nonrect_bounds
3072    for the upper bounds if M?2 multiplier is present.  Given e.g.
3073    for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3074    for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3075    for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3076    for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
3077    do:
3078     L10:
3079 	V4 += STEP4;
3080 	if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
3081     L11:
3082 	V4 = N41 + M41 * V2; // This can be left out if the loop
3083 			     // refers to the immediate parent loop
3084 	V3 += STEP3;
3085 	if (V3 cond3 N32) goto BODY_BB; else goto L12;
3086     L12:
3087 	V3 = N31;
3088 	V2 += STEP2;
3089 	if (V2 cond2 N22) goto L120; else goto L13;
3090     L120:
3091 	V4 = N41 + M41 * V2;
3092 	NONRECT_BOUND4 = N42 + M42 * V2;
3093 	if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
3094     L13:
3095 	V2 = N21;
3096 	V1 += STEP1;
3097 	goto L120;  */
3098 
3099 static basic_block
extract_omp_for_update_vars(struct omp_for_data * fd,tree * nonrect_bounds,basic_block cont_bb,basic_block body_bb)3100 extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
3101 			     basic_block cont_bb, basic_block body_bb)
3102 {
3103   basic_block last_bb, bb, collapse_bb = NULL;
3104   int i;
3105   gimple_stmt_iterator gsi;
3106   edge e;
3107   tree t;
3108   gimple *stmt;
3109 
3110   last_bb = cont_bb;
3111   for (i = fd->collapse - 1; i >= 0; i--)
3112     {
3113       tree vtype = TREE_TYPE (fd->loops[i].v);
3114 
3115       bb = create_empty_bb (last_bb);
3116       add_bb_to_loop (bb, last_bb->loop_father);
3117       gsi = gsi_start_bb (bb);
3118 
3119       if (i < fd->collapse - 1)
3120 	{
3121 	  e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
3122 	  e->probability
3123 	    = profile_probability::guessed_always ().apply_scale (1, 8);
3124 
3125 	  struct omp_for_data_loop *l = &fd->loops[i + 1];
3126 	  if (l->m1 == NULL_TREE || l->outer != 1)
3127 	    {
3128 	      t = l->n1;
3129 	      if (l->m1)
3130 		{
3131 		  if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3132 		    t = fold_build_pointer_plus (fd->loops[i + 1 - l->outer].v,
3133 						 fold_convert (sizetype, t));
3134 		  else
3135 		    {
3136 		      tree t2
3137 			= fold_build2 (MULT_EXPR, TREE_TYPE (t),
3138 				       fd->loops[i + 1 - l->outer].v, l->m1);
3139 		      t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
3140 		    }
3141 		}
3142 	      t = force_gimple_operand_gsi (&gsi, t,
3143 					    DECL_P (l->v)
3144 					    && TREE_ADDRESSABLE (l->v),
3145 					    NULL_TREE, false,
3146 					    GSI_CONTINUE_LINKING);
3147 	      stmt = gimple_build_assign (l->v, t);
3148 	      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3149 	    }
3150 	}
3151       else
3152 	collapse_bb = bb;
3153 
3154       set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3155 
3156       if (POINTER_TYPE_P (vtype))
3157 	t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3158       else
3159 	t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
3160       t = force_gimple_operand_gsi (&gsi, t,
3161 				    DECL_P (fd->loops[i].v)
3162 				    && TREE_ADDRESSABLE (fd->loops[i].v),
3163 				    NULL_TREE, false, GSI_CONTINUE_LINKING);
3164       stmt = gimple_build_assign (fd->loops[i].v, t);
3165       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3166 
3167       if (fd->loops[i].non_rect_referenced)
3168 	{
3169 	  basic_block update_bb = NULL, prev_bb = NULL;
3170 	  for (int j = i + 1; j <= fd->last_nonrect; j++)
3171 	    if (j - fd->loops[j].outer == i)
3172 	      {
3173 		tree n1, n2;
3174 		struct omp_for_data_loop *l = &fd->loops[j];
3175 		basic_block this_bb = create_empty_bb (last_bb);
3176 		add_bb_to_loop (this_bb, last_bb->loop_father);
3177 		gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
3178 		if (prev_bb)
3179 		  {
3180 		    e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
3181 		    e->probability
3182 		      = profile_probability::guessed_always ().apply_scale (7,
3183 									    8);
3184 		    set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
3185 		  }
3186 		if (l->m1)
3187 		  {
3188 		    if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3189 		      t = fold_build_pointer_plus (fd->loops[i].v,
3190 						   fold_convert (sizetype,
3191 								 l->n1));
3192 		    else
3193 		      {
3194 			t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
3195 					 fd->loops[i].v);
3196 			t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v),
3197 					 t, l->n1);
3198 		      }
3199 		    n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3200 						   false,
3201 						   GSI_CONTINUE_LINKING);
3202 		    stmt = gimple_build_assign (l->v, n1);
3203 		    gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3204 		    n1 = l->v;
3205 		  }
3206 		else
3207 		  n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
3208 						 NULL_TREE, false,
3209 						 GSI_CONTINUE_LINKING);
3210 		if (l->m2)
3211 		  {
3212 		    if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3213 		      t = fold_build_pointer_plus (fd->loops[i].v,
3214 						   fold_convert (sizetype,
3215 								 l->n2));
3216 		    else
3217 		      {
3218 			t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
3219 					 fd->loops[i].v);
3220 			t = fold_build2 (PLUS_EXPR,
3221 					 TREE_TYPE (nonrect_bounds[j]),
3222 					 t, unshare_expr (l->n2));
3223 		      }
3224 		    n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3225 						   false,
3226 						   GSI_CONTINUE_LINKING);
3227 		    stmt = gimple_build_assign (nonrect_bounds[j], n2);
3228 		    gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3229 		    n2 = nonrect_bounds[j];
3230 		  }
3231 		else
3232 		  n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
3233 						 true, NULL_TREE, false,
3234 						 GSI_CONTINUE_LINKING);
3235 		gcond *cond_stmt
3236 		  = gimple_build_cond (l->cond_code, n1, n2,
3237 				       NULL_TREE, NULL_TREE);
3238 		gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
3239 		if (update_bb == NULL)
3240 		  update_bb = this_bb;
3241 		e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
3242 		e->probability
3243 		  = profile_probability::guessed_always ().apply_scale (1, 8);
3244 		if (prev_bb == NULL)
3245 		  set_immediate_dominator (CDI_DOMINATORS, this_bb, bb);
3246 		prev_bb = this_bb;
3247 	      }
3248 	  e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
3249 	  e->probability
3250 	    = profile_probability::guessed_always ().apply_scale (7, 8);
3251 	  body_bb = update_bb;
3252 	}
3253 
3254       if (i > 0)
3255 	{
3256 	  if (fd->loops[i].m2)
3257 	    t = nonrect_bounds[i];
3258 	  else
3259 	    t = unshare_expr (fd->loops[i].n2);
3260 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3261 					false, GSI_CONTINUE_LINKING);
3262 	  tree v = fd->loops[i].v;
3263 	  if (DECL_P (v) && TREE_ADDRESSABLE (v))
3264 	    v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
3265 					  false, GSI_CONTINUE_LINKING);
3266 	  t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
3267 	  stmt = gimple_build_cond_empty (t);
3268 	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3269 	  if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
3270 			 expand_omp_regimplify_p, NULL, NULL)
3271 	      || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
3272 			    expand_omp_regimplify_p, NULL, NULL))
3273 	    gimple_regimplify_operands (stmt, &gsi);
3274 	  e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
3275 	  e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3276 	}
3277       else
3278 	make_edge (bb, body_bb, EDGE_FALLTHRU);
3279       set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3280       last_bb = bb;
3281     }
3282 
3283   return collapse_bb;
3284 }
3285 
3286 /* Expand #pragma omp ordered depend(source).  */
3287 
3288 static void
expand_omp_ordered_source(gimple_stmt_iterator * gsi,struct omp_for_data * fd,tree * counts,location_t loc)3289 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3290 			   tree *counts, location_t loc)
3291 {
3292   enum built_in_function source_ix
3293     = fd->iter_type == long_integer_type_node
3294       ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
3295   gimple *g
3296     = gimple_build_call (builtin_decl_explicit (source_ix), 1,
3297 			 build_fold_addr_expr (counts[fd->ordered]));
3298   gimple_set_location (g, loc);
3299   gsi_insert_before (gsi, g, GSI_SAME_STMT);
3300 }
3301 
3302 /* Expand a single depend from #pragma omp ordered depend(sink:...).  */
3303 
3304 static void
expand_omp_ordered_sink(gimple_stmt_iterator * gsi,struct omp_for_data * fd,tree * counts,tree c,location_t loc)3305 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3306 			 tree *counts, tree c, location_t loc)
3307 {
3308   auto_vec<tree, 10> args;
3309   enum built_in_function sink_ix
3310     = fd->iter_type == long_integer_type_node
3311       ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
3312   tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
3313   int i;
3314   gimple_stmt_iterator gsi2 = *gsi;
3315   bool warned_step = false;
3316 
3317   for (i = 0; i < fd->ordered; i++)
3318     {
3319       tree step = NULL_TREE;
3320       off = TREE_PURPOSE (deps);
3321       if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3322 	{
3323 	  step = TREE_OPERAND (off, 1);
3324 	  off = TREE_OPERAND (off, 0);
3325 	}
3326       if (!integer_zerop (off))
3327 	{
3328 	  gcc_assert (fd->loops[i].cond_code == LT_EXPR
3329 		      || fd->loops[i].cond_code == GT_EXPR);
3330 	  bool forward = fd->loops[i].cond_code == LT_EXPR;
3331 	  if (step)
3332 	    {
3333 	      /* Non-simple Fortran DO loops.  If step is variable,
3334 		 we don't know at compile even the direction, so can't
3335 		 warn.  */
3336 	      if (TREE_CODE (step) != INTEGER_CST)
3337 		break;
3338 	      forward = tree_int_cst_sgn (step) != -1;
3339 	    }
3340 	  if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3341 	    warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3342 				"waiting for lexically later iteration");
3343 	  break;
3344 	}
3345       deps = TREE_CHAIN (deps);
3346     }
3347   /* If all offsets corresponding to the collapsed loops are zero,
3348      this depend clause can be ignored.  FIXME: but there is still a
3349      flush needed.  We need to emit one __sync_synchronize () for it
3350      though (perhaps conditionally)?  Solve this together with the
3351      conservative dependence folding optimization.
3352   if (i >= fd->collapse)
3353     return;  */
3354 
3355   deps = OMP_CLAUSE_DECL (c);
3356   gsi_prev (&gsi2);
3357   edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3358   edge e2 = split_block_after_labels (e1->dest);
3359 
3360   gsi2 = gsi_after_labels (e1->dest);
3361   *gsi = gsi_last_bb (e1->src);
3362   for (i = 0; i < fd->ordered; i++)
3363     {
3364       tree itype = TREE_TYPE (fd->loops[i].v);
3365       tree step = NULL_TREE;
3366       tree orig_off = NULL_TREE;
3367       if (POINTER_TYPE_P (itype))
3368 	itype = sizetype;
3369       if (i)
3370 	deps = TREE_CHAIN (deps);
3371       off = TREE_PURPOSE (deps);
3372       if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3373 	{
3374 	  step = TREE_OPERAND (off, 1);
3375 	  off = TREE_OPERAND (off, 0);
3376 	  gcc_assert (fd->loops[i].cond_code == LT_EXPR
3377 		      && integer_onep (fd->loops[i].step)
3378 		      && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3379 	}
3380       tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3381       if (step)
3382 	{
3383 	  off = fold_convert_loc (loc, itype, off);
3384 	  orig_off = off;
3385 	  off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3386 	}
3387 
3388       if (integer_zerop (off))
3389 	t = boolean_true_node;
3390       else
3391 	{
3392 	  tree a;
3393 	  tree co = fold_convert_loc (loc, itype, off);
3394 	  if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3395 	    {
3396 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3397 		co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3398 	      a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3399 				   TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3400 				   co);
3401 	    }
3402 	  else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3403 	    a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3404 				 fd->loops[i].v, co);
3405 	  else
3406 	    a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3407 				 fd->loops[i].v, co);
3408 	  if (step)
3409 	    {
3410 	      tree t1, t2;
3411 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3412 		t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3413 				      fd->loops[i].n1);
3414 	      else
3415 		t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3416 				      fd->loops[i].n2);
3417 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3418 		t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3419 				      fd->loops[i].n2);
3420 	      else
3421 		t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3422 				      fd->loops[i].n1);
3423 	      t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3424 				   step, build_int_cst (TREE_TYPE (step), 0));
3425 	      if (TREE_CODE (step) != INTEGER_CST)
3426 		{
3427 		  t1 = unshare_expr (t1);
3428 		  t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3429 						 false, GSI_CONTINUE_LINKING);
3430 		  t2 = unshare_expr (t2);
3431 		  t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3432 						 false, GSI_CONTINUE_LINKING);
3433 		}
3434 	      t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3435 				   t, t2, t1);
3436 	    }
3437 	  else if (fd->loops[i].cond_code == LT_EXPR)
3438 	    {
3439 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3440 		t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3441 				     fd->loops[i].n1);
3442 	      else
3443 		t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3444 				     fd->loops[i].n2);
3445 	    }
3446 	  else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3447 	    t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3448 				 fd->loops[i].n2);
3449 	  else
3450 	    t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3451 				 fd->loops[i].n1);
3452 	}
3453       if (cond)
3454 	cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3455       else
3456 	cond = t;
3457 
3458       off = fold_convert_loc (loc, itype, off);
3459 
3460       if (step
3461 	  || (fd->loops[i].cond_code == LT_EXPR
3462 	      ? !integer_onep (fd->loops[i].step)
3463 	      : !integer_minus_onep (fd->loops[i].step)))
3464 	{
3465 	  if (step == NULL_TREE
3466 	      && TYPE_UNSIGNED (itype)
3467 	      && fd->loops[i].cond_code == GT_EXPR)
3468 	    t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3469 				 fold_build1_loc (loc, NEGATE_EXPR, itype,
3470 						  s));
3471 	  else
3472 	    t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3473 				 orig_off ? orig_off : off, s);
3474 	  t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3475 			       build_int_cst (itype, 0));
3476 	  if (integer_zerop (t) && !warned_step)
3477 	    {
3478 	      warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3479 				  "refers to iteration never in the iteration "
3480 				  "space");
3481 	      warned_step = true;
3482 	    }
3483 	  cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3484 				  cond, t);
3485 	}
3486 
3487       if (i <= fd->collapse - 1 && fd->collapse > 1)
3488 	t = fd->loop.v;
3489       else if (counts[i])
3490 	t = counts[i];
3491       else
3492 	{
3493 	  t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3494 			       fd->loops[i].v, fd->loops[i].n1);
3495 	  t = fold_convert_loc (loc, fd->iter_type, t);
3496 	}
3497       if (step)
3498 	/* We have divided off by step already earlier.  */;
3499       else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3500 	off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3501 			       fold_build1_loc (loc, NEGATE_EXPR, itype,
3502 						s));
3503       else
3504 	off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3505       if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3506 	off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3507       off = fold_convert_loc (loc, fd->iter_type, off);
3508       if (i <= fd->collapse - 1 && fd->collapse > 1)
3509 	{
3510 	  if (i)
3511 	    off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3512 				   off);
3513 	  if (i < fd->collapse - 1)
3514 	    {
3515 	      coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3516 				      counts[i]);
3517 	      continue;
3518 	    }
3519 	}
3520       off = unshare_expr (off);
3521       t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3522       t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3523 				    true, GSI_SAME_STMT);
3524       args.safe_push (t);
3525     }
3526   gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
3527   gimple_set_location (g, loc);
3528   gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3529 
3530   cond = unshare_expr (cond);
3531   cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3532 				   GSI_CONTINUE_LINKING);
3533   gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3534   edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3535   e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3536   e1->probability = e3->probability.invert ();
3537   e1->flags = EDGE_TRUE_VALUE;
3538   set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3539 
3540   *gsi = gsi_after_labels (e2->dest);
3541 }
3542 
3543 /* Expand all #pragma omp ordered depend(source) and
3544    #pragma omp ordered depend(sink:...) constructs in the current
3545    #pragma omp for ordered(n) region.  */
3546 
3547 static void
expand_omp_ordered_source_sink(struct omp_region * region,struct omp_for_data * fd,tree * counts,basic_block cont_bb)3548 expand_omp_ordered_source_sink (struct omp_region *region,
3549 				struct omp_for_data *fd, tree *counts,
3550 				basic_block cont_bb)
3551 {
3552   struct omp_region *inner;
3553   int i;
3554   for (i = fd->collapse - 1; i < fd->ordered; i++)
3555     if (i == fd->collapse - 1 && fd->collapse > 1)
3556       counts[i] = NULL_TREE;
3557     else if (i >= fd->collapse && !cont_bb)
3558       counts[i] = build_zero_cst (fd->iter_type);
3559     else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3560 	     && integer_onep (fd->loops[i].step))
3561       counts[i] = NULL_TREE;
3562     else
3563       counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3564   tree atype
3565     = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3566   counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3567   TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3568 
3569   for (inner = region->inner; inner; inner = inner->next)
3570     if (inner->type == GIMPLE_OMP_ORDERED)
3571       {
3572 	gomp_ordered *ord_stmt = inner->ord_stmt;
3573 	gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3574 	location_t loc = gimple_location (ord_stmt);
3575 	tree c;
3576 	for (c = gimple_omp_ordered_clauses (ord_stmt);
3577 	     c; c = OMP_CLAUSE_CHAIN (c))
3578 	  if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
3579 	    break;
3580 	if (c)
3581 	  expand_omp_ordered_source (&gsi, fd, counts, loc);
3582 	for (c = gimple_omp_ordered_clauses (ord_stmt);
3583 	     c; c = OMP_CLAUSE_CHAIN (c))
3584 	  if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
3585 	    expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
3586 	gsi_remove (&gsi, true);
3587       }
3588 }
3589 
3590 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
3591    collapsed.  */
3592 
3593 static basic_block
expand_omp_for_ordered_loops(struct omp_for_data * fd,tree * counts,basic_block cont_bb,basic_block body_bb,basic_block l0_bb,bool ordered_lastprivate)3594 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3595 			      basic_block cont_bb, basic_block body_bb,
3596 			      basic_block l0_bb, bool ordered_lastprivate)
3597 {
3598   if (fd->ordered == fd->collapse)
3599     return cont_bb;
3600 
3601   if (!cont_bb)
3602     {
3603       gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3604       for (int i = fd->collapse; i < fd->ordered; i++)
3605 	{
3606 	  tree type = TREE_TYPE (fd->loops[i].v);
3607 	  tree n1 = fold_convert (type, fd->loops[i].n1);
3608 	  expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
3609 	  tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3610 			      size_int (i - fd->collapse + 1),
3611 			      NULL_TREE, NULL_TREE);
3612 	  expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3613 	}
3614       return NULL;
3615     }
3616 
3617   for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3618     {
3619       tree t, type = TREE_TYPE (fd->loops[i].v);
3620       gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3621       expand_omp_build_assign (&gsi, fd->loops[i].v,
3622 			       fold_convert (type, fd->loops[i].n1));
3623       if (counts[i])
3624 	expand_omp_build_assign (&gsi, counts[i],
3625 				 build_zero_cst (fd->iter_type));
3626       tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3627 			  size_int (i - fd->collapse + 1),
3628 			  NULL_TREE, NULL_TREE);
3629       expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3630       if (!gsi_end_p (gsi))
3631 	gsi_prev (&gsi);
3632       else
3633 	gsi = gsi_last_bb (body_bb);
3634       edge e1 = split_block (body_bb, gsi_stmt (gsi));
3635       basic_block new_body = e1->dest;
3636       if (body_bb == cont_bb)
3637 	cont_bb = new_body;
3638       edge e2 = NULL;
3639       basic_block new_header;
3640       if (EDGE_COUNT (cont_bb->preds) > 0)
3641 	{
3642 	  gsi = gsi_last_bb (cont_bb);
3643 	  if (POINTER_TYPE_P (type))
3644 	    t = fold_build_pointer_plus (fd->loops[i].v,
3645 					 fold_convert (sizetype,
3646 						       fd->loops[i].step));
3647 	  else
3648 	    t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3649 			     fold_convert (type, fd->loops[i].step));
3650 	  expand_omp_build_assign (&gsi, fd->loops[i].v, t);
3651 	  if (counts[i])
3652 	    {
3653 	      t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3654 			       build_int_cst (fd->iter_type, 1));
3655 	      expand_omp_build_assign (&gsi, counts[i], t);
3656 	      t = counts[i];
3657 	    }
3658 	  else
3659 	    {
3660 	      t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3661 			       fd->loops[i].v, fd->loops[i].n1);
3662 	      t = fold_convert (fd->iter_type, t);
3663 	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3664 					    true, GSI_SAME_STMT);
3665 	    }
3666 	  aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3667 			 size_int (i - fd->collapse + 1),
3668 			 NULL_TREE, NULL_TREE);
3669 	  expand_omp_build_assign (&gsi, aref, t);
3670 	  gsi_prev (&gsi);
3671 	  e2 = split_block (cont_bb, gsi_stmt (gsi));
3672 	  new_header = e2->dest;
3673 	}
3674       else
3675 	new_header = cont_bb;
3676       gsi = gsi_after_labels (new_header);
3677       tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3678 					 true, GSI_SAME_STMT);
3679       tree n2
3680 	= force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3681 				    true, NULL_TREE, true, GSI_SAME_STMT);
3682       t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3683       gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
3684       edge e3 = split_block (new_header, gsi_stmt (gsi));
3685       cont_bb = e3->dest;
3686       remove_edge (e1);
3687       make_edge (body_bb, new_header, EDGE_FALLTHRU);
3688       e3->flags = EDGE_FALSE_VALUE;
3689       e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3690       e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
3691       e1->probability = e3->probability.invert ();
3692 
3693       set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3694       set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3695 
3696       if (e2)
3697 	{
3698 	  class loop *loop = alloc_loop ();
3699 	  loop->header = new_header;
3700 	  loop->latch = e2->src;
3701 	  add_loop (loop, l0_bb->loop_father);
3702 	}
3703     }
3704 
3705   /* If there are any lastprivate clauses and it is possible some loops
3706      might have zero iterations, ensure all the decls are initialized,
3707      otherwise we could crash evaluating C++ class iterators with lastprivate
3708      clauses.  */
3709   bool need_inits = false;
3710   for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3711     if (need_inits)
3712       {
3713 	tree type = TREE_TYPE (fd->loops[i].v);
3714 	gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3715 	expand_omp_build_assign (&gsi, fd->loops[i].v,
3716 				 fold_convert (type, fd->loops[i].n1));
3717       }
3718     else
3719       {
3720 	tree type = TREE_TYPE (fd->loops[i].v);
3721 	tree this_cond = fold_build2 (fd->loops[i].cond_code,
3722 				      boolean_type_node,
3723 				      fold_convert (type, fd->loops[i].n1),
3724 				      fold_convert (type, fd->loops[i].n2));
3725 	if (!integer_onep (this_cond))
3726 	  need_inits = true;
3727       }
3728 
3729   return cont_bb;
3730 }
3731 
3732 /* A subroutine of expand_omp_for.  Generate code for a parallel
3733    loop with any schedule.  Given parameters:
3734 
3735 	for (V = N1; V cond N2; V += STEP) BODY;
3736 
3737    where COND is "<" or ">", we generate pseudocode
3738 
3739 	more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3740 	if (more) goto L0; else goto L3;
3741     L0:
3742 	V = istart0;
3743 	iend = iend0;
3744     L1:
3745 	BODY;
3746 	V += STEP;
3747 	if (V cond iend) goto L1; else goto L2;
3748     L2:
3749 	if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3750     L3:
3751 
3752     If this is a combined omp parallel loop, instead of the call to
3753     GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3754     If this is gimple_omp_for_combined_p loop, then instead of assigning
3755     V and iend in L0 we assign the first two _looptemp_ clause decls of the
3756     inner GIMPLE_OMP_FOR and V += STEP; and
3757     if (V cond iend) goto L1; else goto L2; are removed.
3758 
3759     For collapsed loops, given parameters:
3760       collapse(3)
3761       for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3762 	for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3763 	  for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3764 	    BODY;
3765 
3766     we generate pseudocode
3767 
3768 	if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3769 	if (cond3 is <)
3770 	  adj = STEP3 - 1;
3771 	else
3772 	  adj = STEP3 + 1;
3773 	count3 = (adj + N32 - N31) / STEP3;
3774 	if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3775 	if (cond2 is <)
3776 	  adj = STEP2 - 1;
3777 	else
3778 	  adj = STEP2 + 1;
3779 	count2 = (adj + N22 - N21) / STEP2;
3780 	if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3781 	if (cond1 is <)
3782 	  adj = STEP1 - 1;
3783 	else
3784 	  adj = STEP1 + 1;
3785 	count1 = (adj + N12 - N11) / STEP1;
3786 	count = count1 * count2 * count3;
3787 	goto Z1;
3788     Z0:
3789 	count = 0;
3790     Z1:
3791 	more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3792 	if (more) goto L0; else goto L3;
3793     L0:
3794 	V = istart0;
3795 	T = V;
3796 	V3 = N31 + (T % count3) * STEP3;
3797 	T = T / count3;
3798 	V2 = N21 + (T % count2) * STEP2;
3799 	T = T / count2;
3800 	V1 = N11 + T * STEP1;
3801 	iend = iend0;
3802     L1:
3803 	BODY;
3804 	V += 1;
3805 	if (V < iend) goto L10; else goto L2;
3806     L10:
3807 	V3 += STEP3;
3808 	if (V3 cond3 N32) goto L1; else goto L11;
3809     L11:
3810 	V3 = N31;
3811 	V2 += STEP2;
3812 	if (V2 cond2 N22) goto L1; else goto L12;
3813     L12:
3814 	V2 = N21;
3815 	V1 += STEP1;
3816 	goto L1;
3817     L2:
3818 	if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3819     L3:
3820 
3821       */
3822 
3823 static void
expand_omp_for_generic(struct omp_region * region,struct omp_for_data * fd,enum built_in_function start_fn,enum built_in_function next_fn,tree sched_arg,gimple * inner_stmt)3824 expand_omp_for_generic (struct omp_region *region,
3825 			struct omp_for_data *fd,
3826 			enum built_in_function start_fn,
3827 			enum built_in_function next_fn,
3828 			tree sched_arg,
3829 			gimple *inner_stmt)
3830 {
3831   tree type, istart0, iend0, iend;
3832   tree t, vmain, vback, bias = NULL_TREE;
3833   basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3834   basic_block l2_bb = NULL, l3_bb = NULL;
3835   gimple_stmt_iterator gsi;
3836   gassign *assign_stmt;
3837   bool in_combined_parallel = is_combined_parallel (region);
3838   bool broken_loop = region->cont == NULL;
3839   edge e, ne;
3840   tree *counts = NULL;
3841   int i;
3842   bool ordered_lastprivate = false;
3843 
3844   gcc_assert (!broken_loop || !in_combined_parallel);
3845   gcc_assert (fd->iter_type == long_integer_type_node
3846 	      || !in_combined_parallel);
3847 
3848   entry_bb = region->entry;
3849   cont_bb = region->cont;
3850   collapse_bb = NULL;
3851   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3852   gcc_assert (broken_loop
3853 	      || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3854   l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3855   l1_bb = single_succ (l0_bb);
3856   if (!broken_loop)
3857     {
3858       l2_bb = create_empty_bb (cont_bb);
3859       gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3860 		  || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
3861 		      == l1_bb));
3862       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3863     }
3864   else
3865     l2_bb = NULL;
3866   l3_bb = BRANCH_EDGE (entry_bb)->dest;
3867   exit_bb = region->exit;
3868 
3869   gsi = gsi_last_nondebug_bb (entry_bb);
3870 
3871   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3872   if (fd->ordered
3873       && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3874 			  OMP_CLAUSE_LASTPRIVATE))
3875     ordered_lastprivate = false;
3876   tree reductions = NULL_TREE;
3877   tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
3878   tree memv = NULL_TREE;
3879   if (fd->lastprivate_conditional)
3880     {
3881       tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3882 				OMP_CLAUSE__CONDTEMP_);
3883       if (fd->have_pointer_condtemp)
3884 	condtemp = OMP_CLAUSE_DECL (c);
3885       c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3886       cond_var = OMP_CLAUSE_DECL (c);
3887     }
3888   if (sched_arg)
3889     {
3890       if (fd->have_reductemp)
3891 	{
3892 	  tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3893 				    OMP_CLAUSE__REDUCTEMP_);
3894 	  reductions = OMP_CLAUSE_DECL (c);
3895 	  gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3896 	  gimple *g = SSA_NAME_DEF_STMT (reductions);
3897 	  reductions = gimple_assign_rhs1 (g);
3898 	  OMP_CLAUSE_DECL (c) = reductions;
3899 	  entry_bb = gimple_bb (g);
3900 	  edge e = split_block (entry_bb, g);
3901 	  if (region->entry == entry_bb)
3902 	    region->entry = e->dest;
3903 	  gsi = gsi_last_bb (entry_bb);
3904 	}
3905       else
3906 	reductions = null_pointer_node;
3907       if (fd->have_pointer_condtemp)
3908 	{
3909 	  tree type = TREE_TYPE (condtemp);
3910 	  memv = create_tmp_var (type);
3911 	  TREE_ADDRESSABLE (memv) = 1;
3912 	  unsigned HOST_WIDE_INT sz
3913 	    = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3914 	  sz *= fd->lastprivate_conditional;
3915 	  expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
3916 				   false);
3917 	  mem = build_fold_addr_expr (memv);
3918 	}
3919       else
3920 	mem = null_pointer_node;
3921     }
3922   if (fd->collapse > 1 || fd->ordered)
3923     {
3924       int first_zero_iter1 = -1, first_zero_iter2 = -1;
3925       basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
3926 
3927       counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
3928       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3929 				  zero_iter1_bb, first_zero_iter1,
3930 				  zero_iter2_bb, first_zero_iter2, l2_dom_bb);
3931 
3932       if (zero_iter1_bb)
3933 	{
3934 	  /* Some counts[i] vars might be uninitialized if
3935 	     some loop has zero iterations.  But the body shouldn't
3936 	     be executed in that case, so just avoid uninit warnings.  */
3937 	  for (i = first_zero_iter1;
3938 	       i < (fd->ordered ? fd->ordered : fd->collapse); i++)
3939 	    if (SSA_VAR_P (counts[i]))
3940 	      suppress_warning (counts[i], OPT_Wuninitialized);
3941 	  gsi_prev (&gsi);
3942 	  e = split_block (entry_bb, gsi_stmt (gsi));
3943 	  entry_bb = e->dest;
3944 	  make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
3945 	  gsi = gsi_last_nondebug_bb (entry_bb);
3946 	  set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3947 				   get_immediate_dominator (CDI_DOMINATORS,
3948 							    zero_iter1_bb));
3949 	}
3950       if (zero_iter2_bb)
3951 	{
3952 	  /* Some counts[i] vars might be uninitialized if
3953 	     some loop has zero iterations.  But the body shouldn't
3954 	     be executed in that case, so just avoid uninit warnings.  */
3955 	  for (i = first_zero_iter2; i < fd->ordered; i++)
3956 	    if (SSA_VAR_P (counts[i]))
3957 	      suppress_warning (counts[i], OPT_Wuninitialized);
3958 	  if (zero_iter1_bb)
3959 	    make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3960 	  else
3961 	    {
3962 	      gsi_prev (&gsi);
3963 	      e = split_block (entry_bb, gsi_stmt (gsi));
3964 	      entry_bb = e->dest;
3965 	      make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3966 	      gsi = gsi_last_nondebug_bb (entry_bb);
3967 	      set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3968 				       get_immediate_dominator
3969 					 (CDI_DOMINATORS, zero_iter2_bb));
3970 	    }
3971 	}
3972       if (fd->collapse == 1)
3973 	{
3974 	  counts[0] = fd->loop.n2;
3975 	  fd->loop = fd->loops[0];
3976 	}
3977     }
3978 
3979   type = TREE_TYPE (fd->loop.v);
3980   istart0 = create_tmp_var (fd->iter_type, ".istart0");
3981   iend0 = create_tmp_var (fd->iter_type, ".iend0");
3982   TREE_ADDRESSABLE (istart0) = 1;
3983   TREE_ADDRESSABLE (iend0) = 1;
3984 
3985   /* See if we need to bias by LLONG_MIN.  */
3986   if (fd->iter_type == long_long_unsigned_type_node
3987       && TREE_CODE (type) == INTEGER_TYPE
3988       && !TYPE_UNSIGNED (type)
3989       && fd->ordered == 0)
3990     {
3991       tree n1, n2;
3992 
3993       if (fd->loop.cond_code == LT_EXPR)
3994 	{
3995 	  n1 = fd->loop.n1;
3996 	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
3997 	}
3998       else
3999 	{
4000 	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4001 	  n2 = fd->loop.n1;
4002 	}
4003       if (TREE_CODE (n1) != INTEGER_CST
4004 	  || TREE_CODE (n2) != INTEGER_CST
4005 	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4006 	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4007     }
4008 
4009   gimple_stmt_iterator gsif = gsi;
4010   gsi_prev (&gsif);
4011 
4012   tree arr = NULL_TREE;
4013   if (in_combined_parallel)
4014     {
4015       gcc_assert (fd->ordered == 0);
4016       /* In a combined parallel loop, emit a call to
4017 	 GOMP_loop_foo_next.  */
4018       t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4019 			   build_fold_addr_expr (istart0),
4020 			   build_fold_addr_expr (iend0));
4021     }
4022   else
4023     {
4024       tree t0, t1, t2, t3, t4;
4025       /* If this is not a combined parallel loop, emit a call to
4026 	 GOMP_loop_foo_start in ENTRY_BB.  */
4027       t4 = build_fold_addr_expr (iend0);
4028       t3 = build_fold_addr_expr (istart0);
4029       if (fd->ordered)
4030 	{
4031 	  t0 = build_int_cst (unsigned_type_node,
4032 			      fd->ordered - fd->collapse + 1);
4033 	  arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
4034 							fd->ordered
4035 							- fd->collapse + 1),
4036 				".omp_counts");
4037 	  DECL_NAMELESS (arr) = 1;
4038 	  TREE_ADDRESSABLE (arr) = 1;
4039 	  TREE_STATIC (arr) = 1;
4040 	  vec<constructor_elt, va_gc> *v;
4041 	  vec_alloc (v, fd->ordered - fd->collapse + 1);
4042 	  int idx;
4043 
4044 	  for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
4045 	    {
4046 	      tree c;
4047 	      if (idx == 0 && fd->collapse > 1)
4048 		c = fd->loop.n2;
4049 	      else
4050 		c = counts[idx + fd->collapse - 1];
4051 	      tree purpose = size_int (idx);
4052 	      CONSTRUCTOR_APPEND_ELT (v, purpose, c);
4053 	      if (TREE_CODE (c) != INTEGER_CST)
4054 		TREE_STATIC (arr) = 0;
4055 	    }
4056 
4057 	  DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
4058 	  if (!TREE_STATIC (arr))
4059 	    force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
4060 						    void_type_node, arr),
4061 				      true, NULL_TREE, true, GSI_SAME_STMT);
4062 	  t1 = build_fold_addr_expr (arr);
4063 	  t2 = NULL_TREE;
4064 	}
4065       else
4066 	{
4067 	  t2 = fold_convert (fd->iter_type, fd->loop.step);
4068 	  t1 = fd->loop.n2;
4069 	  t0 = fd->loop.n1;
4070 	  if (gimple_omp_for_combined_into_p (fd->for_stmt))
4071 	    {
4072 	      tree innerc
4073 		= omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4074 				   OMP_CLAUSE__LOOPTEMP_);
4075 	      gcc_assert (innerc);
4076 	      t0 = OMP_CLAUSE_DECL (innerc);
4077 	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4078 					OMP_CLAUSE__LOOPTEMP_);
4079 	      gcc_assert (innerc);
4080 	      t1 = OMP_CLAUSE_DECL (innerc);
4081 	    }
4082 	  if (POINTER_TYPE_P (TREE_TYPE (t0))
4083 	      && TYPE_PRECISION (TREE_TYPE (t0))
4084 		 != TYPE_PRECISION (fd->iter_type))
4085 	    {
4086 	      /* Avoid casting pointers to integer of a different size.  */
4087 	      tree itype = signed_type_for (type);
4088 	      t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4089 	      t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4090 	    }
4091 	  else
4092 	    {
4093 	      t1 = fold_convert (fd->iter_type, t1);
4094 	      t0 = fold_convert (fd->iter_type, t0);
4095 	    }
4096 	  if (bias)
4097 	    {
4098 	      t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4099 	      t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4100 	    }
4101 	}
4102       if (fd->iter_type == long_integer_type_node || fd->ordered)
4103 	{
4104 	  if (fd->chunk_size)
4105 	    {
4106 	      t = fold_convert (fd->iter_type, fd->chunk_size);
4107 	      t = omp_adjust_chunk_size (t, fd->simd_schedule);
4108 	      if (sched_arg)
4109 		{
4110 		  if (fd->ordered)
4111 		    t = build_call_expr (builtin_decl_explicit (start_fn),
4112 					 8, t0, t1, sched_arg, t, t3, t4,
4113 					 reductions, mem);
4114 		  else
4115 		    t = build_call_expr (builtin_decl_explicit (start_fn),
4116 					 9, t0, t1, t2, sched_arg, t, t3, t4,
4117 					 reductions, mem);
4118 		}
4119 	      else if (fd->ordered)
4120 		t = build_call_expr (builtin_decl_explicit (start_fn),
4121 				     5, t0, t1, t, t3, t4);
4122 	      else
4123 		t = build_call_expr (builtin_decl_explicit (start_fn),
4124 				     6, t0, t1, t2, t, t3, t4);
4125 	    }
4126 	  else if (fd->ordered)
4127 	    t = build_call_expr (builtin_decl_explicit (start_fn),
4128 				 4, t0, t1, t3, t4);
4129 	  else
4130 	    t = build_call_expr (builtin_decl_explicit (start_fn),
4131 				 5, t0, t1, t2, t3, t4);
4132 	}
4133       else
4134 	{
4135 	  tree t5;
4136 	  tree c_bool_type;
4137 	  tree bfn_decl;
4138 
4139 	  /* The GOMP_loop_ull_*start functions have additional boolean
4140 	     argument, true for < loops and false for > loops.
4141 	     In Fortran, the C bool type can be different from
4142 	     boolean_type_node.  */
4143 	  bfn_decl = builtin_decl_explicit (start_fn);
4144 	  c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
4145 	  t5 = build_int_cst (c_bool_type,
4146 			      fd->loop.cond_code == LT_EXPR ? 1 : 0);
4147 	  if (fd->chunk_size)
4148 	    {
4149 	      tree bfn_decl = builtin_decl_explicit (start_fn);
4150 	      t = fold_convert (fd->iter_type, fd->chunk_size);
4151 	      t = omp_adjust_chunk_size (t, fd->simd_schedule);
4152 	      if (sched_arg)
4153 		t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
4154 				     t, t3, t4, reductions, mem);
4155 	      else
4156 		t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
4157 	    }
4158 	  else
4159 	    t = build_call_expr (builtin_decl_explicit (start_fn),
4160 				 6, t5, t0, t1, t2, t3, t4);
4161 	}
4162     }
4163   if (TREE_TYPE (t) != boolean_type_node)
4164     t = fold_build2 (NE_EXPR, boolean_type_node,
4165 		     t, build_int_cst (TREE_TYPE (t), 0));
4166   t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4167 				true, GSI_SAME_STMT);
4168   if (arr && !TREE_STATIC (arr))
4169     {
4170       tree clobber = build_clobber (TREE_TYPE (arr));
4171       gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
4172 			 GSI_SAME_STMT);
4173     }
4174   if (fd->have_pointer_condtemp)
4175     expand_omp_build_assign (&gsi, condtemp, memv, false);
4176   if (fd->have_reductemp)
4177     {
4178       gimple *g = gsi_stmt (gsi);
4179       gsi_remove (&gsi, true);
4180       release_ssa_name (gimple_assign_lhs (g));
4181 
4182       entry_bb = region->entry;
4183       gsi = gsi_last_nondebug_bb (entry_bb);
4184 
4185       gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4186     }
4187   gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4188 
4189   /* Remove the GIMPLE_OMP_FOR statement.  */
4190   gsi_remove (&gsi, true);
4191 
4192   if (gsi_end_p (gsif))
4193     gsif = gsi_after_labels (gsi_bb (gsif));
4194   gsi_next (&gsif);
4195 
4196   /* Iteration setup for sequential loop goes in L0_BB.  */
4197   tree startvar = fd->loop.v;
4198   tree endvar = NULL_TREE;
4199 
4200   if (gimple_omp_for_combined_p (fd->for_stmt))
4201     {
4202       gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
4203 		  && gimple_omp_for_kind (inner_stmt)
4204 		     == GF_OMP_FOR_KIND_SIMD);
4205       tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
4206 				     OMP_CLAUSE__LOOPTEMP_);
4207       gcc_assert (innerc);
4208       startvar = OMP_CLAUSE_DECL (innerc);
4209       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4210 				OMP_CLAUSE__LOOPTEMP_);
4211       gcc_assert (innerc);
4212       endvar = OMP_CLAUSE_DECL (innerc);
4213     }
4214 
4215   gsi = gsi_start_bb (l0_bb);
4216   t = istart0;
4217   if (fd->ordered && fd->collapse == 1)
4218     t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4219 		     fold_convert (fd->iter_type, fd->loop.step));
4220   else if (bias)
4221     t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4222   if (fd->ordered && fd->collapse == 1)
4223     {
4224       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4225 	t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4226 			 fd->loop.n1, fold_convert (sizetype, t));
4227       else
4228 	{
4229 	  t = fold_convert (TREE_TYPE (startvar), t);
4230 	  t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4231 			   fd->loop.n1, t);
4232 	}
4233     }
4234   else
4235     {
4236       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4237 	t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4238       t = fold_convert (TREE_TYPE (startvar), t);
4239     }
4240   t = force_gimple_operand_gsi (&gsi, t,
4241 				DECL_P (startvar)
4242 				&& TREE_ADDRESSABLE (startvar),
4243 				NULL_TREE, false, GSI_CONTINUE_LINKING);
4244   assign_stmt = gimple_build_assign (startvar, t);
4245   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4246   if (cond_var)
4247     {
4248       tree itype = TREE_TYPE (cond_var);
4249       /* For lastprivate(conditional:) itervar, we need some iteration
4250 	 counter that starts at unsigned non-zero and increases.
4251 	 Prefer as few IVs as possible, so if we can use startvar
4252 	 itself, use that, or startvar + constant (those would be
4253 	 incremented with step), and as last resort use the s0 + 1
4254 	 incremented by 1.  */
4255       if ((fd->ordered && fd->collapse == 1)
4256 	  || bias
4257 	  || POINTER_TYPE_P (type)
4258 	  || TREE_CODE (fd->loop.n1) != INTEGER_CST
4259 	  || fd->loop.cond_code != LT_EXPR)
4260 	t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
4261 			 build_int_cst (itype, 1));
4262       else if (tree_int_cst_sgn (fd->loop.n1) == 1)
4263 	t = fold_convert (itype, t);
4264       else
4265 	{
4266 	  tree c = fold_convert (itype, fd->loop.n1);
4267 	  c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4268 	  t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4269 	}
4270       t = force_gimple_operand_gsi (&gsi, t, false,
4271 				    NULL_TREE, false, GSI_CONTINUE_LINKING);
4272       assign_stmt = gimple_build_assign (cond_var, t);
4273       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4274     }
4275 
4276   t = iend0;
4277   if (fd->ordered && fd->collapse == 1)
4278     t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4279 		     fold_convert (fd->iter_type, fd->loop.step));
4280   else if (bias)
4281     t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4282   if (fd->ordered && fd->collapse == 1)
4283     {
4284       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4285 	t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4286 			 fd->loop.n1, fold_convert (sizetype, t));
4287       else
4288 	{
4289 	  t = fold_convert (TREE_TYPE (startvar), t);
4290 	  t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4291 			   fd->loop.n1, t);
4292 	}
4293     }
4294   else
4295     {
4296       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4297 	t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4298       t = fold_convert (TREE_TYPE (startvar), t);
4299     }
4300   iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4301 				   false, GSI_CONTINUE_LINKING);
4302   if (endvar)
4303     {
4304       assign_stmt = gimple_build_assign (endvar, iend);
4305       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4306       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
4307 	assign_stmt = gimple_build_assign (fd->loop.v, iend);
4308       else
4309 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
4310       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4311     }
4312   /* Handle linear clause adjustments.  */
4313   tree itercnt = NULL_TREE;
4314   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4315     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4316 	 c; c = OMP_CLAUSE_CHAIN (c))
4317       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4318 	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4319 	{
4320 	  tree d = OMP_CLAUSE_DECL (c);
4321 	  tree t = d, a, dest;
4322 	  if (omp_privatize_by_reference (t))
4323 	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4324 	  tree type = TREE_TYPE (t);
4325 	  if (POINTER_TYPE_P (type))
4326 	    type = sizetype;
4327 	  dest = unshare_expr (t);
4328 	  tree v = create_tmp_var (TREE_TYPE (t), NULL);
4329 	  expand_omp_build_assign (&gsif, v, t);
4330 	  if (itercnt == NULL_TREE)
4331 	    {
4332 	      itercnt = startvar;
4333 	      tree n1 = fd->loop.n1;
4334 	      if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
4335 		{
4336 		  itercnt
4337 		    = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
4338 				    itercnt);
4339 		  n1 = fold_convert (TREE_TYPE (itercnt), n1);
4340 		}
4341 	      itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
4342 				     itercnt, n1);
4343 	      itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
4344 				     itercnt, fd->loop.step);
4345 	      itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4346 						  NULL_TREE, false,
4347 						  GSI_CONTINUE_LINKING);
4348 	    }
4349 	  a = fold_build2 (MULT_EXPR, type,
4350 			   fold_convert (type, itercnt),
4351 			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4352 	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4353 			   : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4354 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4355 					false, GSI_CONTINUE_LINKING);
4356 	  expand_omp_build_assign (&gsi, dest, t, true);
4357 	}
4358   if (fd->collapse > 1)
4359     expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
4360 
4361   if (fd->ordered)
4362     {
4363       /* Until now, counts array contained number of iterations or
4364 	 variable containing it for ith loop.  From now on, we need
4365 	 those counts only for collapsed loops, and only for the 2nd
4366 	 till the last collapsed one.  Move those one element earlier,
4367 	 we'll use counts[fd->collapse - 1] for the first source/sink
4368 	 iteration counter and so on and counts[fd->ordered]
4369 	 as the array holding the current counter values for
4370 	 depend(source).  */
4371       if (fd->collapse > 1)
4372 	memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
4373       if (broken_loop)
4374 	{
4375 	  int i;
4376 	  for (i = fd->collapse; i < fd->ordered; i++)
4377 	    {
4378 	      tree type = TREE_TYPE (fd->loops[i].v);
4379 	      tree this_cond
4380 		= fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4381 			       fold_convert (type, fd->loops[i].n1),
4382 			       fold_convert (type, fd->loops[i].n2));
4383 	      if (!integer_onep (this_cond))
4384 		break;
4385 	    }
4386 	  if (i < fd->ordered)
4387 	    {
4388 	      if (entry_bb->loop_father != l0_bb->loop_father)
4389 		{
4390 		  remove_bb_from_loops (l0_bb);
4391 		  add_bb_to_loop (l0_bb, entry_bb->loop_father);
4392 		  gcc_assert (single_succ (l0_bb) == l1_bb);
4393 		}
4394 	      cont_bb
4395 		= create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4396 	      add_bb_to_loop (cont_bb, l0_bb->loop_father);
4397 	      gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
4398 	      gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4399 	      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4400 	      make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4401 	      make_edge (cont_bb, l1_bb, 0);
4402 	      l2_bb = create_empty_bb (cont_bb);
4403 	      broken_loop = false;
4404 	    }
4405 	}
4406       expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4407       cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
4408 					      l0_bb, ordered_lastprivate);
4409       if (counts[fd->collapse - 1])
4410 	{
4411 	  gcc_assert (fd->collapse == 1);
4412 	  gsi = gsi_last_bb (l0_bb);
4413 	  expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
4414 				   istart0, true);
4415 	  if (cont_bb)
4416 	    {
4417 	      gsi = gsi_last_bb (cont_bb);
4418 	      t = fold_build2 (PLUS_EXPR, fd->iter_type,
4419 			       counts[fd->collapse - 1],
4420 			       build_int_cst (fd->iter_type, 1));
4421 	      expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
4422 	      tree aref = build4 (ARRAY_REF, fd->iter_type,
4423 				  counts[fd->ordered], size_zero_node,
4424 				  NULL_TREE, NULL_TREE);
4425 	      expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
4426 	    }
4427 	  t = counts[fd->collapse - 1];
4428 	}
4429       else if (fd->collapse > 1)
4430 	t = fd->loop.v;
4431       else
4432 	{
4433 	  t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4434 			   fd->loops[0].v, fd->loops[0].n1);
4435 	  t = fold_convert (fd->iter_type, t);
4436 	}
4437       gsi = gsi_last_bb (l0_bb);
4438       tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4439 			  size_zero_node, NULL_TREE, NULL_TREE);
4440       t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4441 				    false, GSI_CONTINUE_LINKING);
4442       expand_omp_build_assign (&gsi, aref, t, true);
4443     }
4444 
4445   if (!broken_loop)
4446     {
4447       /* Code to control the increment and predicate for the sequential
4448 	 loop goes in the CONT_BB.  */
4449       gsi = gsi_last_nondebug_bb (cont_bb);
4450       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4451       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4452       vmain = gimple_omp_continue_control_use (cont_stmt);
4453       vback = gimple_omp_continue_control_def (cont_stmt);
4454 
4455       if (cond_var)
4456 	{
4457 	  tree itype = TREE_TYPE (cond_var);
4458 	  tree t2;
4459 	  if ((fd->ordered && fd->collapse == 1)
4460 	       || bias
4461 	       || POINTER_TYPE_P (type)
4462 	       || TREE_CODE (fd->loop.n1) != INTEGER_CST
4463 	       || fd->loop.cond_code != LT_EXPR)
4464 	    t2 = build_int_cst (itype, 1);
4465 	  else
4466 	    t2 = fold_convert (itype, fd->loop.step);
4467 	  t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4468 	  t2 = force_gimple_operand_gsi (&gsi, t2, false,
4469 					 NULL_TREE, true, GSI_SAME_STMT);
4470 	  assign_stmt = gimple_build_assign (cond_var, t2);
4471 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4472 	}
4473 
4474       if (!gimple_omp_for_combined_p (fd->for_stmt))
4475 	{
4476 	  if (POINTER_TYPE_P (type))
4477 	    t = fold_build_pointer_plus (vmain, fd->loop.step);
4478 	  else
4479 	    t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4480 	  t = force_gimple_operand_gsi (&gsi, t,
4481 					DECL_P (vback)
4482 					&& TREE_ADDRESSABLE (vback),
4483 					NULL_TREE, true, GSI_SAME_STMT);
4484 	  assign_stmt = gimple_build_assign (vback, t);
4485 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4486 
4487 	  if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4488 	    {
4489 	      tree tem;
4490 	      if (fd->collapse > 1)
4491 		tem = fd->loop.v;
4492 	      else
4493 		{
4494 		  tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4495 				     fd->loops[0].v, fd->loops[0].n1);
4496 		  tem = fold_convert (fd->iter_type, tem);
4497 		}
4498 	      tree aref = build4 (ARRAY_REF, fd->iter_type,
4499 				  counts[fd->ordered], size_zero_node,
4500 				  NULL_TREE, NULL_TREE);
4501 	      tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4502 					      true, GSI_SAME_STMT);
4503 	      expand_omp_build_assign (&gsi, aref, tem);
4504 	    }
4505 
4506 	  t = build2 (fd->loop.cond_code, boolean_type_node,
4507 		      DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4508 		      iend);
4509 	  gcond *cond_stmt = gimple_build_cond_empty (t);
4510 	  gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4511 	}
4512 
4513       /* Remove GIMPLE_OMP_CONTINUE.  */
4514       gsi_remove (&gsi, true);
4515 
4516       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4517 	collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
4518 
4519       /* Emit code to get the next parallel iteration in L2_BB.  */
4520       gsi = gsi_start_bb (l2_bb);
4521 
4522       t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4523 			   build_fold_addr_expr (istart0),
4524 			   build_fold_addr_expr (iend0));
4525       t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4526 				    false, GSI_CONTINUE_LINKING);
4527       if (TREE_TYPE (t) != boolean_type_node)
4528 	t = fold_build2 (NE_EXPR, boolean_type_node,
4529 			 t, build_int_cst (TREE_TYPE (t), 0));
4530       gcond *cond_stmt = gimple_build_cond_empty (t);
4531       gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4532     }
4533 
4534   /* Add the loop cleanup function.  */
4535   gsi = gsi_last_nondebug_bb (exit_bb);
4536   if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4537     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4538   else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4539     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4540   else
4541     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4542   gcall *call_stmt = gimple_build_call (t, 0);
4543   if (fd->ordered)
4544     {
4545       tree arr = counts[fd->ordered];
4546       tree clobber = build_clobber (TREE_TYPE (arr));
4547       gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4548 			GSI_SAME_STMT);
4549     }
4550   if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4551     {
4552       gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
4553       if (fd->have_reductemp)
4554 	{
4555 	  gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4556 					   gimple_call_lhs (call_stmt));
4557 	  gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4558 	}
4559     }
4560   gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
4561   gsi_remove (&gsi, true);
4562 
4563   /* Connect the new blocks.  */
4564   find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4565   find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4566 
4567   if (!broken_loop)
4568     {
4569       gimple_seq phis;
4570 
4571       e = find_edge (cont_bb, l3_bb);
4572       ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4573 
4574       phis = phi_nodes (l3_bb);
4575       for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
4576 	{
4577 	  gimple *phi = gsi_stmt (gsi);
4578 	  SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4579 		   PHI_ARG_DEF_FROM_EDGE (phi, e));
4580 	}
4581       remove_edge (e);
4582 
4583       make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4584       e = find_edge (cont_bb, l1_bb);
4585       if (e == NULL)
4586 	{
4587 	  e = BRANCH_EDGE (cont_bb);
4588 	  gcc_assert (single_succ (e->dest) == l1_bb);
4589 	}
4590       if (gimple_omp_for_combined_p (fd->for_stmt))
4591 	{
4592 	  remove_edge (e);
4593 	  e = NULL;
4594 	}
4595       else if (fd->collapse > 1)
4596 	{
4597 	  remove_edge (e);
4598 	  e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4599 	}
4600       else
4601 	e->flags = EDGE_TRUE_VALUE;
4602       if (e)
4603 	{
4604 	  e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4605 	  find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
4606 	}
4607       else
4608 	{
4609 	  e = find_edge (cont_bb, l2_bb);
4610 	  e->flags = EDGE_FALLTHRU;
4611 	}
4612       make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4613 
4614       if (gimple_in_ssa_p (cfun))
4615 	{
4616 	  /* Add phis to the outer loop that connect to the phis in the inner,
4617 	     original loop, and move the loop entry value of the inner phi to
4618 	     the loop entry value of the outer phi.  */
4619 	  gphi_iterator psi;
4620 	  for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
4621 	    {
4622 	      location_t locus;
4623 	      gphi *nphi;
4624 	      gphi *exit_phi = psi.phi ();
4625 
4626 	      if (virtual_operand_p (gimple_phi_result (exit_phi)))
4627 		continue;
4628 
4629 	      edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4630 	      tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4631 
4632 	      basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4633 	      edge latch_to_l1 = find_edge (latch, l1_bb);
4634 	      gphi *inner_phi
4635 		= find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4636 
4637 	      tree t = gimple_phi_result (exit_phi);
4638 	      tree new_res = copy_ssa_name (t, NULL);
4639 	      nphi = create_phi_node (new_res, l0_bb);
4640 
4641 	      edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4642 	      t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4643 	      locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
4644 	      edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4645 	      add_phi_arg (nphi, t, entry_to_l0, locus);
4646 
4647 	      edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4648 	      add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4649 
4650 	      add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
4651 	    }
4652 	}
4653 
4654       set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4655 			       recompute_dominator (CDI_DOMINATORS, l2_bb));
4656       set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4657 			       recompute_dominator (CDI_DOMINATORS, l3_bb));
4658       set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4659 			       recompute_dominator (CDI_DOMINATORS, l0_bb));
4660       set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4661 			       recompute_dominator (CDI_DOMINATORS, l1_bb));
4662 
4663       /* We enter expand_omp_for_generic with a loop.  This original loop may
4664 	 have its own loop struct, or it may be part of an outer loop struct
4665 	 (which may be the fake loop).  */
4666       class loop *outer_loop = entry_bb->loop_father;
4667       bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4668 
4669       add_bb_to_loop (l2_bb, outer_loop);
4670 
4671       /* We've added a new loop around the original loop.  Allocate the
4672 	 corresponding loop struct.  */
4673       class loop *new_loop = alloc_loop ();
4674       new_loop->header = l0_bb;
4675       new_loop->latch = l2_bb;
4676       add_loop (new_loop, outer_loop);
4677 
4678       /* Allocate a loop structure for the original loop unless we already
4679 	 had one.  */
4680       if (!orig_loop_has_loop_struct
4681 	  && !gimple_omp_for_combined_p (fd->for_stmt))
4682 	{
4683 	  class loop *orig_loop = alloc_loop ();
4684 	  orig_loop->header = l1_bb;
4685 	  /* The loop may have multiple latches.  */
4686 	  add_loop (orig_loop, new_loop);
4687 	}
4688     }
4689 }
4690 
4691 /* Helper function for expand_omp_for_static_nochunk.  If PTR is NULL,
4692    compute needed allocation size.  If !ALLOC of team allocations,
4693    if ALLOC of thread allocation.  SZ is the initial needed size for
4694    other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4695    CNT number of elements of each array, for !ALLOC this is
4696    omp_get_num_threads (), for ALLOC number of iterations handled by the
4697    current thread.  If PTR is non-NULL, it is the start of the allocation
4698    and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4699    clauses pointers to the corresponding arrays.  */
4700 
4701 static tree
expand_omp_scantemp_alloc(tree clauses,tree ptr,unsigned HOST_WIDE_INT sz,unsigned HOST_WIDE_INT alloc_align,tree cnt,gimple_stmt_iterator * gsi,bool alloc)4702 expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4703 			   unsigned HOST_WIDE_INT alloc_align, tree cnt,
4704 			   gimple_stmt_iterator *gsi, bool alloc)
4705 {
4706   tree eltsz = NULL_TREE;
4707   unsigned HOST_WIDE_INT preval = 0;
4708   if (ptr && sz)
4709     ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4710 		       ptr, size_int (sz));
4711   for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4712     if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4713 	&& !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4714 	&& (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4715       {
4716 	tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4717 	unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4718 	if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4719 	  {
4720 	    unsigned HOST_WIDE_INT szl
4721 	      = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4722 	    szl = least_bit_hwi (szl);
4723 	    if (szl)
4724 	      al = MIN (al, szl);
4725 	  }
4726 	if (ptr == NULL_TREE)
4727 	  {
4728 	    if (eltsz == NULL_TREE)
4729 	      eltsz = TYPE_SIZE_UNIT (pointee_type);
4730 	    else
4731 	      eltsz = size_binop (PLUS_EXPR, eltsz,
4732 				  TYPE_SIZE_UNIT (pointee_type));
4733 	  }
4734 	if (preval == 0 && al <= alloc_align)
4735 	  {
4736 	    unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4737 	    sz += diff;
4738 	    if (diff && ptr)
4739 	      ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4740 				 ptr, size_int (diff));
4741 	  }
4742 	else if (al > preval)
4743 	  {
4744 	    if (ptr)
4745 	      {
4746 		ptr = fold_convert (pointer_sized_int_node, ptr);
4747 		ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4748 				   build_int_cst (pointer_sized_int_node,
4749 						  al - 1));
4750 		ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4751 				   build_int_cst (pointer_sized_int_node,
4752 						  -(HOST_WIDE_INT) al));
4753 		ptr = fold_convert (ptr_type_node, ptr);
4754 	      }
4755 	    else
4756 	      sz += al - 1;
4757 	  }
4758 	if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4759 	  preval = al;
4760 	else
4761 	  preval = 1;
4762 	if (ptr)
4763 	  {
4764 	    expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
4765 	    ptr = OMP_CLAUSE_DECL (c);
4766 	    ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4767 			       size_binop (MULT_EXPR, cnt,
4768 					   TYPE_SIZE_UNIT (pointee_type)));
4769 	  }
4770       }
4771 
4772   if (ptr == NULL_TREE)
4773     {
4774       eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4775       if (sz)
4776 	eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4777       return eltsz;
4778     }
4779   else
4780     return ptr;
4781 }
4782 
4783 /* Return the last _looptemp_ clause if one has been created for
4784    lastprivate on distribute parallel for{, simd} or taskloop.
4785    FD is the loop data and INNERC should be the second _looptemp_
4786    clause (the one holding the end of the range).
4787    This is followed by collapse - 1 _looptemp_ clauses for the
4788    counts[1] and up, and for triangular loops followed by 4
4789    further _looptemp_ clauses (one for counts[0], one first_inner_iterations,
4790    one factor and one adjn1).  After this there is optionally one
4791    _looptemp_ clause that this function returns.  */
4792 
4793 static tree
find_lastprivate_looptemp(struct omp_for_data * fd,tree innerc)4794 find_lastprivate_looptemp (struct omp_for_data *fd, tree innerc)
4795 {
4796   gcc_assert (innerc);
4797   int count = fd->collapse - 1;
4798   if (fd->non_rect
4799       && fd->last_nonrect == fd->first_nonrect + 1
4800       && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
4801     count += 4;
4802   for (int i = 0; i < count; i++)
4803     {
4804       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4805 				OMP_CLAUSE__LOOPTEMP_);
4806       gcc_assert (innerc);
4807     }
4808   return omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4809 			  OMP_CLAUSE__LOOPTEMP_);
4810 }
4811 
4812 /* A subroutine of expand_omp_for.  Generate code for a parallel
4813    loop with static schedule and no specified chunk size.  Given
4814    parameters:
4815 
4816 	for (V = N1; V cond N2; V += STEP) BODY;
4817 
4818    where COND is "<" or ">", we generate pseudocode
4819 
4820 	if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4821 	if (cond is <)
4822 	  adj = STEP - 1;
4823 	else
4824 	  adj = STEP + 1;
4825 	if ((__typeof (V)) -1 > 0 && cond is >)
4826 	  n = -(adj + N2 - N1) / -STEP;
4827 	else
4828 	  n = (adj + N2 - N1) / STEP;
4829 	q = n / nthreads;
4830 	tt = n % nthreads;
4831 	if (threadid < tt) goto L3; else goto L4;
4832     L3:
4833 	tt = 0;
4834 	q = q + 1;
4835     L4:
4836 	s0 = q * threadid + tt;
4837 	e0 = s0 + q;
4838 	V = s0 * STEP + N1;
4839 	if (s0 >= e0) goto L2; else goto L0;
4840     L0:
4841 	e = e0 * STEP + N1;
4842     L1:
4843 	BODY;
4844 	V += STEP;
4845 	if (V cond e) goto L1;
4846     L2:
4847 */
4848 
4849 static void
expand_omp_for_static_nochunk(struct omp_region * region,struct omp_for_data * fd,gimple * inner_stmt)4850 expand_omp_for_static_nochunk (struct omp_region *region,
4851 			       struct omp_for_data *fd,
4852 			       gimple *inner_stmt)
4853 {
4854   tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
4855   tree type, itype, vmain, vback;
4856   basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
4857   basic_block body_bb, cont_bb, collapse_bb = NULL;
4858   basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
4859   basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
4860   gimple_stmt_iterator gsi, gsip;
4861   edge ep;
4862   bool broken_loop = region->cont == NULL;
4863   tree *counts = NULL;
4864   tree n1, n2, step;
4865   tree reductions = NULL_TREE;
4866   tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4867 
4868   itype = type = TREE_TYPE (fd->loop.v);
4869   if (POINTER_TYPE_P (type))
4870     itype = signed_type_for (type);
4871 
4872   entry_bb = region->entry;
4873   cont_bb = region->cont;
4874   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4875   fin_bb = BRANCH_EDGE (entry_bb)->dest;
4876   gcc_assert (broken_loop
4877 	      || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4878   seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
4879   body_bb = single_succ (seq_start_bb);
4880   if (!broken_loop)
4881     {
4882       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4883 		  || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4884       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4885     }
4886   exit_bb = region->exit;
4887 
4888   /* Iteration space partitioning goes in ENTRY_BB.  */
4889   gsi = gsi_last_nondebug_bb (entry_bb);
4890   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4891   gsip = gsi;
4892   gsi_prev (&gsip);
4893 
4894   if (fd->collapse > 1)
4895     {
4896       int first_zero_iter = -1, dummy = -1;
4897       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4898 
4899       counts = XALLOCAVEC (tree, fd->collapse);
4900       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4901 				  fin_bb, first_zero_iter,
4902 				  dummy_bb, dummy, l2_dom_bb);
4903       t = NULL_TREE;
4904     }
4905   else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4906     t = integer_one_node;
4907   else
4908     t = fold_binary (fd->loop.cond_code, boolean_type_node,
4909 		     fold_convert (type, fd->loop.n1),
4910 		     fold_convert (type, fd->loop.n2));
4911   if (fd->collapse == 1
4912       && TYPE_UNSIGNED (type)
4913       && (t == NULL_TREE || !integer_onep (t)))
4914     {
4915       n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4916       n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4917 				     true, GSI_SAME_STMT);
4918       n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4919       n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4920 				     true, GSI_SAME_STMT);
4921       gcond *cond_stmt = expand_omp_build_cond (&gsi, fd->loop.cond_code,
4922 						n1, n2);
4923       ep = split_block (entry_bb, cond_stmt);
4924       ep->flags = EDGE_TRUE_VALUE;
4925       entry_bb = ep->dest;
4926       ep->probability = profile_probability::very_likely ();
4927       ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
4928       ep->probability = profile_probability::very_unlikely ();
4929       if (gimple_in_ssa_p (cfun))
4930 	{
4931 	  int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
4932 	  for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4933 	       !gsi_end_p (gpi); gsi_next (&gpi))
4934 	    {
4935 	      gphi *phi = gpi.phi ();
4936 	      add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4937 			   ep, UNKNOWN_LOCATION);
4938 	    }
4939 	}
4940       gsi = gsi_last_bb (entry_bb);
4941     }
4942 
4943   if (fd->lastprivate_conditional)
4944     {
4945       tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4946       tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4947       if (fd->have_pointer_condtemp)
4948 	condtemp = OMP_CLAUSE_DECL (c);
4949       c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4950       cond_var = OMP_CLAUSE_DECL (c);
4951     }
4952   if (fd->have_reductemp
4953       /* For scan, we don't want to reinitialize condtemp before the
4954 	 second loop.  */
4955       || (fd->have_pointer_condtemp && !fd->have_scantemp)
4956       || fd->have_nonctrl_scantemp)
4957     {
4958       tree t1 = build_int_cst (long_integer_type_node, 0);
4959       tree t2 = build_int_cst (long_integer_type_node, 1);
4960       tree t3 = build_int_cstu (long_integer_type_node,
4961 				(HOST_WIDE_INT_1U << 31) + 1);
4962       tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4963       gimple_stmt_iterator gsi2 = gsi_none ();
4964       gimple *g = NULL;
4965       tree mem = null_pointer_node, memv = NULL_TREE;
4966       unsigned HOST_WIDE_INT condtemp_sz = 0;
4967       unsigned HOST_WIDE_INT alloc_align = 0;
4968       if (fd->have_reductemp)
4969 	{
4970 	  gcc_assert (!fd->have_nonctrl_scantemp);
4971 	  tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4972 	  reductions = OMP_CLAUSE_DECL (c);
4973 	  gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4974 	  g = SSA_NAME_DEF_STMT (reductions);
4975 	  reductions = gimple_assign_rhs1 (g);
4976 	  OMP_CLAUSE_DECL (c) = reductions;
4977 	  gsi2 = gsi_for_stmt (g);
4978 	}
4979       else
4980 	{
4981 	  if (gsi_end_p (gsip))
4982 	    gsi2 = gsi_after_labels (region->entry);
4983 	  else
4984 	    gsi2 = gsip;
4985 	  reductions = null_pointer_node;
4986 	}
4987       if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
4988 	{
4989 	  tree type;
4990 	  if (fd->have_pointer_condtemp)
4991 	    type = TREE_TYPE (condtemp);
4992 	  else
4993 	    type = ptr_type_node;
4994 	  memv = create_tmp_var (type);
4995 	  TREE_ADDRESSABLE (memv) = 1;
4996 	  unsigned HOST_WIDE_INT sz = 0;
4997 	  tree size = NULL_TREE;
4998 	  if (fd->have_pointer_condtemp)
4999 	    {
5000 	      sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5001 	      sz *= fd->lastprivate_conditional;
5002 	      condtemp_sz = sz;
5003 	    }
5004 	  if (fd->have_nonctrl_scantemp)
5005 	    {
5006 	      nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5007 	      gimple *g = gimple_build_call (nthreads, 0);
5008 	      nthreads = create_tmp_var (integer_type_node);
5009 	      gimple_call_set_lhs (g, nthreads);
5010 	      gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5011 	      nthreads = fold_convert (sizetype, nthreads);
5012 	      alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
5013 	      size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
5014 						alloc_align, nthreads, NULL,
5015 						false);
5016 	      size = fold_convert (type, size);
5017 	    }
5018 	  else
5019 	    size = build_int_cst (type, sz);
5020 	  expand_omp_build_assign (&gsi2, memv, size, false);
5021 	  mem = build_fold_addr_expr (memv);
5022 	}
5023       tree t
5024 	= build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5025 			   9, t1, t2, t2, t3, t1, null_pointer_node,
5026 			   null_pointer_node, reductions, mem);
5027       force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5028 				true, GSI_SAME_STMT);
5029       if (fd->have_pointer_condtemp)
5030 	expand_omp_build_assign (&gsi2, condtemp, memv, false);
5031       if (fd->have_nonctrl_scantemp)
5032 	{
5033 	  tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
5034 	  expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
5035 				     alloc_align, nthreads, &gsi2, false);
5036 	}
5037       if (fd->have_reductemp)
5038 	{
5039 	  gsi_remove (&gsi2, true);
5040 	  release_ssa_name (gimple_assign_lhs (g));
5041 	}
5042     }
5043   switch (gimple_omp_for_kind (fd->for_stmt))
5044     {
5045     case GF_OMP_FOR_KIND_FOR:
5046       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5047       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5048       break;
5049     case GF_OMP_FOR_KIND_DISTRIBUTE:
5050       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5051       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5052       break;
5053     default:
5054       gcc_unreachable ();
5055     }
5056   nthreads = build_call_expr (nthreads, 0);
5057   nthreads = fold_convert (itype, nthreads);
5058   nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5059 				       true, GSI_SAME_STMT);
5060   threadid = build_call_expr (threadid, 0);
5061   threadid = fold_convert (itype, threadid);
5062   threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5063 				       true, GSI_SAME_STMT);
5064 
5065   n1 = fd->loop.n1;
5066   n2 = fd->loop.n2;
5067   step = fd->loop.step;
5068   if (gimple_omp_for_combined_into_p (fd->for_stmt))
5069     {
5070       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5071 				     OMP_CLAUSE__LOOPTEMP_);
5072       gcc_assert (innerc);
5073       n1 = OMP_CLAUSE_DECL (innerc);
5074       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5075 				OMP_CLAUSE__LOOPTEMP_);
5076       gcc_assert (innerc);
5077       n2 = OMP_CLAUSE_DECL (innerc);
5078     }
5079   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5080 				 true, NULL_TREE, true, GSI_SAME_STMT);
5081   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5082 				 true, NULL_TREE, true, GSI_SAME_STMT);
5083   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5084 				   true, NULL_TREE, true, GSI_SAME_STMT);
5085 
5086   t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5087   t = fold_build2 (PLUS_EXPR, itype, step, t);
5088   t = fold_build2 (PLUS_EXPR, itype, t, n2);
5089   t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5090   if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5091     t = fold_build2 (TRUNC_DIV_EXPR, itype,
5092 		     fold_build1 (NEGATE_EXPR, itype, t),
5093 		     fold_build1 (NEGATE_EXPR, itype, step));
5094   else
5095     t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5096   t = fold_convert (itype, t);
5097   n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5098 
5099   q = create_tmp_reg (itype, "q");
5100   t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
5101   t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5102   gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
5103 
5104   tt = create_tmp_reg (itype, "tt");
5105   t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
5106   t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5107   gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
5108 
5109   t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
5110   gcond *cond_stmt = gimple_build_cond_empty (t);
5111   gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5112 
5113   second_bb = split_block (entry_bb, cond_stmt)->dest;
5114   gsi = gsi_last_nondebug_bb (second_bb);
5115   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5116 
5117   gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
5118 		     GSI_SAME_STMT);
5119   gassign *assign_stmt
5120     = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
5121   gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5122 
5123   third_bb = split_block (second_bb, assign_stmt)->dest;
5124   gsi = gsi_last_nondebug_bb (third_bb);
5125   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5126 
5127   if (fd->have_nonctrl_scantemp)
5128     {
5129       tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5130       tree controlp = NULL_TREE, controlb = NULL_TREE;
5131       for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5132 	if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5133 	    && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5134 	  {
5135 	    if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5136 	      controlb = OMP_CLAUSE_DECL (c);
5137 	    else
5138 	      controlp = OMP_CLAUSE_DECL (c);
5139 	    if (controlb && controlp)
5140 	      break;
5141 	  }
5142       gcc_assert (controlp && controlb);
5143       tree cnt = create_tmp_var (sizetype);
5144       gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
5145       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5146       unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
5147       tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
5148 					   alloc_align, cnt, NULL, true);
5149       tree size = create_tmp_var (sizetype);
5150       expand_omp_build_assign (&gsi, size, sz, false);
5151       tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
5152 			      size, size_int (16384));
5153       expand_omp_build_assign (&gsi, controlb, cmp);
5154       g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5155 			     NULL_TREE, NULL_TREE);
5156       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5157       fourth_bb = split_block (third_bb, g)->dest;
5158       gsi = gsi_last_nondebug_bb (fourth_bb);
5159       /* FIXME: Once we have allocators, this should use allocator.  */
5160       g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
5161       gimple_call_set_lhs (g, controlp);
5162       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5163       expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
5164 				 &gsi, true);
5165       gsi_prev (&gsi);
5166       g = gsi_stmt (gsi);
5167       fifth_bb = split_block (fourth_bb, g)->dest;
5168       gsi = gsi_last_nondebug_bb (fifth_bb);
5169 
5170       g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
5171       gimple_call_set_lhs (g, controlp);
5172       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5173       tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
5174       for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5175 	if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5176 	    && OMP_CLAUSE__SCANTEMP__ALLOC (c))
5177 	  {
5178 	    tree tmp = create_tmp_var (sizetype);
5179 	    tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
5180 	    g = gimple_build_assign (tmp, MULT_EXPR, cnt,
5181 				     TYPE_SIZE_UNIT (pointee_type));
5182 	    gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5183 	    g = gimple_build_call (alloca_decl, 2, tmp,
5184 				   size_int (TYPE_ALIGN (pointee_type)));
5185 	    gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
5186 	    gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5187 	  }
5188 
5189       sixth_bb = split_block (fifth_bb, g)->dest;
5190       gsi = gsi_last_nondebug_bb (sixth_bb);
5191     }
5192 
5193   t = build2 (MULT_EXPR, itype, q, threadid);
5194   t = build2 (PLUS_EXPR, itype, t, tt);
5195   s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5196 
5197   t = fold_build2 (PLUS_EXPR, itype, s0, q);
5198   e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5199 
5200   t = build2 (GE_EXPR, boolean_type_node, s0, e0);
5201   gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5202 
5203   /* Remove the GIMPLE_OMP_FOR statement.  */
5204   gsi_remove (&gsi, true);
5205 
5206   /* Setup code for sequential iteration goes in SEQ_START_BB.  */
5207   gsi = gsi_start_bb (seq_start_bb);
5208 
5209   tree startvar = fd->loop.v;
5210   tree endvar = NULL_TREE;
5211 
5212   if (gimple_omp_for_combined_p (fd->for_stmt))
5213     {
5214       tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5215 		     ? gimple_omp_parallel_clauses (inner_stmt)
5216 		     : gimple_omp_for_clauses (inner_stmt);
5217       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5218       gcc_assert (innerc);
5219       startvar = OMP_CLAUSE_DECL (innerc);
5220       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5221 				OMP_CLAUSE__LOOPTEMP_);
5222       gcc_assert (innerc);
5223       endvar = OMP_CLAUSE_DECL (innerc);
5224       if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5225 	  && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5226 	{
5227 	  innerc = find_lastprivate_looptemp (fd, innerc);
5228 	  if (innerc)
5229 	    {
5230 	      /* If needed (distribute parallel for with lastprivate),
5231 		 propagate down the total number of iterations.  */
5232 	      tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5233 				     fd->loop.n2);
5234 	      t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5235 					    GSI_CONTINUE_LINKING);
5236 	      assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5237 	      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5238 	    }
5239 	}
5240     }
5241   t = fold_convert (itype, s0);
5242   t = fold_build2 (MULT_EXPR, itype, t, step);
5243   if (POINTER_TYPE_P (type))
5244     {
5245       t = fold_build_pointer_plus (n1, t);
5246       if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5247 	  && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5248 	t = fold_convert (signed_type_for (type), t);
5249     }
5250   else
5251     t = fold_build2 (PLUS_EXPR, type, t, n1);
5252   t = fold_convert (TREE_TYPE (startvar), t);
5253   t = force_gimple_operand_gsi (&gsi, t,
5254 				DECL_P (startvar)
5255 				&& TREE_ADDRESSABLE (startvar),
5256 				NULL_TREE, false, GSI_CONTINUE_LINKING);
5257   assign_stmt = gimple_build_assign (startvar, t);
5258   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5259   if (cond_var)
5260     {
5261       tree itype = TREE_TYPE (cond_var);
5262       /* For lastprivate(conditional:) itervar, we need some iteration
5263 	 counter that starts at unsigned non-zero and increases.
5264 	 Prefer as few IVs as possible, so if we can use startvar
5265 	 itself, use that, or startvar + constant (those would be
5266 	 incremented with step), and as last resort use the s0 + 1
5267 	 incremented by 1.  */
5268       if (POINTER_TYPE_P (type)
5269 	  || TREE_CODE (n1) != INTEGER_CST
5270 	  || fd->loop.cond_code != LT_EXPR)
5271 	t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5272 			 build_int_cst (itype, 1));
5273       else if (tree_int_cst_sgn (n1) == 1)
5274 	t = fold_convert (itype, t);
5275       else
5276 	{
5277 	  tree c = fold_convert (itype, n1);
5278 	  c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5279 	  t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5280 	}
5281       t = force_gimple_operand_gsi (&gsi, t, false,
5282 				    NULL_TREE, false, GSI_CONTINUE_LINKING);
5283       assign_stmt = gimple_build_assign (cond_var, t);
5284       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5285     }
5286 
5287   t = fold_convert (itype, e0);
5288   t = fold_build2 (MULT_EXPR, itype, t, step);
5289   if (POINTER_TYPE_P (type))
5290     {
5291       t = fold_build_pointer_plus (n1, t);
5292       if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5293 	  && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5294 	t = fold_convert (signed_type_for (type), t);
5295     }
5296   else
5297     t = fold_build2 (PLUS_EXPR, type, t, n1);
5298   t = fold_convert (TREE_TYPE (startvar), t);
5299   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5300 				false, GSI_CONTINUE_LINKING);
5301   if (endvar)
5302     {
5303       assign_stmt = gimple_build_assign (endvar, e);
5304       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5305       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5306 	assign_stmt = gimple_build_assign (fd->loop.v, e);
5307       else
5308 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5309       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5310     }
5311   /* Handle linear clause adjustments.  */
5312   tree itercnt = NULL_TREE;
5313   tree *nonrect_bounds = NULL;
5314   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5315     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5316 	 c; c = OMP_CLAUSE_CHAIN (c))
5317       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5318 	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5319 	{
5320 	  tree d = OMP_CLAUSE_DECL (c);
5321 	  tree t = d, a, dest;
5322 	  if (omp_privatize_by_reference (t))
5323 	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5324 	  if (itercnt == NULL_TREE)
5325 	    {
5326 	      if (gimple_omp_for_combined_into_p (fd->for_stmt))
5327 		{
5328 		  itercnt = fold_build2 (MINUS_EXPR, itype,
5329 					 fold_convert (itype, n1),
5330 					 fold_convert (itype, fd->loop.n1));
5331 		  itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
5332 		  itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
5333 		  itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5334 						      NULL_TREE, false,
5335 						      GSI_CONTINUE_LINKING);
5336 		}
5337 	      else
5338 		itercnt = s0;
5339 	    }
5340 	  tree type = TREE_TYPE (t);
5341 	  if (POINTER_TYPE_P (type))
5342 	    type = sizetype;
5343 	  a = fold_build2 (MULT_EXPR, type,
5344 			   fold_convert (type, itercnt),
5345 			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5346 	  dest = unshare_expr (t);
5347 	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5348 			   : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
5349 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5350 					false, GSI_CONTINUE_LINKING);
5351 	  expand_omp_build_assign (&gsi, dest, t, true);
5352 	}
5353   if (fd->collapse > 1)
5354     {
5355       if (fd->non_rect)
5356 	{
5357 	  nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
5358 	  memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
5359 	}
5360       expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
5361 				startvar);
5362     }
5363 
5364   if (!broken_loop)
5365     {
5366       /* The code controlling the sequential loop replaces the
5367 	 GIMPLE_OMP_CONTINUE.  */
5368       gsi = gsi_last_nondebug_bb (cont_bb);
5369       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5370       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5371       vmain = gimple_omp_continue_control_use (cont_stmt);
5372       vback = gimple_omp_continue_control_def (cont_stmt);
5373 
5374       if (cond_var)
5375 	{
5376 	  tree itype = TREE_TYPE (cond_var);
5377 	  tree t2;
5378 	  if (POINTER_TYPE_P (type)
5379 	      || TREE_CODE (n1) != INTEGER_CST
5380 	      || fd->loop.cond_code != LT_EXPR)
5381 	    t2 = build_int_cst (itype, 1);
5382 	  else
5383 	    t2 = fold_convert (itype, step);
5384 	  t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5385 	  t2 = force_gimple_operand_gsi (&gsi, t2, false,
5386 					 NULL_TREE, true, GSI_SAME_STMT);
5387 	  assign_stmt = gimple_build_assign (cond_var, t2);
5388 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5389 	}
5390 
5391       if (!gimple_omp_for_combined_p (fd->for_stmt))
5392 	{
5393 	  if (POINTER_TYPE_P (type))
5394 	    t = fold_build_pointer_plus (vmain, step);
5395 	  else
5396 	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
5397 	  t = force_gimple_operand_gsi (&gsi, t,
5398 					DECL_P (vback)
5399 					&& TREE_ADDRESSABLE (vback),
5400 					NULL_TREE, true, GSI_SAME_STMT);
5401 	  assign_stmt = gimple_build_assign (vback, t);
5402 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5403 
5404 	  t = build2 (fd->loop.cond_code, boolean_type_node,
5405 		      DECL_P (vback) && TREE_ADDRESSABLE (vback)
5406 		      ? t : vback, e);
5407 	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5408 	}
5409 
5410       /* Remove the GIMPLE_OMP_CONTINUE statement.  */
5411       gsi_remove (&gsi, true);
5412 
5413       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5414 	collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5415 						   cont_bb, body_bb);
5416     }
5417 
5418   /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
5419   gsi = gsi_last_nondebug_bb (exit_bb);
5420   if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5421     {
5422       t = gimple_omp_return_lhs (gsi_stmt (gsi));
5423       if (fd->have_reductemp
5424 	  || ((fd->have_pointer_condtemp || fd->have_scantemp)
5425 	      && !fd->have_nonctrl_scantemp))
5426 	{
5427 	  tree fn;
5428 	  if (t)
5429 	    fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5430 	  else
5431 	    fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5432 	  gcall *g = gimple_build_call (fn, 0);
5433 	  if (t)
5434 	    {
5435 	      gimple_call_set_lhs (g, t);
5436 	      if (fd->have_reductemp)
5437 		gsi_insert_after (&gsi, gimple_build_assign (reductions,
5438 							     NOP_EXPR, t),
5439 				  GSI_SAME_STMT);
5440 	    }
5441 	  gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5442 	}
5443       else
5444 	gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
5445     }
5446   else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5447 	   && !fd->have_nonctrl_scantemp)
5448     {
5449       tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5450       gcall *g = gimple_build_call (fn, 0);
5451       gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5452     }
5453   if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5454     {
5455       tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5456       tree controlp = NULL_TREE, controlb = NULL_TREE;
5457       for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5458 	if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5459 	    && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5460 	  {
5461 	    if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5462 	      controlb = OMP_CLAUSE_DECL (c);
5463 	    else
5464 	      controlp = OMP_CLAUSE_DECL (c);
5465 	    if (controlb && controlp)
5466 	      break;
5467 	  }
5468       gcc_assert (controlp && controlb);
5469       gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5470 				     NULL_TREE, NULL_TREE);
5471       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5472       exit1_bb = split_block (exit_bb, g)->dest;
5473       gsi = gsi_after_labels (exit1_bb);
5474       g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
5475 			     controlp);
5476       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5477       exit2_bb = split_block (exit1_bb, g)->dest;
5478       gsi = gsi_after_labels (exit2_bb);
5479       g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
5480 			     controlp);
5481       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5482       exit3_bb = split_block (exit2_bb, g)->dest;
5483       gsi = gsi_after_labels (exit3_bb);
5484     }
5485   gsi_remove (&gsi, true);
5486 
5487   /* Connect all the blocks.  */
5488   ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
5489   ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
5490   ep = find_edge (entry_bb, second_bb);
5491   ep->flags = EDGE_TRUE_VALUE;
5492   ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
5493   if (fourth_bb)
5494     {
5495       ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5496       ep->probability
5497 	= profile_probability::guessed_always ().apply_scale (1, 2);
5498       ep = find_edge (third_bb, fourth_bb);
5499       ep->flags = EDGE_TRUE_VALUE;
5500       ep->probability
5501 	= profile_probability::guessed_always ().apply_scale (1, 2);
5502       ep = find_edge (fourth_bb, fifth_bb);
5503       redirect_edge_and_branch (ep, sixth_bb);
5504     }
5505   else
5506     sixth_bb = third_bb;
5507   find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5508   find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5509   if (exit1_bb)
5510     {
5511       ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5512       ep->probability
5513 	= profile_probability::guessed_always ().apply_scale (1, 2);
5514       ep = find_edge (exit_bb, exit1_bb);
5515       ep->flags = EDGE_TRUE_VALUE;
5516       ep->probability
5517 	= profile_probability::guessed_always ().apply_scale (1, 2);
5518       ep = find_edge (exit1_bb, exit2_bb);
5519       redirect_edge_and_branch (ep, exit3_bb);
5520     }
5521 
5522   if (!broken_loop)
5523     {
5524       ep = find_edge (cont_bb, body_bb);
5525       if (ep == NULL)
5526 	{
5527 	  ep = BRANCH_EDGE (cont_bb);
5528 	  gcc_assert (single_succ (ep->dest) == body_bb);
5529 	}
5530       if (gimple_omp_for_combined_p (fd->for_stmt))
5531 	{
5532 	  remove_edge (ep);
5533 	  ep = NULL;
5534 	}
5535       else if (fd->collapse > 1)
5536 	{
5537 	  remove_edge (ep);
5538 	  ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5539 	}
5540       else
5541 	ep->flags = EDGE_TRUE_VALUE;
5542       find_edge (cont_bb, fin_bb)->flags
5543 	= ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5544     }
5545 
5546   set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5547   set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
5548   if (fourth_bb)
5549     {
5550       set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5551       set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5552     }
5553   set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
5554 
5555   set_immediate_dominator (CDI_DOMINATORS, body_bb,
5556 			   recompute_dominator (CDI_DOMINATORS, body_bb));
5557   set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5558 			   recompute_dominator (CDI_DOMINATORS, fin_bb));
5559   if (exit1_bb)
5560     {
5561       set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5562       set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5563     }
5564 
5565   class loop *loop = body_bb->loop_father;
5566   if (loop != entry_bb->loop_father)
5567     {
5568       gcc_assert (broken_loop || loop->header == body_bb);
5569       gcc_assert (broken_loop
5570 		  || loop->latch == region->cont
5571 		  || single_pred (loop->latch) == region->cont);
5572       return;
5573     }
5574 
5575   if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5576     {
5577       loop = alloc_loop ();
5578       loop->header = body_bb;
5579       if (collapse_bb == NULL)
5580 	loop->latch = cont_bb;
5581       add_loop (loop, body_bb->loop_father);
5582     }
5583 }
5584 
5585 /* Return phi in E->DEST with ARG on edge E.  */
5586 
5587 static gphi *
find_phi_with_arg_on_edge(tree arg,edge e)5588 find_phi_with_arg_on_edge (tree arg, edge e)
5589 {
5590   basic_block bb = e->dest;
5591 
5592   for (gphi_iterator gpi = gsi_start_phis (bb);
5593        !gsi_end_p (gpi);
5594        gsi_next (&gpi))
5595     {
5596       gphi *phi = gpi.phi ();
5597       if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5598 	return phi;
5599     }
5600 
5601   return NULL;
5602 }
5603 
5604 /* A subroutine of expand_omp_for.  Generate code for a parallel
5605    loop with static schedule and a specified chunk size.  Given
5606    parameters:
5607 
5608 	for (V = N1; V cond N2; V += STEP) BODY;
5609 
5610    where COND is "<" or ">", we generate pseudocode
5611 
5612 	if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5613 	if (cond is <)
5614 	  adj = STEP - 1;
5615 	else
5616 	  adj = STEP + 1;
5617 	if ((__typeof (V)) -1 > 0 && cond is >)
5618 	  n = -(adj + N2 - N1) / -STEP;
5619 	else
5620 	  n = (adj + N2 - N1) / STEP;
5621 	trip = 0;
5622 	V = threadid * CHUNK * STEP + N1;  -- this extra definition of V is
5623 					      here so that V is defined
5624 					      if the loop is not entered
5625     L0:
5626 	s0 = (trip * nthreads + threadid) * CHUNK;
5627 	e0 = min (s0 + CHUNK, n);
5628 	if (s0 < n) goto L1; else goto L4;
5629     L1:
5630 	V = s0 * STEP + N1;
5631 	e = e0 * STEP + N1;
5632     L2:
5633 	BODY;
5634 	V += STEP;
5635 	if (V cond e) goto L2; else goto L3;
5636     L3:
5637 	trip += 1;
5638 	goto L0;
5639     L4:
5640 */
5641 
5642 static void
expand_omp_for_static_chunk(struct omp_region * region,struct omp_for_data * fd,gimple * inner_stmt)5643 expand_omp_for_static_chunk (struct omp_region *region,
5644 			     struct omp_for_data *fd, gimple *inner_stmt)
5645 {
5646   tree n, s0, e0, e, t;
5647   tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5648   tree type, itype, vmain, vback, vextra;
5649   basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5650   basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
5651   gimple_stmt_iterator gsi, gsip;
5652   edge se;
5653   bool broken_loop = region->cont == NULL;
5654   tree *counts = NULL;
5655   tree n1, n2, step;
5656   tree reductions = NULL_TREE;
5657   tree cond_var = NULL_TREE, condtemp = NULL_TREE;
5658 
5659   itype = type = TREE_TYPE (fd->loop.v);
5660   if (POINTER_TYPE_P (type))
5661     itype = signed_type_for (type);
5662 
5663   entry_bb = region->entry;
5664   se = split_block (entry_bb, last_stmt (entry_bb));
5665   entry_bb = se->src;
5666   iter_part_bb = se->dest;
5667   cont_bb = region->cont;
5668   gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5669   fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5670   gcc_assert (broken_loop
5671 	      || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5672   seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5673   body_bb = single_succ (seq_start_bb);
5674   if (!broken_loop)
5675     {
5676       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5677 		  || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5678       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5679       trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5680     }
5681   exit_bb = region->exit;
5682 
5683   /* Trip and adjustment setup goes in ENTRY_BB.  */
5684   gsi = gsi_last_nondebug_bb (entry_bb);
5685   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5686   gsip = gsi;
5687   gsi_prev (&gsip);
5688 
5689   if (fd->collapse > 1)
5690     {
5691       int first_zero_iter = -1, dummy = -1;
5692       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5693 
5694       counts = XALLOCAVEC (tree, fd->collapse);
5695       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5696 				  fin_bb, first_zero_iter,
5697 				  dummy_bb, dummy, l2_dom_bb);
5698       t = NULL_TREE;
5699     }
5700   else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5701     t = integer_one_node;
5702   else
5703     t = fold_binary (fd->loop.cond_code, boolean_type_node,
5704 		     fold_convert (type, fd->loop.n1),
5705 		     fold_convert (type, fd->loop.n2));
5706   if (fd->collapse == 1
5707       && TYPE_UNSIGNED (type)
5708       && (t == NULL_TREE || !integer_onep (t)))
5709     {
5710       n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5711       n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5712 				     true, GSI_SAME_STMT);
5713       n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5714       n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5715 				     true, GSI_SAME_STMT);
5716       gcond *cond_stmt = expand_omp_build_cond (&gsi, fd->loop.cond_code,
5717 						n1, n2);
5718       se = split_block (entry_bb, cond_stmt);
5719       se->flags = EDGE_TRUE_VALUE;
5720       entry_bb = se->dest;
5721       se->probability = profile_probability::very_likely ();
5722       se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
5723       se->probability = profile_probability::very_unlikely ();
5724       if (gimple_in_ssa_p (cfun))
5725 	{
5726 	  int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5727 	  for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5728 	       !gsi_end_p (gpi); gsi_next (&gpi))
5729 	    {
5730 	      gphi *phi = gpi.phi ();
5731 	      add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5732 			   se, UNKNOWN_LOCATION);
5733 	    }
5734 	}
5735       gsi = gsi_last_bb (entry_bb);
5736     }
5737 
5738   if (fd->lastprivate_conditional)
5739     {
5740       tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5741       tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5742       if (fd->have_pointer_condtemp)
5743 	condtemp = OMP_CLAUSE_DECL (c);
5744       c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5745       cond_var = OMP_CLAUSE_DECL (c);
5746     }
5747   if (fd->have_reductemp || fd->have_pointer_condtemp)
5748     {
5749       tree t1 = build_int_cst (long_integer_type_node, 0);
5750       tree t2 = build_int_cst (long_integer_type_node, 1);
5751       tree t3 = build_int_cstu (long_integer_type_node,
5752 				(HOST_WIDE_INT_1U << 31) + 1);
5753       tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5754       gimple_stmt_iterator gsi2 = gsi_none ();
5755       gimple *g = NULL;
5756       tree mem = null_pointer_node, memv = NULL_TREE;
5757       if (fd->have_reductemp)
5758 	{
5759 	  tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5760 	  reductions = OMP_CLAUSE_DECL (c);
5761 	  gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5762 	  g = SSA_NAME_DEF_STMT (reductions);
5763 	  reductions = gimple_assign_rhs1 (g);
5764 	  OMP_CLAUSE_DECL (c) = reductions;
5765 	  gsi2 = gsi_for_stmt (g);
5766 	}
5767       else
5768 	{
5769 	  if (gsi_end_p (gsip))
5770 	    gsi2 = gsi_after_labels (region->entry);
5771 	  else
5772 	    gsi2 = gsip;
5773 	  reductions = null_pointer_node;
5774 	}
5775       if (fd->have_pointer_condtemp)
5776 	{
5777 	  tree type = TREE_TYPE (condtemp);
5778 	  memv = create_tmp_var (type);
5779 	  TREE_ADDRESSABLE (memv) = 1;
5780 	  unsigned HOST_WIDE_INT sz
5781 	    = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5782 	  sz *= fd->lastprivate_conditional;
5783 	  expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
5784 				   false);
5785 	  mem = build_fold_addr_expr (memv);
5786 	}
5787       tree t
5788 	= build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5789 			   9, t1, t2, t2, t3, t1, null_pointer_node,
5790 			   null_pointer_node, reductions, mem);
5791       force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5792 				true, GSI_SAME_STMT);
5793       if (fd->have_pointer_condtemp)
5794 	expand_omp_build_assign (&gsi2, condtemp, memv, false);
5795       if (fd->have_reductemp)
5796 	{
5797 	  gsi_remove (&gsi2, true);
5798 	  release_ssa_name (gimple_assign_lhs (g));
5799 	}
5800     }
5801   switch (gimple_omp_for_kind (fd->for_stmt))
5802     {
5803     case GF_OMP_FOR_KIND_FOR:
5804       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5805       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5806       break;
5807     case GF_OMP_FOR_KIND_DISTRIBUTE:
5808       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5809       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5810       break;
5811     default:
5812       gcc_unreachable ();
5813     }
5814   nthreads = build_call_expr (nthreads, 0);
5815   nthreads = fold_convert (itype, nthreads);
5816   nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5817 				       true, GSI_SAME_STMT);
5818   threadid = build_call_expr (threadid, 0);
5819   threadid = fold_convert (itype, threadid);
5820   threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5821 				       true, GSI_SAME_STMT);
5822 
5823   n1 = fd->loop.n1;
5824   n2 = fd->loop.n2;
5825   step = fd->loop.step;
5826   if (gimple_omp_for_combined_into_p (fd->for_stmt))
5827     {
5828       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5829 				     OMP_CLAUSE__LOOPTEMP_);
5830       gcc_assert (innerc);
5831       n1 = OMP_CLAUSE_DECL (innerc);
5832       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5833 				OMP_CLAUSE__LOOPTEMP_);
5834       gcc_assert (innerc);
5835       n2 = OMP_CLAUSE_DECL (innerc);
5836     }
5837   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5838 				 true, NULL_TREE, true, GSI_SAME_STMT);
5839   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5840 				 true, NULL_TREE, true, GSI_SAME_STMT);
5841   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5842 				   true, NULL_TREE, true, GSI_SAME_STMT);
5843   tree chunk_size = fold_convert (itype, fd->chunk_size);
5844   chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
5845   chunk_size
5846     = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5847 				GSI_SAME_STMT);
5848 
5849   t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5850   t = fold_build2 (PLUS_EXPR, itype, step, t);
5851   t = fold_build2 (PLUS_EXPR, itype, t, n2);
5852   t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5853   if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5854     t = fold_build2 (TRUNC_DIV_EXPR, itype,
5855 		     fold_build1 (NEGATE_EXPR, itype, t),
5856 		     fold_build1 (NEGATE_EXPR, itype, step));
5857   else
5858     t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5859   t = fold_convert (itype, t);
5860   n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5861 				true, GSI_SAME_STMT);
5862 
5863   trip_var = create_tmp_reg (itype, ".trip");
5864   if (gimple_in_ssa_p (cfun))
5865     {
5866       trip_init = make_ssa_name (trip_var);
5867       trip_main = make_ssa_name (trip_var);
5868       trip_back = make_ssa_name (trip_var);
5869     }
5870   else
5871     {
5872       trip_init = trip_var;
5873       trip_main = trip_var;
5874       trip_back = trip_var;
5875     }
5876 
5877   gassign *assign_stmt
5878     = gimple_build_assign (trip_init, build_int_cst (itype, 0));
5879   gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5880 
5881   t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
5882   t = fold_build2 (MULT_EXPR, itype, t, step);
5883   if (POINTER_TYPE_P (type))
5884     t = fold_build_pointer_plus (n1, t);
5885   else
5886     t = fold_build2 (PLUS_EXPR, type, t, n1);
5887   vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5888 				     true, GSI_SAME_STMT);
5889 
5890   /* Remove the GIMPLE_OMP_FOR.  */
5891   gsi_remove (&gsi, true);
5892 
5893   gimple_stmt_iterator gsif = gsi;
5894 
5895   /* Iteration space partitioning goes in ITER_PART_BB.  */
5896   gsi = gsi_last_bb (iter_part_bb);
5897 
5898   t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
5899   t = fold_build2 (PLUS_EXPR, itype, t, threadid);
5900   t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
5901   s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5902 				 false, GSI_CONTINUE_LINKING);
5903 
5904   t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
5905   t = fold_build2 (MIN_EXPR, itype, t, n);
5906   e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5907 				 false, GSI_CONTINUE_LINKING);
5908 
5909   t = build2 (LT_EXPR, boolean_type_node, s0, n);
5910   gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
5911 
5912   /* Setup code for sequential iteration goes in SEQ_START_BB.  */
5913   gsi = gsi_start_bb (seq_start_bb);
5914 
5915   tree startvar = fd->loop.v;
5916   tree endvar = NULL_TREE;
5917 
5918   if (gimple_omp_for_combined_p (fd->for_stmt))
5919     {
5920       tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5921 		     ? gimple_omp_parallel_clauses (inner_stmt)
5922 		     : gimple_omp_for_clauses (inner_stmt);
5923       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5924       gcc_assert (innerc);
5925       startvar = OMP_CLAUSE_DECL (innerc);
5926       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5927 				OMP_CLAUSE__LOOPTEMP_);
5928       gcc_assert (innerc);
5929       endvar = OMP_CLAUSE_DECL (innerc);
5930       if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5931 	  && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5932 	{
5933 	  innerc = find_lastprivate_looptemp (fd, innerc);
5934 	  if (innerc)
5935 	    {
5936 	      /* If needed (distribute parallel for with lastprivate),
5937 		 propagate down the total number of iterations.  */
5938 	      tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5939 				     fd->loop.n2);
5940 	      t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5941 					    GSI_CONTINUE_LINKING);
5942 	      assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5943 	      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5944 	    }
5945 	}
5946     }
5947 
5948   t = fold_convert (itype, s0);
5949   t = fold_build2 (MULT_EXPR, itype, t, step);
5950   if (POINTER_TYPE_P (type))
5951     {
5952       t = fold_build_pointer_plus (n1, t);
5953       if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5954 	  && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5955 	t = fold_convert (signed_type_for (type), t);
5956     }
5957   else
5958     t = fold_build2 (PLUS_EXPR, type, t, n1);
5959   t = fold_convert (TREE_TYPE (startvar), t);
5960   t = force_gimple_operand_gsi (&gsi, t,
5961 				DECL_P (startvar)
5962 				&& TREE_ADDRESSABLE (startvar),
5963 				NULL_TREE, false, GSI_CONTINUE_LINKING);
5964   assign_stmt = gimple_build_assign (startvar, t);
5965   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5966   if (cond_var)
5967     {
5968       tree itype = TREE_TYPE (cond_var);
5969       /* For lastprivate(conditional:) itervar, we need some iteration
5970 	 counter that starts at unsigned non-zero and increases.
5971 	 Prefer as few IVs as possible, so if we can use startvar
5972 	 itself, use that, or startvar + constant (those would be
5973 	 incremented with step), and as last resort use the s0 + 1
5974 	 incremented by 1.  */
5975       if (POINTER_TYPE_P (type)
5976 	  || TREE_CODE (n1) != INTEGER_CST
5977 	  || fd->loop.cond_code != LT_EXPR)
5978 	t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5979 			 build_int_cst (itype, 1));
5980       else if (tree_int_cst_sgn (n1) == 1)
5981 	t = fold_convert (itype, t);
5982       else
5983 	{
5984 	  tree c = fold_convert (itype, n1);
5985 	  c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5986 	  t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5987 	}
5988       t = force_gimple_operand_gsi (&gsi, t, false,
5989 				    NULL_TREE, false, GSI_CONTINUE_LINKING);
5990       assign_stmt = gimple_build_assign (cond_var, t);
5991       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5992     }
5993 
5994   t = fold_convert (itype, e0);
5995   t = fold_build2 (MULT_EXPR, itype, t, step);
5996   if (POINTER_TYPE_P (type))
5997     {
5998       t = fold_build_pointer_plus (n1, t);
5999       if (!POINTER_TYPE_P (TREE_TYPE (startvar))
6000 	  && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
6001 	t = fold_convert (signed_type_for (type), t);
6002     }
6003   else
6004     t = fold_build2 (PLUS_EXPR, type, t, n1);
6005   t = fold_convert (TREE_TYPE (startvar), t);
6006   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6007 				false, GSI_CONTINUE_LINKING);
6008   if (endvar)
6009     {
6010       assign_stmt = gimple_build_assign (endvar, e);
6011       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6012       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
6013 	assign_stmt = gimple_build_assign (fd->loop.v, e);
6014       else
6015 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
6016       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6017     }
6018   /* Handle linear clause adjustments.  */
6019   tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
6020   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
6021     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
6022 	 c; c = OMP_CLAUSE_CHAIN (c))
6023       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
6024 	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
6025 	{
6026 	  tree d = OMP_CLAUSE_DECL (c);
6027 	  tree t = d, a, dest;
6028 	  if (omp_privatize_by_reference (t))
6029 	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
6030 	  tree type = TREE_TYPE (t);
6031 	  if (POINTER_TYPE_P (type))
6032 	    type = sizetype;
6033 	  dest = unshare_expr (t);
6034 	  tree v = create_tmp_var (TREE_TYPE (t), NULL);
6035 	  expand_omp_build_assign (&gsif, v, t);
6036 	  if (itercnt == NULL_TREE)
6037 	    {
6038 	      if (gimple_omp_for_combined_into_p (fd->for_stmt))
6039 		{
6040 		  itercntbias
6041 		    = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
6042 				   fold_convert (itype, fd->loop.n1));
6043 		  itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
6044 					     itercntbias, step);
6045 		  itercntbias
6046 		    = force_gimple_operand_gsi (&gsif, itercntbias, true,
6047 						NULL_TREE, true,
6048 						GSI_SAME_STMT);
6049 		  itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
6050 		  itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
6051 						      NULL_TREE, false,
6052 						      GSI_CONTINUE_LINKING);
6053 		}
6054 	      else
6055 		itercnt = s0;
6056 	    }
6057 	  a = fold_build2 (MULT_EXPR, type,
6058 			   fold_convert (type, itercnt),
6059 			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
6060 	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
6061 			   : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
6062 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6063 					false, GSI_CONTINUE_LINKING);
6064 	  expand_omp_build_assign (&gsi, dest, t, true);
6065 	}
6066   if (fd->collapse > 1)
6067     expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
6068 
6069   if (!broken_loop)
6070     {
6071       /* The code controlling the sequential loop goes in CONT_BB,
6072 	 replacing the GIMPLE_OMP_CONTINUE.  */
6073       gsi = gsi_last_nondebug_bb (cont_bb);
6074       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6075       vmain = gimple_omp_continue_control_use (cont_stmt);
6076       vback = gimple_omp_continue_control_def (cont_stmt);
6077 
6078       if (cond_var)
6079 	{
6080 	  tree itype = TREE_TYPE (cond_var);
6081 	  tree t2;
6082 	  if (POINTER_TYPE_P (type)
6083 	      || TREE_CODE (n1) != INTEGER_CST
6084 	      || fd->loop.cond_code != LT_EXPR)
6085 	    t2 = build_int_cst (itype, 1);
6086 	  else
6087 	    t2 = fold_convert (itype, step);
6088 	  t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
6089 	  t2 = force_gimple_operand_gsi (&gsi, t2, false,
6090 					 NULL_TREE, true, GSI_SAME_STMT);
6091 	  assign_stmt = gimple_build_assign (cond_var, t2);
6092 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6093 	}
6094 
6095       if (!gimple_omp_for_combined_p (fd->for_stmt))
6096 	{
6097 	  if (POINTER_TYPE_P (type))
6098 	    t = fold_build_pointer_plus (vmain, step);
6099 	  else
6100 	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
6101 	  if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
6102 	    t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6103 					  true, GSI_SAME_STMT);
6104 	  assign_stmt = gimple_build_assign (vback, t);
6105 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6106 
6107 	  if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
6108 	    t = build2 (EQ_EXPR, boolean_type_node,
6109 			build_int_cst (itype, 0),
6110 			build_int_cst (itype, 1));
6111 	  else
6112 	    t = build2 (fd->loop.cond_code, boolean_type_node,
6113 			DECL_P (vback) && TREE_ADDRESSABLE (vback)
6114 			? t : vback, e);
6115 	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
6116 	}
6117 
6118       /* Remove GIMPLE_OMP_CONTINUE.  */
6119       gsi_remove (&gsi, true);
6120 
6121       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
6122 	collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
6123 
6124       /* Trip update code goes into TRIP_UPDATE_BB.  */
6125       gsi = gsi_start_bb (trip_update_bb);
6126 
6127       t = build_int_cst (itype, 1);
6128       t = build2 (PLUS_EXPR, itype, trip_main, t);
6129       assign_stmt = gimple_build_assign (trip_back, t);
6130       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6131     }
6132 
6133   /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
6134   gsi = gsi_last_nondebug_bb (exit_bb);
6135   if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
6136     {
6137       t = gimple_omp_return_lhs (gsi_stmt (gsi));
6138       if (fd->have_reductemp || fd->have_pointer_condtemp)
6139 	{
6140 	  tree fn;
6141 	  if (t)
6142 	    fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
6143 	  else
6144 	    fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
6145 	  gcall *g = gimple_build_call (fn, 0);
6146 	  if (t)
6147 	    {
6148 	      gimple_call_set_lhs (g, t);
6149 	      if (fd->have_reductemp)
6150 		gsi_insert_after (&gsi, gimple_build_assign (reductions,
6151 							     NOP_EXPR, t),
6152 				  GSI_SAME_STMT);
6153 	    }
6154 	  gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6155 	}
6156       else
6157 	gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
6158     }
6159   else if (fd->have_pointer_condtemp)
6160     {
6161       tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
6162       gcall *g = gimple_build_call (fn, 0);
6163       gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6164     }
6165   gsi_remove (&gsi, true);
6166 
6167   /* Connect the new blocks.  */
6168   find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
6169   find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
6170 
6171   if (!broken_loop)
6172     {
6173       se = find_edge (cont_bb, body_bb);
6174       if (se == NULL)
6175 	{
6176 	  se = BRANCH_EDGE (cont_bb);
6177 	  gcc_assert (single_succ (se->dest) == body_bb);
6178 	}
6179       if (gimple_omp_for_combined_p (fd->for_stmt))
6180 	{
6181 	  remove_edge (se);
6182 	  se = NULL;
6183 	}
6184       else if (fd->collapse > 1)
6185 	{
6186 	  remove_edge (se);
6187 	  se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6188 	}
6189       else
6190 	se->flags = EDGE_TRUE_VALUE;
6191       find_edge (cont_bb, trip_update_bb)->flags
6192 	= se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6193 
6194       redirect_edge_and_branch (single_succ_edge (trip_update_bb),
6195 				iter_part_bb);
6196     }
6197 
6198   if (gimple_in_ssa_p (cfun))
6199     {
6200       gphi_iterator psi;
6201       gphi *phi;
6202       edge re, ene;
6203       edge_var_map *vm;
6204       size_t i;
6205 
6206       gcc_assert (fd->collapse == 1 && !broken_loop);
6207 
6208       /* When we redirect the edge from trip_update_bb to iter_part_bb, we
6209 	 remove arguments of the phi nodes in fin_bb.  We need to create
6210 	 appropriate phi nodes in iter_part_bb instead.  */
6211       se = find_edge (iter_part_bb, fin_bb);
6212       re = single_succ_edge (trip_update_bb);
6213       vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
6214       ene = single_succ_edge (entry_bb);
6215 
6216       psi = gsi_start_phis (fin_bb);
6217       for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
6218 	   gsi_next (&psi), ++i)
6219 	{
6220 	  gphi *nphi;
6221 	  location_t locus;
6222 
6223 	  phi = psi.phi ();
6224 	  if (operand_equal_p (gimple_phi_arg_def (phi, 0),
6225 			       redirect_edge_var_map_def (vm), 0))
6226 	    continue;
6227 
6228 	  t = gimple_phi_result (phi);
6229 	  gcc_assert (t == redirect_edge_var_map_result (vm));
6230 
6231 	  if (!single_pred_p (fin_bb))
6232 	    t = copy_ssa_name (t, phi);
6233 
6234 	  nphi = create_phi_node (t, iter_part_bb);
6235 
6236 	  t = PHI_ARG_DEF_FROM_EDGE (phi, se);
6237 	  locus = gimple_phi_arg_location_from_edge (phi, se);
6238 
6239 	  /* A special case -- fd->loop.v is not yet computed in
6240 	     iter_part_bb, we need to use vextra instead.  */
6241 	  if (t == fd->loop.v)
6242 	    t = vextra;
6243 	  add_phi_arg (nphi, t, ene, locus);
6244 	  locus = redirect_edge_var_map_location (vm);
6245 	  tree back_arg = redirect_edge_var_map_def (vm);
6246 	  add_phi_arg (nphi, back_arg, re, locus);
6247 	  edge ce = find_edge (cont_bb, body_bb);
6248 	  if (ce == NULL)
6249 	    {
6250 	      ce = BRANCH_EDGE (cont_bb);
6251 	      gcc_assert (single_succ (ce->dest) == body_bb);
6252 	      ce = single_succ_edge (ce->dest);
6253 	    }
6254 	  gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
6255 	  gcc_assert (inner_loop_phi != NULL);
6256 	  add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
6257 		       find_edge (seq_start_bb, body_bb), locus);
6258 
6259 	  if (!single_pred_p (fin_bb))
6260 	    add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
6261 	}
6262       gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
6263       redirect_edge_var_map_clear (re);
6264       if (single_pred_p (fin_bb))
6265 	while (1)
6266 	  {
6267 	    psi = gsi_start_phis (fin_bb);
6268 	    if (gsi_end_p (psi))
6269 	      break;
6270 	    remove_phi_node (&psi, false);
6271 	  }
6272 
6273       /* Make phi node for trip.  */
6274       phi = create_phi_node (trip_main, iter_part_bb);
6275       add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
6276 		   UNKNOWN_LOCATION);
6277       add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
6278 		   UNKNOWN_LOCATION);
6279     }
6280 
6281   if (!broken_loop)
6282     set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
6283   set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
6284 			   recompute_dominator (CDI_DOMINATORS, iter_part_bb));
6285   set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6286 			   recompute_dominator (CDI_DOMINATORS, fin_bb));
6287   set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
6288 			   recompute_dominator (CDI_DOMINATORS, seq_start_bb));
6289   set_immediate_dominator (CDI_DOMINATORS, body_bb,
6290 			   recompute_dominator (CDI_DOMINATORS, body_bb));
6291 
6292   if (!broken_loop)
6293     {
6294       class loop *loop = body_bb->loop_father;
6295       class loop *trip_loop = alloc_loop ();
6296       trip_loop->header = iter_part_bb;
6297       trip_loop->latch = trip_update_bb;
6298       add_loop (trip_loop, iter_part_bb->loop_father);
6299 
6300       if (loop != entry_bb->loop_father)
6301 	{
6302 	  gcc_assert (loop->header == body_bb);
6303 	  gcc_assert (loop->latch == region->cont
6304 		      || single_pred (loop->latch) == region->cont);
6305 	  trip_loop->inner = loop;
6306 	  return;
6307 	}
6308 
6309       if (!gimple_omp_for_combined_p (fd->for_stmt))
6310 	{
6311 	  loop = alloc_loop ();
6312 	  loop->header = body_bb;
6313 	  if (collapse_bb == NULL)
6314 	    loop->latch = cont_bb;
6315 	  add_loop (loop, trip_loop);
6316 	}
6317     }
6318 }
6319 
6320 /* A subroutine of expand_omp_for.  Generate code for a simd non-worksharing
6321    loop.  Given parameters:
6322 
6323 	for (V = N1; V cond N2; V += STEP) BODY;
6324 
6325    where COND is "<" or ">", we generate pseudocode
6326 
6327 	V = N1;
6328 	goto L1;
6329     L0:
6330 	BODY;
6331 	V += STEP;
6332     L1:
6333 	if (V cond N2) goto L0; else goto L2;
6334     L2:
6335 
6336     For collapsed loops, emit the outer loops as scalar
6337     and only try to vectorize the innermost loop.  */
6338 
6339 static void
expand_omp_simd(struct omp_region * region,struct omp_for_data * fd)6340 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6341 {
6342   tree type, t;
6343   basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6344   gimple_stmt_iterator gsi;
6345   gimple *stmt;
6346   gcond *cond_stmt;
6347   bool broken_loop = region->cont == NULL;
6348   edge e, ne;
6349   tree *counts = NULL;
6350   int i;
6351   int safelen_int = INT_MAX;
6352   bool dont_vectorize = false;
6353   tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6354 				  OMP_CLAUSE_SAFELEN);
6355   tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6356 				  OMP_CLAUSE__SIMDUID_);
6357   tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6358 			      OMP_CLAUSE_IF);
6359   tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6360 				  OMP_CLAUSE_SIMDLEN);
6361   tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6362 				   OMP_CLAUSE__CONDTEMP_);
6363   tree n1, n2;
6364   tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
6365 
6366   if (safelen)
6367     {
6368       poly_uint64 val;
6369       safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
6370       if (!poly_int_tree_p (safelen, &val))
6371 	safelen_int = 0;
6372       else
6373 	safelen_int = MIN (constant_lower_bound (val), INT_MAX);
6374       if (safelen_int == 1)
6375 	safelen_int = 0;
6376     }
6377   if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6378       || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6379     {
6380       safelen_int = 0;
6381       dont_vectorize = true;
6382     }
6383   type = TREE_TYPE (fd->loop.v);
6384   entry_bb = region->entry;
6385   cont_bb = region->cont;
6386   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6387   gcc_assert (broken_loop
6388 	      || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6389   l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6390   if (!broken_loop)
6391     {
6392       gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6393       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6394       l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
6395       l2_bb = BRANCH_EDGE (entry_bb)->dest;
6396     }
6397   else
6398     {
6399       BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6400       l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6401       l2_bb = single_succ (l1_bb);
6402     }
6403   exit_bb = region->exit;
6404   l2_dom_bb = NULL;
6405 
6406   gsi = gsi_last_nondebug_bb (entry_bb);
6407 
6408   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6409   /* Not needed in SSA form right now.  */
6410   gcc_assert (!gimple_in_ssa_p (cfun));
6411   if (fd->collapse > 1
6412       && (gimple_omp_for_combined_into_p (fd->for_stmt)
6413 	  || broken_loop))
6414     {
6415       int first_zero_iter = -1, dummy = -1;
6416       basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6417 
6418       counts = XALLOCAVEC (tree, fd->collapse);
6419       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6420 				  zero_iter_bb, first_zero_iter,
6421 				  dummy_bb, dummy, l2_dom_bb);
6422     }
6423   if (l2_dom_bb == NULL)
6424     l2_dom_bb = l1_bb;
6425 
6426   n1 = fd->loop.n1;
6427   n2 = fd->loop.n2;
6428   if (gimple_omp_for_combined_into_p (fd->for_stmt))
6429     {
6430       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6431 				     OMP_CLAUSE__LOOPTEMP_);
6432       gcc_assert (innerc);
6433       n1 = OMP_CLAUSE_DECL (innerc);
6434       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6435 				OMP_CLAUSE__LOOPTEMP_);
6436       gcc_assert (innerc);
6437       n2 = OMP_CLAUSE_DECL (innerc);
6438     }
6439   tree step = fd->loop.step;
6440   tree orig_step = step; /* May be different from step if is_simt.  */
6441 
6442   bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6443 				  OMP_CLAUSE__SIMT_);
6444   if (is_simt)
6445     {
6446       cfun->curr_properties &= ~PROP_gimple_lomp_dev;
6447       is_simt = safelen_int > 1;
6448     }
6449   tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6450   if (is_simt)
6451     {
6452       simt_lane = create_tmp_var (unsigned_type_node);
6453       gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6454       gimple_call_set_lhs (g, simt_lane);
6455       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6456       tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6457 				 fold_convert (TREE_TYPE (step), simt_lane));
6458       n1 = fold_convert (type, n1);
6459       if (POINTER_TYPE_P (type))
6460 	n1 = fold_build_pointer_plus (n1, offset);
6461       else
6462 	n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6463 
6464       /* Collapsed loops not handled for SIMT yet: limit to one lane only.  */
6465       if (fd->collapse > 1)
6466 	simt_maxlane = build_one_cst (unsigned_type_node);
6467       else if (safelen_int < omp_max_simt_vf ())
6468 	simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6469       tree vf
6470 	= build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6471 					unsigned_type_node, 0);
6472       if (simt_maxlane)
6473 	vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6474       vf = fold_convert (TREE_TYPE (step), vf);
6475       step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6476     }
6477 
6478   tree n2var = NULL_TREE;
6479   tree n2v = NULL_TREE;
6480   tree *nonrect_bounds = NULL;
6481   tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE;
6482   if (fd->collapse > 1)
6483     {
6484       if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt))
6485 	{
6486 	  if (fd->non_rect)
6487 	    {
6488 	      nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
6489 	      memset (nonrect_bounds, 0,
6490 		      sizeof (tree) * (fd->last_nonrect + 1));
6491 	    }
6492 	  expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6493 	  gcc_assert (entry_bb == gsi_bb (gsi));
6494 	  gcc_assert (fd->for_stmt == gsi_stmt (gsi));
6495 	  gsi_prev (&gsi);
6496 	  entry_bb = split_block (entry_bb, gsi_stmt (gsi))->dest;
6497 	  expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds,
6498 				    NULL, n1);
6499 	  gsi = gsi_for_stmt (fd->for_stmt);
6500 	}
6501       if (broken_loop)
6502 	;
6503       else if (gimple_omp_for_combined_into_p (fd->for_stmt))
6504 	{
6505 	  /* Compute in n2var the limit for the first innermost loop,
6506 	     i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
6507 	     where cnt is how many iterations would the loop have if
6508 	     all further iterations were assigned to the current task.  */
6509 	  n2var = create_tmp_var (type);
6510 	  i = fd->collapse - 1;
6511 	  tree itype = TREE_TYPE (fd->loops[i].v);
6512 	  if (POINTER_TYPE_P (itype))
6513 	    itype = signed_type_for (itype);
6514 	  t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
6515 				     ? -1 : 1));
6516 	  t = fold_build2 (PLUS_EXPR, itype,
6517 			   fold_convert (itype, fd->loops[i].step), t);
6518 	  t = fold_build2 (PLUS_EXPR, itype, t,
6519 			   fold_convert (itype, fd->loops[i].n2));
6520 	  if (fd->loops[i].m2)
6521 	    {
6522 	      tree t2 = fold_convert (itype,
6523 				      fd->loops[i - fd->loops[i].outer].v);
6524 	      tree t3 = fold_convert (itype, fd->loops[i].m2);
6525 	      t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6526 	      t = fold_build2 (PLUS_EXPR, itype, t, t2);
6527 	    }
6528 	  t = fold_build2 (MINUS_EXPR, itype, t,
6529 			   fold_convert (itype, fd->loops[i].v));
6530 	  if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
6531 	    t = fold_build2 (TRUNC_DIV_EXPR, itype,
6532 			     fold_build1 (NEGATE_EXPR, itype, t),
6533 			     fold_build1 (NEGATE_EXPR, itype,
6534 					  fold_convert (itype,
6535 							fd->loops[i].step)));
6536 	  else
6537 	    t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6538 			     fold_convert (itype, fd->loops[i].step));
6539 	  t = fold_convert (type, t);
6540 	  tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
6541 	  min_arg1 = create_tmp_var (type);
6542 	  expand_omp_build_assign (&gsi, min_arg1, t2);
6543 	  min_arg2 = create_tmp_var (type);
6544 	  expand_omp_build_assign (&gsi, min_arg2, t);
6545 	}
6546       else
6547 	{
6548 	  if (TREE_CODE (n2) == INTEGER_CST)
6549 	    {
6550 	      /* Indicate for lastprivate handling that at least one iteration
6551 		 has been performed, without wasting runtime.  */
6552 	      if (integer_nonzerop (n2))
6553 		expand_omp_build_assign (&gsi, fd->loop.v,
6554 					 fold_convert (type, n2));
6555 	      else
6556 		/* Indicate that no iteration has been performed.  */
6557 		expand_omp_build_assign (&gsi, fd->loop.v,
6558 					 build_one_cst (type));
6559 	    }
6560 	  else
6561 	    {
6562 	      expand_omp_build_assign (&gsi, fd->loop.v,
6563 				       build_zero_cst (type));
6564 	      expand_omp_build_assign (&gsi, n2, build_one_cst (type));
6565 	    }
6566 	  for (i = 0; i < fd->collapse; i++)
6567 	    {
6568 	      t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6569 	      if (fd->loops[i].m1)
6570 		{
6571 		  tree t2
6572 		    = fold_convert (TREE_TYPE (t),
6573 				    fd->loops[i - fd->loops[i].outer].v);
6574 		  tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1);
6575 		  t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6576 		  t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6577 		}
6578 	      expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6579 	      /* For normal non-combined collapsed loops just initialize
6580 		 the outermost iterator in the entry_bb.  */
6581 	      if (!broken_loop)
6582 		break;
6583 	    }
6584 	}
6585     }
6586   else
6587     expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6588   tree altv = NULL_TREE, altn2 = NULL_TREE;
6589   if (fd->collapse == 1
6590       && !broken_loop
6591       && TREE_CODE (orig_step) != INTEGER_CST)
6592     {
6593       /* The vectorizer currently punts on loops with non-constant steps
6594 	 for the main IV (can't compute number of iterations and gives up
6595 	 because of that).  As for OpenMP loops it is always possible to
6596 	 compute the number of iterations upfront, use an alternate IV
6597 	 as the loop iterator:
6598 	 altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
6599 	 for (i = n1, altv = 0; altv < altn2; altv++, i += step)  */
6600       altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6601       expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv)));
6602       tree itype = TREE_TYPE (fd->loop.v);
6603       if (POINTER_TYPE_P (itype))
6604 	itype = signed_type_for (itype);
6605       t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
6606       t = fold_build2 (PLUS_EXPR, itype,
6607 		       fold_convert (itype, step), t);
6608       t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
6609       t = fold_build2 (MINUS_EXPR, itype, t,
6610 		       fold_convert (itype, fd->loop.v));
6611       if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
6612 	t = fold_build2 (TRUNC_DIV_EXPR, itype,
6613 			 fold_build1 (NEGATE_EXPR, itype, t),
6614 			 fold_build1 (NEGATE_EXPR, itype,
6615 				      fold_convert (itype, step)));
6616       else
6617 	t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6618 			 fold_convert (itype, step));
6619       t = fold_convert (TREE_TYPE (altv), t);
6620       altn2 = create_tmp_var (TREE_TYPE (altv));
6621       expand_omp_build_assign (&gsi, altn2, t);
6622       tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2);
6623       t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6624 				     true, GSI_SAME_STMT);
6625       t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2);
6626       gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6627 					build_zero_cst (TREE_TYPE (altv)));
6628       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6629     }
6630   else if (fd->collapse > 1
6631 	   && !broken_loop
6632 	   && !gimple_omp_for_combined_into_p (fd->for_stmt)
6633 	   && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST)
6634     {
6635       altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6636       altn2 = create_tmp_var (TREE_TYPE (altv));
6637     }
6638   if (cond_var)
6639     {
6640       if (POINTER_TYPE_P (type)
6641 	  || TREE_CODE (n1) != INTEGER_CST
6642 	  || fd->loop.cond_code != LT_EXPR
6643 	  || tree_int_cst_sgn (n1) != 1)
6644 	expand_omp_build_assign (&gsi, cond_var,
6645 				 build_one_cst (TREE_TYPE (cond_var)));
6646       else
6647 	expand_omp_build_assign (&gsi, cond_var,
6648 				 fold_convert (TREE_TYPE (cond_var), n1));
6649     }
6650 
6651   /* Remove the GIMPLE_OMP_FOR statement.  */
6652   gsi_remove (&gsi, true);
6653 
6654   if (!broken_loop)
6655     {
6656       /* Code to control the increment goes in the CONT_BB.  */
6657       gsi = gsi_last_nondebug_bb (cont_bb);
6658       stmt = gsi_stmt (gsi);
6659       gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6660 
6661       if (fd->collapse == 1
6662 	  || gimple_omp_for_combined_into_p (fd->for_stmt))
6663 	{
6664 	  if (POINTER_TYPE_P (type))
6665 	    t = fold_build_pointer_plus (fd->loop.v, step);
6666 	  else
6667 	    t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6668 	  expand_omp_build_assign (&gsi, fd->loop.v, t);
6669 	}
6670       else if (TREE_CODE (n2) != INTEGER_CST)
6671 	expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type));
6672       if (altv)
6673 	{
6674 	  t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv,
6675 			   build_one_cst (TREE_TYPE (altv)));
6676 	  expand_omp_build_assign (&gsi, altv, t);
6677 	}
6678 
6679       if (fd->collapse > 1)
6680 	{
6681 	  i = fd->collapse - 1;
6682 	  if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6683 	    {
6684 	      t = fold_convert (sizetype, fd->loops[i].step);
6685 	      t = fold_build_pointer_plus (fd->loops[i].v, t);
6686 	    }
6687 	  else
6688 	    {
6689 	      t = fold_convert (TREE_TYPE (fd->loops[i].v),
6690 				fd->loops[i].step);
6691 	      t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6692 			       fd->loops[i].v, t);
6693 	    }
6694 	  expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6695 	}
6696       if (cond_var)
6697 	{
6698 	  if (POINTER_TYPE_P (type)
6699 	      || TREE_CODE (n1) != INTEGER_CST
6700 	      || fd->loop.cond_code != LT_EXPR
6701 	      || tree_int_cst_sgn (n1) != 1)
6702 	    t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6703 			     build_one_cst (TREE_TYPE (cond_var)));
6704 	  else
6705 	    t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6706 			     fold_convert (TREE_TYPE (cond_var), step));
6707 	  expand_omp_build_assign (&gsi, cond_var, t);
6708 	}
6709 
6710       /* Remove GIMPLE_OMP_CONTINUE.  */
6711       gsi_remove (&gsi, true);
6712     }
6713 
6714   /* Emit the condition in L1_BB.  */
6715   gsi = gsi_start_bb (l1_bb);
6716 
6717   if (altv)
6718     t = build2 (LT_EXPR, boolean_type_node, altv, altn2);
6719   else if (fd->collapse > 1
6720 	   && !gimple_omp_for_combined_into_p (fd->for_stmt)
6721 	   && !broken_loop)
6722     {
6723       i = fd->collapse - 1;
6724       tree itype = TREE_TYPE (fd->loops[i].v);
6725       if (fd->loops[i].m2)
6726 	t = n2v = create_tmp_var (itype);
6727       else
6728 	t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
6729       t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6730 				    false, GSI_CONTINUE_LINKING);
6731       tree v = fd->loops[i].v;
6732       if (DECL_P (v) && TREE_ADDRESSABLE (v))
6733 	v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6734 				      false, GSI_CONTINUE_LINKING);
6735       t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6736     }
6737   else
6738     {
6739       if (fd->collapse > 1 && !broken_loop)
6740 	t = n2var;
6741       else
6742 	t = fold_convert (type, unshare_expr (n2));
6743       t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6744 				    false, GSI_CONTINUE_LINKING);
6745       tree v = fd->loop.v;
6746       if (DECL_P (v) && TREE_ADDRESSABLE (v))
6747 	v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6748 				      false, GSI_CONTINUE_LINKING);
6749       t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6750     }
6751   cond_stmt = gimple_build_cond_empty (t);
6752   gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6753   if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6754 		 NULL, NULL)
6755       || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6756 		    NULL, NULL))
6757     {
6758       gsi = gsi_for_stmt (cond_stmt);
6759       gimple_regimplify_operands (cond_stmt, &gsi);
6760     }
6761 
6762   /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop.  */
6763   if (is_simt)
6764     {
6765       gsi = gsi_start_bb (l2_bb);
6766       step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), orig_step, step);
6767       if (POINTER_TYPE_P (type))
6768 	t = fold_build_pointer_plus (fd->loop.v, step);
6769       else
6770 	t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6771       expand_omp_build_assign (&gsi, fd->loop.v, t);
6772     }
6773 
6774   /* Remove GIMPLE_OMP_RETURN.  */
6775   gsi = gsi_last_nondebug_bb (exit_bb);
6776   gsi_remove (&gsi, true);
6777 
6778   /* Connect the new blocks.  */
6779   remove_edge (FALLTHRU_EDGE (entry_bb));
6780 
6781   if (!broken_loop)
6782     {
6783       remove_edge (BRANCH_EDGE (entry_bb));
6784       make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6785 
6786       e = BRANCH_EDGE (l1_bb);
6787       ne = FALLTHRU_EDGE (l1_bb);
6788       e->flags = EDGE_TRUE_VALUE;
6789     }
6790   else
6791     {
6792       single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6793 
6794       ne = single_succ_edge (l1_bb);
6795       e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6796 
6797     }
6798   ne->flags = EDGE_FALSE_VALUE;
6799   e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
6800   ne->probability = e->probability.invert ();
6801 
6802   set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6803   set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6804 
6805   if (simt_maxlane)
6806     {
6807       cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6808 				     NULL_TREE, NULL_TREE);
6809       gsi = gsi_last_bb (entry_bb);
6810       gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6811       make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6812       FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
6813       FALLTHRU_EDGE (entry_bb)->probability
6814 	 = profile_probability::guessed_always ().apply_scale (7, 8);
6815       BRANCH_EDGE (entry_bb)->probability
6816 	 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
6817       l2_dom_bb = entry_bb;
6818     }
6819   set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6820 
6821   if (!broken_loop && fd->collapse > 1)
6822     {
6823       basic_block last_bb = l1_bb;
6824       basic_block init_bb = NULL;
6825       for (i = fd->collapse - 2; i >= 0; i--)
6826 	{
6827 	  tree nextn2v = NULL_TREE;
6828 	  if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE)
6829 	    e = EDGE_SUCC (last_bb, 0);
6830 	  else
6831 	    e = EDGE_SUCC (last_bb, 1);
6832 	  basic_block bb = split_edge (e);
6833 	  if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6834 	    {
6835 	      t = fold_convert (sizetype, fd->loops[i].step);
6836 	      t = fold_build_pointer_plus (fd->loops[i].v, t);
6837 	    }
6838 	  else
6839 	    {
6840 	      t = fold_convert (TREE_TYPE (fd->loops[i].v),
6841 				fd->loops[i].step);
6842 	      t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6843 			       fd->loops[i].v, t);
6844 	    }
6845 	  gsi = gsi_after_labels (bb);
6846 	  expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6847 
6848 	  bb = split_block (bb, last_stmt (bb))->dest;
6849 	  gsi = gsi_start_bb (bb);
6850 	  tree itype = TREE_TYPE (fd->loops[i].v);
6851 	  if (fd->loops[i].m2)
6852 	    t = nextn2v = create_tmp_var (itype);
6853 	  else
6854 	    t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
6855 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6856 					false, GSI_CONTINUE_LINKING);
6857 	  tree v = fd->loops[i].v;
6858 	  if (DECL_P (v) && TREE_ADDRESSABLE (v))
6859 	    v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6860 					  false, GSI_CONTINUE_LINKING);
6861 	  t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6862 	  cond_stmt = gimple_build_cond_empty (t);
6863 	  gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6864 	  if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
6865 			 expand_omp_regimplify_p, NULL, NULL)
6866 	      || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
6867 			    expand_omp_regimplify_p, NULL, NULL))
6868 	    {
6869 	      gsi = gsi_for_stmt (cond_stmt);
6870 	      gimple_regimplify_operands (cond_stmt, &gsi);
6871 	    }
6872 	  ne = single_succ_edge (bb);
6873 	  ne->flags = EDGE_FALSE_VALUE;
6874 
6875 	  init_bb = create_empty_bb (bb);
6876 	  set_immediate_dominator (CDI_DOMINATORS, init_bb, bb);
6877 	  add_bb_to_loop (init_bb, bb->loop_father);
6878 	  e = make_edge (bb, init_bb, EDGE_TRUE_VALUE);
6879 	  e->probability
6880 	    = profile_probability::guessed_always ().apply_scale (7, 8);
6881 	  ne->probability = e->probability.invert ();
6882 
6883 	  gsi = gsi_after_labels (init_bb);
6884 	  if (fd->loops[i + 1].m1)
6885 	    {
6886 	      tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6887 				      fd->loops[i + 1
6888 						- fd->loops[i + 1].outer].v);
6889 	      if (POINTER_TYPE_P (TREE_TYPE (t2)))
6890 		t = fold_build_pointer_plus (t2, fd->loops[i + 1].n1);
6891 	      else
6892 		{
6893 		  t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6894 				    fd->loops[i + 1].n1);
6895 		  tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1);
6896 		  t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6897 		  t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6898 		}
6899 	    }
6900 	  else
6901 	    t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6902 			      fd->loops[i + 1].n1);
6903 	  expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t);
6904 	  if (fd->loops[i + 1].m2)
6905 	    {
6906 	      if (i + 2 == fd->collapse && (n2var || altv))
6907 		{
6908 		  gcc_assert (n2v == NULL_TREE);
6909 		  n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
6910 		}
6911 	      tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6912 				      fd->loops[i + 1
6913 						- fd->loops[i + 1].outer].v);
6914 	      if (POINTER_TYPE_P (TREE_TYPE (t2)))
6915 		t = fold_build_pointer_plus (t2, fd->loops[i + 1].n2);
6916 	      else
6917 		{
6918 		  t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6919 				    fd->loops[i + 1].n2);
6920 		  tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2);
6921 		  t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6922 		  t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6923 		}
6924 	      expand_omp_build_assign (&gsi, n2v, t);
6925 	    }
6926 	  if (i + 2 == fd->collapse && n2var)
6927 	    {
6928 	      /* For composite simd, n2 is the first iteration the current
6929 		 task shouldn't already handle, so we effectively want to use
6930 		 for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
6931 		 as the vectorized loop.  Except the vectorizer will not
6932 		 vectorize that, so instead compute N2VAR as
6933 		 N2VAR = V + MIN (N2 - V, COUNTS3) and use
6934 		 for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
6935 		 as the loop to vectorize.  */
6936 	      tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v);
6937 	      if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2)
6938 		{
6939 		  tree itype = TREE_TYPE (fd->loops[i].v);
6940 		  if (POINTER_TYPE_P (itype))
6941 		    itype = signed_type_for (itype);
6942 		  t = build_int_cst (itype, (fd->loops[i + 1].cond_code
6943 					     == LT_EXPR ? -1 : 1));
6944 		  t = fold_build2 (PLUS_EXPR, itype,
6945 				   fold_convert (itype,
6946 						 fd->loops[i + 1].step), t);
6947 		  if (fd->loops[i + 1].m2 == NULL_TREE)
6948 		    t = fold_build2 (PLUS_EXPR, itype, t,
6949 				     fold_convert (itype,
6950 						   fd->loops[i + 1].n2));
6951 		  else if (POINTER_TYPE_P (TREE_TYPE (n2v)))
6952 		    {
6953 		      t = fold_build_pointer_plus (n2v, t);
6954 		      t = fold_convert (itype, t);
6955 		    }
6956 		  else
6957 		    t = fold_build2 (PLUS_EXPR, itype, t, n2v);
6958 		  t = fold_build2 (MINUS_EXPR, itype, t,
6959 				   fold_convert (itype, fd->loops[i + 1].v));
6960 		  tree step = fold_convert (itype, fd->loops[i + 1].step);
6961 		  if (TYPE_UNSIGNED (itype)
6962 		      && fd->loops[i + 1].cond_code == GT_EXPR)
6963 		    t = fold_build2 (TRUNC_DIV_EXPR, itype,
6964 				     fold_build1 (NEGATE_EXPR, itype, t),
6965 				     fold_build1 (NEGATE_EXPR, itype, step));
6966 		  else
6967 		    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6968 		  t = fold_convert (type, t);
6969 		}
6970 	      else
6971 		t = counts[i + 1];
6972 	      expand_omp_build_assign (&gsi, min_arg1, t2);
6973 	      expand_omp_build_assign (&gsi, min_arg2, t);
6974 	      e = split_block (init_bb, last_stmt (init_bb));
6975 	      gsi = gsi_after_labels (e->dest);
6976 	      init_bb = e->dest;
6977 	      remove_edge (FALLTHRU_EDGE (entry_bb));
6978 	      make_edge (entry_bb, init_bb, EDGE_FALLTHRU);
6979 	      set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb);
6980 	      set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb);
6981 	      t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2);
6982 	      t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
6983 	      expand_omp_build_assign (&gsi, n2var, t);
6984 	    }
6985 	  if (i + 2 == fd->collapse && altv)
6986 	    {
6987 	      /* The vectorizer currently punts on loops with non-constant
6988 		 steps for the main IV (can't compute number of iterations
6989 		 and gives up because of that).  As for OpenMP loops it is
6990 		 always possible to compute the number of iterations upfront,
6991 		 use an alternate IV as the loop iterator.  */
6992 	      expand_omp_build_assign (&gsi, altv,
6993 				       build_zero_cst (TREE_TYPE (altv)));
6994 	      tree itype = TREE_TYPE (fd->loops[i + 1].v);
6995 	      if (POINTER_TYPE_P (itype))
6996 		itype = signed_type_for (itype);
6997 	      t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR
6998 					 ? -1 : 1));
6999 	      t = fold_build2 (PLUS_EXPR, itype,
7000 			       fold_convert (itype, fd->loops[i + 1].step), t);
7001 	      t = fold_build2 (PLUS_EXPR, itype, t,
7002 			       fold_convert (itype,
7003 					     fd->loops[i + 1].m2
7004 					     ? n2v : fd->loops[i + 1].n2));
7005 	      t = fold_build2 (MINUS_EXPR, itype, t,
7006 			       fold_convert (itype, fd->loops[i + 1].v));
7007 	      tree step = fold_convert (itype, fd->loops[i + 1].step);
7008 	      if (TYPE_UNSIGNED (itype)
7009 		  && fd->loops[i + 1].cond_code == GT_EXPR)
7010 		t = fold_build2 (TRUNC_DIV_EXPR, itype,
7011 				 fold_build1 (NEGATE_EXPR, itype, t),
7012 				 fold_build1 (NEGATE_EXPR, itype, step));
7013 	      else
7014 		t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
7015 	      t = fold_convert (TREE_TYPE (altv), t);
7016 	      expand_omp_build_assign (&gsi, altn2, t);
7017 	      tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
7018 				      fd->loops[i + 1].m2
7019 				      ? n2v : fd->loops[i + 1].n2);
7020 	      t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
7021 					     true, GSI_SAME_STMT);
7022 	      t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node,
7023 				fd->loops[i + 1].v, t2);
7024 	      gassign *g
7025 		= gimple_build_assign (altn2, COND_EXPR, t2, altn2,
7026 				       build_zero_cst (TREE_TYPE (altv)));
7027 	      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7028 	    }
7029 	  n2v = nextn2v;
7030 
7031 	  make_edge (init_bb, last_bb, EDGE_FALLTHRU);
7032 	  if (!gimple_omp_for_combined_into_p (fd->for_stmt))
7033 	    {
7034 	      e = find_edge (entry_bb, last_bb);
7035 	      redirect_edge_succ (e, bb);
7036 	      set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb);
7037 	      set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb);
7038 	    }
7039 
7040 	  last_bb = bb;
7041 	}
7042     }
7043   if (!broken_loop)
7044     {
7045       class loop *loop = alloc_loop ();
7046       loop->header = l1_bb;
7047       loop->latch = cont_bb;
7048       add_loop (loop, l1_bb->loop_father);
7049       loop->safelen = safelen_int;
7050       if (simduid)
7051 	{
7052 	  loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
7053 	  cfun->has_simduid_loops = true;
7054 	}
7055       /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
7056 	 the loop.  */
7057       if ((flag_tree_loop_vectorize
7058 	   || !OPTION_SET_P (flag_tree_loop_vectorize))
7059 	  && flag_tree_loop_optimize
7060 	  && loop->safelen > 1)
7061 	{
7062 	  loop->force_vectorize = true;
7063 	  if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
7064 	    {
7065 	      unsigned HOST_WIDE_INT v
7066 		= tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
7067 	      if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
7068 		loop->simdlen = v;
7069 	    }
7070 	  cfun->has_force_vectorize_loops = true;
7071 	}
7072       else if (dont_vectorize)
7073 	loop->dont_vectorize = true;
7074     }
7075   else if (simduid)
7076     cfun->has_simduid_loops = true;
7077 }
7078 
7079 /* Taskloop construct is represented after gimplification with
7080    two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7081    in between them.  This routine expands the outer GIMPLE_OMP_FOR,
7082    which should just compute all the needed loop temporaries
7083    for GIMPLE_OMP_TASK.  */
7084 
7085 static void
expand_omp_taskloop_for_outer(struct omp_region * region,struct omp_for_data * fd,gimple * inner_stmt)7086 expand_omp_taskloop_for_outer (struct omp_region *region,
7087 			       struct omp_for_data *fd,
7088 			       gimple *inner_stmt)
7089 {
7090   tree type, bias = NULL_TREE;
7091   basic_block entry_bb, cont_bb, exit_bb;
7092   gimple_stmt_iterator gsi;
7093   gassign *assign_stmt;
7094   tree *counts = NULL;
7095   int i;
7096 
7097   gcc_assert (inner_stmt);
7098   gcc_assert (region->cont);
7099   gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
7100 	      && gimple_omp_task_taskloop_p (inner_stmt));
7101   type = TREE_TYPE (fd->loop.v);
7102 
7103   /* See if we need to bias by LLONG_MIN.  */
7104   if (fd->iter_type == long_long_unsigned_type_node
7105       && TREE_CODE (type) == INTEGER_TYPE
7106       && !TYPE_UNSIGNED (type))
7107     {
7108       tree n1, n2;
7109 
7110       if (fd->loop.cond_code == LT_EXPR)
7111 	{
7112 	  n1 = fd->loop.n1;
7113 	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7114 	}
7115       else
7116 	{
7117 	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7118 	  n2 = fd->loop.n1;
7119 	}
7120       if (TREE_CODE (n1) != INTEGER_CST
7121 	  || TREE_CODE (n2) != INTEGER_CST
7122 	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7123 	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7124     }
7125 
7126   entry_bb = region->entry;
7127   cont_bb = region->cont;
7128   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7129   gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
7130   exit_bb = region->exit;
7131 
7132   gsi = gsi_last_nondebug_bb (entry_bb);
7133   gimple *for_stmt = gsi_stmt (gsi);
7134   gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
7135   if (fd->collapse > 1)
7136     {
7137       int first_zero_iter = -1, dummy = -1;
7138       basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
7139 
7140       counts = XALLOCAVEC (tree, fd->collapse);
7141       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7142 				  zero_iter_bb, first_zero_iter,
7143 				  dummy_bb, dummy, l2_dom_bb);
7144 
7145       if (zero_iter_bb)
7146 	{
7147 	  /* Some counts[i] vars might be uninitialized if
7148 	     some loop has zero iterations.  But the body shouldn't
7149 	     be executed in that case, so just avoid uninit warnings.  */
7150 	  for (i = first_zero_iter; i < fd->collapse; i++)
7151 	    if (SSA_VAR_P (counts[i]))
7152 	      suppress_warning (counts[i], OPT_Wuninitialized);
7153 	  gsi_prev (&gsi);
7154 	  edge e = split_block (entry_bb, gsi_stmt (gsi));
7155 	  entry_bb = e->dest;
7156 	  make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
7157 	  gsi = gsi_last_bb (entry_bb);
7158 	  set_immediate_dominator (CDI_DOMINATORS, entry_bb,
7159 				   get_immediate_dominator (CDI_DOMINATORS,
7160 							    zero_iter_bb));
7161 	}
7162     }
7163 
7164   tree t0, t1;
7165   t1 = fd->loop.n2;
7166   t0 = fd->loop.n1;
7167   if (POINTER_TYPE_P (TREE_TYPE (t0))
7168       && TYPE_PRECISION (TREE_TYPE (t0))
7169 	 != TYPE_PRECISION (fd->iter_type))
7170     {
7171       /* Avoid casting pointers to integer of a different size.  */
7172       tree itype = signed_type_for (type);
7173       t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
7174       t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
7175     }
7176   else
7177     {
7178       t1 = fold_convert (fd->iter_type, t1);
7179       t0 = fold_convert (fd->iter_type, t0);
7180     }
7181   if (bias)
7182     {
7183       t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
7184       t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
7185     }
7186 
7187   tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
7188 				 OMP_CLAUSE__LOOPTEMP_);
7189   gcc_assert (innerc);
7190   tree startvar = OMP_CLAUSE_DECL (innerc);
7191   innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7192   gcc_assert (innerc);
7193   tree endvar = OMP_CLAUSE_DECL (innerc);
7194   if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7195     {
7196       innerc = find_lastprivate_looptemp (fd, innerc);
7197       if (innerc)
7198 	{
7199 	  /* If needed (inner taskloop has lastprivate clause), propagate
7200 	     down the total number of iterations.  */
7201 	  tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
7202 					     NULL_TREE, false,
7203 					     GSI_CONTINUE_LINKING);
7204 	  assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
7205 	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7206 	}
7207     }
7208 
7209   t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
7210 				 GSI_CONTINUE_LINKING);
7211   assign_stmt = gimple_build_assign (startvar, t0);
7212   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7213 
7214   t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
7215 				 GSI_CONTINUE_LINKING);
7216   assign_stmt = gimple_build_assign (endvar, t1);
7217   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7218   if (fd->collapse > 1)
7219     expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
7220 
7221   /* Remove the GIMPLE_OMP_FOR statement.  */
7222   gsi = gsi_for_stmt (for_stmt);
7223   gsi_remove (&gsi, true);
7224 
7225   gsi = gsi_last_nondebug_bb (cont_bb);
7226   gsi_remove (&gsi, true);
7227 
7228   gsi = gsi_last_nondebug_bb (exit_bb);
7229   gsi_remove (&gsi, true);
7230 
7231   FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7232   remove_edge (BRANCH_EDGE (entry_bb));
7233   FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
7234   remove_edge (BRANCH_EDGE (cont_bb));
7235   set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
7236   set_immediate_dominator (CDI_DOMINATORS, region->entry,
7237 			   recompute_dominator (CDI_DOMINATORS, region->entry));
7238 }
7239 
7240 /* Taskloop construct is represented after gimplification with
7241    two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7242    in between them.  This routine expands the inner GIMPLE_OMP_FOR.
7243    GOMP_taskloop{,_ull} function arranges for each task to be given just
7244    a single range of iterations.  */
7245 
7246 static void
expand_omp_taskloop_for_inner(struct omp_region * region,struct omp_for_data * fd,gimple * inner_stmt)7247 expand_omp_taskloop_for_inner (struct omp_region *region,
7248 			       struct omp_for_data *fd,
7249 			       gimple *inner_stmt)
7250 {
7251   tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
7252   basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
7253   basic_block fin_bb;
7254   gimple_stmt_iterator gsi;
7255   edge ep;
7256   bool broken_loop = region->cont == NULL;
7257   tree *counts = NULL;
7258   tree n1, n2, step;
7259 
7260   itype = type = TREE_TYPE (fd->loop.v);
7261   if (POINTER_TYPE_P (type))
7262     itype = signed_type_for (type);
7263 
7264   /* See if we need to bias by LLONG_MIN.  */
7265   if (fd->iter_type == long_long_unsigned_type_node
7266       && TREE_CODE (type) == INTEGER_TYPE
7267       && !TYPE_UNSIGNED (type))
7268     {
7269       tree n1, n2;
7270 
7271       if (fd->loop.cond_code == LT_EXPR)
7272 	{
7273 	  n1 = fd->loop.n1;
7274 	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7275 	}
7276       else
7277 	{
7278 	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7279 	  n2 = fd->loop.n1;
7280 	}
7281       if (TREE_CODE (n1) != INTEGER_CST
7282 	  || TREE_CODE (n2) != INTEGER_CST
7283 	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7284 	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7285     }
7286 
7287   entry_bb = region->entry;
7288   cont_bb = region->cont;
7289   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7290   fin_bb = BRANCH_EDGE (entry_bb)->dest;
7291   gcc_assert (broken_loop
7292 	      || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
7293   body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7294   if (!broken_loop)
7295     {
7296       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
7297       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
7298     }
7299   exit_bb = region->exit;
7300 
7301   /* Iteration space partitioning goes in ENTRY_BB.  */
7302   gsi = gsi_last_nondebug_bb (entry_bb);
7303   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
7304 
7305   if (fd->collapse > 1)
7306     {
7307       int first_zero_iter = -1, dummy = -1;
7308       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
7309 
7310       counts = XALLOCAVEC (tree, fd->collapse);
7311       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7312 				  fin_bb, first_zero_iter,
7313 				  dummy_bb, dummy, l2_dom_bb);
7314       t = NULL_TREE;
7315     }
7316   else
7317     t = integer_one_node;
7318 
7319   step = fd->loop.step;
7320   tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
7321 				 OMP_CLAUSE__LOOPTEMP_);
7322   gcc_assert (innerc);
7323   n1 = OMP_CLAUSE_DECL (innerc);
7324   innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7325   gcc_assert (innerc);
7326   n2 = OMP_CLAUSE_DECL (innerc);
7327   if (bias)
7328     {
7329       n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
7330       n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
7331     }
7332   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7333 				 true, NULL_TREE, true, GSI_SAME_STMT);
7334   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
7335 				 true, NULL_TREE, true, GSI_SAME_STMT);
7336   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7337 				   true, NULL_TREE, true, GSI_SAME_STMT);
7338 
7339   tree startvar = fd->loop.v;
7340   tree endvar = NULL_TREE;
7341 
7342   if (gimple_omp_for_combined_p (fd->for_stmt))
7343     {
7344       tree clauses = gimple_omp_for_clauses (inner_stmt);
7345       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
7346       gcc_assert (innerc);
7347       startvar = OMP_CLAUSE_DECL (innerc);
7348       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7349 				OMP_CLAUSE__LOOPTEMP_);
7350       gcc_assert (innerc);
7351       endvar = OMP_CLAUSE_DECL (innerc);
7352     }
7353   t = fold_convert (TREE_TYPE (startvar), n1);
7354   t = force_gimple_operand_gsi (&gsi, t,
7355 				DECL_P (startvar)
7356 				&& TREE_ADDRESSABLE (startvar),
7357 				NULL_TREE, false, GSI_CONTINUE_LINKING);
7358   gimple *assign_stmt = gimple_build_assign (startvar, t);
7359   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7360 
7361   t = fold_convert (TREE_TYPE (startvar), n2);
7362   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7363 				false, GSI_CONTINUE_LINKING);
7364   if (endvar)
7365     {
7366       assign_stmt = gimple_build_assign (endvar, e);
7367       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7368       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
7369 	assign_stmt = gimple_build_assign (fd->loop.v, e);
7370       else
7371 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
7372       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7373     }
7374 
7375   tree *nonrect_bounds = NULL;
7376   if (fd->collapse > 1)
7377     {
7378       if (fd->non_rect)
7379 	{
7380 	  nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
7381 	  memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
7382 	}
7383       gcc_assert (gsi_bb (gsi) == entry_bb);
7384       expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
7385 				startvar);
7386       entry_bb = gsi_bb (gsi);
7387     }
7388 
7389   if (!broken_loop)
7390     {
7391       /* The code controlling the sequential loop replaces the
7392 	 GIMPLE_OMP_CONTINUE.  */
7393       gsi = gsi_last_nondebug_bb (cont_bb);
7394       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7395       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
7396       vmain = gimple_omp_continue_control_use (cont_stmt);
7397       vback = gimple_omp_continue_control_def (cont_stmt);
7398 
7399       if (!gimple_omp_for_combined_p (fd->for_stmt))
7400 	{
7401 	  if (POINTER_TYPE_P (type))
7402 	    t = fold_build_pointer_plus (vmain, step);
7403 	  else
7404 	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
7405 	  t = force_gimple_operand_gsi (&gsi, t,
7406 					DECL_P (vback)
7407 					&& TREE_ADDRESSABLE (vback),
7408 					NULL_TREE, true, GSI_SAME_STMT);
7409 	  assign_stmt = gimple_build_assign (vback, t);
7410 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7411 
7412 	  t = build2 (fd->loop.cond_code, boolean_type_node,
7413 		      DECL_P (vback) && TREE_ADDRESSABLE (vback)
7414 		      ? t : vback, e);
7415 	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
7416 	}
7417 
7418       /* Remove the GIMPLE_OMP_CONTINUE statement.  */
7419       gsi_remove (&gsi, true);
7420 
7421       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
7422 	collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
7423 						   cont_bb, body_bb);
7424     }
7425 
7426   /* Remove the GIMPLE_OMP_FOR statement.  */
7427   gsi = gsi_for_stmt (fd->for_stmt);
7428   gsi_remove (&gsi, true);
7429 
7430   /* Remove the GIMPLE_OMP_RETURN statement.  */
7431   gsi = gsi_last_nondebug_bb (exit_bb);
7432   gsi_remove (&gsi, true);
7433 
7434   FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7435   if (!broken_loop)
7436     remove_edge (BRANCH_EDGE (entry_bb));
7437   else
7438     {
7439       remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
7440       region->outer->cont = NULL;
7441     }
7442 
7443   /* Connect all the blocks.  */
7444   if (!broken_loop)
7445     {
7446       ep = find_edge (cont_bb, body_bb);
7447       if (gimple_omp_for_combined_p (fd->for_stmt))
7448 	{
7449 	  remove_edge (ep);
7450 	  ep = NULL;
7451 	}
7452       else if (fd->collapse > 1)
7453 	{
7454 	  remove_edge (ep);
7455 	  ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
7456 	}
7457       else
7458 	ep->flags = EDGE_TRUE_VALUE;
7459       find_edge (cont_bb, fin_bb)->flags
7460 	= ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
7461     }
7462 
7463   set_immediate_dominator (CDI_DOMINATORS, body_bb,
7464 			   recompute_dominator (CDI_DOMINATORS, body_bb));
7465   if (!broken_loop)
7466     set_immediate_dominator (CDI_DOMINATORS, fin_bb,
7467 			     recompute_dominator (CDI_DOMINATORS, fin_bb));
7468 
7469   if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
7470     {
7471       class loop *loop = alloc_loop ();
7472       loop->header = body_bb;
7473       if (collapse_bb == NULL)
7474 	loop->latch = cont_bb;
7475       add_loop (loop, body_bb->loop_father);
7476     }
7477 }
7478 
7479 /* A subroutine of expand_omp_for.  Generate code for an OpenACC
7480    partitioned loop.  The lowering here is abstracted, in that the
7481    loop parameters are passed through internal functions, which are
7482    further lowered by oacc_device_lower, once we get to the target
7483    compiler.  The loop is of the form:
7484 
7485    for (V = B; V LTGT E; V += S) {BODY}
7486 
7487    where LTGT is < or >.  We may have a specified chunking size, CHUNKING
7488    (constant 0 for no chunking) and we will have a GWV partitioning
7489    mask, specifying dimensions over which the loop is to be
7490    partitioned (see note below).  We generate code that looks like
7491    (this ignores tiling):
7492 
7493    <entry_bb> [incoming FALL->body, BRANCH->exit]
7494      typedef signedintify (typeof (V)) T;  // underlying signed integral type
7495      T range = E - B;
7496      T chunk_no = 0;
7497      T DIR = LTGT == '<' ? +1 : -1;
7498      T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
7499      T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
7500 
7501    <head_bb> [created by splitting end of entry_bb]
7502      T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
7503      T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
7504      if (!(offset LTGT bound)) goto bottom_bb;
7505 
7506    <body_bb> [incoming]
7507      V = B + offset;
7508      {BODY}
7509 
7510    <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
7511      offset += step;
7512      if (offset LTGT bound) goto body_bb; [*]
7513 
7514    <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
7515      chunk_no++;
7516      if (chunk < chunk_max) goto head_bb;
7517 
7518    <exit_bb> [incoming]
7519      V = B + ((range -/+ 1) / S +/- 1) * S [*]
7520 
7521    [*] Needed if V live at end of loop.  */
7522 
7523 static void
expand_oacc_for(struct omp_region * region,struct omp_for_data * fd)7524 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
7525 {
7526   bool is_oacc_kernels_parallelized
7527     = (lookup_attribute ("oacc kernels parallelized",
7528 			 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7529   {
7530     bool is_oacc_kernels
7531       = (lookup_attribute ("oacc kernels",
7532 			   DECL_ATTRIBUTES (current_function_decl)) != NULL);
7533     if (is_oacc_kernels_parallelized)
7534       gcc_checking_assert (is_oacc_kernels);
7535   }
7536   gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized);
7537   /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are
7538      for SSA specifics, and some are for 'parloops' OpenACC
7539      'kernels'-parallelized specifics.  */
7540 
7541   tree v = fd->loop.v;
7542   enum tree_code cond_code = fd->loop.cond_code;
7543   enum tree_code plus_code = PLUS_EXPR;
7544 
7545   tree chunk_size = integer_minus_one_node;
7546   tree gwv = integer_zero_node;
7547   tree iter_type = TREE_TYPE (v);
7548   tree diff_type = iter_type;
7549   tree plus_type = iter_type;
7550   struct oacc_collapse *counts = NULL;
7551 
7552   gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
7553 		       == GF_OMP_FOR_KIND_OACC_LOOP);
7554   gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
7555   gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
7556 
7557   if (POINTER_TYPE_P (iter_type))
7558     {
7559       plus_code = POINTER_PLUS_EXPR;
7560       plus_type = sizetype;
7561     }
7562   for (int ix = fd->collapse; ix--;)
7563     {
7564       tree diff_type2 = TREE_TYPE (fd->loops[ix].step);
7565       if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2))
7566 	diff_type = diff_type2;
7567     }
7568   if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7569     diff_type = signed_type_for (diff_type);
7570   if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
7571     diff_type = integer_type_node;
7572 
7573   basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
7574   basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
7575   basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE  */
7576   basic_block bottom_bb = NULL;
7577 
7578   /* entry_bb has two successors; the branch edge is to the exit
7579      block, fallthrough edge to body.  */
7580   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
7581 	      && BRANCH_EDGE (entry_bb)->dest == exit_bb);
7582 
7583   /* If cont_bb non-NULL, it has 2 successors.  The branch successor is
7584      body_bb, or to a block whose only successor is the body_bb.  Its
7585      fallthrough successor is the final block (same as the branch
7586      successor of the entry_bb).  */
7587   if (cont_bb)
7588     {
7589       basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7590       basic_block bed = BRANCH_EDGE (cont_bb)->dest;
7591 
7592       gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
7593       gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
7594     }
7595   else
7596     gcc_assert (!gimple_in_ssa_p (cfun));
7597 
7598   /* The exit block only has entry_bb and cont_bb as predecessors.  */
7599   gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
7600 
7601   tree chunk_no;
7602   tree chunk_max = NULL_TREE;
7603   tree bound, offset;
7604   tree step = create_tmp_var (diff_type, ".step");
7605   bool up = cond_code == LT_EXPR;
7606   tree dir = build_int_cst (diff_type, up ? +1 : -1);
7607   bool chunking = !gimple_in_ssa_p (cfun);
7608   bool negating;
7609 
7610   /* Tiling vars.  */
7611   tree tile_size = NULL_TREE;
7612   tree element_s = NULL_TREE;
7613   tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
7614   basic_block elem_body_bb = NULL;
7615   basic_block elem_cont_bb = NULL;
7616 
7617   /* SSA instances.  */
7618   tree offset_incr = NULL_TREE;
7619   tree offset_init = NULL_TREE;
7620 
7621   gimple_stmt_iterator gsi;
7622   gassign *ass;
7623   gcall *call;
7624   gimple *stmt;
7625   tree expr;
7626   location_t loc;
7627   edge split, be, fte;
7628 
7629   /* Split the end of entry_bb to create head_bb.  */
7630   split = split_block (entry_bb, last_stmt (entry_bb));
7631   basic_block head_bb = split->dest;
7632   entry_bb = split->src;
7633 
7634   /* Chunk setup goes at end of entry_bb, replacing the omp_for.  */
7635   gsi = gsi_last_nondebug_bb (entry_bb);
7636   gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
7637   loc = gimple_location (for_stmt);
7638 
7639   if (gimple_in_ssa_p (cfun))
7640     {
7641       offset_init = gimple_omp_for_index (for_stmt, 0);
7642       gcc_assert (integer_zerop (fd->loop.n1));
7643       /* The SSA parallelizer does gang parallelism.  */
7644       gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
7645     }
7646 
7647   if (fd->collapse > 1 || fd->tiling)
7648     {
7649       gcc_assert (!gimple_in_ssa_p (cfun) && up);
7650       counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
7651       tree total = expand_oacc_collapse_init (fd, &gsi, counts, diff_type,
7652 					      TREE_TYPE (fd->loop.n2), loc);
7653 
7654       if (SSA_VAR_P (fd->loop.n2))
7655 	{
7656 	  total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
7657 					    true, GSI_SAME_STMT);
7658 	  ass = gimple_build_assign (fd->loop.n2, total);
7659 	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7660 	}
7661     }
7662 
7663   tree b = fd->loop.n1;
7664   tree e = fd->loop.n2;
7665   tree s = fd->loop.step;
7666 
7667   b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
7668   e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
7669 
7670   /* Convert the step, avoiding possible unsigned->signed overflow.  */
7671   negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7672   if (negating)
7673     s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7674   s = fold_convert (diff_type, s);
7675   if (negating)
7676     s = fold_build1 (NEGATE_EXPR, diff_type, s);
7677   s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
7678 
7679   if (!chunking)
7680     chunk_size = integer_zero_node;
7681   expr = fold_convert (diff_type, chunk_size);
7682   chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
7683 					 NULL_TREE, true, GSI_SAME_STMT);
7684 
7685   if (fd->tiling)
7686     {
7687       /* Determine the tile size and element step,
7688 	 modify the outer loop step size.  */
7689       tile_size = create_tmp_var (diff_type, ".tile_size");
7690       expr = build_int_cst (diff_type, 1);
7691       for (int ix = 0; ix < fd->collapse; ix++)
7692 	expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
7693       expr = force_gimple_operand_gsi (&gsi, expr, true,
7694 				       NULL_TREE, true, GSI_SAME_STMT);
7695       ass = gimple_build_assign (tile_size, expr);
7696       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7697 
7698       element_s = create_tmp_var (diff_type, ".element_s");
7699       ass = gimple_build_assign (element_s, s);
7700       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7701 
7702       expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
7703       s = force_gimple_operand_gsi (&gsi, expr, true,
7704 				    NULL_TREE, true, GSI_SAME_STMT);
7705     }
7706 
7707   /* Determine the range, avoiding possible unsigned->signed overflow.  */
7708   negating = !up && TYPE_UNSIGNED (iter_type);
7709   expr = fold_build2 (MINUS_EXPR, plus_type,
7710 		      fold_convert (plus_type, negating ? b : e),
7711 		      fold_convert (plus_type, negating ? e : b));
7712   expr = fold_convert (diff_type, expr);
7713   if (negating)
7714     expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7715   tree range = force_gimple_operand_gsi (&gsi, expr, true,
7716 					 NULL_TREE, true, GSI_SAME_STMT);
7717 
7718   chunk_no = build_int_cst (diff_type, 0);
7719   if (chunking)
7720     {
7721       gcc_assert (!gimple_in_ssa_p (cfun));
7722 
7723       expr = chunk_no;
7724       chunk_max = create_tmp_var (diff_type, ".chunk_max");
7725       chunk_no = create_tmp_var (diff_type, ".chunk_no");
7726 
7727       ass = gimple_build_assign (chunk_no, expr);
7728       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7729 
7730       call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7731 					 build_int_cst (integer_type_node,
7732 							IFN_GOACC_LOOP_CHUNKS),
7733 					 dir, range, s, chunk_size, gwv);
7734       gimple_call_set_lhs (call, chunk_max);
7735       gimple_set_location (call, loc);
7736       gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7737     }
7738   else
7739     chunk_size = chunk_no;
7740 
7741   call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7742 				     build_int_cst (integer_type_node,
7743 						    IFN_GOACC_LOOP_STEP),
7744 				     dir, range, s, chunk_size, gwv);
7745   gimple_call_set_lhs (call, step);
7746   gimple_set_location (call, loc);
7747   gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7748 
7749   /* Remove the GIMPLE_OMP_FOR.  */
7750   gsi_remove (&gsi, true);
7751 
7752   /* Fixup edges from head_bb.  */
7753   be = BRANCH_EDGE (head_bb);
7754   fte = FALLTHRU_EDGE (head_bb);
7755   be->flags |= EDGE_FALSE_VALUE;
7756   fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7757 
7758   basic_block body_bb = fte->dest;
7759 
7760   if (gimple_in_ssa_p (cfun))
7761     {
7762       gsi = gsi_last_nondebug_bb (cont_bb);
7763       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7764 
7765       offset = gimple_omp_continue_control_use (cont_stmt);
7766       offset_incr = gimple_omp_continue_control_def (cont_stmt);
7767     }
7768   else
7769     {
7770       offset = create_tmp_var (diff_type, ".offset");
7771       offset_init = offset_incr = offset;
7772     }
7773   bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7774 
7775   /* Loop offset & bound go into head_bb.  */
7776   gsi = gsi_start_bb (head_bb);
7777 
7778   call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7779 				     build_int_cst (integer_type_node,
7780 						    IFN_GOACC_LOOP_OFFSET),
7781 				     dir, range, s,
7782 				     chunk_size, gwv, chunk_no);
7783   gimple_call_set_lhs (call, offset_init);
7784   gimple_set_location (call, loc);
7785   gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7786 
7787   call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7788 				     build_int_cst (integer_type_node,
7789 						    IFN_GOACC_LOOP_BOUND),
7790 				     dir, range, s,
7791 				     chunk_size, gwv, offset_init);
7792   gimple_call_set_lhs (call, bound);
7793   gimple_set_location (call, loc);
7794   gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7795 
7796   expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7797   gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7798 		    GSI_CONTINUE_LINKING);
7799 
7800   /* V assignment goes into body_bb.  */
7801   if (!gimple_in_ssa_p (cfun))
7802     {
7803       gsi = gsi_start_bb (body_bb);
7804 
7805       expr = build2 (plus_code, iter_type, b,
7806 		     fold_convert (plus_type, offset));
7807       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7808 				       true, GSI_SAME_STMT);
7809       ass = gimple_build_assign (v, expr);
7810       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7811 
7812       if (fd->collapse > 1 || fd->tiling)
7813 	expand_oacc_collapse_vars (fd, false, &gsi, counts, v, diff_type);
7814 
7815       if (fd->tiling)
7816 	{
7817 	  /* Determine the range of the element loop -- usually simply
7818 	     the tile_size, but could be smaller if the final
7819 	     iteration of the outer loop is a partial tile.  */
7820 	  tree e_range = create_tmp_var (diff_type, ".e_range");
7821 
7822 	  expr = build2 (MIN_EXPR, diff_type,
7823 			 build2 (MINUS_EXPR, diff_type, bound, offset),
7824 			 build2 (MULT_EXPR, diff_type, tile_size,
7825 				 element_s));
7826 	  expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7827 					   true, GSI_SAME_STMT);
7828 	  ass = gimple_build_assign (e_range, expr);
7829 	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7830 
7831 	  /* Determine bound, offset & step of inner loop. */
7832 	  e_bound = create_tmp_var (diff_type, ".e_bound");
7833 	  e_offset = create_tmp_var (diff_type, ".e_offset");
7834 	  e_step = create_tmp_var (diff_type, ".e_step");
7835 
7836 	  /* Mark these as element loops.  */
7837 	  tree t, e_gwv = integer_minus_one_node;
7838 	  tree chunk = build_int_cst (diff_type, 0); /* Never chunked.  */
7839 
7840 	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7841 	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7842 					     element_s, chunk, e_gwv, chunk);
7843 	  gimple_call_set_lhs (call, e_offset);
7844 	  gimple_set_location (call, loc);
7845 	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7846 
7847 	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7848 	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7849 					     element_s, chunk, e_gwv, e_offset);
7850 	  gimple_call_set_lhs (call, e_bound);
7851 	  gimple_set_location (call, loc);
7852 	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7853 
7854 	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7855 	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7856 					     element_s, chunk, e_gwv);
7857 	  gimple_call_set_lhs (call, e_step);
7858 	  gimple_set_location (call, loc);
7859 	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7860 
7861 	  /* Add test and split block.  */
7862 	  expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7863 	  stmt = gimple_build_cond_empty (expr);
7864 	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7865 	  split = split_block (body_bb, stmt);
7866 	  elem_body_bb = split->dest;
7867 	  if (cont_bb == body_bb)
7868 	    cont_bb = elem_body_bb;
7869 	  body_bb = split->src;
7870 
7871 	  split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7872 
7873 	  /* Add a dummy exit for the tiled block when cont_bb is missing.  */
7874 	  if (cont_bb == NULL)
7875 	    {
7876 	      edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
7877 	      e->probability = profile_probability::even ();
7878 	      split->probability = profile_probability::even ();
7879 	    }
7880 
7881 	  /* Initialize the user's loop vars.  */
7882 	  gsi = gsi_start_bb (elem_body_bb);
7883 	  expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset,
7884 				     diff_type);
7885 	}
7886     }
7887 
7888   /* Loop increment goes into cont_bb.  If this is not a loop, we
7889      will have spawned threads as if it was, and each one will
7890      execute one iteration.  The specification is not explicit about
7891      whether such constructs are ill-formed or not, and they can
7892      occur, especially when noreturn routines are involved.  */
7893   if (cont_bb)
7894     {
7895       gsi = gsi_last_nondebug_bb (cont_bb);
7896       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7897       loc = gimple_location (cont_stmt);
7898 
7899       if (fd->tiling)
7900 	{
7901 	  /* Insert element loop increment and test.  */
7902 	  expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
7903 	  expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7904 					   true, GSI_SAME_STMT);
7905 	  ass = gimple_build_assign (e_offset, expr);
7906 	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7907 	  expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7908 
7909 	  stmt = gimple_build_cond_empty (expr);
7910 	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7911 	  split = split_block (cont_bb, stmt);
7912 	  elem_cont_bb = split->src;
7913 	  cont_bb = split->dest;
7914 
7915 	  split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7916 	  split->probability = profile_probability::unlikely ().guessed ();
7917 	  edge latch_edge
7918 	    = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
7919 	  latch_edge->probability = profile_probability::likely ().guessed ();
7920 
7921 	  edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
7922 	  skip_edge->probability = profile_probability::unlikely ().guessed ();
7923 	  edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
7924 	  loop_entry_edge->probability
7925 	    = profile_probability::likely ().guessed ();
7926 
7927 	  gsi = gsi_for_stmt (cont_stmt);
7928 	}
7929 
7930       /* Increment offset.  */
7931       if (gimple_in_ssa_p (cfun))
7932 	expr = build2 (plus_code, iter_type, offset,
7933 		       fold_convert (plus_type, step));
7934       else
7935 	expr = build2 (PLUS_EXPR, diff_type, offset, step);
7936       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7937 				       true, GSI_SAME_STMT);
7938       ass = gimple_build_assign (offset_incr, expr);
7939       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7940       expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
7941       gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
7942 
7943       /*  Remove the GIMPLE_OMP_CONTINUE.  */
7944       gsi_remove (&gsi, true);
7945 
7946       /* Fixup edges from cont_bb.  */
7947       be = BRANCH_EDGE (cont_bb);
7948       fte = FALLTHRU_EDGE (cont_bb);
7949       be->flags |= EDGE_TRUE_VALUE;
7950       fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7951 
7952       if (chunking)
7953 	{
7954 	  /* Split the beginning of exit_bb to make bottom_bb.  We
7955 	     need to insert a nop at the start, because splitting is
7956 	     after a stmt, not before.  */
7957 	  gsi = gsi_start_bb (exit_bb);
7958 	  stmt = gimple_build_nop ();
7959 	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7960 	  split = split_block (exit_bb, stmt);
7961 	  bottom_bb = split->src;
7962 	  exit_bb = split->dest;
7963 	  gsi = gsi_last_bb (bottom_bb);
7964 
7965 	  /* Chunk increment and test goes into bottom_bb.  */
7966 	  expr = build2 (PLUS_EXPR, diff_type, chunk_no,
7967 			 build_int_cst (diff_type, 1));
7968 	  ass = gimple_build_assign (chunk_no, expr);
7969 	  gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
7970 
7971 	  /* Chunk test at end of bottom_bb.  */
7972 	  expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
7973 	  gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7974 			    GSI_CONTINUE_LINKING);
7975 
7976 	  /* Fixup edges from bottom_bb.  */
7977 	  split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7978 	  split->probability = profile_probability::unlikely ().guessed ();
7979 	  edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
7980 	  latch_edge->probability = profile_probability::likely ().guessed ();
7981 	}
7982     }
7983 
7984   gsi = gsi_last_nondebug_bb (exit_bb);
7985   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7986   loc = gimple_location (gsi_stmt (gsi));
7987 
7988   if (!gimple_in_ssa_p (cfun))
7989     {
7990       /* Insert the final value of V, in case it is live.  This is the
7991 	 value for the only thread that survives past the join.  */
7992       expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
7993       expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
7994       expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
7995       expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
7996       expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
7997       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7998 				       true, GSI_SAME_STMT);
7999       ass = gimple_build_assign (v, expr);
8000       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
8001     }
8002 
8003   /* Remove the OMP_RETURN.  */
8004   gsi_remove (&gsi, true);
8005 
8006   if (cont_bb)
8007     {
8008       /* We now have one, two or three nested loops.  Update the loop
8009 	 structures.  */
8010       class loop *parent = entry_bb->loop_father;
8011       class loop *body = body_bb->loop_father;
8012 
8013       if (chunking)
8014 	{
8015 	  class loop *chunk_loop = alloc_loop ();
8016 	  chunk_loop->header = head_bb;
8017 	  chunk_loop->latch = bottom_bb;
8018 	  add_loop (chunk_loop, parent);
8019 	  parent = chunk_loop;
8020 	}
8021       else if (parent != body)
8022 	{
8023 	  gcc_assert (body->header == body_bb);
8024 	  gcc_assert (body->latch == cont_bb
8025 		      || single_pred (body->latch) == cont_bb);
8026 	  parent = NULL;
8027 	}
8028 
8029       if (parent)
8030 	{
8031 	  class loop *body_loop = alloc_loop ();
8032 	  body_loop->header = body_bb;
8033 	  body_loop->latch = cont_bb;
8034 	  add_loop (body_loop, parent);
8035 
8036 	  if (fd->tiling)
8037 	    {
8038 	      /* Insert tiling's element loop.  */
8039 	      class loop *inner_loop = alloc_loop ();
8040 	      inner_loop->header = elem_body_bb;
8041 	      inner_loop->latch = elem_cont_bb;
8042 	      add_loop (inner_loop, body_loop);
8043 	    }
8044 	}
8045     }
8046 }
8047 
8048 /* Expand the OMP loop defined by REGION.  */
8049 
8050 static void
expand_omp_for(struct omp_region * region,gimple * inner_stmt)8051 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
8052 {
8053   struct omp_for_data fd;
8054   struct omp_for_data_loop *loops;
8055 
8056   loops = XALLOCAVEC (struct omp_for_data_loop,
8057 		      gimple_omp_for_collapse (last_stmt (region->entry)));
8058   omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
8059 			&fd, loops);
8060   region->sched_kind = fd.sched_kind;
8061   region->sched_modifiers = fd.sched_modifiers;
8062   region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
8063   if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt))
8064     {
8065       for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
8066 	if ((loops[i].m1 || loops[i].m2)
8067 	    && (loops[i].m1 == NULL_TREE
8068 		|| TREE_CODE (loops[i].m1) == INTEGER_CST)
8069 	    && (loops[i].m2 == NULL_TREE
8070 		|| TREE_CODE (loops[i].m2) == INTEGER_CST)
8071 	    && TREE_CODE (loops[i].step) == INTEGER_CST
8072 	    && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
8073 	  {
8074 	    tree t;
8075 	    tree itype = TREE_TYPE (loops[i].v);
8076 	    if (loops[i].m1 && loops[i].m2)
8077 	      t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
8078 	    else if (loops[i].m1)
8079 	      t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
8080 	    else
8081 	      t = loops[i].m2;
8082 	    t = fold_build2 (MULT_EXPR, itype, t,
8083 			     fold_convert (itype,
8084 					   loops[i - loops[i].outer].step));
8085 	    if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
8086 	      t = fold_build2 (TRUNC_MOD_EXPR, itype,
8087 			       fold_build1 (NEGATE_EXPR, itype, t),
8088 			       fold_build1 (NEGATE_EXPR, itype,
8089 					    fold_convert (itype,
8090 							  loops[i].step)));
8091 	    else
8092 	      t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
8093 			       fold_convert (itype, loops[i].step));
8094 	    if (integer_nonzerop (t))
8095 	      error_at (gimple_location (fd.for_stmt),
8096 			"invalid OpenMP non-rectangular loop step; "
8097 			"%<(%E - %E) * %E%> is not a multiple of loop %d "
8098 			"step %qE",
8099 			loops[i].m2 ? loops[i].m2 : integer_zero_node,
8100 			loops[i].m1 ? loops[i].m1 : integer_zero_node,
8101 			loops[i - loops[i].outer].step, i + 1,
8102 			loops[i].step);
8103 	  }
8104     }
8105 
8106   gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
8107   BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8108   FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8109   if (region->cont)
8110     {
8111       gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
8112       BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8113       FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8114     }
8115   else
8116     /* If there isn't a continue then this is a degerate case where
8117        the introduction of abnormal edges during lowering will prevent
8118        original loops from being detected.  Fix that up.  */
8119     loops_state_set (LOOPS_NEED_FIXUP);
8120 
8121   if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
8122     expand_omp_simd (region, &fd);
8123   else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
8124     {
8125       gcc_assert (!inner_stmt && !fd.non_rect);
8126       expand_oacc_for (region, &fd);
8127     }
8128   else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
8129     {
8130       if (gimple_omp_for_combined_into_p (fd.for_stmt))
8131 	expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
8132       else
8133 	expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
8134     }
8135   else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
8136 	   && !fd.have_ordered)
8137     {
8138       if (fd.chunk_size == NULL)
8139 	expand_omp_for_static_nochunk (region, &fd, inner_stmt);
8140       else
8141 	expand_omp_for_static_chunk (region, &fd, inner_stmt);
8142     }
8143   else
8144     {
8145       int fn_index, start_ix, next_ix;
8146       unsigned HOST_WIDE_INT sched = 0;
8147       tree sched_arg = NULL_TREE;
8148 
8149       gcc_assert (gimple_omp_for_kind (fd.for_stmt)
8150 		  == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
8151       if (fd.chunk_size == NULL
8152 	  && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
8153 	fd.chunk_size = integer_zero_node;
8154       switch (fd.sched_kind)
8155 	{
8156 	case OMP_CLAUSE_SCHEDULE_RUNTIME:
8157 	  if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
8158 	      && fd.lastprivate_conditional == 0)
8159 	    {
8160 	      gcc_assert (!fd.have_ordered);
8161 	      fn_index = 6;
8162 	      sched = 4;
8163 	    }
8164 	  else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8165 		   && !fd.have_ordered
8166 		   && fd.lastprivate_conditional == 0)
8167 	    fn_index = 7;
8168 	  else
8169 	    {
8170 	      fn_index = 3;
8171 	      sched = (HOST_WIDE_INT_1U << 31);
8172 	    }
8173 	  break;
8174 	case OMP_CLAUSE_SCHEDULE_DYNAMIC:
8175 	case OMP_CLAUSE_SCHEDULE_GUIDED:
8176 	  if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8177 	      && !fd.have_ordered
8178 	      && fd.lastprivate_conditional == 0)
8179 	    {
8180 	      fn_index = 3 + fd.sched_kind;
8181 	      sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8182 	      break;
8183 	    }
8184 	  fn_index = fd.sched_kind;
8185 	  sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8186 	  sched += (HOST_WIDE_INT_1U << 31);
8187 	  break;
8188 	case OMP_CLAUSE_SCHEDULE_STATIC:
8189 	  gcc_assert (fd.have_ordered);
8190 	  fn_index = 0;
8191 	  sched = (HOST_WIDE_INT_1U << 31) + 1;
8192 	  break;
8193 	default:
8194 	  gcc_unreachable ();
8195 	}
8196       if (!fd.ordered)
8197 	fn_index += fd.have_ordered * 8;
8198       if (fd.ordered)
8199 	start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
8200       else
8201 	start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
8202       next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8203       if (fd.have_reductemp || fd.have_pointer_condtemp)
8204 	{
8205 	  if (fd.ordered)
8206 	    start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
8207 	  else if (fd.have_ordered)
8208 	    start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
8209 	  else
8210 	    start_ix = (int)BUILT_IN_GOMP_LOOP_START;
8211 	  sched_arg = build_int_cstu (long_integer_type_node, sched);
8212 	  if (!fd.chunk_size)
8213 	    fd.chunk_size = integer_zero_node;
8214 	}
8215       if (fd.iter_type == long_long_unsigned_type_node)
8216 	{
8217 	  start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
8218 			- (int)BUILT_IN_GOMP_LOOP_STATIC_START);
8219 	  next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
8220 		      - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
8221 	}
8222       expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
8223 			      (enum built_in_function) next_ix, sched_arg,
8224 			      inner_stmt);
8225     }
8226 
8227   if (gimple_in_ssa_p (cfun))
8228     update_ssa (TODO_update_ssa_only_virtuals);
8229 }
8230 
8231 /* Expand code for an OpenMP sections directive.  In pseudo code, we generate
8232 
8233 	v = GOMP_sections_start (n);
8234     L0:
8235 	switch (v)
8236 	  {
8237 	  case 0:
8238 	    goto L2;
8239 	  case 1:
8240 	    section 1;
8241 	    goto L1;
8242 	  case 2:
8243 	    ...
8244 	  case n:
8245 	    ...
8246 	  default:
8247 	    abort ();
8248 	  }
8249     L1:
8250 	v = GOMP_sections_next ();
8251 	goto L0;
8252     L2:
8253 	reduction;
8254 
8255     If this is a combined parallel sections, replace the call to
8256     GOMP_sections_start with call to GOMP_sections_next.  */
8257 
8258 static void
expand_omp_sections(struct omp_region * region)8259 expand_omp_sections (struct omp_region *region)
8260 {
8261   tree t, u, vin = NULL, vmain, vnext, l2;
8262   unsigned len;
8263   basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
8264   gimple_stmt_iterator si, switch_si;
8265   gomp_sections *sections_stmt;
8266   gimple *stmt;
8267   gomp_continue *cont;
8268   edge_iterator ei;
8269   edge e;
8270   struct omp_region *inner;
8271   unsigned i, casei;
8272   bool exit_reachable = region->cont != NULL;
8273 
8274   gcc_assert (region->exit != NULL);
8275   entry_bb = region->entry;
8276   l0_bb = single_succ (entry_bb);
8277   l1_bb = region->cont;
8278   l2_bb = region->exit;
8279   if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
8280     l2 = gimple_block_label (l2_bb);
8281   else
8282     {
8283       /* This can happen if there are reductions.  */
8284       len = EDGE_COUNT (l0_bb->succs);
8285       gcc_assert (len > 0);
8286       e = EDGE_SUCC (l0_bb, len - 1);
8287       si = gsi_last_nondebug_bb (e->dest);
8288       l2 = NULL_TREE;
8289       if (gsi_end_p (si)
8290 	  || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8291 	l2 = gimple_block_label (e->dest);
8292       else
8293 	FOR_EACH_EDGE (e, ei, l0_bb->succs)
8294 	  {
8295 	    si = gsi_last_nondebug_bb (e->dest);
8296 	    if (gsi_end_p (si)
8297 		|| gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8298 	      {
8299 		l2 = gimple_block_label (e->dest);
8300 		break;
8301 	      }
8302 	  }
8303     }
8304   if (exit_reachable)
8305     default_bb = create_empty_bb (l1_bb->prev_bb);
8306   else
8307     default_bb = create_empty_bb (l0_bb);
8308 
8309   /* We will build a switch() with enough cases for all the
8310      GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
8311      and a default case to abort if something goes wrong.  */
8312   len = EDGE_COUNT (l0_bb->succs);
8313 
8314   /* Use vec::quick_push on label_vec throughout, since we know the size
8315      in advance.  */
8316   auto_vec<tree> label_vec (len);
8317 
8318   /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
8319      GIMPLE_OMP_SECTIONS statement.  */
8320   si = gsi_last_nondebug_bb (entry_bb);
8321   sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
8322   gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
8323   vin = gimple_omp_sections_control (sections_stmt);
8324   tree clauses = gimple_omp_sections_clauses (sections_stmt);
8325   tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8326   tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
8327   tree cond_var = NULL_TREE;
8328   if (reductmp || condtmp)
8329     {
8330       tree reductions = null_pointer_node, mem = null_pointer_node;
8331       tree memv = NULL_TREE, condtemp = NULL_TREE;
8332       gimple_stmt_iterator gsi = gsi_none ();
8333       gimple *g = NULL;
8334       if (reductmp)
8335 	{
8336 	  reductions = OMP_CLAUSE_DECL (reductmp);
8337 	  gcc_assert (TREE_CODE (reductions) == SSA_NAME);
8338 	  g = SSA_NAME_DEF_STMT (reductions);
8339 	  reductions = gimple_assign_rhs1 (g);
8340 	  OMP_CLAUSE_DECL (reductmp) = reductions;
8341 	  gsi = gsi_for_stmt (g);
8342 	}
8343       else
8344 	gsi = si;
8345       if (condtmp)
8346 	{
8347 	  condtemp = OMP_CLAUSE_DECL (condtmp);
8348 	  tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
8349 				    OMP_CLAUSE__CONDTEMP_);
8350 	  cond_var = OMP_CLAUSE_DECL (c);
8351 	  tree type = TREE_TYPE (condtemp);
8352 	  memv = create_tmp_var (type);
8353 	  TREE_ADDRESSABLE (memv) = 1;
8354 	  unsigned cnt = 0;
8355 	  for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
8356 	    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
8357 		&& OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
8358 	      ++cnt;
8359 	  unsigned HOST_WIDE_INT sz
8360 	    = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
8361 	  expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
8362 				   false);
8363 	  mem = build_fold_addr_expr (memv);
8364 	}
8365       t = build_int_cst (unsigned_type_node, len - 1);
8366       u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8367       stmt = gimple_build_call (u, 3, t, reductions, mem);
8368       gimple_call_set_lhs (stmt, vin);
8369       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8370       if (condtmp)
8371 	{
8372 	  expand_omp_build_assign (&gsi, condtemp, memv, false);
8373 	  tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8374 			   vin, build_one_cst (TREE_TYPE (cond_var)));
8375 	  expand_omp_build_assign (&gsi, cond_var, t, false);
8376 	}
8377       if (reductmp)
8378 	{
8379 	  gsi_remove (&gsi, true);
8380 	  release_ssa_name (gimple_assign_lhs (g));
8381 	}
8382     }
8383   else if (!is_combined_parallel (region))
8384     {
8385       /* If we are not inside a combined parallel+sections region,
8386 	 call GOMP_sections_start.  */
8387       t = build_int_cst (unsigned_type_node, len - 1);
8388       u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
8389       stmt = gimple_build_call (u, 1, t);
8390     }
8391   else
8392     {
8393       /* Otherwise, call GOMP_sections_next.  */
8394       u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8395       stmt = gimple_build_call (u, 0);
8396     }
8397   if (!reductmp && !condtmp)
8398     {
8399       gimple_call_set_lhs (stmt, vin);
8400       gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8401     }
8402   gsi_remove (&si, true);
8403 
8404   /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
8405      L0_BB.  */
8406   switch_si = gsi_last_nondebug_bb (l0_bb);
8407   gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
8408   if (exit_reachable)
8409     {
8410       cont = as_a <gomp_continue *> (last_stmt (l1_bb));
8411       gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
8412       vmain = gimple_omp_continue_control_use (cont);
8413       vnext = gimple_omp_continue_control_def (cont);
8414     }
8415   else
8416     {
8417       vmain = vin;
8418       vnext = NULL_TREE;
8419     }
8420 
8421   t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
8422   label_vec.quick_push (t);
8423   i = 1;
8424 
8425   /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR.  */
8426   for (inner = region->inner, casei = 1;
8427        inner;
8428        inner = inner->next, i++, casei++)
8429     {
8430       basic_block s_entry_bb, s_exit_bb;
8431 
8432       /* Skip optional reduction region.  */
8433       if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
8434 	{
8435 	  --i;
8436 	  --casei;
8437 	  continue;
8438 	}
8439 
8440       s_entry_bb = inner->entry;
8441       s_exit_bb = inner->exit;
8442 
8443       t = gimple_block_label (s_entry_bb);
8444       u = build_int_cst (unsigned_type_node, casei);
8445       u = build_case_label (u, NULL, t);
8446       label_vec.quick_push (u);
8447 
8448       si = gsi_last_nondebug_bb (s_entry_bb);
8449       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
8450       gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
8451       gsi_remove (&si, true);
8452       single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
8453 
8454       if (s_exit_bb == NULL)
8455 	continue;
8456 
8457       si = gsi_last_nondebug_bb (s_exit_bb);
8458       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8459       gsi_remove (&si, true);
8460 
8461       single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
8462     }
8463 
8464   /* Error handling code goes in DEFAULT_BB.  */
8465   t = gimple_block_label (default_bb);
8466   u = build_case_label (NULL, NULL, t);
8467   make_edge (l0_bb, default_bb, 0);
8468   add_bb_to_loop (default_bb, current_loops->tree_root);
8469 
8470   stmt = gimple_build_switch (vmain, u, label_vec);
8471   gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
8472   gsi_remove (&switch_si, true);
8473 
8474   si = gsi_start_bb (default_bb);
8475   stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
8476   gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
8477 
8478   if (exit_reachable)
8479     {
8480       tree bfn_decl;
8481 
8482       /* Code to get the next section goes in L1_BB.  */
8483       si = gsi_last_nondebug_bb (l1_bb);
8484       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
8485 
8486       bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8487       stmt = gimple_build_call (bfn_decl, 0);
8488       gimple_call_set_lhs (stmt, vnext);
8489       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8490       if (cond_var)
8491 	{
8492 	  tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8493 			   vnext, build_one_cst (TREE_TYPE (cond_var)));
8494 	  expand_omp_build_assign (&si, cond_var, t, false);
8495 	}
8496       gsi_remove (&si, true);
8497 
8498       single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
8499     }
8500 
8501   /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB.  */
8502   si = gsi_last_nondebug_bb (l2_bb);
8503   if (gimple_omp_return_nowait_p (gsi_stmt (si)))
8504     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
8505   else if (gimple_omp_return_lhs (gsi_stmt (si)))
8506     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
8507   else
8508     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
8509   stmt = gimple_build_call (t, 0);
8510   if (gimple_omp_return_lhs (gsi_stmt (si)))
8511     gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
8512   gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8513   gsi_remove (&si, true);
8514 
8515   set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
8516 }
8517 
8518 /* Expand code for an OpenMP single or scope directive.  We've already expanded
8519    much of the code, here we simply place the GOMP_barrier call.  */
8520 
8521 static void
expand_omp_single(struct omp_region * region)8522 expand_omp_single (struct omp_region *region)
8523 {
8524   basic_block entry_bb, exit_bb;
8525   gimple_stmt_iterator si;
8526 
8527   entry_bb = region->entry;
8528   exit_bb = region->exit;
8529 
8530   si = gsi_last_nondebug_bb (entry_bb);
8531   enum gimple_code code = gimple_code (gsi_stmt (si));
8532   gcc_assert (code == GIMPLE_OMP_SINGLE || code == GIMPLE_OMP_SCOPE);
8533   gsi_remove (&si, true);
8534   single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8535 
8536   if (exit_bb == NULL)
8537     {
8538       gcc_assert (code == GIMPLE_OMP_SCOPE);
8539       return;
8540     }
8541 
8542   si = gsi_last_nondebug_bb (exit_bb);
8543   if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
8544     {
8545       tree t = gimple_omp_return_lhs (gsi_stmt (si));
8546       gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
8547     }
8548   gsi_remove (&si, true);
8549   single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8550 }
8551 
8552 /* Generic expansion for OpenMP synchronization directives: master,
8553    ordered and critical.  All we need to do here is remove the entry
8554    and exit markers for REGION.  */
8555 
8556 static void
expand_omp_synch(struct omp_region * region)8557 expand_omp_synch (struct omp_region *region)
8558 {
8559   basic_block entry_bb, exit_bb;
8560   gimple_stmt_iterator si;
8561 
8562   entry_bb = region->entry;
8563   exit_bb = region->exit;
8564 
8565   si = gsi_last_nondebug_bb (entry_bb);
8566   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8567 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
8568 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASKED
8569 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
8570 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
8571 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
8572 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
8573   if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
8574       && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
8575     {
8576       expand_omp_taskreg (region);
8577       return;
8578     }
8579   gsi_remove (&si, true);
8580   single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8581 
8582   if (exit_bb)
8583     {
8584       si = gsi_last_nondebug_bb (exit_bb);
8585       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8586       gsi_remove (&si, true);
8587       single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8588     }
8589 }
8590 
8591 /* Translate enum omp_memory_order to enum memmodel for the embedded
8592    fail clause in there.  */
8593 
8594 static enum memmodel
omp_memory_order_to_fail_memmodel(enum omp_memory_order mo)8595 omp_memory_order_to_fail_memmodel (enum omp_memory_order mo)
8596 {
8597   switch (mo & OMP_FAIL_MEMORY_ORDER_MASK)
8598     {
8599     case OMP_FAIL_MEMORY_ORDER_UNSPECIFIED:
8600       switch (mo & OMP_MEMORY_ORDER_MASK)
8601 	{
8602 	case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8603 	case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8604 	case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELAXED;
8605 	case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQUIRE;
8606 	case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8607 	default: break;
8608 	}
8609       gcc_unreachable ();
8610     case OMP_FAIL_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8611     case OMP_FAIL_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8612     case OMP_FAIL_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8613     default: gcc_unreachable ();
8614     }
8615 }
8616 
8617 /* Translate enum omp_memory_order to enum memmodel.  The two enums
8618    are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
8619    is 0 and omp_memory_order has the fail mode encoded in it too.  */
8620 
8621 static enum memmodel
omp_memory_order_to_memmodel(enum omp_memory_order mo)8622 omp_memory_order_to_memmodel (enum omp_memory_order mo)
8623 {
8624   enum memmodel ret, fail_ret;
8625   switch (mo & OMP_MEMORY_ORDER_MASK)
8626     {
8627     case OMP_MEMORY_ORDER_RELAXED: ret = MEMMODEL_RELAXED; break;
8628     case OMP_MEMORY_ORDER_ACQUIRE: ret = MEMMODEL_ACQUIRE; break;
8629     case OMP_MEMORY_ORDER_RELEASE: ret = MEMMODEL_RELEASE; break;
8630     case OMP_MEMORY_ORDER_ACQ_REL: ret = MEMMODEL_ACQ_REL; break;
8631     case OMP_MEMORY_ORDER_SEQ_CST: ret = MEMMODEL_SEQ_CST; break;
8632     default: gcc_unreachable ();
8633     }
8634   /* If we drop the -Winvalid-memory-model warning for C++17 P0418R2,
8635      we can just return ret here unconditionally.  Otherwise, work around
8636      it here and make sure fail memmodel is not stronger.  */
8637   if ((mo & OMP_FAIL_MEMORY_ORDER_MASK) == OMP_FAIL_MEMORY_ORDER_UNSPECIFIED)
8638     return ret;
8639   fail_ret = omp_memory_order_to_fail_memmodel (mo);
8640   if (fail_ret > ret)
8641     return fail_ret;
8642   return ret;
8643 }
8644 
8645 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
8646    operation as a normal volatile load.  */
8647 
8648 static bool
expand_omp_atomic_load(basic_block load_bb,tree addr,tree loaded_val,int index)8649 expand_omp_atomic_load (basic_block load_bb, tree addr,
8650 			tree loaded_val, int index)
8651 {
8652   enum built_in_function tmpbase;
8653   gimple_stmt_iterator gsi;
8654   basic_block store_bb;
8655   location_t loc;
8656   gimple *stmt;
8657   tree decl, call, type, itype;
8658 
8659   gsi = gsi_last_nondebug_bb (load_bb);
8660   stmt = gsi_stmt (gsi);
8661   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8662   loc = gimple_location (stmt);
8663 
8664   /* ??? If the target does not implement atomic_load_optab[mode], and mode
8665      is smaller than word size, then expand_atomic_load assumes that the load
8666      is atomic.  We could avoid the builtin entirely in this case.  */
8667 
8668   tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8669   decl = builtin_decl_explicit (tmpbase);
8670   if (decl == NULL_TREE)
8671     return false;
8672 
8673   type = TREE_TYPE (loaded_val);
8674   itype = TREE_TYPE (TREE_TYPE (decl));
8675 
8676   enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8677   tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8678   call = build_call_expr_loc (loc, decl, 2, addr, mo);
8679   if (!useless_type_conversion_p (type, itype))
8680     call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8681   call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8682 
8683   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8684   gsi_remove (&gsi, true);
8685 
8686   store_bb = single_succ (load_bb);
8687   gsi = gsi_last_nondebug_bb (store_bb);
8688   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8689   gsi_remove (&gsi, true);
8690 
8691   if (gimple_in_ssa_p (cfun))
8692     update_ssa (TODO_update_ssa_no_phi);
8693 
8694   return true;
8695 }
8696 
8697 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
8698    operation as a normal volatile store.  */
8699 
8700 static bool
expand_omp_atomic_store(basic_block load_bb,tree addr,tree loaded_val,tree stored_val,int index)8701 expand_omp_atomic_store (basic_block load_bb, tree addr,
8702 			 tree loaded_val, tree stored_val, int index)
8703 {
8704   enum built_in_function tmpbase;
8705   gimple_stmt_iterator gsi;
8706   basic_block store_bb = single_succ (load_bb);
8707   location_t loc;
8708   gimple *stmt;
8709   tree decl, call, type, itype;
8710   machine_mode imode;
8711   bool exchange;
8712 
8713   gsi = gsi_last_nondebug_bb (load_bb);
8714   stmt = gsi_stmt (gsi);
8715   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8716 
8717   /* If the load value is needed, then this isn't a store but an exchange.  */
8718   exchange = gimple_omp_atomic_need_value_p (stmt);
8719 
8720   gsi = gsi_last_nondebug_bb (store_bb);
8721   stmt = gsi_stmt (gsi);
8722   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
8723   loc = gimple_location (stmt);
8724 
8725   /* ??? If the target does not implement atomic_store_optab[mode], and mode
8726      is smaller than word size, then expand_atomic_store assumes that the store
8727      is atomic.  We could avoid the builtin entirely in this case.  */
8728 
8729   tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
8730   tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
8731   decl = builtin_decl_explicit (tmpbase);
8732   if (decl == NULL_TREE)
8733     return false;
8734 
8735   type = TREE_TYPE (stored_val);
8736 
8737   /* Dig out the type of the function's second argument.  */
8738   itype = TREE_TYPE (decl);
8739   itype = TYPE_ARG_TYPES (itype);
8740   itype = TREE_CHAIN (itype);
8741   itype = TREE_VALUE (itype);
8742   imode = TYPE_MODE (itype);
8743 
8744   if (exchange && !can_atomic_exchange_p (imode, true))
8745     return false;
8746 
8747   if (!useless_type_conversion_p (itype, type))
8748     stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
8749   enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8750   tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8751   call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
8752   if (exchange)
8753     {
8754       if (!useless_type_conversion_p (type, itype))
8755 	call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8756       call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8757     }
8758 
8759   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8760   gsi_remove (&gsi, true);
8761 
8762   /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above.  */
8763   gsi = gsi_last_nondebug_bb (load_bb);
8764   gsi_remove (&gsi, true);
8765 
8766   if (gimple_in_ssa_p (cfun))
8767     update_ssa (TODO_update_ssa_no_phi);
8768 
8769   return true;
8770 }
8771 
8772 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
8773    operation as a __atomic_fetch_op builtin.  INDEX is log2 of the
8774    size of the data type, and thus usable to find the index of the builtin
8775    decl.  Returns false if the expression is not of the proper form.  */
8776 
8777 static bool
expand_omp_atomic_fetch_op(basic_block load_bb,tree addr,tree loaded_val,tree stored_val,int index)8778 expand_omp_atomic_fetch_op (basic_block load_bb,
8779 			    tree addr, tree loaded_val,
8780 			    tree stored_val, int index)
8781 {
8782   enum built_in_function oldbase, newbase, tmpbase;
8783   tree decl, itype, call;
8784   tree lhs, rhs;
8785   basic_block store_bb = single_succ (load_bb);
8786   gimple_stmt_iterator gsi;
8787   gimple *stmt;
8788   location_t loc;
8789   enum tree_code code;
8790   bool need_old, need_new;
8791   machine_mode imode;
8792 
8793   /* We expect to find the following sequences:
8794 
8795    load_bb:
8796        GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8797 
8798    store_bb:
8799        val = tmp OP something; (or: something OP tmp)
8800        GIMPLE_OMP_STORE (val)
8801 
8802   ???FIXME: Allow a more flexible sequence.
8803   Perhaps use data flow to pick the statements.
8804 
8805   */
8806 
8807   gsi = gsi_after_labels (store_bb);
8808   stmt = gsi_stmt (gsi);
8809   if (is_gimple_debug (stmt))
8810     {
8811       gsi_next_nondebug (&gsi);
8812       if (gsi_end_p (gsi))
8813 	return false;
8814       stmt = gsi_stmt (gsi);
8815     }
8816   loc = gimple_location (stmt);
8817   if (!is_gimple_assign (stmt))
8818     return false;
8819   gsi_next_nondebug (&gsi);
8820   if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8821     return false;
8822   need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
8823   need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
8824   enum omp_memory_order omo
8825     = gimple_omp_atomic_memory_order (last_stmt (load_bb));
8826   enum memmodel mo = omp_memory_order_to_memmodel (omo);
8827   gcc_checking_assert (!need_old || !need_new);
8828 
8829   if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
8830     return false;
8831 
8832   /* Check for one of the supported fetch-op operations.  */
8833   code = gimple_assign_rhs_code (stmt);
8834   switch (code)
8835     {
8836     case PLUS_EXPR:
8837     case POINTER_PLUS_EXPR:
8838       oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8839       newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8840       break;
8841     case MINUS_EXPR:
8842       oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
8843       newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
8844       break;
8845     case BIT_AND_EXPR:
8846       oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
8847       newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
8848       break;
8849     case BIT_IOR_EXPR:
8850       oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
8851       newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
8852       break;
8853     case BIT_XOR_EXPR:
8854       oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
8855       newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
8856       break;
8857     default:
8858       return false;
8859     }
8860 
8861   /* Make sure the expression is of the proper form.  */
8862   if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
8863     rhs = gimple_assign_rhs2 (stmt);
8864   else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
8865 	   && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
8866     rhs = gimple_assign_rhs1 (stmt);
8867   else
8868     return false;
8869 
8870   tmpbase = ((enum built_in_function)
8871 	     ((need_new ? newbase : oldbase) + index + 1));
8872   decl = builtin_decl_explicit (tmpbase);
8873   if (decl == NULL_TREE)
8874     return false;
8875   itype = TREE_TYPE (TREE_TYPE (decl));
8876   imode = TYPE_MODE (itype);
8877 
8878   /* We could test all of the various optabs involved, but the fact of the
8879      matter is that (with the exception of i486 vs i586 and xadd) all targets
8880      that support any atomic operaton optab also implements compare-and-swap.
8881      Let optabs.cc take care of expanding any compare-and-swap loop.  */
8882   if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
8883     return false;
8884 
8885   gsi = gsi_last_nondebug_bb (load_bb);
8886   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
8887 
8888   /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8889      It only requires that the operation happen atomically.  Thus we can
8890      use the RELAXED memory model.  */
8891   call = build_call_expr_loc (loc, decl, 3, addr,
8892 			      fold_convert_loc (loc, itype, rhs),
8893 			      build_int_cst (NULL, mo));
8894 
8895   if (need_old || need_new)
8896     {
8897       lhs = need_old ? loaded_val : stored_val;
8898       call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
8899       call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
8900     }
8901   else
8902     call = fold_convert_loc (loc, void_type_node, call);
8903   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8904   gsi_remove (&gsi, true);
8905 
8906   gsi = gsi_last_nondebug_bb (store_bb);
8907   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8908   gsi_remove (&gsi, true);
8909   gsi = gsi_last_nondebug_bb (store_bb);
8910   stmt = gsi_stmt (gsi);
8911   gsi_remove (&gsi, true);
8912 
8913   if (gimple_in_ssa_p (cfun))
8914     {
8915       release_defs (stmt);
8916       update_ssa (TODO_update_ssa_no_phi);
8917     }
8918 
8919   return true;
8920 }
8921 
8922 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
8923    compare and exchange as an ATOMIC_COMPARE_EXCHANGE internal function.
8924    Returns false if the expression is not of the proper form.  */
8925 
8926 static bool
expand_omp_atomic_cas(basic_block load_bb,tree addr,tree loaded_val,tree stored_val,int index)8927 expand_omp_atomic_cas (basic_block load_bb, tree addr,
8928 		       tree loaded_val, tree stored_val, int index)
8929 {
8930   /* We expect to find the following sequences:
8931 
8932    load_bb:
8933        GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8934 
8935    store_bb:
8936        val = tmp == e ? d : tmp;
8937        GIMPLE_OMP_ATOMIC_STORE (val)
8938 
8939      or in store_bb instead:
8940        tmp2 = tmp == e;
8941        val = tmp2 ? d : tmp;
8942        GIMPLE_OMP_ATOMIC_STORE (val)
8943 
8944      or:
8945        tmp3 = VIEW_CONVERT_EXPR<integral_type>(tmp);
8946        val = e == tmp3 ? d : tmp;
8947        GIMPLE_OMP_ATOMIC_STORE (val)
8948 
8949      etc.  */
8950 
8951 
8952   basic_block store_bb = single_succ (load_bb);
8953   gimple_stmt_iterator gsi = gsi_last_nondebug_bb (store_bb);
8954   gimple *store_stmt = gsi_stmt (gsi);
8955   if (!store_stmt || gimple_code (store_stmt) != GIMPLE_OMP_ATOMIC_STORE)
8956     return false;
8957   gsi_prev_nondebug (&gsi);
8958   if (gsi_end_p (gsi))
8959     return false;
8960   gimple *condexpr_stmt = gsi_stmt (gsi);
8961   if (!is_gimple_assign (condexpr_stmt)
8962       || gimple_assign_rhs_code (condexpr_stmt) != COND_EXPR)
8963     return false;
8964   if (!operand_equal_p (gimple_assign_lhs (condexpr_stmt), stored_val, 0))
8965     return false;
8966   gimple *cond_stmt = NULL;
8967   gimple *vce_stmt = NULL;
8968   gsi_prev_nondebug (&gsi);
8969   if (!gsi_end_p (gsi))
8970     {
8971       cond_stmt = gsi_stmt (gsi);
8972       if (!is_gimple_assign (cond_stmt))
8973 	return false;
8974       if (gimple_assign_rhs_code (cond_stmt) == EQ_EXPR)
8975 	{
8976 	  gsi_prev_nondebug (&gsi);
8977 	  if (!gsi_end_p (gsi))
8978 	    {
8979 	      vce_stmt = gsi_stmt (gsi);
8980 	      if (!is_gimple_assign (vce_stmt)
8981 		  || gimple_assign_rhs_code (vce_stmt) != VIEW_CONVERT_EXPR)
8982 		return false;
8983 	    }
8984 	}
8985       else if (gimple_assign_rhs_code (cond_stmt) == VIEW_CONVERT_EXPR)
8986 	std::swap (vce_stmt, cond_stmt);
8987       else
8988 	return false;
8989       if (vce_stmt)
8990 	{
8991 	  tree vce_rhs = gimple_assign_rhs1 (vce_stmt);
8992 	  if (TREE_CODE (vce_rhs) != VIEW_CONVERT_EXPR
8993 	      || !operand_equal_p (TREE_OPERAND (vce_rhs, 0), loaded_val))
8994 	    return false;
8995 	  if (!INTEGRAL_TYPE_P (TREE_TYPE (vce_rhs))
8996 	      || !SCALAR_FLOAT_TYPE_P (TREE_TYPE (loaded_val))
8997 	      || !tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vce_rhs)),
8998 				      TYPE_SIZE (TREE_TYPE (loaded_val))))
8999 	    return false;
9000 	  gsi_prev_nondebug (&gsi);
9001 	  if (!gsi_end_p (gsi))
9002 	    return false;
9003 	}
9004     }
9005   tree cond = gimple_assign_rhs1 (condexpr_stmt);
9006   tree cond_op1, cond_op2;
9007   if (cond_stmt)
9008     {
9009       if (!operand_equal_p (cond, gimple_assign_lhs (cond_stmt)))
9010 	return false;
9011       cond_op1 = gimple_assign_rhs1 (cond_stmt);
9012       cond_op2 = gimple_assign_rhs2 (cond_stmt);
9013     }
9014   else if (TREE_CODE (cond) != EQ_EXPR && TREE_CODE (cond) != NE_EXPR)
9015     return false;
9016   else
9017     {
9018       cond_op1 = TREE_OPERAND (cond, 0);
9019       cond_op2 = TREE_OPERAND (cond, 1);
9020     }
9021   tree d;
9022   if (TREE_CODE (cond) == NE_EXPR)
9023     {
9024       if (!operand_equal_p (gimple_assign_rhs2 (condexpr_stmt), loaded_val))
9025 	return false;
9026       d = gimple_assign_rhs3 (condexpr_stmt);
9027     }
9028   else if (!operand_equal_p (gimple_assign_rhs3 (condexpr_stmt), loaded_val))
9029     return false;
9030   else
9031     d = gimple_assign_rhs2 (condexpr_stmt);
9032   tree e = vce_stmt ? gimple_assign_lhs (vce_stmt) : loaded_val;
9033   if (operand_equal_p (e, cond_op1))
9034     e = cond_op2;
9035   else if (operand_equal_p (e, cond_op2))
9036     e = cond_op1;
9037   else
9038     return false;
9039 
9040   location_t loc = gimple_location (store_stmt);
9041   gimple *load_stmt = last_stmt (load_bb);
9042   bool need_new = gimple_omp_atomic_need_value_p (store_stmt);
9043   bool need_old = gimple_omp_atomic_need_value_p (load_stmt);
9044   bool weak = gimple_omp_atomic_weak_p (load_stmt);
9045   enum omp_memory_order omo = gimple_omp_atomic_memory_order (load_stmt);
9046   tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
9047   tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
9048   gcc_checking_assert (!need_old || !need_new);
9049 
9050   enum built_in_function fncode
9051     = (enum built_in_function) ((int) BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9052 				+ index + 1);
9053   tree cmpxchg = builtin_decl_explicit (fncode);
9054   if (cmpxchg == NULL_TREE)
9055     return false;
9056   tree itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9057 
9058   if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9059       || !can_atomic_load_p (TYPE_MODE (itype)))
9060     return false;
9061 
9062   tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9063   if (SCALAR_FLOAT_TYPE_P (type) && !vce_stmt)
9064     return false;
9065 
9066   gsi = gsi_for_stmt (store_stmt);
9067   if (!useless_type_conversion_p (itype, TREE_TYPE (e)))
9068     {
9069       tree ne = create_tmp_reg (itype);
9070       gimple *g = gimple_build_assign (ne, NOP_EXPR, e);
9071       gimple_set_location (g, loc);
9072       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9073       e = ne;
9074     }
9075   if (!useless_type_conversion_p (itype, TREE_TYPE (d)))
9076     {
9077       tree nd = create_tmp_reg (itype);
9078       enum tree_code code;
9079       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (d)))
9080 	{
9081 	  code = VIEW_CONVERT_EXPR;
9082 	  d = build1 (VIEW_CONVERT_EXPR, itype, d);
9083 	}
9084       else
9085 	code = NOP_EXPR;
9086       gimple *g = gimple_build_assign (nd, code, d);
9087       gimple_set_location (g, loc);
9088       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9089       d = nd;
9090     }
9091 
9092   tree ctype = build_complex_type (itype);
9093   int flag = int_size_in_bytes (itype) + (weak ? 256 : 0);
9094   gimple *g
9095     = gimple_build_call_internal (IFN_ATOMIC_COMPARE_EXCHANGE, 6, addr, e, d,
9096 				  build_int_cst (integer_type_node, flag),
9097 				  mo, fmo);
9098   tree cres = create_tmp_reg (ctype);
9099   gimple_call_set_lhs (g, cres);
9100   gimple_set_location (g, loc);
9101   gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9102 
9103   if (cond_stmt || need_old || need_new)
9104     {
9105       tree im = create_tmp_reg (itype);
9106       g = gimple_build_assign (im, IMAGPART_EXPR,
9107 			       build1 (IMAGPART_EXPR, itype, cres));
9108       gimple_set_location (g, loc);
9109       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9110 
9111       tree re = NULL_TREE;
9112       if (need_old || need_new)
9113 	{
9114 	  re = create_tmp_reg (itype);
9115 	  g = gimple_build_assign (re, REALPART_EXPR,
9116 				   build1 (REALPART_EXPR, itype, cres));
9117 	  gimple_set_location (g, loc);
9118 	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9119 	}
9120 
9121       if (cond_stmt)
9122 	{
9123 	  g = gimple_build_assign (gimple_assign_lhs (cond_stmt),
9124 				   NOP_EXPR, im);
9125 	  gimple_set_location (g, loc);
9126 	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9127 	}
9128       else if (need_new)
9129 	{
9130 	  g = gimple_build_assign (create_tmp_reg (itype), COND_EXPR,
9131 				   build2 (NE_EXPR, boolean_type_node,
9132 					   im, build_zero_cst (itype)),
9133 				   d, re);
9134 	  gimple_set_location (g, loc);
9135 	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9136 	  re = gimple_assign_lhs (g);
9137 	}
9138 
9139       if (need_old || need_new)
9140 	{
9141 	  tree v = need_old ? loaded_val : stored_val;
9142 	  enum tree_code code;
9143 	  if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (v)))
9144 	    {
9145 	      code = VIEW_CONVERT_EXPR;
9146 	      re = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (v), re);
9147 	    }
9148 	  else if (!useless_type_conversion_p (TREE_TYPE (v), itype))
9149 	    code = NOP_EXPR;
9150 	  else
9151 	    code = TREE_CODE (re);
9152 	  g = gimple_build_assign (v, code, re);
9153 	  gimple_set_location (g, loc);
9154 	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9155 	}
9156     }
9157 
9158   gsi_remove (&gsi, true);
9159   gsi = gsi_for_stmt (load_stmt);
9160   gsi_remove (&gsi, true);
9161   gsi = gsi_for_stmt (condexpr_stmt);
9162   gsi_remove (&gsi, true);
9163   if (cond_stmt)
9164     {
9165       gsi = gsi_for_stmt (cond_stmt);
9166       gsi_remove (&gsi, true);
9167     }
9168   if (vce_stmt)
9169     {
9170       gsi = gsi_for_stmt (vce_stmt);
9171       gsi_remove (&gsi, true);
9172     }
9173 
9174   return true;
9175 }
9176 
9177 /* A subroutine of expand_omp_atomic.  Implement the atomic operation as:
9178 
9179       oldval = *addr;
9180       repeat:
9181 	newval = rhs;	 // with oldval replacing *addr in rhs
9182 	oldval = __sync_val_compare_and_swap (addr, oldval, newval);
9183 	if (oldval != newval)
9184 	  goto repeat;
9185 
9186    INDEX is log2 of the size of the data type, and thus usable to find the
9187    index of the builtin decl.  */
9188 
9189 static bool
expand_omp_atomic_pipeline(basic_block load_bb,basic_block store_bb,tree addr,tree loaded_val,tree stored_val,int index)9190 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
9191 			    tree addr, tree loaded_val, tree stored_val,
9192 			    int index)
9193 {
9194   tree loadedi, storedi, initial, new_storedi, old_vali;
9195   tree type, itype, cmpxchg, iaddr, atype;
9196   gimple_stmt_iterator si;
9197   basic_block loop_header = single_succ (load_bb);
9198   gimple *phi, *stmt;
9199   edge e;
9200   enum built_in_function fncode;
9201 
9202   fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9203 				    + index + 1);
9204   cmpxchg = builtin_decl_explicit (fncode);
9205   if (cmpxchg == NULL_TREE)
9206     return false;
9207   type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9208   atype = type;
9209   itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9210 
9211   if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9212       || !can_atomic_load_p (TYPE_MODE (itype)))
9213     return false;
9214 
9215   /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD.  */
9216   si = gsi_last_nondebug_bb (load_bb);
9217   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9218   location_t loc = gimple_location (gsi_stmt (si));
9219   enum omp_memory_order omo = gimple_omp_atomic_memory_order (gsi_stmt (si));
9220   tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
9221   tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
9222 
9223   /* For floating-point values, we'll need to view-convert them to integers
9224      so that we can perform the atomic compare and swap.  Simplify the
9225      following code by always setting up the "i"ntegral variables.  */
9226   if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
9227     {
9228       tree iaddr_val;
9229 
9230       iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
9231 							   true));
9232       atype = itype;
9233       iaddr_val
9234 	= force_gimple_operand_gsi (&si,
9235 				    fold_convert (TREE_TYPE (iaddr), addr),
9236 				    false, NULL_TREE, true, GSI_SAME_STMT);
9237       stmt = gimple_build_assign (iaddr, iaddr_val);
9238       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9239       loadedi = create_tmp_var (itype);
9240       if (gimple_in_ssa_p (cfun))
9241 	loadedi = make_ssa_name (loadedi);
9242     }
9243   else
9244     {
9245       iaddr = addr;
9246       loadedi = loaded_val;
9247     }
9248 
9249   fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
9250   tree loaddecl = builtin_decl_explicit (fncode);
9251   if (loaddecl)
9252     initial
9253       = fold_convert (atype,
9254 		      build_call_expr (loaddecl, 2, iaddr,
9255 				       build_int_cst (NULL_TREE,
9256 						      MEMMODEL_RELAXED)));
9257   else
9258     {
9259       tree off
9260 	= build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
9261 						      true), 0);
9262       initial = build2 (MEM_REF, atype, iaddr, off);
9263     }
9264 
9265   initial
9266     = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
9267 				GSI_SAME_STMT);
9268 
9269   /* Move the value to the LOADEDI temporary.  */
9270   if (gimple_in_ssa_p (cfun))
9271     {
9272       gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
9273       phi = create_phi_node (loadedi, loop_header);
9274       SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
9275 	       initial);
9276     }
9277   else
9278     gsi_insert_before (&si,
9279 		       gimple_build_assign (loadedi, initial),
9280 		       GSI_SAME_STMT);
9281   if (loadedi != loaded_val)
9282     {
9283       gimple_stmt_iterator gsi2;
9284       tree x;
9285 
9286       x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
9287       gsi2 = gsi_start_bb (loop_header);
9288       if (gimple_in_ssa_p (cfun))
9289 	{
9290 	  gassign *stmt;
9291 	  x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9292 					true, GSI_SAME_STMT);
9293 	  stmt = gimple_build_assign (loaded_val, x);
9294 	  gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
9295 	}
9296       else
9297 	{
9298 	  x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
9299 	  force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9300 				    true, GSI_SAME_STMT);
9301 	}
9302     }
9303   gsi_remove (&si, true);
9304 
9305   si = gsi_last_nondebug_bb (store_bb);
9306   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9307 
9308   if (iaddr == addr)
9309     storedi = stored_val;
9310   else
9311     storedi
9312       = force_gimple_operand_gsi (&si,
9313 				  build1 (VIEW_CONVERT_EXPR, itype,
9314 					  stored_val), true, NULL_TREE, true,
9315 				  GSI_SAME_STMT);
9316 
9317   /* Build the compare&swap statement.  */
9318   tree ctype = build_complex_type (itype);
9319   int flag = int_size_in_bytes (itype);
9320   new_storedi = build_call_expr_internal_loc (loc, IFN_ATOMIC_COMPARE_EXCHANGE,
9321 					      ctype, 6, iaddr, loadedi,
9322 					      storedi,
9323 					      build_int_cst (integer_type_node,
9324 							     flag),
9325 					      mo, fmo);
9326   new_storedi = build1 (REALPART_EXPR, itype, new_storedi);
9327   new_storedi = force_gimple_operand_gsi (&si,
9328 					  fold_convert (TREE_TYPE (loadedi),
9329 							new_storedi),
9330 					  true, NULL_TREE,
9331 					  true, GSI_SAME_STMT);
9332 
9333   if (gimple_in_ssa_p (cfun))
9334     old_vali = loadedi;
9335   else
9336     {
9337       old_vali = create_tmp_var (TREE_TYPE (loadedi));
9338       stmt = gimple_build_assign (old_vali, loadedi);
9339       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9340 
9341       stmt = gimple_build_assign (loadedi, new_storedi);
9342       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9343     }
9344 
9345   /* Note that we always perform the comparison as an integer, even for
9346      floating point.  This allows the atomic operation to properly
9347      succeed even with NaNs and -0.0.  */
9348   tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
9349   stmt = gimple_build_cond_empty (ne);
9350   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9351 
9352   /* Update cfg.  */
9353   e = single_succ_edge (store_bb);
9354   e->flags &= ~EDGE_FALLTHRU;
9355   e->flags |= EDGE_FALSE_VALUE;
9356   /* Expect no looping.  */
9357   e->probability = profile_probability::guessed_always ();
9358 
9359   e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
9360   e->probability = profile_probability::guessed_never ();
9361 
9362   /* Copy the new value to loadedi (we already did that before the condition
9363      if we are not in SSA).  */
9364   if (gimple_in_ssa_p (cfun))
9365     {
9366       phi = gimple_seq_first_stmt (phi_nodes (loop_header));
9367       SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
9368     }
9369 
9370   /* Remove GIMPLE_OMP_ATOMIC_STORE.  */
9371   gsi_remove (&si, true);
9372 
9373   class loop *loop = alloc_loop ();
9374   loop->header = loop_header;
9375   loop->latch = store_bb;
9376   add_loop (loop, loop_header->loop_father);
9377 
9378   if (gimple_in_ssa_p (cfun))
9379     update_ssa (TODO_update_ssa_no_phi);
9380 
9381   return true;
9382 }
9383 
9384 /* A subroutine of expand_omp_atomic.  Implement the atomic operation as:
9385 
9386 				  GOMP_atomic_start ();
9387 				  *addr = rhs;
9388 				  GOMP_atomic_end ();
9389 
9390    The result is not globally atomic, but works so long as all parallel
9391    references are within #pragma omp atomic directives.  According to
9392    responses received from omp@openmp.org, appears to be within spec.
9393    Which makes sense, since that's how several other compilers handle
9394    this situation as well.
9395    LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
9396    expanding.  STORED_VAL is the operand of the matching
9397    GIMPLE_OMP_ATOMIC_STORE.
9398 
9399    We replace
9400    GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
9401    loaded_val = *addr;
9402 
9403    and replace
9404    GIMPLE_OMP_ATOMIC_STORE (stored_val)  with
9405    *addr = stored_val;
9406 */
9407 
9408 static bool
expand_omp_atomic_mutex(basic_block load_bb,basic_block store_bb,tree addr,tree loaded_val,tree stored_val)9409 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
9410 			 tree addr, tree loaded_val, tree stored_val)
9411 {
9412   gimple_stmt_iterator si;
9413   gassign *stmt;
9414   tree t;
9415 
9416   si = gsi_last_nondebug_bb (load_bb);
9417   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9418 
9419   t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
9420   t = build_call_expr (t, 0);
9421   force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9422 
9423   tree mem = build_simple_mem_ref (addr);
9424   TREE_TYPE (mem) = TREE_TYPE (loaded_val);
9425   TREE_OPERAND (mem, 1)
9426     = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
9427 						 true),
9428 		    TREE_OPERAND (mem, 1));
9429   stmt = gimple_build_assign (loaded_val, mem);
9430   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9431   gsi_remove (&si, true);
9432 
9433   si = gsi_last_nondebug_bb (store_bb);
9434   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9435 
9436   stmt = gimple_build_assign (unshare_expr (mem), stored_val);
9437   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9438 
9439   t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
9440   t = build_call_expr (t, 0);
9441   force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9442   gsi_remove (&si, true);
9443 
9444   if (gimple_in_ssa_p (cfun))
9445     update_ssa (TODO_update_ssa_no_phi);
9446   return true;
9447 }
9448 
9449 /* Expand an GIMPLE_OMP_ATOMIC statement.  We try to expand
9450    using expand_omp_atomic_fetch_op.  If it failed, we try to
9451    call expand_omp_atomic_pipeline, and if it fails too, the
9452    ultimate fallback is wrapping the operation in a mutex
9453    (expand_omp_atomic_mutex).  REGION is the atomic region built
9454    by build_omp_regions_1().  */
9455 
9456 static void
expand_omp_atomic(struct omp_region * region)9457 expand_omp_atomic (struct omp_region *region)
9458 {
9459   basic_block load_bb = region->entry, store_bb = region->exit;
9460   gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
9461   gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
9462   tree loaded_val = gimple_omp_atomic_load_lhs (load);
9463   tree addr = gimple_omp_atomic_load_rhs (load);
9464   tree stored_val = gimple_omp_atomic_store_val (store);
9465   tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9466   HOST_WIDE_INT index;
9467 
9468   /* Make sure the type is one of the supported sizes.  */
9469   index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
9470   index = exact_log2 (index);
9471   if (index >= 0 && index <= 4)
9472     {
9473       unsigned int align = TYPE_ALIGN_UNIT (type);
9474 
9475       /* __sync builtins require strict data alignment.  */
9476       if (exact_log2 (align) >= index)
9477 	{
9478 	  /* Atomic load.  */
9479 	  scalar_mode smode;
9480 	  if (loaded_val == stored_val
9481 	      && (is_int_mode (TYPE_MODE (type), &smode)
9482 		  || is_float_mode (TYPE_MODE (type), &smode))
9483 	      && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9484 	      && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
9485 	    return;
9486 
9487 	  /* Atomic store.  */
9488 	  if ((is_int_mode (TYPE_MODE (type), &smode)
9489 	       || is_float_mode (TYPE_MODE (type), &smode))
9490 	      && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9491 	      && store_bb == single_succ (load_bb)
9492 	      && first_stmt (store_bb) == store
9493 	      && expand_omp_atomic_store (load_bb, addr, loaded_val,
9494 					  stored_val, index))
9495 	    return;
9496 
9497 	  /* When possible, use specialized atomic update functions.  */
9498 	  if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
9499 	      && store_bb == single_succ (load_bb)
9500 	      && expand_omp_atomic_fetch_op (load_bb, addr,
9501 					     loaded_val, stored_val, index))
9502 	    return;
9503 
9504 	  /* When possible, use ATOMIC_COMPARE_EXCHANGE ifn without a loop.  */
9505 	  if (store_bb == single_succ (load_bb)
9506 	      && !gimple_in_ssa_p (cfun)
9507 	      && expand_omp_atomic_cas (load_bb, addr, loaded_val, stored_val,
9508 					index))
9509 	    return;
9510 
9511 	  /* If we don't have specialized __sync builtins, try and implement
9512 	     as a compare and swap loop.  */
9513 	  if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
9514 					  loaded_val, stored_val, index))
9515 	    return;
9516 	}
9517     }
9518 
9519   /* The ultimate fallback is wrapping the operation in a mutex.  */
9520   expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
9521 }
9522 
9523 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
9524    at REGION_EXIT.  */
9525 
9526 static void
mark_loops_in_oacc_kernels_region(basic_block region_entry,basic_block region_exit)9527 mark_loops_in_oacc_kernels_region (basic_block region_entry,
9528 				   basic_block region_exit)
9529 {
9530   class loop *outer = region_entry->loop_father;
9531   gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
9532 
9533   /* Don't parallelize the kernels region if it contains more than one outer
9534      loop.  */
9535   unsigned int nr_outer_loops = 0;
9536   class loop *single_outer = NULL;
9537   for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
9538     {
9539       gcc_assert (loop_outer (loop) == outer);
9540 
9541       if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
9542 	continue;
9543 
9544       if (region_exit != NULL
9545 	  && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
9546 	continue;
9547 
9548       nr_outer_loops++;
9549       single_outer = loop;
9550     }
9551   if (nr_outer_loops != 1)
9552     return;
9553 
9554   for (class loop *loop = single_outer->inner;
9555        loop != NULL;
9556        loop = loop->inner)
9557     if (loop->next)
9558       return;
9559 
9560   /* Mark the loops in the region.  */
9561   for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
9562     loop->in_oacc_kernels_region = true;
9563 }
9564 
9565 /* Build target argument identifier from the DEVICE identifier, value
9566    identifier ID and whether the element also has a SUBSEQUENT_PARAM.  */
9567 
9568 static tree
get_target_argument_identifier_1(int device,bool subseqent_param,int id)9569 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
9570 {
9571   tree t = build_int_cst (integer_type_node, device);
9572   if (subseqent_param)
9573     t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9574 		     build_int_cst (integer_type_node,
9575 				    GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
9576   t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9577 		   build_int_cst (integer_type_node, id));
9578   return t;
9579 }
9580 
9581 /* Like above but return it in type that can be directly stored as an element
9582    of the argument array.  */
9583 
9584 static tree
get_target_argument_identifier(int device,bool subseqent_param,int id)9585 get_target_argument_identifier (int device, bool subseqent_param, int id)
9586 {
9587   tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
9588   return fold_convert (ptr_type_node, t);
9589 }
9590 
9591 /* Return a target argument consisting of DEVICE identifier, value identifier
9592    ID, and the actual VALUE.  */
9593 
9594 static tree
get_target_argument_value(gimple_stmt_iterator * gsi,int device,int id,tree value)9595 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
9596 			   tree value)
9597 {
9598   tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
9599 			fold_convert (integer_type_node, value),
9600 			build_int_cst (unsigned_type_node,
9601 				       GOMP_TARGET_ARG_VALUE_SHIFT));
9602   t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9603 		   get_target_argument_identifier_1 (device, false, id));
9604   t = fold_convert (ptr_type_node, t);
9605   return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
9606 }
9607 
9608 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
9609    push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
9610    otherwise push an identifier (with DEVICE and ID) and the VALUE in two
9611    arguments.  */
9612 
9613 static void
push_target_argument_according_to_value(gimple_stmt_iterator * gsi,int device,int id,tree value,vec<tree> * args)9614 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
9615 					 int id, tree value, vec <tree> *args)
9616 {
9617   if (tree_fits_shwi_p (value)
9618       && tree_to_shwi (value) > -(1 << 15)
9619       && tree_to_shwi (value) < (1 << 15))
9620     args->quick_push (get_target_argument_value (gsi, device, id, value));
9621   else
9622     {
9623       args->quick_push (get_target_argument_identifier (device, true, id));
9624       value = fold_convert (ptr_type_node, value);
9625       value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
9626 					GSI_SAME_STMT);
9627       args->quick_push (value);
9628     }
9629 }
9630 
9631 /* Create an array of arguments that is then passed to GOMP_target.  */
9632 
9633 static tree
get_target_arguments(gimple_stmt_iterator * gsi,gomp_target * tgt_stmt)9634 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
9635 {
9636   auto_vec <tree, 6> args;
9637   tree clauses = gimple_omp_target_clauses (tgt_stmt);
9638   tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
9639   if (c)
9640     t = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (c);
9641   else
9642     t = integer_minus_one_node;
9643   push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9644 					   GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
9645 
9646   c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
9647   if (c)
9648     t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
9649   else
9650     t = integer_minus_one_node;
9651   push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9652 					   GOMP_TARGET_ARG_THREAD_LIMIT, t,
9653 					   &args);
9654 
9655   /* Produce more, perhaps device specific, arguments here.  */
9656 
9657   tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
9658 							  args.length () + 1),
9659 				  ".omp_target_args");
9660   for (unsigned i = 0; i < args.length (); i++)
9661     {
9662       tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9663 			 build_int_cst (integer_type_node, i),
9664 			 NULL_TREE, NULL_TREE);
9665       gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
9666 			 GSI_SAME_STMT);
9667     }
9668   tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9669 		     build_int_cst (integer_type_node, args.length ()),
9670 		     NULL_TREE, NULL_TREE);
9671   gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
9672 		     GSI_SAME_STMT);
9673   TREE_ADDRESSABLE (argarray) = 1;
9674   return build_fold_addr_expr (argarray);
9675 }
9676 
9677 /* Expand the GIMPLE_OMP_TARGET starting at REGION.  */
9678 
9679 static void
expand_omp_target(struct omp_region * region)9680 expand_omp_target (struct omp_region *region)
9681 {
9682   basic_block entry_bb, exit_bb, new_bb;
9683   struct function *child_cfun;
9684   tree child_fn, block, t;
9685   gimple_stmt_iterator gsi;
9686   gomp_target *entry_stmt;
9687   gimple *stmt;
9688   edge e;
9689   bool offloaded;
9690   int target_kind;
9691 
9692   entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
9693   target_kind = gimple_omp_target_kind (entry_stmt);
9694   new_bb = region->entry;
9695 
9696   offloaded = is_gimple_omp_offloaded (entry_stmt);
9697   switch (target_kind)
9698     {
9699     case GF_OMP_TARGET_KIND_REGION:
9700     case GF_OMP_TARGET_KIND_UPDATE:
9701     case GF_OMP_TARGET_KIND_ENTER_DATA:
9702     case GF_OMP_TARGET_KIND_EXIT_DATA:
9703     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9704     case GF_OMP_TARGET_KIND_OACC_KERNELS:
9705     case GF_OMP_TARGET_KIND_OACC_SERIAL:
9706     case GF_OMP_TARGET_KIND_OACC_UPDATE:
9707     case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9708     case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9709     case GF_OMP_TARGET_KIND_OACC_DECLARE:
9710     case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9711     case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9712     case GF_OMP_TARGET_KIND_DATA:
9713     case GF_OMP_TARGET_KIND_OACC_DATA:
9714     case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9715     case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9716       break;
9717     default:
9718       gcc_unreachable ();
9719     }
9720 
9721   child_fn = NULL_TREE;
9722   child_cfun = NULL;
9723   if (offloaded)
9724     {
9725       child_fn = gimple_omp_target_child_fn (entry_stmt);
9726       child_cfun = DECL_STRUCT_FUNCTION (child_fn);
9727     }
9728 
9729   /* Supported by expand_omp_taskreg, but not here.  */
9730   if (child_cfun != NULL)
9731     gcc_checking_assert (!child_cfun->cfg);
9732   gcc_checking_assert (!gimple_in_ssa_p (cfun));
9733 
9734   entry_bb = region->entry;
9735   exit_bb = region->exit;
9736 
9737   if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
9738     mark_loops_in_oacc_kernels_region (region->entry, region->exit);
9739 
9740   /* Going on, all OpenACC compute constructs are mapped to
9741      'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
9742      To distinguish between them, we attach attributes.  */
9743   switch (target_kind)
9744     {
9745     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9746       DECL_ATTRIBUTES (child_fn)
9747 	= tree_cons (get_identifier ("oacc parallel"),
9748 		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
9749       break;
9750     case GF_OMP_TARGET_KIND_OACC_KERNELS:
9751       DECL_ATTRIBUTES (child_fn)
9752 	= tree_cons (get_identifier ("oacc kernels"),
9753 		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
9754       break;
9755     case GF_OMP_TARGET_KIND_OACC_SERIAL:
9756       DECL_ATTRIBUTES (child_fn)
9757 	= tree_cons (get_identifier ("oacc serial"),
9758 		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
9759       break;
9760     case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9761       DECL_ATTRIBUTES (child_fn)
9762 	= tree_cons (get_identifier ("oacc parallel_kernels_parallelized"),
9763 		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
9764       break;
9765     case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9766       DECL_ATTRIBUTES (child_fn)
9767 	= tree_cons (get_identifier ("oacc parallel_kernels_gang_single"),
9768 		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
9769       break;
9770     default:
9771       /* Make sure we don't miss any.  */
9772       gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
9773 			     && is_gimple_omp_offloaded (entry_stmt)));
9774       break;
9775     }
9776 
9777   if (offloaded)
9778     {
9779       unsigned srcidx, dstidx, num;
9780 
9781       /* If the offloading region needs data sent from the parent
9782 	 function, then the very first statement (except possible
9783 	 tree profile counter updates) of the offloading body
9784 	 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
9785 	 &.OMP_DATA_O is passed as an argument to the child function,
9786 	 we need to replace it with the argument as seen by the child
9787 	 function.
9788 
9789 	 In most cases, this will end up being the identity assignment
9790 	 .OMP_DATA_I = .OMP_DATA_I.  However, if the offloading body had
9791 	 a function call that has been inlined, the original PARM_DECL
9792 	 .OMP_DATA_I may have been converted into a different local
9793 	 variable.  In which case, we need to keep the assignment.  */
9794       tree data_arg = gimple_omp_target_data_arg (entry_stmt);
9795       if (data_arg)
9796 	{
9797 	  basic_block entry_succ_bb = single_succ (entry_bb);
9798 	  gimple_stmt_iterator gsi;
9799 	  tree arg;
9800 	  gimple *tgtcopy_stmt = NULL;
9801 	  tree sender = TREE_VEC_ELT (data_arg, 0);
9802 
9803 	  for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
9804 	    {
9805 	      gcc_assert (!gsi_end_p (gsi));
9806 	      stmt = gsi_stmt (gsi);
9807 	      if (gimple_code (stmt) != GIMPLE_ASSIGN)
9808 		continue;
9809 
9810 	      if (gimple_num_ops (stmt) == 2)
9811 		{
9812 		  tree arg = gimple_assign_rhs1 (stmt);
9813 
9814 		  /* We're ignoring the subcode because we're
9815 		     effectively doing a STRIP_NOPS.  */
9816 
9817 		  if (TREE_CODE (arg) == ADDR_EXPR
9818 		      && TREE_OPERAND (arg, 0) == sender)
9819 		    {
9820 		      tgtcopy_stmt = stmt;
9821 		      break;
9822 		    }
9823 		}
9824 	    }
9825 
9826 	  gcc_assert (tgtcopy_stmt != NULL);
9827 	  arg = DECL_ARGUMENTS (child_fn);
9828 
9829 	  gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
9830 	  gsi_remove (&gsi, true);
9831 	}
9832 
9833       /* Declare local variables needed in CHILD_CFUN.  */
9834       block = DECL_INITIAL (child_fn);
9835       BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
9836       /* The gimplifier could record temporaries in the offloading block
9837 	 rather than in containing function's local_decls chain,
9838 	 which would mean cgraph missed finalizing them.  Do it now.  */
9839       for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
9840 	if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
9841 	  varpool_node::finalize_decl (t);
9842       DECL_SAVED_TREE (child_fn) = NULL;
9843       /* We'll create a CFG for child_fn, so no gimple body is needed.  */
9844       gimple_set_body (child_fn, NULL);
9845       TREE_USED (block) = 1;
9846 
9847       /* Reset DECL_CONTEXT on function arguments.  */
9848       for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
9849 	DECL_CONTEXT (t) = child_fn;
9850 
9851       /* Split ENTRY_BB at GIMPLE_*,
9852 	 so that it can be moved to the child function.  */
9853       gsi = gsi_last_nondebug_bb (entry_bb);
9854       stmt = gsi_stmt (gsi);
9855       gcc_assert (stmt
9856 		  && gimple_code (stmt) == gimple_code (entry_stmt));
9857       e = split_block (entry_bb, stmt);
9858       gsi_remove (&gsi, true);
9859       entry_bb = e->dest;
9860       single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
9861 
9862       /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR.  */
9863       if (exit_bb)
9864 	{
9865 	  gsi = gsi_last_nondebug_bb (exit_bb);
9866 	  gcc_assert (!gsi_end_p (gsi)
9867 		      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9868 	  stmt = gimple_build_return (NULL);
9869 	  gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
9870 	  gsi_remove (&gsi, true);
9871 	}
9872 
9873       /* Move the offloading region into CHILD_CFUN.  */
9874 
9875       block = gimple_block (entry_stmt);
9876 
9877       new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
9878       if (exit_bb)
9879 	single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
9880       /* When the OMP expansion process cannot guarantee an up-to-date
9881 	 loop tree arrange for the child function to fixup loops.  */
9882       if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9883 	child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
9884 
9885       /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
9886       num = vec_safe_length (child_cfun->local_decls);
9887       for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
9888 	{
9889 	  t = (*child_cfun->local_decls)[srcidx];
9890 	  if (DECL_CONTEXT (t) == cfun->decl)
9891 	    continue;
9892 	  if (srcidx != dstidx)
9893 	    (*child_cfun->local_decls)[dstidx] = t;
9894 	  dstidx++;
9895 	}
9896       if (dstidx != num)
9897 	vec_safe_truncate (child_cfun->local_decls, dstidx);
9898 
9899       /* Inform the callgraph about the new function.  */
9900       child_cfun->curr_properties = cfun->curr_properties;
9901       child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
9902       child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
9903       cgraph_node *node = cgraph_node::get_create (child_fn);
9904       node->parallelized_function = 1;
9905       cgraph_node::add_new_function (child_fn, true);
9906 
9907       /* Add the new function to the offload table.  */
9908       if (ENABLE_OFFLOADING)
9909 	{
9910 	  if (in_lto_p)
9911 	    DECL_PRESERVE_P (child_fn) = 1;
9912 	  vec_safe_push (offload_funcs, child_fn);
9913 	}
9914 
9915       bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
9916 		      && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
9917 
9918       /* Fix the callgraph edges for child_cfun.  Those for cfun will be
9919 	 fixed in a following pass.  */
9920       push_cfun (child_cfun);
9921       if (need_asm)
9922 	assign_assembler_name_if_needed (child_fn);
9923       cgraph_edge::rebuild_edges ();
9924 
9925       /* Some EH regions might become dead, see PR34608.  If
9926 	 pass_cleanup_cfg isn't the first pass to happen with the
9927 	 new child, these dead EH edges might cause problems.
9928 	 Clean them up now.  */
9929       if (flag_exceptions)
9930 	{
9931 	  basic_block bb;
9932 	  bool changed = false;
9933 
9934 	  FOR_EACH_BB_FN (bb, cfun)
9935 	    changed |= gimple_purge_dead_eh_edges (bb);
9936 	  if (changed)
9937 	    cleanup_tree_cfg ();
9938 	}
9939       if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9940 	verify_loop_structure ();
9941       pop_cfun ();
9942 
9943       if (dump_file && !gimple_in_ssa_p (cfun))
9944 	{
9945 	  omp_any_child_fn_dumped = true;
9946 	  dump_function_header (dump_file, child_fn, dump_flags);
9947 	  dump_function_to_file (child_fn, dump_file, dump_flags);
9948 	}
9949 
9950       adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
9951     }
9952 
9953   /* Emit a library call to launch the offloading region, or do data
9954      transfers.  */
9955   tree t1, t2, t3, t4, depend, c, clauses;
9956   enum built_in_function start_ix;
9957   unsigned int flags_i = 0;
9958 
9959   switch (gimple_omp_target_kind (entry_stmt))
9960     {
9961     case GF_OMP_TARGET_KIND_REGION:
9962       start_ix = BUILT_IN_GOMP_TARGET;
9963       break;
9964     case GF_OMP_TARGET_KIND_DATA:
9965       start_ix = BUILT_IN_GOMP_TARGET_DATA;
9966       break;
9967     case GF_OMP_TARGET_KIND_UPDATE:
9968       start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
9969       break;
9970     case GF_OMP_TARGET_KIND_ENTER_DATA:
9971       start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9972       break;
9973     case GF_OMP_TARGET_KIND_EXIT_DATA:
9974       start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9975       flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
9976       break;
9977     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9978     case GF_OMP_TARGET_KIND_OACC_KERNELS:
9979     case GF_OMP_TARGET_KIND_OACC_SERIAL:
9980     case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9981     case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9982       start_ix = BUILT_IN_GOACC_PARALLEL;
9983       break;
9984     case GF_OMP_TARGET_KIND_OACC_DATA:
9985     case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9986     case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9987       start_ix = BUILT_IN_GOACC_DATA_START;
9988       break;
9989     case GF_OMP_TARGET_KIND_OACC_UPDATE:
9990       start_ix = BUILT_IN_GOACC_UPDATE;
9991       break;
9992     case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9993       start_ix = BUILT_IN_GOACC_ENTER_DATA;
9994       break;
9995     case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9996       start_ix = BUILT_IN_GOACC_EXIT_DATA;
9997       break;
9998     case GF_OMP_TARGET_KIND_OACC_DECLARE:
9999       start_ix = BUILT_IN_GOACC_DECLARE;
10000       break;
10001     default:
10002       gcc_unreachable ();
10003     }
10004 
10005   clauses = gimple_omp_target_clauses (entry_stmt);
10006 
10007   tree device = NULL_TREE;
10008   location_t device_loc = UNKNOWN_LOCATION;
10009   tree goacc_flags = NULL_TREE;
10010   if (is_gimple_omp_oacc (entry_stmt))
10011     {
10012       /* By default, no GOACC_FLAGs are set.  */
10013       goacc_flags = integer_zero_node;
10014     }
10015   else
10016     {
10017       c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
10018       if (c)
10019 	{
10020 	  device = OMP_CLAUSE_DEVICE_ID (c);
10021 	  device_loc = OMP_CLAUSE_LOCATION (c);
10022 	  if (OMP_CLAUSE_DEVICE_ANCESTOR (c))
10023 	    sorry_at (device_loc, "%<ancestor%> not yet supported");
10024 	}
10025       else
10026 	{
10027 	  /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
10028 	     library choose).  */
10029 	  device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
10030 	  device_loc = gimple_location (entry_stmt);
10031 	}
10032 
10033       c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
10034       /* FIXME: in_reduction(...) nowait is unimplemented yet, pretend
10035 	 nowait doesn't appear.  */
10036       if (c && omp_find_clause (clauses, OMP_CLAUSE_IN_REDUCTION))
10037 	c = NULL;
10038       if (c)
10039 	flags_i |= GOMP_TARGET_FLAG_NOWAIT;
10040     }
10041 
10042   /* By default, there is no conditional.  */
10043   tree cond = NULL_TREE;
10044   c = omp_find_clause (clauses, OMP_CLAUSE_IF);
10045   if (c)
10046     cond = OMP_CLAUSE_IF_EXPR (c);
10047   /* If we found the clause 'if (cond)', build:
10048      OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
10049      OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
10050   if (cond)
10051     {
10052       tree *tp;
10053       if (is_gimple_omp_oacc (entry_stmt))
10054 	tp = &goacc_flags;
10055       else
10056 	{
10057 	  /* Ensure 'device' is of the correct type.  */
10058 	  device = fold_convert_loc (device_loc, integer_type_node, device);
10059 
10060 	  tp = &device;
10061 	}
10062 
10063       cond = gimple_boolify (cond);
10064 
10065       basic_block cond_bb, then_bb, else_bb;
10066       edge e;
10067       tree tmp_var;
10068 
10069       tmp_var = create_tmp_var (TREE_TYPE (*tp));
10070       if (offloaded)
10071 	e = split_block_after_labels (new_bb);
10072       else
10073 	{
10074 	  gsi = gsi_last_nondebug_bb (new_bb);
10075 	  gsi_prev (&gsi);
10076 	  e = split_block (new_bb, gsi_stmt (gsi));
10077 	}
10078       cond_bb = e->src;
10079       new_bb = e->dest;
10080       remove_edge (e);
10081 
10082       then_bb = create_empty_bb (cond_bb);
10083       else_bb = create_empty_bb (then_bb);
10084       set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
10085       set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
10086 
10087       stmt = gimple_build_cond_empty (cond);
10088       gsi = gsi_last_bb (cond_bb);
10089       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10090 
10091       gsi = gsi_start_bb (then_bb);
10092       stmt = gimple_build_assign (tmp_var, *tp);
10093       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10094 
10095       gsi = gsi_start_bb (else_bb);
10096       if (is_gimple_omp_oacc (entry_stmt))
10097 	stmt = gimple_build_assign (tmp_var,
10098 				    BIT_IOR_EXPR,
10099 				    *tp,
10100 				    build_int_cst (integer_type_node,
10101 						   GOACC_FLAG_HOST_FALLBACK));
10102       else
10103 	stmt = gimple_build_assign (tmp_var,
10104 				    build_int_cst (integer_type_node,
10105 						   GOMP_DEVICE_HOST_FALLBACK));
10106       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10107 
10108       make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
10109       make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
10110       add_bb_to_loop (then_bb, cond_bb->loop_father);
10111       add_bb_to_loop (else_bb, cond_bb->loop_father);
10112       make_edge (then_bb, new_bb, EDGE_FALLTHRU);
10113       make_edge (else_bb, new_bb, EDGE_FALLTHRU);
10114 
10115       *tp = tmp_var;
10116 
10117       gsi = gsi_last_nondebug_bb (new_bb);
10118     }
10119   else
10120     {
10121       gsi = gsi_last_nondebug_bb (new_bb);
10122 
10123       if (device != NULL_TREE)
10124 	device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
10125 					   true, GSI_SAME_STMT);
10126     }
10127 
10128   t = gimple_omp_target_data_arg (entry_stmt);
10129   if (t == NULL)
10130     {
10131       t1 = size_zero_node;
10132       t2 = build_zero_cst (ptr_type_node);
10133       t3 = t2;
10134       t4 = t2;
10135     }
10136   else
10137     {
10138       t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
10139       t1 = size_binop (PLUS_EXPR, t1, size_int (1));
10140       t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
10141       t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
10142       t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
10143     }
10144 
10145   gimple *g;
10146   bool tagging = false;
10147   /* The maximum number used by any start_ix, without varargs.  */
10148   auto_vec<tree, 11> args;
10149   if (is_gimple_omp_oacc (entry_stmt))
10150     {
10151       tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
10152 					TREE_TYPE (goacc_flags), goacc_flags);
10153       goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
10154 						NULL_TREE, true,
10155 						GSI_SAME_STMT);
10156       args.quick_push (goacc_flags_m);
10157     }
10158   else
10159     args.quick_push (device);
10160   if (offloaded)
10161     args.quick_push (build_fold_addr_expr (child_fn));
10162   args.quick_push (t1);
10163   args.quick_push (t2);
10164   args.quick_push (t3);
10165   args.quick_push (t4);
10166   switch (start_ix)
10167     {
10168     case BUILT_IN_GOACC_DATA_START:
10169     case BUILT_IN_GOACC_DECLARE:
10170     case BUILT_IN_GOMP_TARGET_DATA:
10171       break;
10172     case BUILT_IN_GOMP_TARGET:
10173     case BUILT_IN_GOMP_TARGET_UPDATE:
10174     case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
10175       args.quick_push (build_int_cst (unsigned_type_node, flags_i));
10176       c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
10177       if (c)
10178 	depend = OMP_CLAUSE_DECL (c);
10179       else
10180 	depend = build_int_cst (ptr_type_node, 0);
10181       args.quick_push (depend);
10182       if (start_ix == BUILT_IN_GOMP_TARGET)
10183 	args.quick_push (get_target_arguments (&gsi, entry_stmt));
10184       break;
10185     case BUILT_IN_GOACC_PARALLEL:
10186       if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
10187 	{
10188 	  tree dims = NULL_TREE;
10189 	  unsigned int ix;
10190 
10191 	  /* For serial constructs we set all dimensions to 1.  */
10192 	  for (ix = GOMP_DIM_MAX; ix--;)
10193 	    dims = tree_cons (NULL_TREE, integer_one_node, dims);
10194 	  oacc_replace_fn_attrib (child_fn, dims);
10195 	}
10196       else
10197 	oacc_set_fn_attrib (child_fn, clauses, &args);
10198       tagging = true;
10199       /* FALLTHRU */
10200     case BUILT_IN_GOACC_ENTER_DATA:
10201     case BUILT_IN_GOACC_EXIT_DATA:
10202     case BUILT_IN_GOACC_UPDATE:
10203       {
10204 	tree t_async = NULL_TREE;
10205 
10206 	/* If present, use the value specified by the respective
10207 	   clause, making sure that is of the correct type.  */
10208 	c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
10209 	if (c)
10210 	  t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10211 				      integer_type_node,
10212 				      OMP_CLAUSE_ASYNC_EXPR (c));
10213 	else if (!tagging)
10214 	  /* Default values for t_async.  */
10215 	  t_async = fold_convert_loc (gimple_location (entry_stmt),
10216 				      integer_type_node,
10217 				      build_int_cst (integer_type_node,
10218 						     GOMP_ASYNC_SYNC));
10219 	if (tagging && t_async)
10220 	  {
10221 	    unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
10222 
10223 	    if (TREE_CODE (t_async) == INTEGER_CST)
10224 	      {
10225 		/* See if we can pack the async arg in to the tag's
10226 		   operand.  */
10227 		i_async = TREE_INT_CST_LOW (t_async);
10228 		if (i_async < GOMP_LAUNCH_OP_MAX)
10229 		  t_async = NULL_TREE;
10230 		else
10231 		  i_async = GOMP_LAUNCH_OP_MAX;
10232 	      }
10233 	    args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
10234 					      i_async));
10235 	  }
10236 	if (t_async)
10237 	  args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
10238 						    NULL_TREE, true,
10239 						    GSI_SAME_STMT));
10240 
10241 	/* Save the argument index, and ... */
10242 	unsigned t_wait_idx = args.length ();
10243 	unsigned num_waits = 0;
10244 	c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
10245 	if (!tagging || c)
10246 	  /* ... push a placeholder.  */
10247 	  args.safe_push (integer_zero_node);
10248 
10249 	for (; c; c = OMP_CLAUSE_CHAIN (c))
10250 	  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
10251 	    {
10252 	      tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10253 					   integer_type_node,
10254 					   OMP_CLAUSE_WAIT_EXPR (c));
10255 	      arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
10256 					      GSI_SAME_STMT);
10257 	      args.safe_push (arg);
10258 	      num_waits++;
10259 	    }
10260 
10261 	if (!tagging || num_waits)
10262 	  {
10263 	    tree len;
10264 
10265 	    /* Now that we know the number, update the placeholder.  */
10266 	    if (tagging)
10267 	      len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
10268 	    else
10269 	      len = build_int_cst (integer_type_node, num_waits);
10270 	    len = fold_convert_loc (gimple_location (entry_stmt),
10271 				    unsigned_type_node, len);
10272 	    args[t_wait_idx] = len;
10273 	  }
10274       }
10275       break;
10276     default:
10277       gcc_unreachable ();
10278     }
10279   if (tagging)
10280     /*  Push terminal marker - zero.  */
10281     args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
10282 
10283   g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
10284   gimple_set_location (g, gimple_location (entry_stmt));
10285   gsi_insert_before (&gsi, g, GSI_SAME_STMT);
10286   if (!offloaded)
10287     {
10288       g = gsi_stmt (gsi);
10289       gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
10290       gsi_remove (&gsi, true);
10291     }
10292 }
10293 
10294 /* Expand the parallel region tree rooted at REGION.  Expansion
10295    proceeds in depth-first order.  Innermost regions are expanded
10296    first.  This way, parallel regions that require a new function to
10297    be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
10298    internal dependencies in their body.  */
10299 
10300 static void
expand_omp(struct omp_region * region)10301 expand_omp (struct omp_region *region)
10302 {
10303   omp_any_child_fn_dumped = false;
10304   while (region)
10305     {
10306       location_t saved_location;
10307       gimple *inner_stmt = NULL;
10308 
10309       /* First, determine whether this is a combined parallel+workshare
10310 	 region.  */
10311       if (region->type == GIMPLE_OMP_PARALLEL)
10312 	determine_parallel_type (region);
10313 
10314       if (region->type == GIMPLE_OMP_FOR
10315 	  && gimple_omp_for_combined_p (last_stmt (region->entry)))
10316 	inner_stmt = last_stmt (region->inner->entry);
10317 
10318       if (region->inner)
10319 	expand_omp (region->inner);
10320 
10321       saved_location = input_location;
10322       if (gimple_has_location (last_stmt (region->entry)))
10323 	input_location = gimple_location (last_stmt (region->entry));
10324 
10325       switch (region->type)
10326 	{
10327 	case GIMPLE_OMP_PARALLEL:
10328 	case GIMPLE_OMP_TASK:
10329 	  expand_omp_taskreg (region);
10330 	  break;
10331 
10332 	case GIMPLE_OMP_FOR:
10333 	  expand_omp_for (region, inner_stmt);
10334 	  break;
10335 
10336 	case GIMPLE_OMP_SECTIONS:
10337 	  expand_omp_sections (region);
10338 	  break;
10339 
10340 	case GIMPLE_OMP_SECTION:
10341 	  /* Individual omp sections are handled together with their
10342 	     parent GIMPLE_OMP_SECTIONS region.  */
10343 	  break;
10344 
10345 	case GIMPLE_OMP_SINGLE:
10346 	case GIMPLE_OMP_SCOPE:
10347 	  expand_omp_single (region);
10348 	  break;
10349 
10350 	case GIMPLE_OMP_ORDERED:
10351 	  {
10352 	    gomp_ordered *ord_stmt
10353 	      = as_a <gomp_ordered *> (last_stmt (region->entry));
10354 	    if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
10355 				 OMP_CLAUSE_DEPEND))
10356 	      {
10357 		/* We'll expand these when expanding corresponding
10358 		   worksharing region with ordered(n) clause.  */
10359 		gcc_assert (region->outer
10360 			    && region->outer->type == GIMPLE_OMP_FOR);
10361 		region->ord_stmt = ord_stmt;
10362 		break;
10363 	      }
10364 	  }
10365 	  /* FALLTHRU */
10366 	case GIMPLE_OMP_MASTER:
10367 	case GIMPLE_OMP_MASKED:
10368 	case GIMPLE_OMP_TASKGROUP:
10369 	case GIMPLE_OMP_CRITICAL:
10370 	case GIMPLE_OMP_TEAMS:
10371 	  expand_omp_synch (region);
10372 	  break;
10373 
10374 	case GIMPLE_OMP_ATOMIC_LOAD:
10375 	  expand_omp_atomic (region);
10376 	  break;
10377 
10378 	case GIMPLE_OMP_TARGET:
10379 	  expand_omp_target (region);
10380 	  break;
10381 
10382 	default:
10383 	  gcc_unreachable ();
10384 	}
10385 
10386       input_location = saved_location;
10387       region = region->next;
10388     }
10389   if (omp_any_child_fn_dumped)
10390     {
10391       if (dump_file)
10392 	dump_function_header (dump_file, current_function_decl, dump_flags);
10393       omp_any_child_fn_dumped = false;
10394     }
10395 }
10396 
10397 /* Helper for build_omp_regions.  Scan the dominator tree starting at
10398    block BB.  PARENT is the region that contains BB.  If SINGLE_TREE is
10399    true, the function ends once a single tree is built (otherwise, whole
10400    forest of OMP constructs may be built).  */
10401 
10402 static void
build_omp_regions_1(basic_block bb,struct omp_region * parent,bool single_tree)10403 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
10404 		     bool single_tree)
10405 {
10406   gimple_stmt_iterator gsi;
10407   gimple *stmt;
10408   basic_block son;
10409 
10410   gsi = gsi_last_nondebug_bb (bb);
10411   if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
10412     {
10413       struct omp_region *region;
10414       enum gimple_code code;
10415 
10416       stmt = gsi_stmt (gsi);
10417       code = gimple_code (stmt);
10418       if (code == GIMPLE_OMP_RETURN)
10419 	{
10420 	  /* STMT is the return point out of region PARENT.  Mark it
10421 	     as the exit point and make PARENT the immediately
10422 	     enclosing region.  */
10423 	  gcc_assert (parent);
10424 	  region = parent;
10425 	  region->exit = bb;
10426 	  parent = parent->outer;
10427 	}
10428       else if (code == GIMPLE_OMP_ATOMIC_STORE)
10429 	{
10430 	  /* GIMPLE_OMP_ATOMIC_STORE is analogous to
10431 	     GIMPLE_OMP_RETURN, but matches with
10432 	     GIMPLE_OMP_ATOMIC_LOAD.  */
10433 	  gcc_assert (parent);
10434 	  gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
10435 	  region = parent;
10436 	  region->exit = bb;
10437 	  parent = parent->outer;
10438 	}
10439       else if (code == GIMPLE_OMP_CONTINUE)
10440 	{
10441 	  gcc_assert (parent);
10442 	  parent->cont = bb;
10443 	}
10444       else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
10445 	{
10446 	  /* GIMPLE_OMP_SECTIONS_SWITCH is part of
10447 	     GIMPLE_OMP_SECTIONS, and we do nothing for it.  */
10448 	}
10449       else
10450 	{
10451 	  region = new_omp_region (bb, code, parent);
10452 	  /* Otherwise...  */
10453 	  if (code == GIMPLE_OMP_TARGET)
10454 	    {
10455 	      switch (gimple_omp_target_kind (stmt))
10456 		{
10457 		case GF_OMP_TARGET_KIND_REGION:
10458 		case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10459 		case GF_OMP_TARGET_KIND_OACC_KERNELS:
10460 		case GF_OMP_TARGET_KIND_OACC_SERIAL:
10461 		case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10462 		case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10463 		  break;
10464 		case GF_OMP_TARGET_KIND_UPDATE:
10465 		case GF_OMP_TARGET_KIND_ENTER_DATA:
10466 		case GF_OMP_TARGET_KIND_EXIT_DATA:
10467 		case GF_OMP_TARGET_KIND_DATA:
10468 		case GF_OMP_TARGET_KIND_OACC_DATA:
10469 		case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10470 		case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10471 		case GF_OMP_TARGET_KIND_OACC_UPDATE:
10472 		case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10473 		case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10474 		case GF_OMP_TARGET_KIND_OACC_DECLARE:
10475 		  /* ..., other than for those stand-alone directives...
10476 		     To be precise, target data isn't stand-alone, but
10477 		     gimplifier put the end API call into try finally block
10478 		     for it, so omp expansion can treat it as such.  */
10479 		  region = NULL;
10480 		  break;
10481 		default:
10482 		  gcc_unreachable ();
10483 		}
10484 	    }
10485 	  else if (code == GIMPLE_OMP_ORDERED
10486 		   && omp_find_clause (gimple_omp_ordered_clauses
10487 					 (as_a <gomp_ordered *> (stmt)),
10488 				       OMP_CLAUSE_DEPEND))
10489 	    /* #pragma omp ordered depend is also just a stand-alone
10490 	       directive.  */
10491 	    region = NULL;
10492 	  else if (code == GIMPLE_OMP_TASK
10493 		   && gimple_omp_task_taskwait_p (stmt))
10494 	    /* #pragma omp taskwait depend(...) is a stand-alone directive.  */
10495 	    region = NULL;
10496 	  else if (code == GIMPLE_OMP_TASKGROUP)
10497 	    /* #pragma omp taskgroup isn't a stand-alone directive, but
10498 	       gimplifier put the end API call into try finall block
10499 	       for it, so omp expansion can treat it as such.  */
10500 	    region = NULL;
10501 	  /* ..., this directive becomes the parent for a new region.  */
10502 	  if (region)
10503 	    parent = region;
10504 	}
10505     }
10506 
10507   if (single_tree && !parent)
10508     return;
10509 
10510   for (son = first_dom_son (CDI_DOMINATORS, bb);
10511        son;
10512        son = next_dom_son (CDI_DOMINATORS, son))
10513     build_omp_regions_1 (son, parent, single_tree);
10514 }
10515 
10516 /* Builds the tree of OMP regions rooted at ROOT, storing it to
10517    root_omp_region.  */
10518 
10519 static void
build_omp_regions_root(basic_block root)10520 build_omp_regions_root (basic_block root)
10521 {
10522   gcc_assert (root_omp_region == NULL);
10523   build_omp_regions_1 (root, NULL, true);
10524   gcc_assert (root_omp_region != NULL);
10525 }
10526 
10527 /* Expands omp construct (and its subconstructs) starting in HEAD.  */
10528 
10529 void
omp_expand_local(basic_block head)10530 omp_expand_local (basic_block head)
10531 {
10532   build_omp_regions_root (head);
10533   if (dump_file && (dump_flags & TDF_DETAILS))
10534     {
10535       fprintf (dump_file, "\nOMP region tree\n\n");
10536       dump_omp_region (dump_file, root_omp_region, 0);
10537       fprintf (dump_file, "\n");
10538     }
10539 
10540   remove_exit_barriers (root_omp_region);
10541   expand_omp (root_omp_region);
10542 
10543   omp_free_regions ();
10544 }
10545 
10546 /* Scan the CFG and build a tree of OMP regions.  Return the root of
10547    the OMP region tree.  */
10548 
10549 static void
build_omp_regions(void)10550 build_omp_regions (void)
10551 {
10552   gcc_assert (root_omp_region == NULL);
10553   calculate_dominance_info (CDI_DOMINATORS);
10554   build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
10555 }
10556 
10557 /* Main entry point for expanding OMP-GIMPLE into runtime calls.  */
10558 
10559 static unsigned int
execute_expand_omp(void)10560 execute_expand_omp (void)
10561 {
10562   build_omp_regions ();
10563 
10564   if (!root_omp_region)
10565     return 0;
10566 
10567   if (dump_file)
10568     {
10569       fprintf (dump_file, "\nOMP region tree\n\n");
10570       dump_omp_region (dump_file, root_omp_region, 0);
10571       fprintf (dump_file, "\n");
10572     }
10573 
10574   remove_exit_barriers (root_omp_region);
10575 
10576   expand_omp (root_omp_region);
10577 
10578   if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
10579     verify_loop_structure ();
10580   cleanup_tree_cfg ();
10581 
10582   omp_free_regions ();
10583 
10584   return 0;
10585 }
10586 
10587 /* OMP expansion -- the default pass, run before creation of SSA form.  */
10588 
10589 namespace {
10590 
10591 const pass_data pass_data_expand_omp =
10592 {
10593   GIMPLE_PASS, /* type */
10594   "ompexp", /* name */
10595   OPTGROUP_OMP, /* optinfo_flags */
10596   TV_NONE, /* tv_id */
10597   PROP_gimple_any, /* properties_required */
10598   PROP_gimple_eomp, /* properties_provided */
10599   0, /* properties_destroyed */
10600   0, /* todo_flags_start */
10601   0, /* todo_flags_finish */
10602 };
10603 
10604 class pass_expand_omp : public gimple_opt_pass
10605 {
10606 public:
pass_expand_omp(gcc::context * ctxt)10607   pass_expand_omp (gcc::context *ctxt)
10608     : gimple_opt_pass (pass_data_expand_omp, ctxt)
10609   {}
10610 
10611   /* opt_pass methods: */
execute(function *)10612   virtual unsigned int execute (function *)
10613     {
10614       bool gate = ((flag_openacc != 0 || flag_openmp != 0
10615 		    || flag_openmp_simd != 0)
10616 		   && !seen_error ());
10617 
10618       /* This pass always runs, to provide PROP_gimple_eomp.
10619 	 But often, there is nothing to do.  */
10620       if (!gate)
10621 	return 0;
10622 
10623       return execute_expand_omp ();
10624     }
10625 
10626 }; // class pass_expand_omp
10627 
10628 } // anon namespace
10629 
10630 gimple_opt_pass *
make_pass_expand_omp(gcc::context * ctxt)10631 make_pass_expand_omp (gcc::context *ctxt)
10632 {
10633   return new pass_expand_omp (ctxt);
10634 }
10635 
10636 namespace {
10637 
10638 const pass_data pass_data_expand_omp_ssa =
10639 {
10640   GIMPLE_PASS, /* type */
10641   "ompexpssa", /* name */
10642   OPTGROUP_OMP, /* optinfo_flags */
10643   TV_NONE, /* tv_id */
10644   PROP_cfg | PROP_ssa, /* properties_required */
10645   PROP_gimple_eomp, /* properties_provided */
10646   0, /* properties_destroyed */
10647   0, /* todo_flags_start */
10648   TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
10649 };
10650 
10651 class pass_expand_omp_ssa : public gimple_opt_pass
10652 {
10653 public:
pass_expand_omp_ssa(gcc::context * ctxt)10654   pass_expand_omp_ssa (gcc::context *ctxt)
10655     : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
10656   {}
10657 
10658   /* opt_pass methods: */
gate(function * fun)10659   virtual bool gate (function *fun)
10660     {
10661       return !(fun->curr_properties & PROP_gimple_eomp);
10662     }
execute(function *)10663   virtual unsigned int execute (function *) { return execute_expand_omp (); }
clone()10664   opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
10665 
10666 }; // class pass_expand_omp_ssa
10667 
10668 } // anon namespace
10669 
10670 gimple_opt_pass *
make_pass_expand_omp_ssa(gcc::context * ctxt)10671 make_pass_expand_omp_ssa (gcc::context *ctxt)
10672 {
10673   return new pass_expand_omp_ssa (ctxt);
10674 }
10675 
10676 /* Called from tree-cfg.cc::make_edges to create cfg edges for all relevant
10677    GIMPLE_* codes.  */
10678 
10679 bool
omp_make_gimple_edges(basic_block bb,struct omp_region ** region,int * region_idx)10680 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
10681 		       int *region_idx)
10682 {
10683   gimple *last = last_stmt (bb);
10684   enum gimple_code code = gimple_code (last);
10685   struct omp_region *cur_region = *region;
10686   bool fallthru = false;
10687 
10688   switch (code)
10689     {
10690     case GIMPLE_OMP_PARALLEL:
10691     case GIMPLE_OMP_FOR:
10692     case GIMPLE_OMP_SINGLE:
10693     case GIMPLE_OMP_TEAMS:
10694     case GIMPLE_OMP_MASTER:
10695     case GIMPLE_OMP_MASKED:
10696     case GIMPLE_OMP_SCOPE:
10697     case GIMPLE_OMP_CRITICAL:
10698     case GIMPLE_OMP_SECTION:
10699       cur_region = new_omp_region (bb, code, cur_region);
10700       fallthru = true;
10701       break;
10702 
10703     case GIMPLE_OMP_TASKGROUP:
10704       cur_region = new_omp_region (bb, code, cur_region);
10705       fallthru = true;
10706       cur_region = cur_region->outer;
10707       break;
10708 
10709     case GIMPLE_OMP_TASK:
10710       cur_region = new_omp_region (bb, code, cur_region);
10711       fallthru = true;
10712       if (gimple_omp_task_taskwait_p (last))
10713 	cur_region = cur_region->outer;
10714       break;
10715 
10716     case GIMPLE_OMP_ORDERED:
10717       cur_region = new_omp_region (bb, code, cur_region);
10718       fallthru = true;
10719       if (omp_find_clause (gimple_omp_ordered_clauses
10720 			     (as_a <gomp_ordered *> (last)),
10721 			   OMP_CLAUSE_DEPEND))
10722 	cur_region = cur_region->outer;
10723       break;
10724 
10725     case GIMPLE_OMP_TARGET:
10726       cur_region = new_omp_region (bb, code, cur_region);
10727       fallthru = true;
10728       switch (gimple_omp_target_kind (last))
10729 	{
10730 	case GF_OMP_TARGET_KIND_REGION:
10731 	case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10732 	case GF_OMP_TARGET_KIND_OACC_KERNELS:
10733 	case GF_OMP_TARGET_KIND_OACC_SERIAL:
10734 	case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10735 	case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10736 	  break;
10737 	case GF_OMP_TARGET_KIND_UPDATE:
10738 	case GF_OMP_TARGET_KIND_ENTER_DATA:
10739 	case GF_OMP_TARGET_KIND_EXIT_DATA:
10740 	case GF_OMP_TARGET_KIND_DATA:
10741 	case GF_OMP_TARGET_KIND_OACC_DATA:
10742 	case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10743 	case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10744 	case GF_OMP_TARGET_KIND_OACC_UPDATE:
10745 	case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10746 	case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10747 	case GF_OMP_TARGET_KIND_OACC_DECLARE:
10748 	  cur_region = cur_region->outer;
10749 	  break;
10750 	default:
10751 	  gcc_unreachable ();
10752 	}
10753       break;
10754 
10755     case GIMPLE_OMP_SECTIONS:
10756       cur_region = new_omp_region (bb, code, cur_region);
10757       fallthru = true;
10758       break;
10759 
10760     case GIMPLE_OMP_SECTIONS_SWITCH:
10761       fallthru = false;
10762       break;
10763 
10764     case GIMPLE_OMP_ATOMIC_LOAD:
10765     case GIMPLE_OMP_ATOMIC_STORE:
10766        fallthru = true;
10767        break;
10768 
10769     case GIMPLE_OMP_RETURN:
10770       /* In the case of a GIMPLE_OMP_SECTION, the edge will go
10771 	 somewhere other than the next block.  This will be
10772 	 created later.  */
10773       cur_region->exit = bb;
10774       if (cur_region->type == GIMPLE_OMP_TASK)
10775 	/* Add an edge corresponding to not scheduling the task
10776 	   immediately.  */
10777 	make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
10778       fallthru = cur_region->type != GIMPLE_OMP_SECTION;
10779       cur_region = cur_region->outer;
10780       break;
10781 
10782     case GIMPLE_OMP_CONTINUE:
10783       cur_region->cont = bb;
10784       switch (cur_region->type)
10785 	{
10786 	case GIMPLE_OMP_FOR:
10787 	  /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
10788 	     succs edges as abnormal to prevent splitting
10789 	     them.  */
10790 	  single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
10791 	  /* Make the loopback edge.  */
10792 	  make_edge (bb, single_succ (cur_region->entry),
10793 		     EDGE_ABNORMAL);
10794 
10795 	  /* Create an edge from GIMPLE_OMP_FOR to exit, which
10796 	     corresponds to the case that the body of the loop
10797 	     is not executed at all.  */
10798 	  make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
10799 	  make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
10800 	  fallthru = false;
10801 	  break;
10802 
10803 	case GIMPLE_OMP_SECTIONS:
10804 	  /* Wire up the edges into and out of the nested sections.  */
10805 	  {
10806 	    basic_block switch_bb = single_succ (cur_region->entry);
10807 
10808 	    struct omp_region *i;
10809 	    for (i = cur_region->inner; i ; i = i->next)
10810 	      {
10811 		gcc_assert (i->type == GIMPLE_OMP_SECTION);
10812 		make_edge (switch_bb, i->entry, 0);
10813 		make_edge (i->exit, bb, EDGE_FALLTHRU);
10814 	      }
10815 
10816 	    /* Make the loopback edge to the block with
10817 	       GIMPLE_OMP_SECTIONS_SWITCH.  */
10818 	    make_edge (bb, switch_bb, 0);
10819 
10820 	    /* Make the edge from the switch to exit.  */
10821 	    make_edge (switch_bb, bb->next_bb, 0);
10822 	    fallthru = false;
10823 	  }
10824 	  break;
10825 
10826 	case GIMPLE_OMP_TASK:
10827 	  fallthru = true;
10828 	  break;
10829 
10830 	default:
10831 	  gcc_unreachable ();
10832 	}
10833       break;
10834 
10835     default:
10836       gcc_unreachable ();
10837     }
10838 
10839   if (*region != cur_region)
10840     {
10841       *region = cur_region;
10842       if (cur_region)
10843 	*region_idx = cur_region->entry->index;
10844       else
10845 	*region_idx = 0;
10846     }
10847 
10848   return fallthru;
10849 }
10850