1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
4
5 Copyright (C) 2005-2022 Free Software Foundation, Inc.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "alloc-pool.h"
56 #include "symbol-summary.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "stringpool.h"
60 #include "attribs.h"
61 #include "tree-eh.h"
62 #include "opts.h"
63
64 /* OMP region information. Every parallel and workshare
65 directive is enclosed between two markers, the OMP_* directive
66 and a corresponding GIMPLE_OMP_RETURN statement. */
67
68 struct omp_region
69 {
70 /* The enclosing region. */
71 struct omp_region *outer;
72
73 /* First child region. */
74 struct omp_region *inner;
75
76 /* Next peer region. */
77 struct omp_region *next;
78
79 /* Block containing the omp directive as its last stmt. */
80 basic_block entry;
81
82 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
83 basic_block exit;
84
85 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
86 basic_block cont;
87
88 /* If this is a combined parallel+workshare region, this is a list
89 of additional arguments needed by the combined parallel+workshare
90 library call. */
91 vec<tree, va_gc> *ws_args;
92
93 /* The code for the omp directive of this region. */
94 enum gimple_code type;
95
96 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
97 enum omp_clause_schedule_kind sched_kind;
98
99 /* Schedule modifiers. */
100 unsigned char sched_modifiers;
101
102 /* True if this is a combined parallel+workshare region. */
103 bool is_combined_parallel;
104
105 /* Copy of fd.lastprivate_conditional != 0. */
106 bool has_lastprivate_conditional;
107
108 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
109 a depend clause. */
110 gomp_ordered *ord_stmt;
111 };
112
113 static struct omp_region *root_omp_region;
114 static bool omp_any_child_fn_dumped;
115
116 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
117 bool = false);
118 static gphi *find_phi_with_arg_on_edge (tree, edge);
119 static void expand_omp (struct omp_region *region);
120
121 /* Return true if REGION is a combined parallel+workshare region. */
122
123 static inline bool
is_combined_parallel(struct omp_region * region)124 is_combined_parallel (struct omp_region *region)
125 {
126 return region->is_combined_parallel;
127 }
128
129 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
130 is the immediate dominator of PAR_ENTRY_BB, return true if there
131 are no data dependencies that would prevent expanding the parallel
132 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
133
134 When expanding a combined parallel+workshare region, the call to
135 the child function may need additional arguments in the case of
136 GIMPLE_OMP_FOR regions. In some cases, these arguments are
137 computed out of variables passed in from the parent to the child
138 via 'struct .omp_data_s'. For instance:
139
140 #pragma omp parallel for schedule (guided, i * 4)
141 for (j ...)
142
143 Is lowered into:
144
145 # BLOCK 2 (PAR_ENTRY_BB)
146 .omp_data_o.i = i;
147 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
148
149 # BLOCK 3 (WS_ENTRY_BB)
150 .omp_data_i = &.omp_data_o;
151 D.1667 = .omp_data_i->i;
152 D.1598 = D.1667 * 4;
153 #pragma omp for schedule (guided, D.1598)
154
155 When we outline the parallel region, the call to the child function
156 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
157 that value is computed *after* the call site. So, in principle we
158 cannot do the transformation.
159
160 To see whether the code in WS_ENTRY_BB blocks the combined
161 parallel+workshare call, we collect all the variables used in the
162 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
163 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
164 call.
165
166 FIXME. If we had the SSA form built at this point, we could merely
167 hoist the code in block 3 into block 2 and be done with it. But at
168 this point we don't have dataflow information and though we could
169 hack something up here, it is really not worth the aggravation. */
170
171 static bool
workshare_safe_to_combine_p(basic_block ws_entry_bb)172 workshare_safe_to_combine_p (basic_block ws_entry_bb)
173 {
174 struct omp_for_data fd;
175 gimple *ws_stmt = last_stmt (ws_entry_bb);
176
177 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
178 return true;
179
180 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
181 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
182 return false;
183
184 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
185
186 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
187 return false;
188 if (fd.iter_type != long_integer_type_node)
189 return false;
190
191 /* FIXME. We give up too easily here. If any of these arguments
192 are not constants, they will likely involve variables that have
193 been mapped into fields of .omp_data_s for sharing with the child
194 function. With appropriate data flow, it would be possible to
195 see through this. */
196 if (!is_gimple_min_invariant (fd.loop.n1)
197 || !is_gimple_min_invariant (fd.loop.n2)
198 || !is_gimple_min_invariant (fd.loop.step)
199 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
200 return false;
201
202 return true;
203 }
204
205 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
206 presence (SIMD_SCHEDULE). */
207
208 static tree
omp_adjust_chunk_size(tree chunk_size,bool simd_schedule)209 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
210 {
211 if (!simd_schedule || integer_zerop (chunk_size))
212 return chunk_size;
213
214 poly_uint64 vf = omp_max_vf ();
215 if (known_eq (vf, 1U))
216 return chunk_size;
217
218 tree type = TREE_TYPE (chunk_size);
219 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
220 build_int_cst (type, vf - 1));
221 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
222 build_int_cst (type, -vf));
223 }
224
225 /* Collect additional arguments needed to emit a combined
226 parallel+workshare call. WS_STMT is the workshare directive being
227 expanded. */
228
229 static vec<tree, va_gc> *
get_ws_args_for(gimple * par_stmt,gimple * ws_stmt)230 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
231 {
232 tree t;
233 location_t loc = gimple_location (ws_stmt);
234 vec<tree, va_gc> *ws_args;
235
236 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
237 {
238 struct omp_for_data fd;
239 tree n1, n2;
240
241 omp_extract_for_data (for_stmt, &fd, NULL);
242 n1 = fd.loop.n1;
243 n2 = fd.loop.n2;
244
245 if (gimple_omp_for_combined_into_p (for_stmt))
246 {
247 tree innerc
248 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
249 OMP_CLAUSE__LOOPTEMP_);
250 gcc_assert (innerc);
251 n1 = OMP_CLAUSE_DECL (innerc);
252 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
253 OMP_CLAUSE__LOOPTEMP_);
254 gcc_assert (innerc);
255 n2 = OMP_CLAUSE_DECL (innerc);
256 }
257
258 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
259
260 t = fold_convert_loc (loc, long_integer_type_node, n1);
261 ws_args->quick_push (t);
262
263 t = fold_convert_loc (loc, long_integer_type_node, n2);
264 ws_args->quick_push (t);
265
266 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
267 ws_args->quick_push (t);
268
269 if (fd.chunk_size)
270 {
271 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
272 t = omp_adjust_chunk_size (t, fd.simd_schedule);
273 ws_args->quick_push (t);
274 }
275
276 return ws_args;
277 }
278 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
279 {
280 /* Number of sections is equal to the number of edges from the
281 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
282 the exit of the sections region. */
283 basic_block bb = single_succ (gimple_bb (ws_stmt));
284 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
285 vec_alloc (ws_args, 1);
286 ws_args->quick_push (t);
287 return ws_args;
288 }
289
290 gcc_unreachable ();
291 }
292
293 /* Discover whether REGION is a combined parallel+workshare region. */
294
295 static void
determine_parallel_type(struct omp_region * region)296 determine_parallel_type (struct omp_region *region)
297 {
298 basic_block par_entry_bb, par_exit_bb;
299 basic_block ws_entry_bb, ws_exit_bb;
300
301 if (region == NULL || region->inner == NULL
302 || region->exit == NULL || region->inner->exit == NULL
303 || region->inner->cont == NULL)
304 return;
305
306 /* We only support parallel+for and parallel+sections. */
307 if (region->type != GIMPLE_OMP_PARALLEL
308 || (region->inner->type != GIMPLE_OMP_FOR
309 && region->inner->type != GIMPLE_OMP_SECTIONS))
310 return;
311
312 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
313 WS_EXIT_BB -> PAR_EXIT_BB. */
314 par_entry_bb = region->entry;
315 par_exit_bb = region->exit;
316 ws_entry_bb = region->inner->entry;
317 ws_exit_bb = region->inner->exit;
318
319 /* Give up for task reductions on the parallel, while it is implementable,
320 adding another big set of APIs or slowing down the normal paths is
321 not acceptable. */
322 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
323 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
324 return;
325
326 if (single_succ (par_entry_bb) == ws_entry_bb
327 && single_succ (ws_exit_bb) == par_exit_bb
328 && workshare_safe_to_combine_p (ws_entry_bb)
329 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
330 || (last_and_only_stmt (ws_entry_bb)
331 && last_and_only_stmt (par_exit_bb))))
332 {
333 gimple *par_stmt = last_stmt (par_entry_bb);
334 gimple *ws_stmt = last_stmt (ws_entry_bb);
335
336 if (region->inner->type == GIMPLE_OMP_FOR)
337 {
338 /* If this is a combined parallel loop, we need to determine
339 whether or not to use the combined library calls. There
340 are two cases where we do not apply the transformation:
341 static loops and any kind of ordered loop. In the first
342 case, we already open code the loop so there is no need
343 to do anything else. In the latter case, the combined
344 parallel loop call would still need extra synchronization
345 to implement ordered semantics, so there would not be any
346 gain in using the combined call. */
347 tree clauses = gimple_omp_for_clauses (ws_stmt);
348 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
349 if (c == NULL
350 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
351 == OMP_CLAUSE_SCHEDULE_STATIC)
352 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
353 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
354 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
355 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
356 return;
357 }
358 else if (region->inner->type == GIMPLE_OMP_SECTIONS
359 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
360 OMP_CLAUSE__REDUCTEMP_)
361 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
362 OMP_CLAUSE__CONDTEMP_)))
363 return;
364
365 region->is_combined_parallel = true;
366 region->inner->is_combined_parallel = true;
367 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
368 }
369 }
370
371 /* Debugging dumps for parallel regions. */
372 void dump_omp_region (FILE *, struct omp_region *, int);
373 void debug_omp_region (struct omp_region *);
374 void debug_all_omp_regions (void);
375
376 /* Dump the parallel region tree rooted at REGION. */
377
378 void
dump_omp_region(FILE * file,struct omp_region * region,int indent)379 dump_omp_region (FILE *file, struct omp_region *region, int indent)
380 {
381 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
382 gimple_code_name[region->type]);
383
384 if (region->inner)
385 dump_omp_region (file, region->inner, indent + 4);
386
387 if (region->cont)
388 {
389 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
390 region->cont->index);
391 }
392
393 if (region->exit)
394 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
395 region->exit->index);
396 else
397 fprintf (file, "%*s[no exit marker]\n", indent, "");
398
399 if (region->next)
400 dump_omp_region (file, region->next, indent);
401 }
402
403 DEBUG_FUNCTION void
debug_omp_region(struct omp_region * region)404 debug_omp_region (struct omp_region *region)
405 {
406 dump_omp_region (stderr, region, 0);
407 }
408
409 DEBUG_FUNCTION void
debug_all_omp_regions(void)410 debug_all_omp_regions (void)
411 {
412 dump_omp_region (stderr, root_omp_region, 0);
413 }
414
415 /* Create a new parallel region starting at STMT inside region PARENT. */
416
417 static struct omp_region *
new_omp_region(basic_block bb,enum gimple_code type,struct omp_region * parent)418 new_omp_region (basic_block bb, enum gimple_code type,
419 struct omp_region *parent)
420 {
421 struct omp_region *region = XCNEW (struct omp_region);
422
423 region->outer = parent;
424 region->entry = bb;
425 region->type = type;
426
427 if (parent)
428 {
429 /* This is a nested region. Add it to the list of inner
430 regions in PARENT. */
431 region->next = parent->inner;
432 parent->inner = region;
433 }
434 else
435 {
436 /* This is a toplevel region. Add it to the list of toplevel
437 regions in ROOT_OMP_REGION. */
438 region->next = root_omp_region;
439 root_omp_region = region;
440 }
441
442 return region;
443 }
444
445 /* Release the memory associated with the region tree rooted at REGION. */
446
447 static void
free_omp_region_1(struct omp_region * region)448 free_omp_region_1 (struct omp_region *region)
449 {
450 struct omp_region *i, *n;
451
452 for (i = region->inner; i ; i = n)
453 {
454 n = i->next;
455 free_omp_region_1 (i);
456 }
457
458 free (region);
459 }
460
461 /* Release the memory for the entire omp region tree. */
462
463 void
omp_free_regions(void)464 omp_free_regions (void)
465 {
466 struct omp_region *r, *n;
467 for (r = root_omp_region; r ; r = n)
468 {
469 n = r->next;
470 free_omp_region_1 (r);
471 }
472 root_omp_region = NULL;
473 }
474
475 /* A convenience function to build an empty GIMPLE_COND with just the
476 condition. */
477
478 static gcond *
gimple_build_cond_empty(tree cond)479 gimple_build_cond_empty (tree cond)
480 {
481 enum tree_code pred_code;
482 tree lhs, rhs;
483
484 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
485 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
486 }
487
488 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
489 Add CHILD_FNDECL to decl chain of the supercontext of the block
490 ENTRY_BLOCK - this is the block which originally contained the
491 code from which CHILD_FNDECL was created.
492
493 Together, these actions ensure that the debug info for the outlined
494 function will be emitted with the correct lexical scope. */
495
496 static void
adjust_context_and_scope(struct omp_region * region,tree entry_block,tree child_fndecl)497 adjust_context_and_scope (struct omp_region *region, tree entry_block,
498 tree child_fndecl)
499 {
500 tree parent_fndecl = NULL_TREE;
501 gimple *entry_stmt;
502 /* OMP expansion expands inner regions before outer ones, so if
503 we e.g. have explicit task region nested in parallel region, when
504 expanding the task region current_function_decl will be the original
505 source function, but we actually want to use as context the child
506 function of the parallel. */
507 for (region = region->outer;
508 region && parent_fndecl == NULL_TREE; region = region->outer)
509 switch (region->type)
510 {
511 case GIMPLE_OMP_PARALLEL:
512 case GIMPLE_OMP_TASK:
513 case GIMPLE_OMP_TEAMS:
514 entry_stmt = last_stmt (region->entry);
515 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
516 break;
517 case GIMPLE_OMP_TARGET:
518 entry_stmt = last_stmt (region->entry);
519 parent_fndecl
520 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
521 break;
522 default:
523 break;
524 }
525
526 if (parent_fndecl == NULL_TREE)
527 parent_fndecl = current_function_decl;
528 DECL_CONTEXT (child_fndecl) = parent_fndecl;
529
530 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
531 {
532 tree b = BLOCK_SUPERCONTEXT (entry_block);
533 if (TREE_CODE (b) == BLOCK)
534 {
535 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
536 BLOCK_VARS (b) = child_fndecl;
537 }
538 }
539 }
540
541 /* Build the function calls to GOMP_parallel etc to actually
542 generate the parallel operation. REGION is the parallel region
543 being expanded. BB is the block where to insert the code. WS_ARGS
544 will be set if this is a call to a combined parallel+workshare
545 construct, it contains the list of additional arguments needed by
546 the workshare construct. */
547
548 static void
expand_parallel_call(struct omp_region * region,basic_block bb,gomp_parallel * entry_stmt,vec<tree,va_gc> * ws_args)549 expand_parallel_call (struct omp_region *region, basic_block bb,
550 gomp_parallel *entry_stmt,
551 vec<tree, va_gc> *ws_args)
552 {
553 tree t, t1, t2, val, cond, c, clauses, flags;
554 gimple_stmt_iterator gsi;
555 gimple *stmt;
556 enum built_in_function start_ix;
557 int start_ix2;
558 location_t clause_loc;
559 vec<tree, va_gc> *args;
560
561 clauses = gimple_omp_parallel_clauses (entry_stmt);
562
563 /* Determine what flavor of GOMP_parallel we will be
564 emitting. */
565 start_ix = BUILT_IN_GOMP_PARALLEL;
566 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
567 if (rtmp)
568 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
569 else if (is_combined_parallel (region))
570 {
571 switch (region->inner->type)
572 {
573 case GIMPLE_OMP_FOR:
574 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
575 switch (region->inner->sched_kind)
576 {
577 case OMP_CLAUSE_SCHEDULE_RUNTIME:
578 /* For lastprivate(conditional:), our implementation
579 requires monotonic behavior. */
580 if (region->inner->has_lastprivate_conditional != 0)
581 start_ix2 = 3;
582 else if ((region->inner->sched_modifiers
583 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
584 start_ix2 = 6;
585 else if ((region->inner->sched_modifiers
586 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
587 start_ix2 = 7;
588 else
589 start_ix2 = 3;
590 break;
591 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
592 case OMP_CLAUSE_SCHEDULE_GUIDED:
593 if ((region->inner->sched_modifiers
594 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
595 && !region->inner->has_lastprivate_conditional)
596 {
597 start_ix2 = 3 + region->inner->sched_kind;
598 break;
599 }
600 /* FALLTHRU */
601 default:
602 start_ix2 = region->inner->sched_kind;
603 break;
604 }
605 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
606 start_ix = (enum built_in_function) start_ix2;
607 break;
608 case GIMPLE_OMP_SECTIONS:
609 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
610 break;
611 default:
612 gcc_unreachable ();
613 }
614 }
615
616 /* By default, the value of NUM_THREADS is zero (selected at run time)
617 and there is no conditional. */
618 cond = NULL_TREE;
619 val = build_int_cst (unsigned_type_node, 0);
620 flags = build_int_cst (unsigned_type_node, 0);
621
622 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
623 if (c)
624 cond = OMP_CLAUSE_IF_EXPR (c);
625
626 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
627 if (c)
628 {
629 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
630 clause_loc = OMP_CLAUSE_LOCATION (c);
631 }
632 else
633 clause_loc = gimple_location (entry_stmt);
634
635 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
636 if (c)
637 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
638
639 /* Ensure 'val' is of the correct type. */
640 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
641
642 /* If we found the clause 'if (cond)', build either
643 (cond != 0) or (cond ? val : 1u). */
644 if (cond)
645 {
646 cond = gimple_boolify (cond);
647
648 if (integer_zerop (val))
649 val = fold_build2_loc (clause_loc,
650 EQ_EXPR, unsigned_type_node, cond,
651 build_int_cst (TREE_TYPE (cond), 0));
652 else
653 {
654 basic_block cond_bb, then_bb, else_bb;
655 edge e, e_then, e_else;
656 tree tmp_then, tmp_else, tmp_join, tmp_var;
657
658 tmp_var = create_tmp_var (TREE_TYPE (val));
659 if (gimple_in_ssa_p (cfun))
660 {
661 tmp_then = make_ssa_name (tmp_var);
662 tmp_else = make_ssa_name (tmp_var);
663 tmp_join = make_ssa_name (tmp_var);
664 }
665 else
666 {
667 tmp_then = tmp_var;
668 tmp_else = tmp_var;
669 tmp_join = tmp_var;
670 }
671
672 e = split_block_after_labels (bb);
673 cond_bb = e->src;
674 bb = e->dest;
675 remove_edge (e);
676
677 then_bb = create_empty_bb (cond_bb);
678 else_bb = create_empty_bb (then_bb);
679 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
680 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
681
682 stmt = gimple_build_cond_empty (cond);
683 gsi = gsi_start_bb (cond_bb);
684 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
685
686 gsi = gsi_start_bb (then_bb);
687 expand_omp_build_assign (&gsi, tmp_then, val, true);
688
689 gsi = gsi_start_bb (else_bb);
690 expand_omp_build_assign (&gsi, tmp_else,
691 build_int_cst (unsigned_type_node, 1),
692 true);
693
694 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
695 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
696 add_bb_to_loop (then_bb, cond_bb->loop_father);
697 add_bb_to_loop (else_bb, cond_bb->loop_father);
698 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
699 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
700
701 if (gimple_in_ssa_p (cfun))
702 {
703 gphi *phi = create_phi_node (tmp_join, bb);
704 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
705 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
706 }
707
708 val = tmp_join;
709 }
710
711 gsi = gsi_start_bb (bb);
712 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
713 false, GSI_CONTINUE_LINKING);
714 }
715
716 gsi = gsi_last_nondebug_bb (bb);
717 t = gimple_omp_parallel_data_arg (entry_stmt);
718 if (t == NULL)
719 t1 = null_pointer_node;
720 else
721 t1 = build_fold_addr_expr (t);
722 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
723 t2 = build_fold_addr_expr (child_fndecl);
724
725 vec_alloc (args, 4 + vec_safe_length (ws_args));
726 args->quick_push (t2);
727 args->quick_push (t1);
728 args->quick_push (val);
729 if (ws_args)
730 args->splice (*ws_args);
731 args->quick_push (flags);
732
733 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
734 builtin_decl_explicit (start_ix), args);
735
736 if (rtmp)
737 {
738 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
739 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
740 fold_convert (type,
741 fold_convert (pointer_sized_int_node, t)));
742 }
743 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
744 false, GSI_CONTINUE_LINKING);
745 }
746
747 /* Build the function call to GOMP_task to actually
748 generate the task operation. BB is the block where to insert the code. */
749
750 static void
expand_task_call(struct omp_region * region,basic_block bb,gomp_task * entry_stmt)751 expand_task_call (struct omp_region *region, basic_block bb,
752 gomp_task *entry_stmt)
753 {
754 tree t1, t2, t3;
755 gimple_stmt_iterator gsi;
756 location_t loc = gimple_location (entry_stmt);
757
758 tree clauses = gimple_omp_task_clauses (entry_stmt);
759
760 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
761 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
762 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
763 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
764 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
765 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
766 tree detach = omp_find_clause (clauses, OMP_CLAUSE_DETACH);
767
768 unsigned int iflags
769 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
770 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
771 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
772
773 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
774 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
775 tree num_tasks = NULL_TREE;
776 bool ull = false;
777 if (taskloop_p)
778 {
779 gimple *g = last_stmt (region->outer->entry);
780 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
781 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
782 struct omp_for_data fd;
783 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
784 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
785 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
786 OMP_CLAUSE__LOOPTEMP_);
787 startvar = OMP_CLAUSE_DECL (startvar);
788 endvar = OMP_CLAUSE_DECL (endvar);
789 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
790 if (fd.loop.cond_code == LT_EXPR)
791 iflags |= GOMP_TASK_FLAG_UP;
792 tree tclauses = gimple_omp_for_clauses (g);
793 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
794 if (num_tasks)
795 {
796 if (OMP_CLAUSE_NUM_TASKS_STRICT (num_tasks))
797 iflags |= GOMP_TASK_FLAG_STRICT;
798 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
799 }
800 else
801 {
802 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
803 if (num_tasks)
804 {
805 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
806 if (OMP_CLAUSE_GRAINSIZE_STRICT (num_tasks))
807 iflags |= GOMP_TASK_FLAG_STRICT;
808 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
809 }
810 else
811 num_tasks = integer_zero_node;
812 }
813 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
814 if (ifc == NULL_TREE)
815 iflags |= GOMP_TASK_FLAG_IF;
816 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
817 iflags |= GOMP_TASK_FLAG_NOGROUP;
818 ull = fd.iter_type == long_long_unsigned_type_node;
819 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
820 iflags |= GOMP_TASK_FLAG_REDUCTION;
821 }
822 else
823 {
824 if (priority)
825 iflags |= GOMP_TASK_FLAG_PRIORITY;
826 if (detach)
827 iflags |= GOMP_TASK_FLAG_DETACH;
828 }
829
830 tree flags = build_int_cst (unsigned_type_node, iflags);
831
832 tree cond = boolean_true_node;
833 if (ifc)
834 {
835 if (taskloop_p)
836 {
837 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
838 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
839 build_int_cst (unsigned_type_node,
840 GOMP_TASK_FLAG_IF),
841 build_int_cst (unsigned_type_node, 0));
842 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
843 flags, t);
844 }
845 else
846 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
847 }
848
849 if (finalc)
850 {
851 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
852 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
853 build_int_cst (unsigned_type_node,
854 GOMP_TASK_FLAG_FINAL),
855 build_int_cst (unsigned_type_node, 0));
856 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
857 }
858 if (depend)
859 depend = OMP_CLAUSE_DECL (depend);
860 else
861 depend = build_int_cst (ptr_type_node, 0);
862 if (priority)
863 priority = fold_convert (integer_type_node,
864 OMP_CLAUSE_PRIORITY_EXPR (priority));
865 else
866 priority = integer_zero_node;
867
868 gsi = gsi_last_nondebug_bb (bb);
869
870 detach = (detach
871 ? build_fold_addr_expr (OMP_CLAUSE_DECL (detach))
872 : null_pointer_node);
873
874 tree t = gimple_omp_task_data_arg (entry_stmt);
875 if (t == NULL)
876 t2 = null_pointer_node;
877 else
878 t2 = build_fold_addr_expr_loc (loc, t);
879 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
880 t = gimple_omp_task_copy_fn (entry_stmt);
881 if (t == NULL)
882 t3 = null_pointer_node;
883 else
884 t3 = build_fold_addr_expr_loc (loc, t);
885
886 if (taskloop_p)
887 t = build_call_expr (ull
888 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
889 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
890 11, t1, t2, t3,
891 gimple_omp_task_arg_size (entry_stmt),
892 gimple_omp_task_arg_align (entry_stmt), flags,
893 num_tasks, priority, startvar, endvar, step);
894 else
895 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
896 10, t1, t2, t3,
897 gimple_omp_task_arg_size (entry_stmt),
898 gimple_omp_task_arg_align (entry_stmt), cond, flags,
899 depend, priority, detach);
900
901 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
902 false, GSI_CONTINUE_LINKING);
903 }
904
905 /* Build the function call to GOMP_taskwait_depend to actually
906 generate the taskwait operation. BB is the block where to insert the
907 code. */
908
909 static void
expand_taskwait_call(basic_block bb,gomp_task * entry_stmt)910 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
911 {
912 tree clauses = gimple_omp_task_clauses (entry_stmt);
913 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
914 if (depend == NULL_TREE)
915 return;
916
917 depend = OMP_CLAUSE_DECL (depend);
918
919 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
920 tree t
921 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
922 1, depend);
923
924 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
925 false, GSI_CONTINUE_LINKING);
926 }
927
928 /* Build the function call to GOMP_teams_reg to actually
929 generate the host teams operation. REGION is the teams region
930 being expanded. BB is the block where to insert the code. */
931
932 static void
expand_teams_call(basic_block bb,gomp_teams * entry_stmt)933 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
934 {
935 tree clauses = gimple_omp_teams_clauses (entry_stmt);
936 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
937 if (num_teams == NULL_TREE)
938 num_teams = build_int_cst (unsigned_type_node, 0);
939 else
940 {
941 num_teams = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (num_teams);
942 num_teams = fold_convert (unsigned_type_node, num_teams);
943 }
944 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
945 if (thread_limit == NULL_TREE)
946 thread_limit = build_int_cst (unsigned_type_node, 0);
947 else
948 {
949 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
950 thread_limit = fold_convert (unsigned_type_node, thread_limit);
951 }
952
953 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
954 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
955 if (t == NULL)
956 t1 = null_pointer_node;
957 else
958 t1 = build_fold_addr_expr (t);
959 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
960 tree t2 = build_fold_addr_expr (child_fndecl);
961
962 vec<tree, va_gc> *args;
963 vec_alloc (args, 5);
964 args->quick_push (t2);
965 args->quick_push (t1);
966 args->quick_push (num_teams);
967 args->quick_push (thread_limit);
968 /* For future extensibility. */
969 args->quick_push (build_zero_cst (unsigned_type_node));
970
971 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
972 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
973 args);
974
975 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
976 false, GSI_CONTINUE_LINKING);
977 }
978
979 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
980
981 static tree
vec2chain(vec<tree,va_gc> * v)982 vec2chain (vec<tree, va_gc> *v)
983 {
984 tree chain = NULL_TREE, t;
985 unsigned ix;
986
987 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
988 {
989 DECL_CHAIN (t) = chain;
990 chain = t;
991 }
992
993 return chain;
994 }
995
996 /* Remove barriers in REGION->EXIT's block. Note that this is only
997 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
998 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
999 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1000 removed. */
1001
1002 static void
remove_exit_barrier(struct omp_region * region)1003 remove_exit_barrier (struct omp_region *region)
1004 {
1005 gimple_stmt_iterator gsi;
1006 basic_block exit_bb;
1007 edge_iterator ei;
1008 edge e;
1009 gimple *stmt;
1010 int any_addressable_vars = -1;
1011
1012 exit_bb = region->exit;
1013
1014 /* If the parallel region doesn't return, we don't have REGION->EXIT
1015 block at all. */
1016 if (! exit_bb)
1017 return;
1018
1019 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1020 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1021 statements that can appear in between are extremely limited -- no
1022 memory operations at all. Here, we allow nothing at all, so the
1023 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1024 gsi = gsi_last_nondebug_bb (exit_bb);
1025 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1026 gsi_prev_nondebug (&gsi);
1027 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1028 return;
1029
1030 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1031 {
1032 gsi = gsi_last_nondebug_bb (e->src);
1033 if (gsi_end_p (gsi))
1034 continue;
1035 stmt = gsi_stmt (gsi);
1036 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1037 && !gimple_omp_return_nowait_p (stmt))
1038 {
1039 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1040 in many cases. If there could be tasks queued, the barrier
1041 might be needed to let the tasks run before some local
1042 variable of the parallel that the task uses as shared
1043 runs out of scope. The task can be spawned either
1044 from within current function (this would be easy to check)
1045 or from some function it calls and gets passed an address
1046 of such a variable. */
1047 if (any_addressable_vars < 0)
1048 {
1049 gomp_parallel *parallel_stmt
1050 = as_a <gomp_parallel *> (last_stmt (region->entry));
1051 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1052 tree local_decls, block, decl;
1053 unsigned ix;
1054
1055 any_addressable_vars = 0;
1056 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1057 if (TREE_ADDRESSABLE (decl))
1058 {
1059 any_addressable_vars = 1;
1060 break;
1061 }
1062 for (block = gimple_block (stmt);
1063 !any_addressable_vars
1064 && block
1065 && TREE_CODE (block) == BLOCK;
1066 block = BLOCK_SUPERCONTEXT (block))
1067 {
1068 for (local_decls = BLOCK_VARS (block);
1069 local_decls;
1070 local_decls = DECL_CHAIN (local_decls))
1071 if (TREE_ADDRESSABLE (local_decls))
1072 {
1073 any_addressable_vars = 1;
1074 break;
1075 }
1076 if (block == gimple_block (parallel_stmt))
1077 break;
1078 }
1079 }
1080 if (!any_addressable_vars)
1081 gimple_omp_return_set_nowait (stmt);
1082 }
1083 }
1084 }
1085
1086 static void
remove_exit_barriers(struct omp_region * region)1087 remove_exit_barriers (struct omp_region *region)
1088 {
1089 if (region->type == GIMPLE_OMP_PARALLEL)
1090 remove_exit_barrier (region);
1091
1092 if (region->inner)
1093 {
1094 region = region->inner;
1095 remove_exit_barriers (region);
1096 while (region->next)
1097 {
1098 region = region->next;
1099 remove_exit_barriers (region);
1100 }
1101 }
1102 }
1103
1104 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1105 calls. These can't be declared as const functions, but
1106 within one parallel body they are constant, so they can be
1107 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1108 which are declared const. Similarly for task body, except
1109 that in untied task omp_get_thread_num () can change at any task
1110 scheduling point. */
1111
1112 static void
optimize_omp_library_calls(gimple * entry_stmt)1113 optimize_omp_library_calls (gimple *entry_stmt)
1114 {
1115 basic_block bb;
1116 gimple_stmt_iterator gsi;
1117 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1118 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1119 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1120 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1121 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1122 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1123 OMP_CLAUSE_UNTIED) != NULL);
1124
1125 FOR_EACH_BB_FN (bb, cfun)
1126 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1127 {
1128 gimple *call = gsi_stmt (gsi);
1129 tree decl;
1130
1131 if (is_gimple_call (call)
1132 && (decl = gimple_call_fndecl (call))
1133 && DECL_EXTERNAL (decl)
1134 && TREE_PUBLIC (decl)
1135 && DECL_INITIAL (decl) == NULL)
1136 {
1137 tree built_in;
1138
1139 if (DECL_NAME (decl) == thr_num_id)
1140 {
1141 /* In #pragma omp task untied omp_get_thread_num () can change
1142 during the execution of the task region. */
1143 if (untied_task)
1144 continue;
1145 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1146 }
1147 else if (DECL_NAME (decl) == num_thr_id)
1148 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1149 else
1150 continue;
1151
1152 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1153 || gimple_call_num_args (call) != 0)
1154 continue;
1155
1156 if (flag_exceptions && !TREE_NOTHROW (decl))
1157 continue;
1158
1159 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1160 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1161 TREE_TYPE (TREE_TYPE (built_in))))
1162 continue;
1163
1164 gimple_call_set_fndecl (call, built_in);
1165 }
1166 }
1167 }
1168
1169 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1170 regimplified. */
1171
1172 static tree
expand_omp_regimplify_p(tree * tp,int * walk_subtrees,void *)1173 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1174 {
1175 tree t = *tp;
1176
1177 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1178 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1179 return t;
1180
1181 if (TREE_CODE (t) == ADDR_EXPR)
1182 recompute_tree_invariant_for_addr_expr (t);
1183
1184 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1185 return NULL_TREE;
1186 }
1187
1188 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1189
1190 static void
expand_omp_build_assign(gimple_stmt_iterator * gsi_p,tree to,tree from,bool after)1191 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1192 bool after)
1193 {
1194 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1195 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1196 !after, after ? GSI_CONTINUE_LINKING
1197 : GSI_SAME_STMT);
1198 gimple *stmt = gimple_build_assign (to, from);
1199 if (after)
1200 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1201 else
1202 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1203 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1204 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1205 {
1206 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1207 gimple_regimplify_operands (stmt, &gsi);
1208 }
1209 }
1210
1211 /* Prepend or append LHS CODE RHS condition before or after *GSI_P. */
1212
1213 static gcond *
expand_omp_build_cond(gimple_stmt_iterator * gsi_p,enum tree_code code,tree lhs,tree rhs,bool after=false)1214 expand_omp_build_cond (gimple_stmt_iterator *gsi_p, enum tree_code code,
1215 tree lhs, tree rhs, bool after = false)
1216 {
1217 gcond *cond_stmt = gimple_build_cond (code, lhs, rhs, NULL_TREE, NULL_TREE);
1218 if (after)
1219 gsi_insert_after (gsi_p, cond_stmt, GSI_CONTINUE_LINKING);
1220 else
1221 gsi_insert_before (gsi_p, cond_stmt, GSI_SAME_STMT);
1222 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
1223 NULL, NULL)
1224 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
1225 NULL, NULL))
1226 {
1227 gimple_stmt_iterator gsi = gsi_for_stmt (cond_stmt);
1228 gimple_regimplify_operands (cond_stmt, &gsi);
1229 }
1230 return cond_stmt;
1231 }
1232
1233 /* Expand the OpenMP parallel or task directive starting at REGION. */
1234
1235 static void
expand_omp_taskreg(struct omp_region * region)1236 expand_omp_taskreg (struct omp_region *region)
1237 {
1238 basic_block entry_bb, exit_bb, new_bb;
1239 struct function *child_cfun;
1240 tree child_fn, block, t;
1241 gimple_stmt_iterator gsi;
1242 gimple *entry_stmt, *stmt;
1243 edge e;
1244 vec<tree, va_gc> *ws_args;
1245
1246 entry_stmt = last_stmt (region->entry);
1247 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1248 && gimple_omp_task_taskwait_p (entry_stmt))
1249 {
1250 new_bb = region->entry;
1251 gsi = gsi_last_nondebug_bb (region->entry);
1252 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1253 gsi_remove (&gsi, true);
1254 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1255 return;
1256 }
1257
1258 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1259 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1260
1261 entry_bb = region->entry;
1262 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1263 exit_bb = region->cont;
1264 else
1265 exit_bb = region->exit;
1266
1267 if (is_combined_parallel (region))
1268 ws_args = region->ws_args;
1269 else
1270 ws_args = NULL;
1271
1272 if (child_cfun->cfg)
1273 {
1274 /* Due to inlining, it may happen that we have already outlined
1275 the region, in which case all we need to do is make the
1276 sub-graph unreachable and emit the parallel call. */
1277 edge entry_succ_e, exit_succ_e;
1278
1279 entry_succ_e = single_succ_edge (entry_bb);
1280
1281 gsi = gsi_last_nondebug_bb (entry_bb);
1282 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1283 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1284 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1285 gsi_remove (&gsi, true);
1286
1287 new_bb = entry_bb;
1288 if (exit_bb)
1289 {
1290 exit_succ_e = single_succ_edge (exit_bb);
1291 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1292 }
1293 remove_edge_and_dominated_blocks (entry_succ_e);
1294 }
1295 else
1296 {
1297 unsigned srcidx, dstidx, num;
1298
1299 /* If the parallel region needs data sent from the parent
1300 function, then the very first statement (except possible
1301 tree profile counter updates) of the parallel body
1302 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1303 &.OMP_DATA_O is passed as an argument to the child function,
1304 we need to replace it with the argument as seen by the child
1305 function.
1306
1307 In most cases, this will end up being the identity assignment
1308 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1309 a function call that has been inlined, the original PARM_DECL
1310 .OMP_DATA_I may have been converted into a different local
1311 variable. In which case, we need to keep the assignment. */
1312 if (gimple_omp_taskreg_data_arg (entry_stmt))
1313 {
1314 basic_block entry_succ_bb
1315 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1316 : FALLTHRU_EDGE (entry_bb)->dest;
1317 tree arg;
1318 gimple *parcopy_stmt = NULL;
1319
1320 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1321 {
1322 gimple *stmt;
1323
1324 gcc_assert (!gsi_end_p (gsi));
1325 stmt = gsi_stmt (gsi);
1326 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1327 continue;
1328
1329 if (gimple_num_ops (stmt) == 2)
1330 {
1331 tree arg = gimple_assign_rhs1 (stmt);
1332
1333 /* We're ignore the subcode because we're
1334 effectively doing a STRIP_NOPS. */
1335
1336 if (TREE_CODE (arg) == ADDR_EXPR
1337 && (TREE_OPERAND (arg, 0)
1338 == gimple_omp_taskreg_data_arg (entry_stmt)))
1339 {
1340 parcopy_stmt = stmt;
1341 break;
1342 }
1343 }
1344 }
1345
1346 gcc_assert (parcopy_stmt != NULL);
1347 arg = DECL_ARGUMENTS (child_fn);
1348
1349 if (!gimple_in_ssa_p (cfun))
1350 {
1351 if (gimple_assign_lhs (parcopy_stmt) == arg)
1352 gsi_remove (&gsi, true);
1353 else
1354 {
1355 /* ?? Is setting the subcode really necessary ?? */
1356 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1357 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1358 }
1359 }
1360 else
1361 {
1362 tree lhs = gimple_assign_lhs (parcopy_stmt);
1363 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1364 /* We'd like to set the rhs to the default def in the child_fn,
1365 but it's too early to create ssa names in the child_fn.
1366 Instead, we set the rhs to the parm. In
1367 move_sese_region_to_fn, we introduce a default def for the
1368 parm, map the parm to it's default def, and once we encounter
1369 this stmt, replace the parm with the default def. */
1370 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1371 update_stmt (parcopy_stmt);
1372 }
1373 }
1374
1375 /* Declare local variables needed in CHILD_CFUN. */
1376 block = DECL_INITIAL (child_fn);
1377 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1378 /* The gimplifier could record temporaries in parallel/task block
1379 rather than in containing function's local_decls chain,
1380 which would mean cgraph missed finalizing them. Do it now. */
1381 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1382 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1383 varpool_node::finalize_decl (t);
1384 DECL_SAVED_TREE (child_fn) = NULL;
1385 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1386 gimple_set_body (child_fn, NULL);
1387 TREE_USED (block) = 1;
1388
1389 /* Reset DECL_CONTEXT on function arguments. */
1390 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1391 DECL_CONTEXT (t) = child_fn;
1392
1393 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1394 so that it can be moved to the child function. */
1395 gsi = gsi_last_nondebug_bb (entry_bb);
1396 stmt = gsi_stmt (gsi);
1397 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1398 || gimple_code (stmt) == GIMPLE_OMP_TASK
1399 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1400 e = split_block (entry_bb, stmt);
1401 gsi_remove (&gsi, true);
1402 entry_bb = e->dest;
1403 edge e2 = NULL;
1404 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1405 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1406 else
1407 {
1408 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1409 gcc_assert (e2->dest == region->exit);
1410 remove_edge (BRANCH_EDGE (entry_bb));
1411 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1412 gsi = gsi_last_nondebug_bb (region->exit);
1413 gcc_assert (!gsi_end_p (gsi)
1414 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1415 gsi_remove (&gsi, true);
1416 }
1417
1418 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1419 if (exit_bb)
1420 {
1421 gsi = gsi_last_nondebug_bb (exit_bb);
1422 gcc_assert (!gsi_end_p (gsi)
1423 && (gimple_code (gsi_stmt (gsi))
1424 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1425 stmt = gimple_build_return (NULL);
1426 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1427 gsi_remove (&gsi, true);
1428 }
1429
1430 /* Move the parallel region into CHILD_CFUN. */
1431
1432 if (gimple_in_ssa_p (cfun))
1433 {
1434 init_tree_ssa (child_cfun);
1435 init_ssa_operands (child_cfun);
1436 child_cfun->gimple_df->in_ssa_p = true;
1437 block = NULL_TREE;
1438 }
1439 else
1440 block = gimple_block (entry_stmt);
1441
1442 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1443 if (exit_bb)
1444 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1445 if (e2)
1446 {
1447 basic_block dest_bb = e2->dest;
1448 if (!exit_bb)
1449 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1450 remove_edge (e2);
1451 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1452 }
1453 /* When the OMP expansion process cannot guarantee an up-to-date
1454 loop tree arrange for the child function to fixup loops. */
1455 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1456 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1457
1458 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1459 num = vec_safe_length (child_cfun->local_decls);
1460 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1461 {
1462 t = (*child_cfun->local_decls)[srcidx];
1463 if (DECL_CONTEXT (t) == cfun->decl)
1464 continue;
1465 if (srcidx != dstidx)
1466 (*child_cfun->local_decls)[dstidx] = t;
1467 dstidx++;
1468 }
1469 if (dstidx != num)
1470 vec_safe_truncate (child_cfun->local_decls, dstidx);
1471
1472 /* Inform the callgraph about the new function. */
1473 child_cfun->curr_properties = cfun->curr_properties;
1474 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1475 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1476 cgraph_node *node = cgraph_node::get_create (child_fn);
1477 node->parallelized_function = 1;
1478 cgraph_node::add_new_function (child_fn, true);
1479
1480 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1481 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1482
1483 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1484 fixed in a following pass. */
1485 push_cfun (child_cfun);
1486 if (need_asm)
1487 assign_assembler_name_if_needed (child_fn);
1488
1489 if (optimize)
1490 optimize_omp_library_calls (entry_stmt);
1491 update_max_bb_count ();
1492 cgraph_edge::rebuild_edges ();
1493
1494 /* Some EH regions might become dead, see PR34608. If
1495 pass_cleanup_cfg isn't the first pass to happen with the
1496 new child, these dead EH edges might cause problems.
1497 Clean them up now. */
1498 if (flag_exceptions)
1499 {
1500 basic_block bb;
1501 bool changed = false;
1502
1503 FOR_EACH_BB_FN (bb, cfun)
1504 changed |= gimple_purge_dead_eh_edges (bb);
1505 if (changed)
1506 cleanup_tree_cfg ();
1507 }
1508 if (gimple_in_ssa_p (cfun))
1509 update_ssa (TODO_update_ssa);
1510 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1511 verify_loop_structure ();
1512 pop_cfun ();
1513
1514 if (dump_file && !gimple_in_ssa_p (cfun))
1515 {
1516 omp_any_child_fn_dumped = true;
1517 dump_function_header (dump_file, child_fn, dump_flags);
1518 dump_function_to_file (child_fn, dump_file, dump_flags);
1519 }
1520 }
1521
1522 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1523
1524 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1525 expand_parallel_call (region, new_bb,
1526 as_a <gomp_parallel *> (entry_stmt), ws_args);
1527 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1528 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1529 else
1530 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1531 if (gimple_in_ssa_p (cfun))
1532 update_ssa (TODO_update_ssa_only_virtuals);
1533 }
1534
1535 /* Information about members of an OpenACC collapsed loop nest. */
1536
1537 struct oacc_collapse
1538 {
1539 tree base; /* Base value. */
1540 tree iters; /* Number of steps. */
1541 tree step; /* Step size. */
1542 tree tile; /* Tile increment (if tiled). */
1543 tree outer; /* Tile iterator var. */
1544 };
1545
1546 /* Helper for expand_oacc_for. Determine collapsed loop information.
1547 Fill in COUNTS array. Emit any initialization code before GSI.
1548 Return the calculated outer loop bound of BOUND_TYPE. */
1549
1550 static tree
expand_oacc_collapse_init(const struct omp_for_data * fd,gimple_stmt_iterator * gsi,oacc_collapse * counts,tree diff_type,tree bound_type,location_t loc)1551 expand_oacc_collapse_init (const struct omp_for_data *fd,
1552 gimple_stmt_iterator *gsi,
1553 oacc_collapse *counts, tree diff_type,
1554 tree bound_type, location_t loc)
1555 {
1556 tree tiling = fd->tiling;
1557 tree total = build_int_cst (bound_type, 1);
1558 int ix;
1559
1560 gcc_assert (integer_onep (fd->loop.step));
1561 gcc_assert (integer_zerop (fd->loop.n1));
1562
1563 /* When tiling, the first operand of the tile clause applies to the
1564 innermost loop, and we work outwards from there. Seems
1565 backwards, but whatever. */
1566 for (ix = fd->collapse; ix--;)
1567 {
1568 const omp_for_data_loop *loop = &fd->loops[ix];
1569
1570 tree iter_type = TREE_TYPE (loop->v);
1571 tree plus_type = iter_type;
1572
1573 gcc_assert (loop->cond_code == LT_EXPR || loop->cond_code == GT_EXPR);
1574
1575 if (POINTER_TYPE_P (iter_type))
1576 plus_type = sizetype;
1577
1578 if (tiling)
1579 {
1580 tree num = build_int_cst (integer_type_node, fd->collapse);
1581 tree loop_no = build_int_cst (integer_type_node, ix);
1582 tree tile = TREE_VALUE (tiling);
1583 gcall *call
1584 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1585 /* gwv-outer=*/integer_zero_node,
1586 /* gwv-inner=*/integer_zero_node);
1587
1588 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1589 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1590 gimple_call_set_lhs (call, counts[ix].tile);
1591 gimple_set_location (call, loc);
1592 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1593
1594 tiling = TREE_CHAIN (tiling);
1595 }
1596 else
1597 {
1598 counts[ix].tile = NULL;
1599 counts[ix].outer = loop->v;
1600 }
1601
1602 tree b = loop->n1;
1603 tree e = loop->n2;
1604 tree s = loop->step;
1605 bool up = loop->cond_code == LT_EXPR;
1606 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1607 bool negating;
1608 tree expr;
1609
1610 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1611 true, GSI_SAME_STMT);
1612 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1613 true, GSI_SAME_STMT);
1614
1615 /* Convert the step, avoiding possible unsigned->signed overflow. */
1616 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1617 if (negating)
1618 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1619 s = fold_convert (diff_type, s);
1620 if (negating)
1621 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1622 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1623 true, GSI_SAME_STMT);
1624
1625 /* Determine the range, avoiding possible unsigned->signed overflow. */
1626 negating = !up && TYPE_UNSIGNED (iter_type);
1627 expr = fold_build2 (MINUS_EXPR, plus_type,
1628 fold_convert (plus_type, negating ? b : e),
1629 fold_convert (plus_type, negating ? e : b));
1630 expr = fold_convert (diff_type, expr);
1631 if (negating)
1632 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1633 tree range = force_gimple_operand_gsi
1634 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1635
1636 /* Determine number of iterations. */
1637 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1638 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1639 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1640
1641 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1642 true, GSI_SAME_STMT);
1643
1644 counts[ix].base = b;
1645 counts[ix].iters = iters;
1646 counts[ix].step = s;
1647
1648 total = fold_build2 (MULT_EXPR, bound_type, total,
1649 fold_convert (bound_type, iters));
1650 }
1651
1652 return total;
1653 }
1654
1655 /* Emit initializers for collapsed loop members. INNER is true if
1656 this is for the element loop of a TILE. IVAR is the outer
1657 loop iteration variable, from which collapsed loop iteration values
1658 are calculated. COUNTS array has been initialized by
1659 expand_oacc_collapse_inits. */
1660
1661 static void
expand_oacc_collapse_vars(const struct omp_for_data * fd,bool inner,gimple_stmt_iterator * gsi,const oacc_collapse * counts,tree ivar,tree diff_type)1662 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1663 gimple_stmt_iterator *gsi,
1664 const oacc_collapse *counts, tree ivar,
1665 tree diff_type)
1666 {
1667 tree ivar_type = TREE_TYPE (ivar);
1668
1669 /* The most rapidly changing iteration variable is the innermost
1670 one. */
1671 for (int ix = fd->collapse; ix--;)
1672 {
1673 const omp_for_data_loop *loop = &fd->loops[ix];
1674 const oacc_collapse *collapse = &counts[ix];
1675 tree v = inner ? loop->v : collapse->outer;
1676 tree iter_type = TREE_TYPE (v);
1677 tree plus_type = iter_type;
1678 enum tree_code plus_code = PLUS_EXPR;
1679 tree expr;
1680
1681 if (POINTER_TYPE_P (iter_type))
1682 {
1683 plus_code = POINTER_PLUS_EXPR;
1684 plus_type = sizetype;
1685 }
1686
1687 expr = ivar;
1688 if (ix)
1689 {
1690 tree mod = fold_convert (ivar_type, collapse->iters);
1691 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1692 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1693 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1694 true, GSI_SAME_STMT);
1695 }
1696
1697 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1698 fold_convert (diff_type, collapse->step));
1699 expr = fold_build2 (plus_code, iter_type,
1700 inner ? collapse->outer : collapse->base,
1701 fold_convert (plus_type, expr));
1702 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1703 true, GSI_SAME_STMT);
1704 gassign *ass = gimple_build_assign (v, expr);
1705 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1706 }
1707 }
1708
1709 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1710 of the combined collapse > 1 loop constructs, generate code like:
1711 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1712 if (cond3 is <)
1713 adj = STEP3 - 1;
1714 else
1715 adj = STEP3 + 1;
1716 count3 = (adj + N32 - N31) / STEP3;
1717 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1718 if (cond2 is <)
1719 adj = STEP2 - 1;
1720 else
1721 adj = STEP2 + 1;
1722 count2 = (adj + N22 - N21) / STEP2;
1723 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1724 if (cond1 is <)
1725 adj = STEP1 - 1;
1726 else
1727 adj = STEP1 + 1;
1728 count1 = (adj + N12 - N11) / STEP1;
1729 count = count1 * count2 * count3;
1730 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1731 count = 0;
1732 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1733 of the combined loop constructs, just initialize COUNTS array
1734 from the _looptemp_ clauses. For loop nests with non-rectangular
1735 loops, do this only for the rectangular loops. Then pick
1736 the loops which reference outer vars in their bound expressions
1737 and the loops which they refer to and for this sub-nest compute
1738 number of iterations. For triangular loops use Faulhaber's formula,
1739 otherwise as a fallback, compute by iterating the loops.
1740 If e.g. the sub-nest is
1741 for (I = N11; I COND1 N12; I += STEP1)
1742 for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1743 for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1744 do:
1745 COUNT = 0;
1746 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1747 for (tmpj = M21 * tmpi + N21;
1748 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1749 {
1750 int tmpk1 = M31 * tmpj + N31;
1751 int tmpk2 = M32 * tmpj + N32;
1752 if (tmpk1 COND3 tmpk2)
1753 {
1754 if (COND3 is <)
1755 adj = STEP3 - 1;
1756 else
1757 adj = STEP3 + 1;
1758 COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1759 }
1760 }
1761 and finally multiply the counts of the rectangular loops not
1762 in the sub-nest with COUNT. Also, as counts[fd->last_nonrect]
1763 store number of iterations of the loops from fd->first_nonrect
1764 to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1765 by the counts of rectangular loops not referenced in any non-rectangular
1766 loops sandwitched in between those. */
1767
1768 /* NOTE: It *could* be better to moosh all of the BBs together,
1769 creating one larger BB with all the computation and the unexpected
1770 jump at the end. I.e.
1771
1772 bool zero3, zero2, zero1, zero;
1773
1774 zero3 = N32 c3 N31;
1775 count3 = (N32 - N31) /[cl] STEP3;
1776 zero2 = N22 c2 N21;
1777 count2 = (N22 - N21) /[cl] STEP2;
1778 zero1 = N12 c1 N11;
1779 count1 = (N12 - N11) /[cl] STEP1;
1780 zero = zero3 || zero2 || zero1;
1781 count = count1 * count2 * count3;
1782 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1783
1784 After all, we expect the zero=false, and thus we expect to have to
1785 evaluate all of the comparison expressions, so short-circuiting
1786 oughtn't be a win. Since the condition isn't protecting a
1787 denominator, we're not concerned about divide-by-zero, so we can
1788 fully evaluate count even if a numerator turned out to be wrong.
1789
1790 It seems like putting this all together would create much better
1791 scheduling opportunities, and less pressure on the chip's branch
1792 predictor. */
1793
1794 static void
expand_omp_for_init_counts(struct omp_for_data * fd,gimple_stmt_iterator * gsi,basic_block & entry_bb,tree * counts,basic_block & zero_iter1_bb,int & first_zero_iter1,basic_block & zero_iter2_bb,int & first_zero_iter2,basic_block & l2_dom_bb)1795 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1796 basic_block &entry_bb, tree *counts,
1797 basic_block &zero_iter1_bb, int &first_zero_iter1,
1798 basic_block &zero_iter2_bb, int &first_zero_iter2,
1799 basic_block &l2_dom_bb)
1800 {
1801 tree t, type = TREE_TYPE (fd->loop.v);
1802 edge e, ne;
1803 int i;
1804
1805 /* Collapsed loops need work for expansion into SSA form. */
1806 gcc_assert (!gimple_in_ssa_p (cfun));
1807
1808 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1809 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1810 {
1811 gcc_assert (fd->ordered == 0);
1812 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1813 isn't supposed to be handled, as the inner loop doesn't
1814 use it. */
1815 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1816 OMP_CLAUSE__LOOPTEMP_);
1817 gcc_assert (innerc);
1818 for (i = 0; i < fd->collapse; i++)
1819 {
1820 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1821 OMP_CLAUSE__LOOPTEMP_);
1822 gcc_assert (innerc);
1823 if (i)
1824 counts[i] = OMP_CLAUSE_DECL (innerc);
1825 else
1826 counts[0] = NULL_TREE;
1827 }
1828 if (fd->non_rect
1829 && fd->last_nonrect == fd->first_nonrect + 1
1830 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
1831 {
1832 tree c[4];
1833 for (i = 0; i < 4; i++)
1834 {
1835 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1836 OMP_CLAUSE__LOOPTEMP_);
1837 gcc_assert (innerc);
1838 c[i] = OMP_CLAUSE_DECL (innerc);
1839 }
1840 counts[0] = c[0];
1841 fd->first_inner_iterations = c[1];
1842 fd->factor = c[2];
1843 fd->adjn1 = c[3];
1844 }
1845 return;
1846 }
1847
1848 for (i = fd->collapse; i < fd->ordered; i++)
1849 {
1850 tree itype = TREE_TYPE (fd->loops[i].v);
1851 counts[i] = NULL_TREE;
1852 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1853 fold_convert (itype, fd->loops[i].n1),
1854 fold_convert (itype, fd->loops[i].n2));
1855 if (t && integer_zerop (t))
1856 {
1857 for (i = fd->collapse; i < fd->ordered; i++)
1858 counts[i] = build_int_cst (type, 0);
1859 break;
1860 }
1861 }
1862 bool rect_count_seen = false;
1863 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1864 {
1865 tree itype = TREE_TYPE (fd->loops[i].v);
1866
1867 if (i >= fd->collapse && counts[i])
1868 continue;
1869 if (fd->non_rect)
1870 {
1871 /* Skip loops that use outer iterators in their expressions
1872 during this phase. */
1873 if (fd->loops[i].m1 || fd->loops[i].m2)
1874 {
1875 counts[i] = build_zero_cst (type);
1876 continue;
1877 }
1878 }
1879 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1880 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1881 fold_convert (itype, fd->loops[i].n1),
1882 fold_convert (itype, fd->loops[i].n2)))
1883 == NULL_TREE || !integer_onep (t)))
1884 {
1885 gcond *cond_stmt;
1886 tree n1, n2;
1887 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1888 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1889 true, GSI_SAME_STMT);
1890 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1891 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1892 true, GSI_SAME_STMT);
1893 cond_stmt = expand_omp_build_cond (gsi, fd->loops[i].cond_code,
1894 n1, n2);
1895 e = split_block (entry_bb, cond_stmt);
1896 basic_block &zero_iter_bb
1897 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1898 int &first_zero_iter
1899 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1900 if (zero_iter_bb == NULL)
1901 {
1902 gassign *assign_stmt;
1903 first_zero_iter = i;
1904 zero_iter_bb = create_empty_bb (entry_bb);
1905 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1906 *gsi = gsi_after_labels (zero_iter_bb);
1907 if (i < fd->collapse)
1908 assign_stmt = gimple_build_assign (fd->loop.n2,
1909 build_zero_cst (type));
1910 else
1911 {
1912 counts[i] = create_tmp_reg (type, ".count");
1913 assign_stmt
1914 = gimple_build_assign (counts[i], build_zero_cst (type));
1915 }
1916 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1917 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1918 entry_bb);
1919 }
1920 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1921 ne->probability = profile_probability::very_unlikely ();
1922 e->flags = EDGE_TRUE_VALUE;
1923 e->probability = ne->probability.invert ();
1924 if (l2_dom_bb == NULL)
1925 l2_dom_bb = entry_bb;
1926 entry_bb = e->dest;
1927 *gsi = gsi_last_nondebug_bb (entry_bb);
1928 }
1929
1930 if (POINTER_TYPE_P (itype))
1931 itype = signed_type_for (itype);
1932 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1933 ? -1 : 1));
1934 t = fold_build2 (PLUS_EXPR, itype,
1935 fold_convert (itype, fd->loops[i].step), t);
1936 t = fold_build2 (PLUS_EXPR, itype, t,
1937 fold_convert (itype, fd->loops[i].n2));
1938 t = fold_build2 (MINUS_EXPR, itype, t,
1939 fold_convert (itype, fd->loops[i].n1));
1940 /* ?? We could probably use CEIL_DIV_EXPR instead of
1941 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1942 generate the same code in the end because generically we
1943 don't know that the values involved must be negative for
1944 GT?? */
1945 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1946 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1947 fold_build1 (NEGATE_EXPR, itype, t),
1948 fold_build1 (NEGATE_EXPR, itype,
1949 fold_convert (itype,
1950 fd->loops[i].step)));
1951 else
1952 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1953 fold_convert (itype, fd->loops[i].step));
1954 t = fold_convert (type, t);
1955 if (TREE_CODE (t) == INTEGER_CST)
1956 counts[i] = t;
1957 else
1958 {
1959 if (i < fd->collapse || i != first_zero_iter2)
1960 counts[i] = create_tmp_reg (type, ".count");
1961 expand_omp_build_assign (gsi, counts[i], t);
1962 }
1963 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1964 {
1965 if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
1966 continue;
1967 if (!rect_count_seen)
1968 {
1969 t = counts[i];
1970 rect_count_seen = true;
1971 }
1972 else
1973 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1974 expand_omp_build_assign (gsi, fd->loop.n2, t);
1975 }
1976 }
1977 if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
1978 {
1979 gcc_assert (fd->last_nonrect != -1);
1980
1981 counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
1982 expand_omp_build_assign (gsi, counts[fd->last_nonrect],
1983 build_zero_cst (type));
1984 for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
1985 if (fd->loops[i].m1
1986 || fd->loops[i].m2
1987 || fd->loops[i].non_rect_referenced)
1988 break;
1989 if (i == fd->last_nonrect
1990 && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
1991 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
1992 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
1993 {
1994 int o = fd->first_nonrect;
1995 tree itype = TREE_TYPE (fd->loops[o].v);
1996 tree n1o = create_tmp_reg (itype, ".n1o");
1997 t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
1998 expand_omp_build_assign (gsi, n1o, t);
1999 tree n2o = create_tmp_reg (itype, ".n2o");
2000 t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
2001 expand_omp_build_assign (gsi, n2o, t);
2002 if (fd->loops[i].m1 && fd->loops[i].m2)
2003 t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
2004 unshare_expr (fd->loops[i].m1));
2005 else if (fd->loops[i].m1)
2006 t = fold_build1 (NEGATE_EXPR, itype,
2007 unshare_expr (fd->loops[i].m1));
2008 else
2009 t = unshare_expr (fd->loops[i].m2);
2010 tree m2minusm1
2011 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2012 true, GSI_SAME_STMT);
2013
2014 gimple_stmt_iterator gsi2 = *gsi;
2015 gsi_prev (&gsi2);
2016 e = split_block (entry_bb, gsi_stmt (gsi2));
2017 e = split_block (e->dest, (gimple *) NULL);
2018 basic_block bb1 = e->src;
2019 entry_bb = e->dest;
2020 *gsi = gsi_after_labels (entry_bb);
2021
2022 gsi2 = gsi_after_labels (bb1);
2023 tree ostep = fold_convert (itype, fd->loops[o].step);
2024 t = build_int_cst (itype, (fd->loops[o].cond_code
2025 == LT_EXPR ? -1 : 1));
2026 t = fold_build2 (PLUS_EXPR, itype, ostep, t);
2027 t = fold_build2 (PLUS_EXPR, itype, t, n2o);
2028 t = fold_build2 (MINUS_EXPR, itype, t, n1o);
2029 if (TYPE_UNSIGNED (itype)
2030 && fd->loops[o].cond_code == GT_EXPR)
2031 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2032 fold_build1 (NEGATE_EXPR, itype, t),
2033 fold_build1 (NEGATE_EXPR, itype, ostep));
2034 else
2035 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
2036 tree outer_niters
2037 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2038 true, GSI_SAME_STMT);
2039 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2040 build_one_cst (itype));
2041 t = fold_build2 (MULT_EXPR, itype, t, ostep);
2042 t = fold_build2 (PLUS_EXPR, itype, n1o, t);
2043 tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2044 true, GSI_SAME_STMT);
2045 tree n1, n2, n1e, n2e;
2046 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2047 if (fd->loops[i].m1)
2048 {
2049 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2050 n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
2051 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2052 }
2053 else
2054 n1 = t;
2055 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2056 true, GSI_SAME_STMT);
2057 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2058 if (fd->loops[i].m2)
2059 {
2060 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2061 n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
2062 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2063 }
2064 else
2065 n2 = t;
2066 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2067 true, GSI_SAME_STMT);
2068 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2069 if (fd->loops[i].m1)
2070 {
2071 n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2072 n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
2073 n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
2074 }
2075 else
2076 n1e = t;
2077 n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
2078 true, GSI_SAME_STMT);
2079 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2080 if (fd->loops[i].m2)
2081 {
2082 n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2083 n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
2084 n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
2085 }
2086 else
2087 n2e = t;
2088 n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
2089 true, GSI_SAME_STMT);
2090 gcond *cond_stmt
2091 = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2092 n1, n2);
2093 e = split_block (bb1, cond_stmt);
2094 e->flags = EDGE_TRUE_VALUE;
2095 e->probability = profile_probability::likely ().guessed ();
2096 basic_block bb2 = e->dest;
2097 gsi2 = gsi_after_labels (bb2);
2098
2099 cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2100 n1e, n2e);
2101 e = split_block (bb2, cond_stmt);
2102 e->flags = EDGE_TRUE_VALUE;
2103 e->probability = profile_probability::likely ().guessed ();
2104 gsi2 = gsi_after_labels (e->dest);
2105
2106 tree step = fold_convert (itype, fd->loops[i].step);
2107 t = build_int_cst (itype, (fd->loops[i].cond_code
2108 == LT_EXPR ? -1 : 1));
2109 t = fold_build2 (PLUS_EXPR, itype, step, t);
2110 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2111 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2112 if (TYPE_UNSIGNED (itype)
2113 && fd->loops[i].cond_code == GT_EXPR)
2114 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2115 fold_build1 (NEGATE_EXPR, itype, t),
2116 fold_build1 (NEGATE_EXPR, itype, step));
2117 else
2118 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2119 tree first_inner_iterations
2120 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2121 true, GSI_SAME_STMT);
2122 t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
2123 if (TYPE_UNSIGNED (itype)
2124 && fd->loops[i].cond_code == GT_EXPR)
2125 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2126 fold_build1 (NEGATE_EXPR, itype, t),
2127 fold_build1 (NEGATE_EXPR, itype, step));
2128 else
2129 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2130 tree factor
2131 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2132 true, GSI_SAME_STMT);
2133 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2134 build_one_cst (itype));
2135 t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
2136 t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
2137 t = fold_build2 (MULT_EXPR, itype, factor, t);
2138 t = fold_build2 (PLUS_EXPR, itype,
2139 fold_build2 (MULT_EXPR, itype, outer_niters,
2140 first_inner_iterations), t);
2141 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect],
2142 fold_convert (type, t));
2143
2144 basic_block bb3 = create_empty_bb (bb1);
2145 add_bb_to_loop (bb3, bb1->loop_father);
2146
2147 e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2148 e->probability = profile_probability::unlikely ().guessed ();
2149
2150 gsi2 = gsi_after_labels (bb3);
2151 cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2152 n1e, n2e);
2153 e = split_block (bb3, cond_stmt);
2154 e->flags = EDGE_TRUE_VALUE;
2155 e->probability = profile_probability::likely ().guessed ();
2156 basic_block bb4 = e->dest;
2157
2158 ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
2159 ne->probability = e->probability.invert ();
2160
2161 basic_block bb5 = create_empty_bb (bb2);
2162 add_bb_to_loop (bb5, bb2->loop_father);
2163
2164 ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
2165 ne->probability = profile_probability::unlikely ().guessed ();
2166
2167 for (int j = 0; j < 2; j++)
2168 {
2169 gsi2 = gsi_after_labels (j ? bb5 : bb4);
2170 t = fold_build2 (MINUS_EXPR, itype,
2171 unshare_expr (fd->loops[i].n1),
2172 unshare_expr (fd->loops[i].n2));
2173 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
2174 tree tem
2175 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2176 true, GSI_SAME_STMT);
2177 t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
2178 t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
2179 t = fold_build2 (MINUS_EXPR, itype, tem, t);
2180 tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2181 true, GSI_SAME_STMT);
2182 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2183 if (fd->loops[i].m1)
2184 {
2185 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2186 n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
2187 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2188 }
2189 else
2190 n1 = t;
2191 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2192 true, GSI_SAME_STMT);
2193 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2194 if (fd->loops[i].m2)
2195 {
2196 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2197 n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
2198 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2199 }
2200 else
2201 n2 = t;
2202 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2203 true, GSI_SAME_STMT);
2204 expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem);
2205
2206 cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2207 n1, n2);
2208 e = split_block (gsi_bb (gsi2), cond_stmt);
2209 e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
2210 e->probability = profile_probability::unlikely ().guessed ();
2211 ne = make_edge (e->src, bb1,
2212 j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
2213 ne->probability = e->probability.invert ();
2214 gsi2 = gsi_after_labels (e->dest);
2215
2216 t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
2217 expand_omp_build_assign (&gsi2, j ? n2o : n1o, t);
2218
2219 make_edge (e->dest, bb1, EDGE_FALLTHRU);
2220 }
2221
2222 set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
2223 set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
2224 set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
2225
2226 if (fd->first_nonrect + 1 == fd->last_nonrect)
2227 {
2228 fd->first_inner_iterations = first_inner_iterations;
2229 fd->factor = factor;
2230 fd->adjn1 = n1o;
2231 }
2232 }
2233 else
2234 {
2235 /* Fallback implementation. Evaluate the loops with m1/m2
2236 non-NULL as well as their outer loops at runtime using temporaries
2237 instead of the original iteration variables, and in the
2238 body just bump the counter. */
2239 gimple_stmt_iterator gsi2 = *gsi;
2240 gsi_prev (&gsi2);
2241 e = split_block (entry_bb, gsi_stmt (gsi2));
2242 e = split_block (e->dest, (gimple *) NULL);
2243 basic_block cur_bb = e->src;
2244 basic_block next_bb = e->dest;
2245 entry_bb = e->dest;
2246 *gsi = gsi_after_labels (entry_bb);
2247
2248 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2249 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2250
2251 for (i = 0; i <= fd->last_nonrect; i++)
2252 {
2253 if (fd->loops[i].m1 == NULL_TREE
2254 && fd->loops[i].m2 == NULL_TREE
2255 && !fd->loops[i].non_rect_referenced)
2256 continue;
2257
2258 tree itype = TREE_TYPE (fd->loops[i].v);
2259
2260 gsi2 = gsi_after_labels (cur_bb);
2261 tree n1, n2;
2262 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2263 if (fd->loops[i].m1 == NULL_TREE)
2264 n1 = t;
2265 else if (POINTER_TYPE_P (itype))
2266 {
2267 gcc_assert (integer_onep (fd->loops[i].m1));
2268 t = fold_convert (sizetype,
2269 unshare_expr (fd->loops[i].n1));
2270 n1 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2271 }
2272 else
2273 {
2274 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2275 n1 = fold_build2 (MULT_EXPR, itype,
2276 vs[i - fd->loops[i].outer], n1);
2277 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2278 }
2279 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2280 true, GSI_SAME_STMT);
2281 if (i < fd->last_nonrect)
2282 {
2283 vs[i] = create_tmp_reg (itype, ".it");
2284 expand_omp_build_assign (&gsi2, vs[i], n1);
2285 }
2286 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2287 if (fd->loops[i].m2 == NULL_TREE)
2288 n2 = t;
2289 else if (POINTER_TYPE_P (itype))
2290 {
2291 gcc_assert (integer_onep (fd->loops[i].m2));
2292 t = fold_convert (sizetype,
2293 unshare_expr (fd->loops[i].n2));
2294 n2 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2295 }
2296 else
2297 {
2298 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2299 n2 = fold_build2 (MULT_EXPR, itype,
2300 vs[i - fd->loops[i].outer], n2);
2301 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2302 }
2303 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2304 true, GSI_SAME_STMT);
2305 if (POINTER_TYPE_P (itype))
2306 itype = signed_type_for (itype);
2307 if (i == fd->last_nonrect)
2308 {
2309 gcond *cond_stmt
2310 = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2311 n1, n2);
2312 e = split_block (cur_bb, cond_stmt);
2313 e->flags = EDGE_TRUE_VALUE;
2314 ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2315 e->probability = profile_probability::likely ().guessed ();
2316 ne->probability = e->probability.invert ();
2317 gsi2 = gsi_after_labels (e->dest);
2318
2319 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2320 ? -1 : 1));
2321 t = fold_build2 (PLUS_EXPR, itype,
2322 fold_convert (itype, fd->loops[i].step), t);
2323 t = fold_build2 (PLUS_EXPR, itype, t,
2324 fold_convert (itype, n2));
2325 t = fold_build2 (MINUS_EXPR, itype, t,
2326 fold_convert (itype, n1));
2327 tree step = fold_convert (itype, fd->loops[i].step);
2328 if (TYPE_UNSIGNED (itype)
2329 && fd->loops[i].cond_code == GT_EXPR)
2330 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2331 fold_build1 (NEGATE_EXPR, itype, t),
2332 fold_build1 (NEGATE_EXPR, itype, step));
2333 else
2334 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2335 t = fold_convert (type, t);
2336 t = fold_build2 (PLUS_EXPR, type,
2337 counts[fd->last_nonrect], t);
2338 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2339 true, GSI_SAME_STMT);
2340 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
2341 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2342 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2343 break;
2344 }
2345 e = split_block (cur_bb, last_stmt (cur_bb));
2346
2347 basic_block new_cur_bb = create_empty_bb (cur_bb);
2348 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2349
2350 gsi2 = gsi_after_labels (e->dest);
2351 tree step = fold_convert (itype,
2352 unshare_expr (fd->loops[i].step));
2353 if (POINTER_TYPE_P (TREE_TYPE (vs[i])))
2354 t = fold_build_pointer_plus (vs[i],
2355 fold_convert (sizetype, step));
2356 else
2357 t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2358 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2359 true, GSI_SAME_STMT);
2360 expand_omp_build_assign (&gsi2, vs[i], t);
2361
2362 ne = split_block (e->dest, last_stmt (e->dest));
2363 gsi2 = gsi_after_labels (ne->dest);
2364
2365 expand_omp_build_cond (&gsi2, fd->loops[i].cond_code, vs[i], n2);
2366 edge e3, e4;
2367 if (next_bb == entry_bb)
2368 {
2369 e3 = find_edge (ne->dest, next_bb);
2370 e3->flags = EDGE_FALSE_VALUE;
2371 }
2372 else
2373 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2374 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2375 e4->probability = profile_probability::likely ().guessed ();
2376 e3->probability = e4->probability.invert ();
2377 basic_block esrc = e->src;
2378 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2379 cur_bb = new_cur_bb;
2380 basic_block latch_bb = next_bb;
2381 next_bb = e->dest;
2382 remove_edge (e);
2383 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2384 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2385 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2386 }
2387 }
2388 t = NULL_TREE;
2389 for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2390 if (!fd->loops[i].non_rect_referenced
2391 && fd->loops[i].m1 == NULL_TREE
2392 && fd->loops[i].m2 == NULL_TREE)
2393 {
2394 if (t == NULL_TREE)
2395 t = counts[i];
2396 else
2397 t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2398 }
2399 if (t)
2400 {
2401 t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2402 expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
2403 }
2404 if (!rect_count_seen)
2405 t = counts[fd->last_nonrect];
2406 else
2407 t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2408 counts[fd->last_nonrect]);
2409 expand_omp_build_assign (gsi, fd->loop.n2, t);
2410 }
2411 else if (fd->non_rect)
2412 {
2413 tree t = fd->loop.n2;
2414 gcc_assert (TREE_CODE (t) == INTEGER_CST);
2415 int non_rect_referenced = 0, non_rect = 0;
2416 for (i = 0; i < fd->collapse; i++)
2417 {
2418 if ((i < fd->first_nonrect || i > fd->last_nonrect)
2419 && !integer_zerop (counts[i]))
2420 t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2421 if (fd->loops[i].non_rect_referenced)
2422 non_rect_referenced++;
2423 if (fd->loops[i].m1 || fd->loops[i].m2)
2424 non_rect++;
2425 }
2426 gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2427 counts[fd->last_nonrect] = t;
2428 }
2429 }
2430
2431 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
2432 T = V;
2433 V3 = N31 + (T % count3) * STEP3;
2434 T = T / count3;
2435 V2 = N21 + (T % count2) * STEP2;
2436 T = T / count2;
2437 V1 = N11 + T * STEP1;
2438 if this loop doesn't have an inner loop construct combined with it.
2439 If it does have an inner loop construct combined with it and the
2440 iteration count isn't known constant, store values from counts array
2441 into its _looptemp_ temporaries instead.
2442 For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2443 inclusive), use the count of all those loops together, and either
2444 find quadratic etc. equation roots, or as a fallback, do:
2445 COUNT = 0;
2446 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2447 for (tmpj = M21 * tmpi + N21;
2448 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2449 {
2450 int tmpk1 = M31 * tmpj + N31;
2451 int tmpk2 = M32 * tmpj + N32;
2452 if (tmpk1 COND3 tmpk2)
2453 {
2454 if (COND3 is <)
2455 adj = STEP3 - 1;
2456 else
2457 adj = STEP3 + 1;
2458 int temp = (adj + tmpk2 - tmpk1) / STEP3;
2459 if (COUNT + temp > T)
2460 {
2461 V1 = tmpi;
2462 V2 = tmpj;
2463 V3 = tmpk1 + (T - COUNT) * STEP3;
2464 goto done;
2465 }
2466 else
2467 COUNT += temp;
2468 }
2469 }
2470 done:;
2471 but for optional innermost or outermost rectangular loops that aren't
2472 referenced by other loop expressions keep doing the division/modulo. */
2473
2474 static void
expand_omp_for_init_vars(struct omp_for_data * fd,gimple_stmt_iterator * gsi,tree * counts,tree * nonrect_bounds,gimple * inner_stmt,tree startvar)2475 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
2476 tree *counts, tree *nonrect_bounds,
2477 gimple *inner_stmt, tree startvar)
2478 {
2479 int i;
2480 if (gimple_omp_for_combined_p (fd->for_stmt))
2481 {
2482 /* If fd->loop.n2 is constant, then no propagation of the counts
2483 is needed, they are constant. */
2484 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2485 return;
2486
2487 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
2488 ? gimple_omp_taskreg_clauses (inner_stmt)
2489 : gimple_omp_for_clauses (inner_stmt);
2490 /* First two _looptemp_ clauses are for istart/iend, counts[0]
2491 isn't supposed to be handled, as the inner loop doesn't
2492 use it. */
2493 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
2494 gcc_assert (innerc);
2495 int count = 0;
2496 if (fd->non_rect
2497 && fd->last_nonrect == fd->first_nonrect + 1
2498 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
2499 count = 4;
2500 for (i = 0; i < fd->collapse + count; i++)
2501 {
2502 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2503 OMP_CLAUSE__LOOPTEMP_);
2504 gcc_assert (innerc);
2505 if (i)
2506 {
2507 tree tem = OMP_CLAUSE_DECL (innerc);
2508 tree t;
2509 if (i < fd->collapse)
2510 t = counts[i];
2511 else
2512 switch (i - fd->collapse)
2513 {
2514 case 0: t = counts[0]; break;
2515 case 1: t = fd->first_inner_iterations; break;
2516 case 2: t = fd->factor; break;
2517 case 3: t = fd->adjn1; break;
2518 default: gcc_unreachable ();
2519 }
2520 t = fold_convert (TREE_TYPE (tem), t);
2521 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2522 false, GSI_CONTINUE_LINKING);
2523 gassign *stmt = gimple_build_assign (tem, t);
2524 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2525 }
2526 }
2527 return;
2528 }
2529
2530 tree type = TREE_TYPE (fd->loop.v);
2531 tree tem = create_tmp_reg (type, ".tem");
2532 gassign *stmt = gimple_build_assign (tem, startvar);
2533 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2534
2535 for (i = fd->collapse - 1; i >= 0; i--)
2536 {
2537 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2538 itype = vtype;
2539 if (POINTER_TYPE_P (vtype))
2540 itype = signed_type_for (vtype);
2541 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2542 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2543 else
2544 t = tem;
2545 if (i == fd->last_nonrect)
2546 {
2547 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2548 false, GSI_CONTINUE_LINKING);
2549 tree stopval = t;
2550 tree idx = create_tmp_reg (type, ".count");
2551 expand_omp_build_assign (gsi, idx,
2552 build_zero_cst (type), true);
2553 basic_block bb_triang = NULL, bb_triang_dom = NULL;
2554 if (fd->first_nonrect + 1 == fd->last_nonrect
2555 && (TREE_CODE (fd->loop.n2) == INTEGER_CST
2556 || fd->first_inner_iterations)
2557 && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
2558 != CODE_FOR_nothing)
2559 && !integer_zerop (fd->loop.n2))
2560 {
2561 tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
2562 tree itype = TREE_TYPE (fd->loops[i].v);
2563 tree first_inner_iterations = fd->first_inner_iterations;
2564 tree factor = fd->factor;
2565 gcond *cond_stmt
2566 = expand_omp_build_cond (gsi, NE_EXPR, factor,
2567 build_zero_cst (TREE_TYPE (factor)),
2568 true);
2569 edge e = split_block (gsi_bb (*gsi), cond_stmt);
2570 basic_block bb0 = e->src;
2571 e->flags = EDGE_TRUE_VALUE;
2572 e->probability = profile_probability::likely ();
2573 bb_triang_dom = bb0;
2574 *gsi = gsi_after_labels (e->dest);
2575 tree slltype = long_long_integer_type_node;
2576 tree ulltype = long_long_unsigned_type_node;
2577 tree stopvalull = fold_convert (ulltype, stopval);
2578 stopvalull
2579 = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2580 false, GSI_CONTINUE_LINKING);
2581 first_inner_iterations
2582 = fold_convert (slltype, first_inner_iterations);
2583 first_inner_iterations
2584 = force_gimple_operand_gsi (gsi, first_inner_iterations, true,
2585 NULL_TREE, false,
2586 GSI_CONTINUE_LINKING);
2587 factor = fold_convert (slltype, factor);
2588 factor
2589 = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2590 false, GSI_CONTINUE_LINKING);
2591 tree first_inner_iterationsd
2592 = fold_build1 (FLOAT_EXPR, double_type_node,
2593 first_inner_iterations);
2594 first_inner_iterationsd
2595 = force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
2596 NULL_TREE, false,
2597 GSI_CONTINUE_LINKING);
2598 tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2599 factor);
2600 factord = force_gimple_operand_gsi (gsi, factord, true,
2601 NULL_TREE, false,
2602 GSI_CONTINUE_LINKING);
2603 tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2604 stopvalull);
2605 stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2606 NULL_TREE, false,
2607 GSI_CONTINUE_LINKING);
2608 /* Temporarily disable flag_rounding_math, values will be
2609 decimal numbers divided by 2 and worst case imprecisions
2610 due to too large values ought to be caught later by the
2611 checks for fallback. */
2612 int save_flag_rounding_math = flag_rounding_math;
2613 flag_rounding_math = 0;
2614 t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2615 build_real (double_type_node, dconst2));
2616 tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
2617 first_inner_iterationsd, t);
2618 t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2619 GSI_CONTINUE_LINKING);
2620 t = fold_build2 (MULT_EXPR, double_type_node, factord,
2621 build_real (double_type_node, dconst2));
2622 t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2623 t = fold_build2 (PLUS_EXPR, double_type_node, t,
2624 fold_build2 (MULT_EXPR, double_type_node,
2625 t3, t3));
2626 flag_rounding_math = save_flag_rounding_math;
2627 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2628 GSI_CONTINUE_LINKING);
2629 if (flag_exceptions
2630 && cfun->can_throw_non_call_exceptions
2631 && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
2632 {
2633 tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
2634 build_zero_cst (double_type_node));
2635 tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
2636 false, GSI_CONTINUE_LINKING);
2637 cond_stmt = gimple_build_cond (NE_EXPR, tem,
2638 boolean_false_node,
2639 NULL_TREE, NULL_TREE);
2640 }
2641 else
2642 cond_stmt
2643 = gimple_build_cond (LT_EXPR, t,
2644 build_zero_cst (double_type_node),
2645 NULL_TREE, NULL_TREE);
2646 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2647 e = split_block (gsi_bb (*gsi), cond_stmt);
2648 basic_block bb1 = e->src;
2649 e->flags = EDGE_FALSE_VALUE;
2650 e->probability = profile_probability::very_likely ();
2651 *gsi = gsi_after_labels (e->dest);
2652 gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2653 tree sqrtr = create_tmp_var (double_type_node);
2654 gimple_call_set_lhs (call, sqrtr);
2655 gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2656 t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2657 t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2658 t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2659 tree c = create_tmp_var (ulltype);
2660 tree d = create_tmp_var (ulltype);
2661 expand_omp_build_assign (gsi, c, t, true);
2662 t = fold_build2 (MINUS_EXPR, ulltype, c,
2663 build_one_cst (ulltype));
2664 t = fold_build2 (MULT_EXPR, ulltype, c, t);
2665 t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
2666 t = fold_build2 (MULT_EXPR, ulltype,
2667 fold_convert (ulltype, fd->factor), t);
2668 tree t2
2669 = fold_build2 (MULT_EXPR, ulltype, c,
2670 fold_convert (ulltype,
2671 fd->first_inner_iterations));
2672 t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2673 expand_omp_build_assign (gsi, d, t, true);
2674 t = fold_build2 (MULT_EXPR, ulltype,
2675 fold_convert (ulltype, fd->factor), c);
2676 t = fold_build2 (PLUS_EXPR, ulltype,
2677 t, fold_convert (ulltype,
2678 fd->first_inner_iterations));
2679 t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2680 GSI_CONTINUE_LINKING);
2681 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2682 NULL_TREE, NULL_TREE);
2683 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2684 e = split_block (gsi_bb (*gsi), cond_stmt);
2685 basic_block bb2 = e->src;
2686 e->flags = EDGE_TRUE_VALUE;
2687 e->probability = profile_probability::very_likely ();
2688 *gsi = gsi_after_labels (e->dest);
2689 t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2690 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2691 GSI_CONTINUE_LINKING);
2692 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2693 NULL_TREE, NULL_TREE);
2694 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2695 e = split_block (gsi_bb (*gsi), cond_stmt);
2696 basic_block bb3 = e->src;
2697 e->flags = EDGE_FALSE_VALUE;
2698 e->probability = profile_probability::very_likely ();
2699 *gsi = gsi_after_labels (e->dest);
2700 t = fold_convert (itype, c);
2701 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
2702 t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
2703 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2704 GSI_CONTINUE_LINKING);
2705 expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
2706 t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2707 t2 = fold_convert (itype, t2);
2708 t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2709 t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2710 if (fd->loops[i].m1)
2711 {
2712 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2713 t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2714 }
2715 expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
2716 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2717 bb_triang = e->src;
2718 *gsi = gsi_after_labels (e->dest);
2719 remove_edge (e);
2720 e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2721 e->probability = profile_probability::very_unlikely ();
2722 e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
2723 e->probability = profile_probability::very_unlikely ();
2724 e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2725 e->probability = profile_probability::very_unlikely ();
2726
2727 basic_block bb4 = create_empty_bb (bb0);
2728 add_bb_to_loop (bb4, bb0->loop_father);
2729 e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2730 e->probability = profile_probability::unlikely ();
2731 make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
2732 set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2733 set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
2734 gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
2735 t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2736 counts[i], counts[i - 1]);
2737 t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2738 GSI_CONTINUE_LINKING);
2739 t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2740 t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2741 t = fold_convert (itype, t);
2742 t2 = fold_convert (itype, t2);
2743 t = fold_build2 (MULT_EXPR, itype, t,
2744 fold_convert (itype, fd->loops[i].step));
2745 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2746 t2 = fold_build2 (MULT_EXPR, itype, t2,
2747 fold_convert (itype, fd->loops[i - 1].step));
2748 t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2749 t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2750 false, GSI_CONTINUE_LINKING);
2751 stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2752 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2753 if (fd->loops[i].m1)
2754 {
2755 t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2756 fd->loops[i - 1].v);
2757 t = fold_build2 (PLUS_EXPR, itype, t, t2);
2758 }
2759 t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2760 false, GSI_CONTINUE_LINKING);
2761 stmt = gimple_build_assign (fd->loops[i].v, t);
2762 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2763 }
2764 /* Fallback implementation. Evaluate the loops in between
2765 (inclusive) fd->first_nonrect and fd->last_nonrect at
2766 runtime unsing temporaries instead of the original iteration
2767 variables, in the body just bump the counter and compare
2768 with the desired value. */
2769 gimple_stmt_iterator gsi2 = *gsi;
2770 basic_block entry_bb = gsi_bb (gsi2);
2771 edge e = split_block (entry_bb, gsi_stmt (gsi2));
2772 e = split_block (e->dest, (gimple *) NULL);
2773 basic_block dom_bb = NULL;
2774 basic_block cur_bb = e->src;
2775 basic_block next_bb = e->dest;
2776 entry_bb = e->dest;
2777 *gsi = gsi_after_labels (entry_bb);
2778
2779 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2780 tree n1 = NULL_TREE, n2 = NULL_TREE;
2781 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2782
2783 for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2784 {
2785 tree itype = TREE_TYPE (fd->loops[j].v);
2786 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2787 && fd->loops[j].m2 == NULL_TREE
2788 && !fd->loops[j].non_rect_referenced);
2789 gsi2 = gsi_after_labels (cur_bb);
2790 t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2791 if (fd->loops[j].m1 == NULL_TREE)
2792 n1 = rect_p ? build_zero_cst (type) : t;
2793 else if (POINTER_TYPE_P (itype))
2794 {
2795 gcc_assert (integer_onep (fd->loops[j].m1));
2796 t = fold_convert (sizetype,
2797 unshare_expr (fd->loops[j].n1));
2798 n1 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2799 }
2800 else
2801 {
2802 n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2803 n1 = fold_build2 (MULT_EXPR, itype,
2804 vs[j - fd->loops[j].outer], n1);
2805 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2806 }
2807 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2808 true, GSI_SAME_STMT);
2809 if (j < fd->last_nonrect)
2810 {
2811 vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2812 expand_omp_build_assign (&gsi2, vs[j], n1);
2813 }
2814 t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2815 if (fd->loops[j].m2 == NULL_TREE)
2816 n2 = rect_p ? counts[j] : t;
2817 else if (POINTER_TYPE_P (itype))
2818 {
2819 gcc_assert (integer_onep (fd->loops[j].m2));
2820 t = fold_convert (sizetype,
2821 unshare_expr (fd->loops[j].n2));
2822 n2 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2823 }
2824 else
2825 {
2826 n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2827 n2 = fold_build2 (MULT_EXPR, itype,
2828 vs[j - fd->loops[j].outer], n2);
2829 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2830 }
2831 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2832 true, GSI_SAME_STMT);
2833 if (POINTER_TYPE_P (itype))
2834 itype = signed_type_for (itype);
2835 if (j == fd->last_nonrect)
2836 {
2837 gcond *cond_stmt
2838 = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2839 n1, n2);
2840 e = split_block (cur_bb, cond_stmt);
2841 e->flags = EDGE_TRUE_VALUE;
2842 edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2843 e->probability = profile_probability::likely ().guessed ();
2844 ne->probability = e->probability.invert ();
2845 gsi2 = gsi_after_labels (e->dest);
2846
2847 t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2848 ? -1 : 1));
2849 t = fold_build2 (PLUS_EXPR, itype,
2850 fold_convert (itype, fd->loops[j].step), t);
2851 t = fold_build2 (PLUS_EXPR, itype, t,
2852 fold_convert (itype, n2));
2853 t = fold_build2 (MINUS_EXPR, itype, t,
2854 fold_convert (itype, n1));
2855 tree step = fold_convert (itype, fd->loops[j].step);
2856 if (TYPE_UNSIGNED (itype)
2857 && fd->loops[j].cond_code == GT_EXPR)
2858 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2859 fold_build1 (NEGATE_EXPR, itype, t),
2860 fold_build1 (NEGATE_EXPR, itype, step));
2861 else
2862 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2863 t = fold_convert (type, t);
2864 t = fold_build2 (PLUS_EXPR, type, idx, t);
2865 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2866 true, GSI_SAME_STMT);
2867 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2868 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2869 cond_stmt
2870 = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2871 NULL_TREE);
2872 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2873 e = split_block (gsi_bb (gsi2), cond_stmt);
2874 e->flags = EDGE_TRUE_VALUE;
2875 e->probability = profile_probability::likely ().guessed ();
2876 ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2877 ne->probability = e->probability.invert ();
2878 gsi2 = gsi_after_labels (e->dest);
2879 expand_omp_build_assign (&gsi2, idx, t);
2880 set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2881 break;
2882 }
2883 e = split_block (cur_bb, last_stmt (cur_bb));
2884
2885 basic_block new_cur_bb = create_empty_bb (cur_bb);
2886 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2887
2888 gsi2 = gsi_after_labels (e->dest);
2889 if (rect_p)
2890 t = fold_build2 (PLUS_EXPR, type, vs[j],
2891 build_one_cst (type));
2892 else
2893 {
2894 tree step
2895 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2896 if (POINTER_TYPE_P (vtype))
2897 t = fold_build_pointer_plus (vs[j], fold_convert (sizetype,
2898 step));
2899 else
2900 t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2901 }
2902 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2903 true, GSI_SAME_STMT);
2904 expand_omp_build_assign (&gsi2, vs[j], t);
2905
2906 edge ne = split_block (e->dest, last_stmt (e->dest));
2907 gsi2 = gsi_after_labels (ne->dest);
2908
2909 gcond *cond_stmt;
2910 if (next_bb == entry_bb)
2911 /* No need to actually check the outermost condition. */
2912 cond_stmt
2913 = gimple_build_cond (EQ_EXPR, boolean_true_node,
2914 boolean_true_node,
2915 NULL_TREE, NULL_TREE);
2916 else
2917 cond_stmt
2918 = gimple_build_cond (rect_p ? LT_EXPR
2919 : fd->loops[j].cond_code,
2920 vs[j], n2, NULL_TREE, NULL_TREE);
2921 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2922 edge e3, e4;
2923 if (next_bb == entry_bb)
2924 {
2925 e3 = find_edge (ne->dest, next_bb);
2926 e3->flags = EDGE_FALSE_VALUE;
2927 dom_bb = ne->dest;
2928 }
2929 else
2930 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2931 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2932 e4->probability = profile_probability::likely ().guessed ();
2933 e3->probability = e4->probability.invert ();
2934 basic_block esrc = e->src;
2935 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2936 cur_bb = new_cur_bb;
2937 basic_block latch_bb = next_bb;
2938 next_bb = e->dest;
2939 remove_edge (e);
2940 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2941 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2942 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2943 }
2944 for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2945 {
2946 tree vtype = TREE_TYPE (fd->loops[j].v);
2947 tree itype = vtype;
2948 if (POINTER_TYPE_P (itype))
2949 itype = signed_type_for (itype);
2950 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2951 && fd->loops[j].m2 == NULL_TREE
2952 && !fd->loops[j].non_rect_referenced);
2953 if (j == fd->last_nonrect)
2954 {
2955 t = fold_build2 (MINUS_EXPR, type, stopval, idx);
2956 t = fold_convert (itype, t);
2957 tree t2
2958 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2959 t = fold_build2 (MULT_EXPR, itype, t, t2);
2960 if (POINTER_TYPE_P (vtype))
2961 t = fold_build_pointer_plus (n1,
2962 fold_convert (sizetype, t));
2963 else
2964 t = fold_build2 (PLUS_EXPR, itype, n1, t);
2965 }
2966 else if (rect_p)
2967 {
2968 t = fold_convert (itype, vs[j]);
2969 t = fold_build2 (MULT_EXPR, itype, t,
2970 fold_convert (itype, fd->loops[j].step));
2971 if (POINTER_TYPE_P (vtype))
2972 t = fold_build_pointer_plus (fd->loops[j].n1,
2973 fold_convert (sizetype, t));
2974 else
2975 t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
2976 }
2977 else
2978 t = vs[j];
2979 t = force_gimple_operand_gsi (gsi, t, false,
2980 NULL_TREE, true,
2981 GSI_SAME_STMT);
2982 stmt = gimple_build_assign (fd->loops[j].v, t);
2983 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
2984 }
2985 if (gsi_end_p (*gsi))
2986 *gsi = gsi_last_bb (gsi_bb (*gsi));
2987 else
2988 gsi_prev (gsi);
2989 if (bb_triang)
2990 {
2991 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2992 make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
2993 *gsi = gsi_after_labels (e->dest);
2994 if (!gsi_end_p (*gsi))
2995 gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
2996 set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom);
2997 }
2998 }
2999 else
3000 {
3001 t = fold_convert (itype, t);
3002 t = fold_build2 (MULT_EXPR, itype, t,
3003 fold_convert (itype, fd->loops[i].step));
3004 if (POINTER_TYPE_P (vtype))
3005 t = fold_build_pointer_plus (fd->loops[i].n1, t);
3006 else
3007 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
3008 t = force_gimple_operand_gsi (gsi, t,
3009 DECL_P (fd->loops[i].v)
3010 && TREE_ADDRESSABLE (fd->loops[i].v),
3011 NULL_TREE, false,
3012 GSI_CONTINUE_LINKING);
3013 stmt = gimple_build_assign (fd->loops[i].v, t);
3014 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3015 }
3016 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
3017 {
3018 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
3019 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
3020 false, GSI_CONTINUE_LINKING);
3021 stmt = gimple_build_assign (tem, t);
3022 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3023 }
3024 if (i == fd->last_nonrect)
3025 i = fd->first_nonrect;
3026 }
3027 if (fd->non_rect)
3028 for (i = 0; i <= fd->last_nonrect; i++)
3029 if (fd->loops[i].m2)
3030 {
3031 tree itype = TREE_TYPE (fd->loops[i].v);
3032
3033 tree t;
3034 if (POINTER_TYPE_P (itype))
3035 {
3036 gcc_assert (integer_onep (fd->loops[i].m2));
3037 t = fold_convert (sizetype, unshare_expr (fd->loops[i].n2));
3038 t = fold_build_pointer_plus (fd->loops[i - fd->loops[i].outer].v,
3039 t);
3040 }
3041 else
3042 {
3043 t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
3044 t = fold_build2 (MULT_EXPR, itype,
3045 fd->loops[i - fd->loops[i].outer].v, t);
3046 t = fold_build2 (PLUS_EXPR, itype, t,
3047 fold_convert (itype,
3048 unshare_expr (fd->loops[i].n2)));
3049 }
3050 nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
3051 t = force_gimple_operand_gsi (gsi, t, false,
3052 NULL_TREE, false,
3053 GSI_CONTINUE_LINKING);
3054 stmt = gimple_build_assign (nonrect_bounds[i], t);
3055 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3056 }
3057 }
3058
3059 /* Helper function for expand_omp_for_*. Generate code like:
3060 L10:
3061 V3 += STEP3;
3062 if (V3 cond3 N32) goto BODY_BB; else goto L11;
3063 L11:
3064 V3 = N31;
3065 V2 += STEP2;
3066 if (V2 cond2 N22) goto BODY_BB; else goto L12;
3067 L12:
3068 V2 = N21;
3069 V1 += STEP1;
3070 goto BODY_BB;
3071 For non-rectangular loops, use temporaries stored in nonrect_bounds
3072 for the upper bounds if M?2 multiplier is present. Given e.g.
3073 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3074 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3075 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3076 for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
3077 do:
3078 L10:
3079 V4 += STEP4;
3080 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
3081 L11:
3082 V4 = N41 + M41 * V2; // This can be left out if the loop
3083 // refers to the immediate parent loop
3084 V3 += STEP3;
3085 if (V3 cond3 N32) goto BODY_BB; else goto L12;
3086 L12:
3087 V3 = N31;
3088 V2 += STEP2;
3089 if (V2 cond2 N22) goto L120; else goto L13;
3090 L120:
3091 V4 = N41 + M41 * V2;
3092 NONRECT_BOUND4 = N42 + M42 * V2;
3093 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
3094 L13:
3095 V2 = N21;
3096 V1 += STEP1;
3097 goto L120; */
3098
3099 static basic_block
extract_omp_for_update_vars(struct omp_for_data * fd,tree * nonrect_bounds,basic_block cont_bb,basic_block body_bb)3100 extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
3101 basic_block cont_bb, basic_block body_bb)
3102 {
3103 basic_block last_bb, bb, collapse_bb = NULL;
3104 int i;
3105 gimple_stmt_iterator gsi;
3106 edge e;
3107 tree t;
3108 gimple *stmt;
3109
3110 last_bb = cont_bb;
3111 for (i = fd->collapse - 1; i >= 0; i--)
3112 {
3113 tree vtype = TREE_TYPE (fd->loops[i].v);
3114
3115 bb = create_empty_bb (last_bb);
3116 add_bb_to_loop (bb, last_bb->loop_father);
3117 gsi = gsi_start_bb (bb);
3118
3119 if (i < fd->collapse - 1)
3120 {
3121 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
3122 e->probability
3123 = profile_probability::guessed_always ().apply_scale (1, 8);
3124
3125 struct omp_for_data_loop *l = &fd->loops[i + 1];
3126 if (l->m1 == NULL_TREE || l->outer != 1)
3127 {
3128 t = l->n1;
3129 if (l->m1)
3130 {
3131 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3132 t = fold_build_pointer_plus (fd->loops[i + 1 - l->outer].v,
3133 fold_convert (sizetype, t));
3134 else
3135 {
3136 tree t2
3137 = fold_build2 (MULT_EXPR, TREE_TYPE (t),
3138 fd->loops[i + 1 - l->outer].v, l->m1);
3139 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
3140 }
3141 }
3142 t = force_gimple_operand_gsi (&gsi, t,
3143 DECL_P (l->v)
3144 && TREE_ADDRESSABLE (l->v),
3145 NULL_TREE, false,
3146 GSI_CONTINUE_LINKING);
3147 stmt = gimple_build_assign (l->v, t);
3148 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3149 }
3150 }
3151 else
3152 collapse_bb = bb;
3153
3154 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3155
3156 if (POINTER_TYPE_P (vtype))
3157 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3158 else
3159 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
3160 t = force_gimple_operand_gsi (&gsi, t,
3161 DECL_P (fd->loops[i].v)
3162 && TREE_ADDRESSABLE (fd->loops[i].v),
3163 NULL_TREE, false, GSI_CONTINUE_LINKING);
3164 stmt = gimple_build_assign (fd->loops[i].v, t);
3165 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3166
3167 if (fd->loops[i].non_rect_referenced)
3168 {
3169 basic_block update_bb = NULL, prev_bb = NULL;
3170 for (int j = i + 1; j <= fd->last_nonrect; j++)
3171 if (j - fd->loops[j].outer == i)
3172 {
3173 tree n1, n2;
3174 struct omp_for_data_loop *l = &fd->loops[j];
3175 basic_block this_bb = create_empty_bb (last_bb);
3176 add_bb_to_loop (this_bb, last_bb->loop_father);
3177 gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
3178 if (prev_bb)
3179 {
3180 e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
3181 e->probability
3182 = profile_probability::guessed_always ().apply_scale (7,
3183 8);
3184 set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
3185 }
3186 if (l->m1)
3187 {
3188 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3189 t = fold_build_pointer_plus (fd->loops[i].v,
3190 fold_convert (sizetype,
3191 l->n1));
3192 else
3193 {
3194 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
3195 fd->loops[i].v);
3196 t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v),
3197 t, l->n1);
3198 }
3199 n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3200 false,
3201 GSI_CONTINUE_LINKING);
3202 stmt = gimple_build_assign (l->v, n1);
3203 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3204 n1 = l->v;
3205 }
3206 else
3207 n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
3208 NULL_TREE, false,
3209 GSI_CONTINUE_LINKING);
3210 if (l->m2)
3211 {
3212 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3213 t = fold_build_pointer_plus (fd->loops[i].v,
3214 fold_convert (sizetype,
3215 l->n2));
3216 else
3217 {
3218 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
3219 fd->loops[i].v);
3220 t = fold_build2 (PLUS_EXPR,
3221 TREE_TYPE (nonrect_bounds[j]),
3222 t, unshare_expr (l->n2));
3223 }
3224 n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3225 false,
3226 GSI_CONTINUE_LINKING);
3227 stmt = gimple_build_assign (nonrect_bounds[j], n2);
3228 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3229 n2 = nonrect_bounds[j];
3230 }
3231 else
3232 n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
3233 true, NULL_TREE, false,
3234 GSI_CONTINUE_LINKING);
3235 gcond *cond_stmt
3236 = gimple_build_cond (l->cond_code, n1, n2,
3237 NULL_TREE, NULL_TREE);
3238 gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
3239 if (update_bb == NULL)
3240 update_bb = this_bb;
3241 e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
3242 e->probability
3243 = profile_probability::guessed_always ().apply_scale (1, 8);
3244 if (prev_bb == NULL)
3245 set_immediate_dominator (CDI_DOMINATORS, this_bb, bb);
3246 prev_bb = this_bb;
3247 }
3248 e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
3249 e->probability
3250 = profile_probability::guessed_always ().apply_scale (7, 8);
3251 body_bb = update_bb;
3252 }
3253
3254 if (i > 0)
3255 {
3256 if (fd->loops[i].m2)
3257 t = nonrect_bounds[i];
3258 else
3259 t = unshare_expr (fd->loops[i].n2);
3260 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3261 false, GSI_CONTINUE_LINKING);
3262 tree v = fd->loops[i].v;
3263 if (DECL_P (v) && TREE_ADDRESSABLE (v))
3264 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
3265 false, GSI_CONTINUE_LINKING);
3266 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
3267 stmt = gimple_build_cond_empty (t);
3268 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3269 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
3270 expand_omp_regimplify_p, NULL, NULL)
3271 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
3272 expand_omp_regimplify_p, NULL, NULL))
3273 gimple_regimplify_operands (stmt, &gsi);
3274 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
3275 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3276 }
3277 else
3278 make_edge (bb, body_bb, EDGE_FALLTHRU);
3279 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3280 last_bb = bb;
3281 }
3282
3283 return collapse_bb;
3284 }
3285
3286 /* Expand #pragma omp ordered depend(source). */
3287
3288 static void
expand_omp_ordered_source(gimple_stmt_iterator * gsi,struct omp_for_data * fd,tree * counts,location_t loc)3289 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3290 tree *counts, location_t loc)
3291 {
3292 enum built_in_function source_ix
3293 = fd->iter_type == long_integer_type_node
3294 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
3295 gimple *g
3296 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
3297 build_fold_addr_expr (counts[fd->ordered]));
3298 gimple_set_location (g, loc);
3299 gsi_insert_before (gsi, g, GSI_SAME_STMT);
3300 }
3301
3302 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
3303
3304 static void
expand_omp_ordered_sink(gimple_stmt_iterator * gsi,struct omp_for_data * fd,tree * counts,tree c,location_t loc)3305 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3306 tree *counts, tree c, location_t loc)
3307 {
3308 auto_vec<tree, 10> args;
3309 enum built_in_function sink_ix
3310 = fd->iter_type == long_integer_type_node
3311 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
3312 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
3313 int i;
3314 gimple_stmt_iterator gsi2 = *gsi;
3315 bool warned_step = false;
3316
3317 for (i = 0; i < fd->ordered; i++)
3318 {
3319 tree step = NULL_TREE;
3320 off = TREE_PURPOSE (deps);
3321 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3322 {
3323 step = TREE_OPERAND (off, 1);
3324 off = TREE_OPERAND (off, 0);
3325 }
3326 if (!integer_zerop (off))
3327 {
3328 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3329 || fd->loops[i].cond_code == GT_EXPR);
3330 bool forward = fd->loops[i].cond_code == LT_EXPR;
3331 if (step)
3332 {
3333 /* Non-simple Fortran DO loops. If step is variable,
3334 we don't know at compile even the direction, so can't
3335 warn. */
3336 if (TREE_CODE (step) != INTEGER_CST)
3337 break;
3338 forward = tree_int_cst_sgn (step) != -1;
3339 }
3340 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3341 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3342 "waiting for lexically later iteration");
3343 break;
3344 }
3345 deps = TREE_CHAIN (deps);
3346 }
3347 /* If all offsets corresponding to the collapsed loops are zero,
3348 this depend clause can be ignored. FIXME: but there is still a
3349 flush needed. We need to emit one __sync_synchronize () for it
3350 though (perhaps conditionally)? Solve this together with the
3351 conservative dependence folding optimization.
3352 if (i >= fd->collapse)
3353 return; */
3354
3355 deps = OMP_CLAUSE_DECL (c);
3356 gsi_prev (&gsi2);
3357 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3358 edge e2 = split_block_after_labels (e1->dest);
3359
3360 gsi2 = gsi_after_labels (e1->dest);
3361 *gsi = gsi_last_bb (e1->src);
3362 for (i = 0; i < fd->ordered; i++)
3363 {
3364 tree itype = TREE_TYPE (fd->loops[i].v);
3365 tree step = NULL_TREE;
3366 tree orig_off = NULL_TREE;
3367 if (POINTER_TYPE_P (itype))
3368 itype = sizetype;
3369 if (i)
3370 deps = TREE_CHAIN (deps);
3371 off = TREE_PURPOSE (deps);
3372 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3373 {
3374 step = TREE_OPERAND (off, 1);
3375 off = TREE_OPERAND (off, 0);
3376 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3377 && integer_onep (fd->loops[i].step)
3378 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3379 }
3380 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3381 if (step)
3382 {
3383 off = fold_convert_loc (loc, itype, off);
3384 orig_off = off;
3385 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3386 }
3387
3388 if (integer_zerop (off))
3389 t = boolean_true_node;
3390 else
3391 {
3392 tree a;
3393 tree co = fold_convert_loc (loc, itype, off);
3394 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3395 {
3396 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3397 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3398 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3399 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3400 co);
3401 }
3402 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3403 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3404 fd->loops[i].v, co);
3405 else
3406 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3407 fd->loops[i].v, co);
3408 if (step)
3409 {
3410 tree t1, t2;
3411 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3412 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3413 fd->loops[i].n1);
3414 else
3415 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3416 fd->loops[i].n2);
3417 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3418 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3419 fd->loops[i].n2);
3420 else
3421 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3422 fd->loops[i].n1);
3423 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3424 step, build_int_cst (TREE_TYPE (step), 0));
3425 if (TREE_CODE (step) != INTEGER_CST)
3426 {
3427 t1 = unshare_expr (t1);
3428 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3429 false, GSI_CONTINUE_LINKING);
3430 t2 = unshare_expr (t2);
3431 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3432 false, GSI_CONTINUE_LINKING);
3433 }
3434 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3435 t, t2, t1);
3436 }
3437 else if (fd->loops[i].cond_code == LT_EXPR)
3438 {
3439 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3440 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3441 fd->loops[i].n1);
3442 else
3443 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3444 fd->loops[i].n2);
3445 }
3446 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3447 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3448 fd->loops[i].n2);
3449 else
3450 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3451 fd->loops[i].n1);
3452 }
3453 if (cond)
3454 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3455 else
3456 cond = t;
3457
3458 off = fold_convert_loc (loc, itype, off);
3459
3460 if (step
3461 || (fd->loops[i].cond_code == LT_EXPR
3462 ? !integer_onep (fd->loops[i].step)
3463 : !integer_minus_onep (fd->loops[i].step)))
3464 {
3465 if (step == NULL_TREE
3466 && TYPE_UNSIGNED (itype)
3467 && fd->loops[i].cond_code == GT_EXPR)
3468 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3469 fold_build1_loc (loc, NEGATE_EXPR, itype,
3470 s));
3471 else
3472 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3473 orig_off ? orig_off : off, s);
3474 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3475 build_int_cst (itype, 0));
3476 if (integer_zerop (t) && !warned_step)
3477 {
3478 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3479 "refers to iteration never in the iteration "
3480 "space");
3481 warned_step = true;
3482 }
3483 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3484 cond, t);
3485 }
3486
3487 if (i <= fd->collapse - 1 && fd->collapse > 1)
3488 t = fd->loop.v;
3489 else if (counts[i])
3490 t = counts[i];
3491 else
3492 {
3493 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3494 fd->loops[i].v, fd->loops[i].n1);
3495 t = fold_convert_loc (loc, fd->iter_type, t);
3496 }
3497 if (step)
3498 /* We have divided off by step already earlier. */;
3499 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3500 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3501 fold_build1_loc (loc, NEGATE_EXPR, itype,
3502 s));
3503 else
3504 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3505 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3506 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3507 off = fold_convert_loc (loc, fd->iter_type, off);
3508 if (i <= fd->collapse - 1 && fd->collapse > 1)
3509 {
3510 if (i)
3511 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3512 off);
3513 if (i < fd->collapse - 1)
3514 {
3515 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3516 counts[i]);
3517 continue;
3518 }
3519 }
3520 off = unshare_expr (off);
3521 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3522 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3523 true, GSI_SAME_STMT);
3524 args.safe_push (t);
3525 }
3526 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
3527 gimple_set_location (g, loc);
3528 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3529
3530 cond = unshare_expr (cond);
3531 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3532 GSI_CONTINUE_LINKING);
3533 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3534 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3535 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3536 e1->probability = e3->probability.invert ();
3537 e1->flags = EDGE_TRUE_VALUE;
3538 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3539
3540 *gsi = gsi_after_labels (e2->dest);
3541 }
3542
3543 /* Expand all #pragma omp ordered depend(source) and
3544 #pragma omp ordered depend(sink:...) constructs in the current
3545 #pragma omp for ordered(n) region. */
3546
3547 static void
expand_omp_ordered_source_sink(struct omp_region * region,struct omp_for_data * fd,tree * counts,basic_block cont_bb)3548 expand_omp_ordered_source_sink (struct omp_region *region,
3549 struct omp_for_data *fd, tree *counts,
3550 basic_block cont_bb)
3551 {
3552 struct omp_region *inner;
3553 int i;
3554 for (i = fd->collapse - 1; i < fd->ordered; i++)
3555 if (i == fd->collapse - 1 && fd->collapse > 1)
3556 counts[i] = NULL_TREE;
3557 else if (i >= fd->collapse && !cont_bb)
3558 counts[i] = build_zero_cst (fd->iter_type);
3559 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3560 && integer_onep (fd->loops[i].step))
3561 counts[i] = NULL_TREE;
3562 else
3563 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3564 tree atype
3565 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3566 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3567 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3568
3569 for (inner = region->inner; inner; inner = inner->next)
3570 if (inner->type == GIMPLE_OMP_ORDERED)
3571 {
3572 gomp_ordered *ord_stmt = inner->ord_stmt;
3573 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3574 location_t loc = gimple_location (ord_stmt);
3575 tree c;
3576 for (c = gimple_omp_ordered_clauses (ord_stmt);
3577 c; c = OMP_CLAUSE_CHAIN (c))
3578 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
3579 break;
3580 if (c)
3581 expand_omp_ordered_source (&gsi, fd, counts, loc);
3582 for (c = gimple_omp_ordered_clauses (ord_stmt);
3583 c; c = OMP_CLAUSE_CHAIN (c))
3584 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
3585 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
3586 gsi_remove (&gsi, true);
3587 }
3588 }
3589
3590 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
3591 collapsed. */
3592
3593 static basic_block
expand_omp_for_ordered_loops(struct omp_for_data * fd,tree * counts,basic_block cont_bb,basic_block body_bb,basic_block l0_bb,bool ordered_lastprivate)3594 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3595 basic_block cont_bb, basic_block body_bb,
3596 basic_block l0_bb, bool ordered_lastprivate)
3597 {
3598 if (fd->ordered == fd->collapse)
3599 return cont_bb;
3600
3601 if (!cont_bb)
3602 {
3603 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3604 for (int i = fd->collapse; i < fd->ordered; i++)
3605 {
3606 tree type = TREE_TYPE (fd->loops[i].v);
3607 tree n1 = fold_convert (type, fd->loops[i].n1);
3608 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
3609 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3610 size_int (i - fd->collapse + 1),
3611 NULL_TREE, NULL_TREE);
3612 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3613 }
3614 return NULL;
3615 }
3616
3617 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3618 {
3619 tree t, type = TREE_TYPE (fd->loops[i].v);
3620 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3621 expand_omp_build_assign (&gsi, fd->loops[i].v,
3622 fold_convert (type, fd->loops[i].n1));
3623 if (counts[i])
3624 expand_omp_build_assign (&gsi, counts[i],
3625 build_zero_cst (fd->iter_type));
3626 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3627 size_int (i - fd->collapse + 1),
3628 NULL_TREE, NULL_TREE);
3629 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3630 if (!gsi_end_p (gsi))
3631 gsi_prev (&gsi);
3632 else
3633 gsi = gsi_last_bb (body_bb);
3634 edge e1 = split_block (body_bb, gsi_stmt (gsi));
3635 basic_block new_body = e1->dest;
3636 if (body_bb == cont_bb)
3637 cont_bb = new_body;
3638 edge e2 = NULL;
3639 basic_block new_header;
3640 if (EDGE_COUNT (cont_bb->preds) > 0)
3641 {
3642 gsi = gsi_last_bb (cont_bb);
3643 if (POINTER_TYPE_P (type))
3644 t = fold_build_pointer_plus (fd->loops[i].v,
3645 fold_convert (sizetype,
3646 fd->loops[i].step));
3647 else
3648 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3649 fold_convert (type, fd->loops[i].step));
3650 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
3651 if (counts[i])
3652 {
3653 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3654 build_int_cst (fd->iter_type, 1));
3655 expand_omp_build_assign (&gsi, counts[i], t);
3656 t = counts[i];
3657 }
3658 else
3659 {
3660 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3661 fd->loops[i].v, fd->loops[i].n1);
3662 t = fold_convert (fd->iter_type, t);
3663 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3664 true, GSI_SAME_STMT);
3665 }
3666 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3667 size_int (i - fd->collapse + 1),
3668 NULL_TREE, NULL_TREE);
3669 expand_omp_build_assign (&gsi, aref, t);
3670 gsi_prev (&gsi);
3671 e2 = split_block (cont_bb, gsi_stmt (gsi));
3672 new_header = e2->dest;
3673 }
3674 else
3675 new_header = cont_bb;
3676 gsi = gsi_after_labels (new_header);
3677 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3678 true, GSI_SAME_STMT);
3679 tree n2
3680 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3681 true, NULL_TREE, true, GSI_SAME_STMT);
3682 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3683 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
3684 edge e3 = split_block (new_header, gsi_stmt (gsi));
3685 cont_bb = e3->dest;
3686 remove_edge (e1);
3687 make_edge (body_bb, new_header, EDGE_FALLTHRU);
3688 e3->flags = EDGE_FALSE_VALUE;
3689 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3690 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
3691 e1->probability = e3->probability.invert ();
3692
3693 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3694 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3695
3696 if (e2)
3697 {
3698 class loop *loop = alloc_loop ();
3699 loop->header = new_header;
3700 loop->latch = e2->src;
3701 add_loop (loop, l0_bb->loop_father);
3702 }
3703 }
3704
3705 /* If there are any lastprivate clauses and it is possible some loops
3706 might have zero iterations, ensure all the decls are initialized,
3707 otherwise we could crash evaluating C++ class iterators with lastprivate
3708 clauses. */
3709 bool need_inits = false;
3710 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3711 if (need_inits)
3712 {
3713 tree type = TREE_TYPE (fd->loops[i].v);
3714 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3715 expand_omp_build_assign (&gsi, fd->loops[i].v,
3716 fold_convert (type, fd->loops[i].n1));
3717 }
3718 else
3719 {
3720 tree type = TREE_TYPE (fd->loops[i].v);
3721 tree this_cond = fold_build2 (fd->loops[i].cond_code,
3722 boolean_type_node,
3723 fold_convert (type, fd->loops[i].n1),
3724 fold_convert (type, fd->loops[i].n2));
3725 if (!integer_onep (this_cond))
3726 need_inits = true;
3727 }
3728
3729 return cont_bb;
3730 }
3731
3732 /* A subroutine of expand_omp_for. Generate code for a parallel
3733 loop with any schedule. Given parameters:
3734
3735 for (V = N1; V cond N2; V += STEP) BODY;
3736
3737 where COND is "<" or ">", we generate pseudocode
3738
3739 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3740 if (more) goto L0; else goto L3;
3741 L0:
3742 V = istart0;
3743 iend = iend0;
3744 L1:
3745 BODY;
3746 V += STEP;
3747 if (V cond iend) goto L1; else goto L2;
3748 L2:
3749 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3750 L3:
3751
3752 If this is a combined omp parallel loop, instead of the call to
3753 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3754 If this is gimple_omp_for_combined_p loop, then instead of assigning
3755 V and iend in L0 we assign the first two _looptemp_ clause decls of the
3756 inner GIMPLE_OMP_FOR and V += STEP; and
3757 if (V cond iend) goto L1; else goto L2; are removed.
3758
3759 For collapsed loops, given parameters:
3760 collapse(3)
3761 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3762 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3763 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3764 BODY;
3765
3766 we generate pseudocode
3767
3768 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3769 if (cond3 is <)
3770 adj = STEP3 - 1;
3771 else
3772 adj = STEP3 + 1;
3773 count3 = (adj + N32 - N31) / STEP3;
3774 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3775 if (cond2 is <)
3776 adj = STEP2 - 1;
3777 else
3778 adj = STEP2 + 1;
3779 count2 = (adj + N22 - N21) / STEP2;
3780 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3781 if (cond1 is <)
3782 adj = STEP1 - 1;
3783 else
3784 adj = STEP1 + 1;
3785 count1 = (adj + N12 - N11) / STEP1;
3786 count = count1 * count2 * count3;
3787 goto Z1;
3788 Z0:
3789 count = 0;
3790 Z1:
3791 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3792 if (more) goto L0; else goto L3;
3793 L0:
3794 V = istart0;
3795 T = V;
3796 V3 = N31 + (T % count3) * STEP3;
3797 T = T / count3;
3798 V2 = N21 + (T % count2) * STEP2;
3799 T = T / count2;
3800 V1 = N11 + T * STEP1;
3801 iend = iend0;
3802 L1:
3803 BODY;
3804 V += 1;
3805 if (V < iend) goto L10; else goto L2;
3806 L10:
3807 V3 += STEP3;
3808 if (V3 cond3 N32) goto L1; else goto L11;
3809 L11:
3810 V3 = N31;
3811 V2 += STEP2;
3812 if (V2 cond2 N22) goto L1; else goto L12;
3813 L12:
3814 V2 = N21;
3815 V1 += STEP1;
3816 goto L1;
3817 L2:
3818 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3819 L3:
3820
3821 */
3822
3823 static void
expand_omp_for_generic(struct omp_region * region,struct omp_for_data * fd,enum built_in_function start_fn,enum built_in_function next_fn,tree sched_arg,gimple * inner_stmt)3824 expand_omp_for_generic (struct omp_region *region,
3825 struct omp_for_data *fd,
3826 enum built_in_function start_fn,
3827 enum built_in_function next_fn,
3828 tree sched_arg,
3829 gimple *inner_stmt)
3830 {
3831 tree type, istart0, iend0, iend;
3832 tree t, vmain, vback, bias = NULL_TREE;
3833 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3834 basic_block l2_bb = NULL, l3_bb = NULL;
3835 gimple_stmt_iterator gsi;
3836 gassign *assign_stmt;
3837 bool in_combined_parallel = is_combined_parallel (region);
3838 bool broken_loop = region->cont == NULL;
3839 edge e, ne;
3840 tree *counts = NULL;
3841 int i;
3842 bool ordered_lastprivate = false;
3843
3844 gcc_assert (!broken_loop || !in_combined_parallel);
3845 gcc_assert (fd->iter_type == long_integer_type_node
3846 || !in_combined_parallel);
3847
3848 entry_bb = region->entry;
3849 cont_bb = region->cont;
3850 collapse_bb = NULL;
3851 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3852 gcc_assert (broken_loop
3853 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3854 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3855 l1_bb = single_succ (l0_bb);
3856 if (!broken_loop)
3857 {
3858 l2_bb = create_empty_bb (cont_bb);
3859 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3860 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
3861 == l1_bb));
3862 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3863 }
3864 else
3865 l2_bb = NULL;
3866 l3_bb = BRANCH_EDGE (entry_bb)->dest;
3867 exit_bb = region->exit;
3868
3869 gsi = gsi_last_nondebug_bb (entry_bb);
3870
3871 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3872 if (fd->ordered
3873 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3874 OMP_CLAUSE_LASTPRIVATE))
3875 ordered_lastprivate = false;
3876 tree reductions = NULL_TREE;
3877 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
3878 tree memv = NULL_TREE;
3879 if (fd->lastprivate_conditional)
3880 {
3881 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3882 OMP_CLAUSE__CONDTEMP_);
3883 if (fd->have_pointer_condtemp)
3884 condtemp = OMP_CLAUSE_DECL (c);
3885 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3886 cond_var = OMP_CLAUSE_DECL (c);
3887 }
3888 if (sched_arg)
3889 {
3890 if (fd->have_reductemp)
3891 {
3892 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3893 OMP_CLAUSE__REDUCTEMP_);
3894 reductions = OMP_CLAUSE_DECL (c);
3895 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3896 gimple *g = SSA_NAME_DEF_STMT (reductions);
3897 reductions = gimple_assign_rhs1 (g);
3898 OMP_CLAUSE_DECL (c) = reductions;
3899 entry_bb = gimple_bb (g);
3900 edge e = split_block (entry_bb, g);
3901 if (region->entry == entry_bb)
3902 region->entry = e->dest;
3903 gsi = gsi_last_bb (entry_bb);
3904 }
3905 else
3906 reductions = null_pointer_node;
3907 if (fd->have_pointer_condtemp)
3908 {
3909 tree type = TREE_TYPE (condtemp);
3910 memv = create_tmp_var (type);
3911 TREE_ADDRESSABLE (memv) = 1;
3912 unsigned HOST_WIDE_INT sz
3913 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3914 sz *= fd->lastprivate_conditional;
3915 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
3916 false);
3917 mem = build_fold_addr_expr (memv);
3918 }
3919 else
3920 mem = null_pointer_node;
3921 }
3922 if (fd->collapse > 1 || fd->ordered)
3923 {
3924 int first_zero_iter1 = -1, first_zero_iter2 = -1;
3925 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
3926
3927 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
3928 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3929 zero_iter1_bb, first_zero_iter1,
3930 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
3931
3932 if (zero_iter1_bb)
3933 {
3934 /* Some counts[i] vars might be uninitialized if
3935 some loop has zero iterations. But the body shouldn't
3936 be executed in that case, so just avoid uninit warnings. */
3937 for (i = first_zero_iter1;
3938 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
3939 if (SSA_VAR_P (counts[i]))
3940 suppress_warning (counts[i], OPT_Wuninitialized);
3941 gsi_prev (&gsi);
3942 e = split_block (entry_bb, gsi_stmt (gsi));
3943 entry_bb = e->dest;
3944 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
3945 gsi = gsi_last_nondebug_bb (entry_bb);
3946 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3947 get_immediate_dominator (CDI_DOMINATORS,
3948 zero_iter1_bb));
3949 }
3950 if (zero_iter2_bb)
3951 {
3952 /* Some counts[i] vars might be uninitialized if
3953 some loop has zero iterations. But the body shouldn't
3954 be executed in that case, so just avoid uninit warnings. */
3955 for (i = first_zero_iter2; i < fd->ordered; i++)
3956 if (SSA_VAR_P (counts[i]))
3957 suppress_warning (counts[i], OPT_Wuninitialized);
3958 if (zero_iter1_bb)
3959 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3960 else
3961 {
3962 gsi_prev (&gsi);
3963 e = split_block (entry_bb, gsi_stmt (gsi));
3964 entry_bb = e->dest;
3965 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3966 gsi = gsi_last_nondebug_bb (entry_bb);
3967 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3968 get_immediate_dominator
3969 (CDI_DOMINATORS, zero_iter2_bb));
3970 }
3971 }
3972 if (fd->collapse == 1)
3973 {
3974 counts[0] = fd->loop.n2;
3975 fd->loop = fd->loops[0];
3976 }
3977 }
3978
3979 type = TREE_TYPE (fd->loop.v);
3980 istart0 = create_tmp_var (fd->iter_type, ".istart0");
3981 iend0 = create_tmp_var (fd->iter_type, ".iend0");
3982 TREE_ADDRESSABLE (istart0) = 1;
3983 TREE_ADDRESSABLE (iend0) = 1;
3984
3985 /* See if we need to bias by LLONG_MIN. */
3986 if (fd->iter_type == long_long_unsigned_type_node
3987 && TREE_CODE (type) == INTEGER_TYPE
3988 && !TYPE_UNSIGNED (type)
3989 && fd->ordered == 0)
3990 {
3991 tree n1, n2;
3992
3993 if (fd->loop.cond_code == LT_EXPR)
3994 {
3995 n1 = fd->loop.n1;
3996 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
3997 }
3998 else
3999 {
4000 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4001 n2 = fd->loop.n1;
4002 }
4003 if (TREE_CODE (n1) != INTEGER_CST
4004 || TREE_CODE (n2) != INTEGER_CST
4005 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4006 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4007 }
4008
4009 gimple_stmt_iterator gsif = gsi;
4010 gsi_prev (&gsif);
4011
4012 tree arr = NULL_TREE;
4013 if (in_combined_parallel)
4014 {
4015 gcc_assert (fd->ordered == 0);
4016 /* In a combined parallel loop, emit a call to
4017 GOMP_loop_foo_next. */
4018 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4019 build_fold_addr_expr (istart0),
4020 build_fold_addr_expr (iend0));
4021 }
4022 else
4023 {
4024 tree t0, t1, t2, t3, t4;
4025 /* If this is not a combined parallel loop, emit a call to
4026 GOMP_loop_foo_start in ENTRY_BB. */
4027 t4 = build_fold_addr_expr (iend0);
4028 t3 = build_fold_addr_expr (istart0);
4029 if (fd->ordered)
4030 {
4031 t0 = build_int_cst (unsigned_type_node,
4032 fd->ordered - fd->collapse + 1);
4033 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
4034 fd->ordered
4035 - fd->collapse + 1),
4036 ".omp_counts");
4037 DECL_NAMELESS (arr) = 1;
4038 TREE_ADDRESSABLE (arr) = 1;
4039 TREE_STATIC (arr) = 1;
4040 vec<constructor_elt, va_gc> *v;
4041 vec_alloc (v, fd->ordered - fd->collapse + 1);
4042 int idx;
4043
4044 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
4045 {
4046 tree c;
4047 if (idx == 0 && fd->collapse > 1)
4048 c = fd->loop.n2;
4049 else
4050 c = counts[idx + fd->collapse - 1];
4051 tree purpose = size_int (idx);
4052 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
4053 if (TREE_CODE (c) != INTEGER_CST)
4054 TREE_STATIC (arr) = 0;
4055 }
4056
4057 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
4058 if (!TREE_STATIC (arr))
4059 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
4060 void_type_node, arr),
4061 true, NULL_TREE, true, GSI_SAME_STMT);
4062 t1 = build_fold_addr_expr (arr);
4063 t2 = NULL_TREE;
4064 }
4065 else
4066 {
4067 t2 = fold_convert (fd->iter_type, fd->loop.step);
4068 t1 = fd->loop.n2;
4069 t0 = fd->loop.n1;
4070 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4071 {
4072 tree innerc
4073 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4074 OMP_CLAUSE__LOOPTEMP_);
4075 gcc_assert (innerc);
4076 t0 = OMP_CLAUSE_DECL (innerc);
4077 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4078 OMP_CLAUSE__LOOPTEMP_);
4079 gcc_assert (innerc);
4080 t1 = OMP_CLAUSE_DECL (innerc);
4081 }
4082 if (POINTER_TYPE_P (TREE_TYPE (t0))
4083 && TYPE_PRECISION (TREE_TYPE (t0))
4084 != TYPE_PRECISION (fd->iter_type))
4085 {
4086 /* Avoid casting pointers to integer of a different size. */
4087 tree itype = signed_type_for (type);
4088 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4089 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4090 }
4091 else
4092 {
4093 t1 = fold_convert (fd->iter_type, t1);
4094 t0 = fold_convert (fd->iter_type, t0);
4095 }
4096 if (bias)
4097 {
4098 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4099 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4100 }
4101 }
4102 if (fd->iter_type == long_integer_type_node || fd->ordered)
4103 {
4104 if (fd->chunk_size)
4105 {
4106 t = fold_convert (fd->iter_type, fd->chunk_size);
4107 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4108 if (sched_arg)
4109 {
4110 if (fd->ordered)
4111 t = build_call_expr (builtin_decl_explicit (start_fn),
4112 8, t0, t1, sched_arg, t, t3, t4,
4113 reductions, mem);
4114 else
4115 t = build_call_expr (builtin_decl_explicit (start_fn),
4116 9, t0, t1, t2, sched_arg, t, t3, t4,
4117 reductions, mem);
4118 }
4119 else if (fd->ordered)
4120 t = build_call_expr (builtin_decl_explicit (start_fn),
4121 5, t0, t1, t, t3, t4);
4122 else
4123 t = build_call_expr (builtin_decl_explicit (start_fn),
4124 6, t0, t1, t2, t, t3, t4);
4125 }
4126 else if (fd->ordered)
4127 t = build_call_expr (builtin_decl_explicit (start_fn),
4128 4, t0, t1, t3, t4);
4129 else
4130 t = build_call_expr (builtin_decl_explicit (start_fn),
4131 5, t0, t1, t2, t3, t4);
4132 }
4133 else
4134 {
4135 tree t5;
4136 tree c_bool_type;
4137 tree bfn_decl;
4138
4139 /* The GOMP_loop_ull_*start functions have additional boolean
4140 argument, true for < loops and false for > loops.
4141 In Fortran, the C bool type can be different from
4142 boolean_type_node. */
4143 bfn_decl = builtin_decl_explicit (start_fn);
4144 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
4145 t5 = build_int_cst (c_bool_type,
4146 fd->loop.cond_code == LT_EXPR ? 1 : 0);
4147 if (fd->chunk_size)
4148 {
4149 tree bfn_decl = builtin_decl_explicit (start_fn);
4150 t = fold_convert (fd->iter_type, fd->chunk_size);
4151 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4152 if (sched_arg)
4153 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
4154 t, t3, t4, reductions, mem);
4155 else
4156 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
4157 }
4158 else
4159 t = build_call_expr (builtin_decl_explicit (start_fn),
4160 6, t5, t0, t1, t2, t3, t4);
4161 }
4162 }
4163 if (TREE_TYPE (t) != boolean_type_node)
4164 t = fold_build2 (NE_EXPR, boolean_type_node,
4165 t, build_int_cst (TREE_TYPE (t), 0));
4166 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4167 true, GSI_SAME_STMT);
4168 if (arr && !TREE_STATIC (arr))
4169 {
4170 tree clobber = build_clobber (TREE_TYPE (arr));
4171 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
4172 GSI_SAME_STMT);
4173 }
4174 if (fd->have_pointer_condtemp)
4175 expand_omp_build_assign (&gsi, condtemp, memv, false);
4176 if (fd->have_reductemp)
4177 {
4178 gimple *g = gsi_stmt (gsi);
4179 gsi_remove (&gsi, true);
4180 release_ssa_name (gimple_assign_lhs (g));
4181
4182 entry_bb = region->entry;
4183 gsi = gsi_last_nondebug_bb (entry_bb);
4184
4185 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4186 }
4187 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4188
4189 /* Remove the GIMPLE_OMP_FOR statement. */
4190 gsi_remove (&gsi, true);
4191
4192 if (gsi_end_p (gsif))
4193 gsif = gsi_after_labels (gsi_bb (gsif));
4194 gsi_next (&gsif);
4195
4196 /* Iteration setup for sequential loop goes in L0_BB. */
4197 tree startvar = fd->loop.v;
4198 tree endvar = NULL_TREE;
4199
4200 if (gimple_omp_for_combined_p (fd->for_stmt))
4201 {
4202 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
4203 && gimple_omp_for_kind (inner_stmt)
4204 == GF_OMP_FOR_KIND_SIMD);
4205 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
4206 OMP_CLAUSE__LOOPTEMP_);
4207 gcc_assert (innerc);
4208 startvar = OMP_CLAUSE_DECL (innerc);
4209 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4210 OMP_CLAUSE__LOOPTEMP_);
4211 gcc_assert (innerc);
4212 endvar = OMP_CLAUSE_DECL (innerc);
4213 }
4214
4215 gsi = gsi_start_bb (l0_bb);
4216 t = istart0;
4217 if (fd->ordered && fd->collapse == 1)
4218 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4219 fold_convert (fd->iter_type, fd->loop.step));
4220 else if (bias)
4221 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4222 if (fd->ordered && fd->collapse == 1)
4223 {
4224 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4225 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4226 fd->loop.n1, fold_convert (sizetype, t));
4227 else
4228 {
4229 t = fold_convert (TREE_TYPE (startvar), t);
4230 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4231 fd->loop.n1, t);
4232 }
4233 }
4234 else
4235 {
4236 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4237 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4238 t = fold_convert (TREE_TYPE (startvar), t);
4239 }
4240 t = force_gimple_operand_gsi (&gsi, t,
4241 DECL_P (startvar)
4242 && TREE_ADDRESSABLE (startvar),
4243 NULL_TREE, false, GSI_CONTINUE_LINKING);
4244 assign_stmt = gimple_build_assign (startvar, t);
4245 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4246 if (cond_var)
4247 {
4248 tree itype = TREE_TYPE (cond_var);
4249 /* For lastprivate(conditional:) itervar, we need some iteration
4250 counter that starts at unsigned non-zero and increases.
4251 Prefer as few IVs as possible, so if we can use startvar
4252 itself, use that, or startvar + constant (those would be
4253 incremented with step), and as last resort use the s0 + 1
4254 incremented by 1. */
4255 if ((fd->ordered && fd->collapse == 1)
4256 || bias
4257 || POINTER_TYPE_P (type)
4258 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4259 || fd->loop.cond_code != LT_EXPR)
4260 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
4261 build_int_cst (itype, 1));
4262 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
4263 t = fold_convert (itype, t);
4264 else
4265 {
4266 tree c = fold_convert (itype, fd->loop.n1);
4267 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4268 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4269 }
4270 t = force_gimple_operand_gsi (&gsi, t, false,
4271 NULL_TREE, false, GSI_CONTINUE_LINKING);
4272 assign_stmt = gimple_build_assign (cond_var, t);
4273 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4274 }
4275
4276 t = iend0;
4277 if (fd->ordered && fd->collapse == 1)
4278 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4279 fold_convert (fd->iter_type, fd->loop.step));
4280 else if (bias)
4281 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4282 if (fd->ordered && fd->collapse == 1)
4283 {
4284 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4285 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4286 fd->loop.n1, fold_convert (sizetype, t));
4287 else
4288 {
4289 t = fold_convert (TREE_TYPE (startvar), t);
4290 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4291 fd->loop.n1, t);
4292 }
4293 }
4294 else
4295 {
4296 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4297 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4298 t = fold_convert (TREE_TYPE (startvar), t);
4299 }
4300 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4301 false, GSI_CONTINUE_LINKING);
4302 if (endvar)
4303 {
4304 assign_stmt = gimple_build_assign (endvar, iend);
4305 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4306 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
4307 assign_stmt = gimple_build_assign (fd->loop.v, iend);
4308 else
4309 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
4310 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4311 }
4312 /* Handle linear clause adjustments. */
4313 tree itercnt = NULL_TREE;
4314 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4315 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4316 c; c = OMP_CLAUSE_CHAIN (c))
4317 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4318 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4319 {
4320 tree d = OMP_CLAUSE_DECL (c);
4321 tree t = d, a, dest;
4322 if (omp_privatize_by_reference (t))
4323 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4324 tree type = TREE_TYPE (t);
4325 if (POINTER_TYPE_P (type))
4326 type = sizetype;
4327 dest = unshare_expr (t);
4328 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4329 expand_omp_build_assign (&gsif, v, t);
4330 if (itercnt == NULL_TREE)
4331 {
4332 itercnt = startvar;
4333 tree n1 = fd->loop.n1;
4334 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
4335 {
4336 itercnt
4337 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
4338 itercnt);
4339 n1 = fold_convert (TREE_TYPE (itercnt), n1);
4340 }
4341 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
4342 itercnt, n1);
4343 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
4344 itercnt, fd->loop.step);
4345 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4346 NULL_TREE, false,
4347 GSI_CONTINUE_LINKING);
4348 }
4349 a = fold_build2 (MULT_EXPR, type,
4350 fold_convert (type, itercnt),
4351 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4352 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4353 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4354 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4355 false, GSI_CONTINUE_LINKING);
4356 expand_omp_build_assign (&gsi, dest, t, true);
4357 }
4358 if (fd->collapse > 1)
4359 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
4360
4361 if (fd->ordered)
4362 {
4363 /* Until now, counts array contained number of iterations or
4364 variable containing it for ith loop. From now on, we need
4365 those counts only for collapsed loops, and only for the 2nd
4366 till the last collapsed one. Move those one element earlier,
4367 we'll use counts[fd->collapse - 1] for the first source/sink
4368 iteration counter and so on and counts[fd->ordered]
4369 as the array holding the current counter values for
4370 depend(source). */
4371 if (fd->collapse > 1)
4372 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
4373 if (broken_loop)
4374 {
4375 int i;
4376 for (i = fd->collapse; i < fd->ordered; i++)
4377 {
4378 tree type = TREE_TYPE (fd->loops[i].v);
4379 tree this_cond
4380 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4381 fold_convert (type, fd->loops[i].n1),
4382 fold_convert (type, fd->loops[i].n2));
4383 if (!integer_onep (this_cond))
4384 break;
4385 }
4386 if (i < fd->ordered)
4387 {
4388 if (entry_bb->loop_father != l0_bb->loop_father)
4389 {
4390 remove_bb_from_loops (l0_bb);
4391 add_bb_to_loop (l0_bb, entry_bb->loop_father);
4392 gcc_assert (single_succ (l0_bb) == l1_bb);
4393 }
4394 cont_bb
4395 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4396 add_bb_to_loop (cont_bb, l0_bb->loop_father);
4397 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
4398 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4399 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4400 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4401 make_edge (cont_bb, l1_bb, 0);
4402 l2_bb = create_empty_bb (cont_bb);
4403 broken_loop = false;
4404 }
4405 }
4406 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4407 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
4408 l0_bb, ordered_lastprivate);
4409 if (counts[fd->collapse - 1])
4410 {
4411 gcc_assert (fd->collapse == 1);
4412 gsi = gsi_last_bb (l0_bb);
4413 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
4414 istart0, true);
4415 if (cont_bb)
4416 {
4417 gsi = gsi_last_bb (cont_bb);
4418 t = fold_build2 (PLUS_EXPR, fd->iter_type,
4419 counts[fd->collapse - 1],
4420 build_int_cst (fd->iter_type, 1));
4421 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
4422 tree aref = build4 (ARRAY_REF, fd->iter_type,
4423 counts[fd->ordered], size_zero_node,
4424 NULL_TREE, NULL_TREE);
4425 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
4426 }
4427 t = counts[fd->collapse - 1];
4428 }
4429 else if (fd->collapse > 1)
4430 t = fd->loop.v;
4431 else
4432 {
4433 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4434 fd->loops[0].v, fd->loops[0].n1);
4435 t = fold_convert (fd->iter_type, t);
4436 }
4437 gsi = gsi_last_bb (l0_bb);
4438 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4439 size_zero_node, NULL_TREE, NULL_TREE);
4440 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4441 false, GSI_CONTINUE_LINKING);
4442 expand_omp_build_assign (&gsi, aref, t, true);
4443 }
4444
4445 if (!broken_loop)
4446 {
4447 /* Code to control the increment and predicate for the sequential
4448 loop goes in the CONT_BB. */
4449 gsi = gsi_last_nondebug_bb (cont_bb);
4450 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4451 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4452 vmain = gimple_omp_continue_control_use (cont_stmt);
4453 vback = gimple_omp_continue_control_def (cont_stmt);
4454
4455 if (cond_var)
4456 {
4457 tree itype = TREE_TYPE (cond_var);
4458 tree t2;
4459 if ((fd->ordered && fd->collapse == 1)
4460 || bias
4461 || POINTER_TYPE_P (type)
4462 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4463 || fd->loop.cond_code != LT_EXPR)
4464 t2 = build_int_cst (itype, 1);
4465 else
4466 t2 = fold_convert (itype, fd->loop.step);
4467 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4468 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4469 NULL_TREE, true, GSI_SAME_STMT);
4470 assign_stmt = gimple_build_assign (cond_var, t2);
4471 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4472 }
4473
4474 if (!gimple_omp_for_combined_p (fd->for_stmt))
4475 {
4476 if (POINTER_TYPE_P (type))
4477 t = fold_build_pointer_plus (vmain, fd->loop.step);
4478 else
4479 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4480 t = force_gimple_operand_gsi (&gsi, t,
4481 DECL_P (vback)
4482 && TREE_ADDRESSABLE (vback),
4483 NULL_TREE, true, GSI_SAME_STMT);
4484 assign_stmt = gimple_build_assign (vback, t);
4485 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4486
4487 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4488 {
4489 tree tem;
4490 if (fd->collapse > 1)
4491 tem = fd->loop.v;
4492 else
4493 {
4494 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4495 fd->loops[0].v, fd->loops[0].n1);
4496 tem = fold_convert (fd->iter_type, tem);
4497 }
4498 tree aref = build4 (ARRAY_REF, fd->iter_type,
4499 counts[fd->ordered], size_zero_node,
4500 NULL_TREE, NULL_TREE);
4501 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4502 true, GSI_SAME_STMT);
4503 expand_omp_build_assign (&gsi, aref, tem);
4504 }
4505
4506 t = build2 (fd->loop.cond_code, boolean_type_node,
4507 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4508 iend);
4509 gcond *cond_stmt = gimple_build_cond_empty (t);
4510 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4511 }
4512
4513 /* Remove GIMPLE_OMP_CONTINUE. */
4514 gsi_remove (&gsi, true);
4515
4516 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4517 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
4518
4519 /* Emit code to get the next parallel iteration in L2_BB. */
4520 gsi = gsi_start_bb (l2_bb);
4521
4522 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4523 build_fold_addr_expr (istart0),
4524 build_fold_addr_expr (iend0));
4525 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4526 false, GSI_CONTINUE_LINKING);
4527 if (TREE_TYPE (t) != boolean_type_node)
4528 t = fold_build2 (NE_EXPR, boolean_type_node,
4529 t, build_int_cst (TREE_TYPE (t), 0));
4530 gcond *cond_stmt = gimple_build_cond_empty (t);
4531 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4532 }
4533
4534 /* Add the loop cleanup function. */
4535 gsi = gsi_last_nondebug_bb (exit_bb);
4536 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4537 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4538 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4539 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4540 else
4541 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4542 gcall *call_stmt = gimple_build_call (t, 0);
4543 if (fd->ordered)
4544 {
4545 tree arr = counts[fd->ordered];
4546 tree clobber = build_clobber (TREE_TYPE (arr));
4547 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4548 GSI_SAME_STMT);
4549 }
4550 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4551 {
4552 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
4553 if (fd->have_reductemp)
4554 {
4555 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4556 gimple_call_lhs (call_stmt));
4557 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4558 }
4559 }
4560 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
4561 gsi_remove (&gsi, true);
4562
4563 /* Connect the new blocks. */
4564 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4565 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4566
4567 if (!broken_loop)
4568 {
4569 gimple_seq phis;
4570
4571 e = find_edge (cont_bb, l3_bb);
4572 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4573
4574 phis = phi_nodes (l3_bb);
4575 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
4576 {
4577 gimple *phi = gsi_stmt (gsi);
4578 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4579 PHI_ARG_DEF_FROM_EDGE (phi, e));
4580 }
4581 remove_edge (e);
4582
4583 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4584 e = find_edge (cont_bb, l1_bb);
4585 if (e == NULL)
4586 {
4587 e = BRANCH_EDGE (cont_bb);
4588 gcc_assert (single_succ (e->dest) == l1_bb);
4589 }
4590 if (gimple_omp_for_combined_p (fd->for_stmt))
4591 {
4592 remove_edge (e);
4593 e = NULL;
4594 }
4595 else if (fd->collapse > 1)
4596 {
4597 remove_edge (e);
4598 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4599 }
4600 else
4601 e->flags = EDGE_TRUE_VALUE;
4602 if (e)
4603 {
4604 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4605 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
4606 }
4607 else
4608 {
4609 e = find_edge (cont_bb, l2_bb);
4610 e->flags = EDGE_FALLTHRU;
4611 }
4612 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4613
4614 if (gimple_in_ssa_p (cfun))
4615 {
4616 /* Add phis to the outer loop that connect to the phis in the inner,
4617 original loop, and move the loop entry value of the inner phi to
4618 the loop entry value of the outer phi. */
4619 gphi_iterator psi;
4620 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
4621 {
4622 location_t locus;
4623 gphi *nphi;
4624 gphi *exit_phi = psi.phi ();
4625
4626 if (virtual_operand_p (gimple_phi_result (exit_phi)))
4627 continue;
4628
4629 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4630 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4631
4632 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4633 edge latch_to_l1 = find_edge (latch, l1_bb);
4634 gphi *inner_phi
4635 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4636
4637 tree t = gimple_phi_result (exit_phi);
4638 tree new_res = copy_ssa_name (t, NULL);
4639 nphi = create_phi_node (new_res, l0_bb);
4640
4641 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4642 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4643 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
4644 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4645 add_phi_arg (nphi, t, entry_to_l0, locus);
4646
4647 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4648 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4649
4650 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
4651 }
4652 }
4653
4654 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4655 recompute_dominator (CDI_DOMINATORS, l2_bb));
4656 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4657 recompute_dominator (CDI_DOMINATORS, l3_bb));
4658 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4659 recompute_dominator (CDI_DOMINATORS, l0_bb));
4660 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4661 recompute_dominator (CDI_DOMINATORS, l1_bb));
4662
4663 /* We enter expand_omp_for_generic with a loop. This original loop may
4664 have its own loop struct, or it may be part of an outer loop struct
4665 (which may be the fake loop). */
4666 class loop *outer_loop = entry_bb->loop_father;
4667 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4668
4669 add_bb_to_loop (l2_bb, outer_loop);
4670
4671 /* We've added a new loop around the original loop. Allocate the
4672 corresponding loop struct. */
4673 class loop *new_loop = alloc_loop ();
4674 new_loop->header = l0_bb;
4675 new_loop->latch = l2_bb;
4676 add_loop (new_loop, outer_loop);
4677
4678 /* Allocate a loop structure for the original loop unless we already
4679 had one. */
4680 if (!orig_loop_has_loop_struct
4681 && !gimple_omp_for_combined_p (fd->for_stmt))
4682 {
4683 class loop *orig_loop = alloc_loop ();
4684 orig_loop->header = l1_bb;
4685 /* The loop may have multiple latches. */
4686 add_loop (orig_loop, new_loop);
4687 }
4688 }
4689 }
4690
4691 /* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
4692 compute needed allocation size. If !ALLOC of team allocations,
4693 if ALLOC of thread allocation. SZ is the initial needed size for
4694 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4695 CNT number of elements of each array, for !ALLOC this is
4696 omp_get_num_threads (), for ALLOC number of iterations handled by the
4697 current thread. If PTR is non-NULL, it is the start of the allocation
4698 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4699 clauses pointers to the corresponding arrays. */
4700
4701 static tree
expand_omp_scantemp_alloc(tree clauses,tree ptr,unsigned HOST_WIDE_INT sz,unsigned HOST_WIDE_INT alloc_align,tree cnt,gimple_stmt_iterator * gsi,bool alloc)4702 expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4703 unsigned HOST_WIDE_INT alloc_align, tree cnt,
4704 gimple_stmt_iterator *gsi, bool alloc)
4705 {
4706 tree eltsz = NULL_TREE;
4707 unsigned HOST_WIDE_INT preval = 0;
4708 if (ptr && sz)
4709 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4710 ptr, size_int (sz));
4711 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4712 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4713 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4714 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4715 {
4716 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4717 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4718 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4719 {
4720 unsigned HOST_WIDE_INT szl
4721 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4722 szl = least_bit_hwi (szl);
4723 if (szl)
4724 al = MIN (al, szl);
4725 }
4726 if (ptr == NULL_TREE)
4727 {
4728 if (eltsz == NULL_TREE)
4729 eltsz = TYPE_SIZE_UNIT (pointee_type);
4730 else
4731 eltsz = size_binop (PLUS_EXPR, eltsz,
4732 TYPE_SIZE_UNIT (pointee_type));
4733 }
4734 if (preval == 0 && al <= alloc_align)
4735 {
4736 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4737 sz += diff;
4738 if (diff && ptr)
4739 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4740 ptr, size_int (diff));
4741 }
4742 else if (al > preval)
4743 {
4744 if (ptr)
4745 {
4746 ptr = fold_convert (pointer_sized_int_node, ptr);
4747 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4748 build_int_cst (pointer_sized_int_node,
4749 al - 1));
4750 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4751 build_int_cst (pointer_sized_int_node,
4752 -(HOST_WIDE_INT) al));
4753 ptr = fold_convert (ptr_type_node, ptr);
4754 }
4755 else
4756 sz += al - 1;
4757 }
4758 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4759 preval = al;
4760 else
4761 preval = 1;
4762 if (ptr)
4763 {
4764 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
4765 ptr = OMP_CLAUSE_DECL (c);
4766 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4767 size_binop (MULT_EXPR, cnt,
4768 TYPE_SIZE_UNIT (pointee_type)));
4769 }
4770 }
4771
4772 if (ptr == NULL_TREE)
4773 {
4774 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4775 if (sz)
4776 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4777 return eltsz;
4778 }
4779 else
4780 return ptr;
4781 }
4782
4783 /* Return the last _looptemp_ clause if one has been created for
4784 lastprivate on distribute parallel for{, simd} or taskloop.
4785 FD is the loop data and INNERC should be the second _looptemp_
4786 clause (the one holding the end of the range).
4787 This is followed by collapse - 1 _looptemp_ clauses for the
4788 counts[1] and up, and for triangular loops followed by 4
4789 further _looptemp_ clauses (one for counts[0], one first_inner_iterations,
4790 one factor and one adjn1). After this there is optionally one
4791 _looptemp_ clause that this function returns. */
4792
4793 static tree
find_lastprivate_looptemp(struct omp_for_data * fd,tree innerc)4794 find_lastprivate_looptemp (struct omp_for_data *fd, tree innerc)
4795 {
4796 gcc_assert (innerc);
4797 int count = fd->collapse - 1;
4798 if (fd->non_rect
4799 && fd->last_nonrect == fd->first_nonrect + 1
4800 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
4801 count += 4;
4802 for (int i = 0; i < count; i++)
4803 {
4804 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4805 OMP_CLAUSE__LOOPTEMP_);
4806 gcc_assert (innerc);
4807 }
4808 return omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4809 OMP_CLAUSE__LOOPTEMP_);
4810 }
4811
4812 /* A subroutine of expand_omp_for. Generate code for a parallel
4813 loop with static schedule and no specified chunk size. Given
4814 parameters:
4815
4816 for (V = N1; V cond N2; V += STEP) BODY;
4817
4818 where COND is "<" or ">", we generate pseudocode
4819
4820 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4821 if (cond is <)
4822 adj = STEP - 1;
4823 else
4824 adj = STEP + 1;
4825 if ((__typeof (V)) -1 > 0 && cond is >)
4826 n = -(adj + N2 - N1) / -STEP;
4827 else
4828 n = (adj + N2 - N1) / STEP;
4829 q = n / nthreads;
4830 tt = n % nthreads;
4831 if (threadid < tt) goto L3; else goto L4;
4832 L3:
4833 tt = 0;
4834 q = q + 1;
4835 L4:
4836 s0 = q * threadid + tt;
4837 e0 = s0 + q;
4838 V = s0 * STEP + N1;
4839 if (s0 >= e0) goto L2; else goto L0;
4840 L0:
4841 e = e0 * STEP + N1;
4842 L1:
4843 BODY;
4844 V += STEP;
4845 if (V cond e) goto L1;
4846 L2:
4847 */
4848
4849 static void
expand_omp_for_static_nochunk(struct omp_region * region,struct omp_for_data * fd,gimple * inner_stmt)4850 expand_omp_for_static_nochunk (struct omp_region *region,
4851 struct omp_for_data *fd,
4852 gimple *inner_stmt)
4853 {
4854 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
4855 tree type, itype, vmain, vback;
4856 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
4857 basic_block body_bb, cont_bb, collapse_bb = NULL;
4858 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
4859 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
4860 gimple_stmt_iterator gsi, gsip;
4861 edge ep;
4862 bool broken_loop = region->cont == NULL;
4863 tree *counts = NULL;
4864 tree n1, n2, step;
4865 tree reductions = NULL_TREE;
4866 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4867
4868 itype = type = TREE_TYPE (fd->loop.v);
4869 if (POINTER_TYPE_P (type))
4870 itype = signed_type_for (type);
4871
4872 entry_bb = region->entry;
4873 cont_bb = region->cont;
4874 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4875 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4876 gcc_assert (broken_loop
4877 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4878 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
4879 body_bb = single_succ (seq_start_bb);
4880 if (!broken_loop)
4881 {
4882 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4883 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4884 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4885 }
4886 exit_bb = region->exit;
4887
4888 /* Iteration space partitioning goes in ENTRY_BB. */
4889 gsi = gsi_last_nondebug_bb (entry_bb);
4890 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4891 gsip = gsi;
4892 gsi_prev (&gsip);
4893
4894 if (fd->collapse > 1)
4895 {
4896 int first_zero_iter = -1, dummy = -1;
4897 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4898
4899 counts = XALLOCAVEC (tree, fd->collapse);
4900 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4901 fin_bb, first_zero_iter,
4902 dummy_bb, dummy, l2_dom_bb);
4903 t = NULL_TREE;
4904 }
4905 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4906 t = integer_one_node;
4907 else
4908 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4909 fold_convert (type, fd->loop.n1),
4910 fold_convert (type, fd->loop.n2));
4911 if (fd->collapse == 1
4912 && TYPE_UNSIGNED (type)
4913 && (t == NULL_TREE || !integer_onep (t)))
4914 {
4915 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4916 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4917 true, GSI_SAME_STMT);
4918 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4919 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4920 true, GSI_SAME_STMT);
4921 gcond *cond_stmt = expand_omp_build_cond (&gsi, fd->loop.cond_code,
4922 n1, n2);
4923 ep = split_block (entry_bb, cond_stmt);
4924 ep->flags = EDGE_TRUE_VALUE;
4925 entry_bb = ep->dest;
4926 ep->probability = profile_probability::very_likely ();
4927 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
4928 ep->probability = profile_probability::very_unlikely ();
4929 if (gimple_in_ssa_p (cfun))
4930 {
4931 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
4932 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4933 !gsi_end_p (gpi); gsi_next (&gpi))
4934 {
4935 gphi *phi = gpi.phi ();
4936 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4937 ep, UNKNOWN_LOCATION);
4938 }
4939 }
4940 gsi = gsi_last_bb (entry_bb);
4941 }
4942
4943 if (fd->lastprivate_conditional)
4944 {
4945 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4946 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4947 if (fd->have_pointer_condtemp)
4948 condtemp = OMP_CLAUSE_DECL (c);
4949 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4950 cond_var = OMP_CLAUSE_DECL (c);
4951 }
4952 if (fd->have_reductemp
4953 /* For scan, we don't want to reinitialize condtemp before the
4954 second loop. */
4955 || (fd->have_pointer_condtemp && !fd->have_scantemp)
4956 || fd->have_nonctrl_scantemp)
4957 {
4958 tree t1 = build_int_cst (long_integer_type_node, 0);
4959 tree t2 = build_int_cst (long_integer_type_node, 1);
4960 tree t3 = build_int_cstu (long_integer_type_node,
4961 (HOST_WIDE_INT_1U << 31) + 1);
4962 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4963 gimple_stmt_iterator gsi2 = gsi_none ();
4964 gimple *g = NULL;
4965 tree mem = null_pointer_node, memv = NULL_TREE;
4966 unsigned HOST_WIDE_INT condtemp_sz = 0;
4967 unsigned HOST_WIDE_INT alloc_align = 0;
4968 if (fd->have_reductemp)
4969 {
4970 gcc_assert (!fd->have_nonctrl_scantemp);
4971 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4972 reductions = OMP_CLAUSE_DECL (c);
4973 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4974 g = SSA_NAME_DEF_STMT (reductions);
4975 reductions = gimple_assign_rhs1 (g);
4976 OMP_CLAUSE_DECL (c) = reductions;
4977 gsi2 = gsi_for_stmt (g);
4978 }
4979 else
4980 {
4981 if (gsi_end_p (gsip))
4982 gsi2 = gsi_after_labels (region->entry);
4983 else
4984 gsi2 = gsip;
4985 reductions = null_pointer_node;
4986 }
4987 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
4988 {
4989 tree type;
4990 if (fd->have_pointer_condtemp)
4991 type = TREE_TYPE (condtemp);
4992 else
4993 type = ptr_type_node;
4994 memv = create_tmp_var (type);
4995 TREE_ADDRESSABLE (memv) = 1;
4996 unsigned HOST_WIDE_INT sz = 0;
4997 tree size = NULL_TREE;
4998 if (fd->have_pointer_condtemp)
4999 {
5000 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5001 sz *= fd->lastprivate_conditional;
5002 condtemp_sz = sz;
5003 }
5004 if (fd->have_nonctrl_scantemp)
5005 {
5006 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5007 gimple *g = gimple_build_call (nthreads, 0);
5008 nthreads = create_tmp_var (integer_type_node);
5009 gimple_call_set_lhs (g, nthreads);
5010 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5011 nthreads = fold_convert (sizetype, nthreads);
5012 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
5013 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
5014 alloc_align, nthreads, NULL,
5015 false);
5016 size = fold_convert (type, size);
5017 }
5018 else
5019 size = build_int_cst (type, sz);
5020 expand_omp_build_assign (&gsi2, memv, size, false);
5021 mem = build_fold_addr_expr (memv);
5022 }
5023 tree t
5024 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5025 9, t1, t2, t2, t3, t1, null_pointer_node,
5026 null_pointer_node, reductions, mem);
5027 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5028 true, GSI_SAME_STMT);
5029 if (fd->have_pointer_condtemp)
5030 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5031 if (fd->have_nonctrl_scantemp)
5032 {
5033 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
5034 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
5035 alloc_align, nthreads, &gsi2, false);
5036 }
5037 if (fd->have_reductemp)
5038 {
5039 gsi_remove (&gsi2, true);
5040 release_ssa_name (gimple_assign_lhs (g));
5041 }
5042 }
5043 switch (gimple_omp_for_kind (fd->for_stmt))
5044 {
5045 case GF_OMP_FOR_KIND_FOR:
5046 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5047 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5048 break;
5049 case GF_OMP_FOR_KIND_DISTRIBUTE:
5050 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5051 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5052 break;
5053 default:
5054 gcc_unreachable ();
5055 }
5056 nthreads = build_call_expr (nthreads, 0);
5057 nthreads = fold_convert (itype, nthreads);
5058 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5059 true, GSI_SAME_STMT);
5060 threadid = build_call_expr (threadid, 0);
5061 threadid = fold_convert (itype, threadid);
5062 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5063 true, GSI_SAME_STMT);
5064
5065 n1 = fd->loop.n1;
5066 n2 = fd->loop.n2;
5067 step = fd->loop.step;
5068 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5069 {
5070 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5071 OMP_CLAUSE__LOOPTEMP_);
5072 gcc_assert (innerc);
5073 n1 = OMP_CLAUSE_DECL (innerc);
5074 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5075 OMP_CLAUSE__LOOPTEMP_);
5076 gcc_assert (innerc);
5077 n2 = OMP_CLAUSE_DECL (innerc);
5078 }
5079 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5080 true, NULL_TREE, true, GSI_SAME_STMT);
5081 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5082 true, NULL_TREE, true, GSI_SAME_STMT);
5083 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5084 true, NULL_TREE, true, GSI_SAME_STMT);
5085
5086 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5087 t = fold_build2 (PLUS_EXPR, itype, step, t);
5088 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5089 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5090 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5091 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5092 fold_build1 (NEGATE_EXPR, itype, t),
5093 fold_build1 (NEGATE_EXPR, itype, step));
5094 else
5095 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5096 t = fold_convert (itype, t);
5097 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5098
5099 q = create_tmp_reg (itype, "q");
5100 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
5101 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5102 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
5103
5104 tt = create_tmp_reg (itype, "tt");
5105 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
5106 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5107 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
5108
5109 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
5110 gcond *cond_stmt = gimple_build_cond_empty (t);
5111 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5112
5113 second_bb = split_block (entry_bb, cond_stmt)->dest;
5114 gsi = gsi_last_nondebug_bb (second_bb);
5115 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5116
5117 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
5118 GSI_SAME_STMT);
5119 gassign *assign_stmt
5120 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
5121 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5122
5123 third_bb = split_block (second_bb, assign_stmt)->dest;
5124 gsi = gsi_last_nondebug_bb (third_bb);
5125 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5126
5127 if (fd->have_nonctrl_scantemp)
5128 {
5129 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5130 tree controlp = NULL_TREE, controlb = NULL_TREE;
5131 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5132 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5133 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5134 {
5135 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5136 controlb = OMP_CLAUSE_DECL (c);
5137 else
5138 controlp = OMP_CLAUSE_DECL (c);
5139 if (controlb && controlp)
5140 break;
5141 }
5142 gcc_assert (controlp && controlb);
5143 tree cnt = create_tmp_var (sizetype);
5144 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
5145 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5146 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
5147 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
5148 alloc_align, cnt, NULL, true);
5149 tree size = create_tmp_var (sizetype);
5150 expand_omp_build_assign (&gsi, size, sz, false);
5151 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
5152 size, size_int (16384));
5153 expand_omp_build_assign (&gsi, controlb, cmp);
5154 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5155 NULL_TREE, NULL_TREE);
5156 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5157 fourth_bb = split_block (third_bb, g)->dest;
5158 gsi = gsi_last_nondebug_bb (fourth_bb);
5159 /* FIXME: Once we have allocators, this should use allocator. */
5160 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
5161 gimple_call_set_lhs (g, controlp);
5162 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5163 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
5164 &gsi, true);
5165 gsi_prev (&gsi);
5166 g = gsi_stmt (gsi);
5167 fifth_bb = split_block (fourth_bb, g)->dest;
5168 gsi = gsi_last_nondebug_bb (fifth_bb);
5169
5170 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
5171 gimple_call_set_lhs (g, controlp);
5172 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5173 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
5174 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5175 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5176 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
5177 {
5178 tree tmp = create_tmp_var (sizetype);
5179 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
5180 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
5181 TYPE_SIZE_UNIT (pointee_type));
5182 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5183 g = gimple_build_call (alloca_decl, 2, tmp,
5184 size_int (TYPE_ALIGN (pointee_type)));
5185 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
5186 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5187 }
5188
5189 sixth_bb = split_block (fifth_bb, g)->dest;
5190 gsi = gsi_last_nondebug_bb (sixth_bb);
5191 }
5192
5193 t = build2 (MULT_EXPR, itype, q, threadid);
5194 t = build2 (PLUS_EXPR, itype, t, tt);
5195 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5196
5197 t = fold_build2 (PLUS_EXPR, itype, s0, q);
5198 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5199
5200 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
5201 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5202
5203 /* Remove the GIMPLE_OMP_FOR statement. */
5204 gsi_remove (&gsi, true);
5205
5206 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5207 gsi = gsi_start_bb (seq_start_bb);
5208
5209 tree startvar = fd->loop.v;
5210 tree endvar = NULL_TREE;
5211
5212 if (gimple_omp_for_combined_p (fd->for_stmt))
5213 {
5214 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5215 ? gimple_omp_parallel_clauses (inner_stmt)
5216 : gimple_omp_for_clauses (inner_stmt);
5217 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5218 gcc_assert (innerc);
5219 startvar = OMP_CLAUSE_DECL (innerc);
5220 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5221 OMP_CLAUSE__LOOPTEMP_);
5222 gcc_assert (innerc);
5223 endvar = OMP_CLAUSE_DECL (innerc);
5224 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5225 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5226 {
5227 innerc = find_lastprivate_looptemp (fd, innerc);
5228 if (innerc)
5229 {
5230 /* If needed (distribute parallel for with lastprivate),
5231 propagate down the total number of iterations. */
5232 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5233 fd->loop.n2);
5234 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5235 GSI_CONTINUE_LINKING);
5236 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5237 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5238 }
5239 }
5240 }
5241 t = fold_convert (itype, s0);
5242 t = fold_build2 (MULT_EXPR, itype, t, step);
5243 if (POINTER_TYPE_P (type))
5244 {
5245 t = fold_build_pointer_plus (n1, t);
5246 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5247 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5248 t = fold_convert (signed_type_for (type), t);
5249 }
5250 else
5251 t = fold_build2 (PLUS_EXPR, type, t, n1);
5252 t = fold_convert (TREE_TYPE (startvar), t);
5253 t = force_gimple_operand_gsi (&gsi, t,
5254 DECL_P (startvar)
5255 && TREE_ADDRESSABLE (startvar),
5256 NULL_TREE, false, GSI_CONTINUE_LINKING);
5257 assign_stmt = gimple_build_assign (startvar, t);
5258 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5259 if (cond_var)
5260 {
5261 tree itype = TREE_TYPE (cond_var);
5262 /* For lastprivate(conditional:) itervar, we need some iteration
5263 counter that starts at unsigned non-zero and increases.
5264 Prefer as few IVs as possible, so if we can use startvar
5265 itself, use that, or startvar + constant (those would be
5266 incremented with step), and as last resort use the s0 + 1
5267 incremented by 1. */
5268 if (POINTER_TYPE_P (type)
5269 || TREE_CODE (n1) != INTEGER_CST
5270 || fd->loop.cond_code != LT_EXPR)
5271 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5272 build_int_cst (itype, 1));
5273 else if (tree_int_cst_sgn (n1) == 1)
5274 t = fold_convert (itype, t);
5275 else
5276 {
5277 tree c = fold_convert (itype, n1);
5278 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5279 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5280 }
5281 t = force_gimple_operand_gsi (&gsi, t, false,
5282 NULL_TREE, false, GSI_CONTINUE_LINKING);
5283 assign_stmt = gimple_build_assign (cond_var, t);
5284 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5285 }
5286
5287 t = fold_convert (itype, e0);
5288 t = fold_build2 (MULT_EXPR, itype, t, step);
5289 if (POINTER_TYPE_P (type))
5290 {
5291 t = fold_build_pointer_plus (n1, t);
5292 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5293 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5294 t = fold_convert (signed_type_for (type), t);
5295 }
5296 else
5297 t = fold_build2 (PLUS_EXPR, type, t, n1);
5298 t = fold_convert (TREE_TYPE (startvar), t);
5299 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5300 false, GSI_CONTINUE_LINKING);
5301 if (endvar)
5302 {
5303 assign_stmt = gimple_build_assign (endvar, e);
5304 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5305 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5306 assign_stmt = gimple_build_assign (fd->loop.v, e);
5307 else
5308 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5309 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5310 }
5311 /* Handle linear clause adjustments. */
5312 tree itercnt = NULL_TREE;
5313 tree *nonrect_bounds = NULL;
5314 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5315 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5316 c; c = OMP_CLAUSE_CHAIN (c))
5317 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5318 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5319 {
5320 tree d = OMP_CLAUSE_DECL (c);
5321 tree t = d, a, dest;
5322 if (omp_privatize_by_reference (t))
5323 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5324 if (itercnt == NULL_TREE)
5325 {
5326 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5327 {
5328 itercnt = fold_build2 (MINUS_EXPR, itype,
5329 fold_convert (itype, n1),
5330 fold_convert (itype, fd->loop.n1));
5331 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
5332 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
5333 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5334 NULL_TREE, false,
5335 GSI_CONTINUE_LINKING);
5336 }
5337 else
5338 itercnt = s0;
5339 }
5340 tree type = TREE_TYPE (t);
5341 if (POINTER_TYPE_P (type))
5342 type = sizetype;
5343 a = fold_build2 (MULT_EXPR, type,
5344 fold_convert (type, itercnt),
5345 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5346 dest = unshare_expr (t);
5347 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5348 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
5349 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5350 false, GSI_CONTINUE_LINKING);
5351 expand_omp_build_assign (&gsi, dest, t, true);
5352 }
5353 if (fd->collapse > 1)
5354 {
5355 if (fd->non_rect)
5356 {
5357 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
5358 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
5359 }
5360 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
5361 startvar);
5362 }
5363
5364 if (!broken_loop)
5365 {
5366 /* The code controlling the sequential loop replaces the
5367 GIMPLE_OMP_CONTINUE. */
5368 gsi = gsi_last_nondebug_bb (cont_bb);
5369 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5370 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5371 vmain = gimple_omp_continue_control_use (cont_stmt);
5372 vback = gimple_omp_continue_control_def (cont_stmt);
5373
5374 if (cond_var)
5375 {
5376 tree itype = TREE_TYPE (cond_var);
5377 tree t2;
5378 if (POINTER_TYPE_P (type)
5379 || TREE_CODE (n1) != INTEGER_CST
5380 || fd->loop.cond_code != LT_EXPR)
5381 t2 = build_int_cst (itype, 1);
5382 else
5383 t2 = fold_convert (itype, step);
5384 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5385 t2 = force_gimple_operand_gsi (&gsi, t2, false,
5386 NULL_TREE, true, GSI_SAME_STMT);
5387 assign_stmt = gimple_build_assign (cond_var, t2);
5388 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5389 }
5390
5391 if (!gimple_omp_for_combined_p (fd->for_stmt))
5392 {
5393 if (POINTER_TYPE_P (type))
5394 t = fold_build_pointer_plus (vmain, step);
5395 else
5396 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5397 t = force_gimple_operand_gsi (&gsi, t,
5398 DECL_P (vback)
5399 && TREE_ADDRESSABLE (vback),
5400 NULL_TREE, true, GSI_SAME_STMT);
5401 assign_stmt = gimple_build_assign (vback, t);
5402 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5403
5404 t = build2 (fd->loop.cond_code, boolean_type_node,
5405 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5406 ? t : vback, e);
5407 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5408 }
5409
5410 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5411 gsi_remove (&gsi, true);
5412
5413 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5414 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5415 cont_bb, body_bb);
5416 }
5417
5418 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
5419 gsi = gsi_last_nondebug_bb (exit_bb);
5420 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5421 {
5422 t = gimple_omp_return_lhs (gsi_stmt (gsi));
5423 if (fd->have_reductemp
5424 || ((fd->have_pointer_condtemp || fd->have_scantemp)
5425 && !fd->have_nonctrl_scantemp))
5426 {
5427 tree fn;
5428 if (t)
5429 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5430 else
5431 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5432 gcall *g = gimple_build_call (fn, 0);
5433 if (t)
5434 {
5435 gimple_call_set_lhs (g, t);
5436 if (fd->have_reductemp)
5437 gsi_insert_after (&gsi, gimple_build_assign (reductions,
5438 NOP_EXPR, t),
5439 GSI_SAME_STMT);
5440 }
5441 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5442 }
5443 else
5444 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
5445 }
5446 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5447 && !fd->have_nonctrl_scantemp)
5448 {
5449 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5450 gcall *g = gimple_build_call (fn, 0);
5451 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5452 }
5453 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5454 {
5455 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5456 tree controlp = NULL_TREE, controlb = NULL_TREE;
5457 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5458 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5459 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5460 {
5461 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5462 controlb = OMP_CLAUSE_DECL (c);
5463 else
5464 controlp = OMP_CLAUSE_DECL (c);
5465 if (controlb && controlp)
5466 break;
5467 }
5468 gcc_assert (controlp && controlb);
5469 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5470 NULL_TREE, NULL_TREE);
5471 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5472 exit1_bb = split_block (exit_bb, g)->dest;
5473 gsi = gsi_after_labels (exit1_bb);
5474 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
5475 controlp);
5476 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5477 exit2_bb = split_block (exit1_bb, g)->dest;
5478 gsi = gsi_after_labels (exit2_bb);
5479 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
5480 controlp);
5481 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5482 exit3_bb = split_block (exit2_bb, g)->dest;
5483 gsi = gsi_after_labels (exit3_bb);
5484 }
5485 gsi_remove (&gsi, true);
5486
5487 /* Connect all the blocks. */
5488 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
5489 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
5490 ep = find_edge (entry_bb, second_bb);
5491 ep->flags = EDGE_TRUE_VALUE;
5492 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
5493 if (fourth_bb)
5494 {
5495 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5496 ep->probability
5497 = profile_probability::guessed_always ().apply_scale (1, 2);
5498 ep = find_edge (third_bb, fourth_bb);
5499 ep->flags = EDGE_TRUE_VALUE;
5500 ep->probability
5501 = profile_probability::guessed_always ().apply_scale (1, 2);
5502 ep = find_edge (fourth_bb, fifth_bb);
5503 redirect_edge_and_branch (ep, sixth_bb);
5504 }
5505 else
5506 sixth_bb = third_bb;
5507 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5508 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5509 if (exit1_bb)
5510 {
5511 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5512 ep->probability
5513 = profile_probability::guessed_always ().apply_scale (1, 2);
5514 ep = find_edge (exit_bb, exit1_bb);
5515 ep->flags = EDGE_TRUE_VALUE;
5516 ep->probability
5517 = profile_probability::guessed_always ().apply_scale (1, 2);
5518 ep = find_edge (exit1_bb, exit2_bb);
5519 redirect_edge_and_branch (ep, exit3_bb);
5520 }
5521
5522 if (!broken_loop)
5523 {
5524 ep = find_edge (cont_bb, body_bb);
5525 if (ep == NULL)
5526 {
5527 ep = BRANCH_EDGE (cont_bb);
5528 gcc_assert (single_succ (ep->dest) == body_bb);
5529 }
5530 if (gimple_omp_for_combined_p (fd->for_stmt))
5531 {
5532 remove_edge (ep);
5533 ep = NULL;
5534 }
5535 else if (fd->collapse > 1)
5536 {
5537 remove_edge (ep);
5538 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5539 }
5540 else
5541 ep->flags = EDGE_TRUE_VALUE;
5542 find_edge (cont_bb, fin_bb)->flags
5543 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5544 }
5545
5546 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5547 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
5548 if (fourth_bb)
5549 {
5550 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5551 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5552 }
5553 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
5554
5555 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5556 recompute_dominator (CDI_DOMINATORS, body_bb));
5557 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5558 recompute_dominator (CDI_DOMINATORS, fin_bb));
5559 if (exit1_bb)
5560 {
5561 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5562 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5563 }
5564
5565 class loop *loop = body_bb->loop_father;
5566 if (loop != entry_bb->loop_father)
5567 {
5568 gcc_assert (broken_loop || loop->header == body_bb);
5569 gcc_assert (broken_loop
5570 || loop->latch == region->cont
5571 || single_pred (loop->latch) == region->cont);
5572 return;
5573 }
5574
5575 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5576 {
5577 loop = alloc_loop ();
5578 loop->header = body_bb;
5579 if (collapse_bb == NULL)
5580 loop->latch = cont_bb;
5581 add_loop (loop, body_bb->loop_father);
5582 }
5583 }
5584
5585 /* Return phi in E->DEST with ARG on edge E. */
5586
5587 static gphi *
find_phi_with_arg_on_edge(tree arg,edge e)5588 find_phi_with_arg_on_edge (tree arg, edge e)
5589 {
5590 basic_block bb = e->dest;
5591
5592 for (gphi_iterator gpi = gsi_start_phis (bb);
5593 !gsi_end_p (gpi);
5594 gsi_next (&gpi))
5595 {
5596 gphi *phi = gpi.phi ();
5597 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5598 return phi;
5599 }
5600
5601 return NULL;
5602 }
5603
5604 /* A subroutine of expand_omp_for. Generate code for a parallel
5605 loop with static schedule and a specified chunk size. Given
5606 parameters:
5607
5608 for (V = N1; V cond N2; V += STEP) BODY;
5609
5610 where COND is "<" or ">", we generate pseudocode
5611
5612 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5613 if (cond is <)
5614 adj = STEP - 1;
5615 else
5616 adj = STEP + 1;
5617 if ((__typeof (V)) -1 > 0 && cond is >)
5618 n = -(adj + N2 - N1) / -STEP;
5619 else
5620 n = (adj + N2 - N1) / STEP;
5621 trip = 0;
5622 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
5623 here so that V is defined
5624 if the loop is not entered
5625 L0:
5626 s0 = (trip * nthreads + threadid) * CHUNK;
5627 e0 = min (s0 + CHUNK, n);
5628 if (s0 < n) goto L1; else goto L4;
5629 L1:
5630 V = s0 * STEP + N1;
5631 e = e0 * STEP + N1;
5632 L2:
5633 BODY;
5634 V += STEP;
5635 if (V cond e) goto L2; else goto L3;
5636 L3:
5637 trip += 1;
5638 goto L0;
5639 L4:
5640 */
5641
5642 static void
expand_omp_for_static_chunk(struct omp_region * region,struct omp_for_data * fd,gimple * inner_stmt)5643 expand_omp_for_static_chunk (struct omp_region *region,
5644 struct omp_for_data *fd, gimple *inner_stmt)
5645 {
5646 tree n, s0, e0, e, t;
5647 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5648 tree type, itype, vmain, vback, vextra;
5649 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5650 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
5651 gimple_stmt_iterator gsi, gsip;
5652 edge se;
5653 bool broken_loop = region->cont == NULL;
5654 tree *counts = NULL;
5655 tree n1, n2, step;
5656 tree reductions = NULL_TREE;
5657 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
5658
5659 itype = type = TREE_TYPE (fd->loop.v);
5660 if (POINTER_TYPE_P (type))
5661 itype = signed_type_for (type);
5662
5663 entry_bb = region->entry;
5664 se = split_block (entry_bb, last_stmt (entry_bb));
5665 entry_bb = se->src;
5666 iter_part_bb = se->dest;
5667 cont_bb = region->cont;
5668 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5669 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5670 gcc_assert (broken_loop
5671 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5672 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5673 body_bb = single_succ (seq_start_bb);
5674 if (!broken_loop)
5675 {
5676 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5677 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5678 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5679 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5680 }
5681 exit_bb = region->exit;
5682
5683 /* Trip and adjustment setup goes in ENTRY_BB. */
5684 gsi = gsi_last_nondebug_bb (entry_bb);
5685 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5686 gsip = gsi;
5687 gsi_prev (&gsip);
5688
5689 if (fd->collapse > 1)
5690 {
5691 int first_zero_iter = -1, dummy = -1;
5692 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5693
5694 counts = XALLOCAVEC (tree, fd->collapse);
5695 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5696 fin_bb, first_zero_iter,
5697 dummy_bb, dummy, l2_dom_bb);
5698 t = NULL_TREE;
5699 }
5700 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5701 t = integer_one_node;
5702 else
5703 t = fold_binary (fd->loop.cond_code, boolean_type_node,
5704 fold_convert (type, fd->loop.n1),
5705 fold_convert (type, fd->loop.n2));
5706 if (fd->collapse == 1
5707 && TYPE_UNSIGNED (type)
5708 && (t == NULL_TREE || !integer_onep (t)))
5709 {
5710 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5711 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5712 true, GSI_SAME_STMT);
5713 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5714 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5715 true, GSI_SAME_STMT);
5716 gcond *cond_stmt = expand_omp_build_cond (&gsi, fd->loop.cond_code,
5717 n1, n2);
5718 se = split_block (entry_bb, cond_stmt);
5719 se->flags = EDGE_TRUE_VALUE;
5720 entry_bb = se->dest;
5721 se->probability = profile_probability::very_likely ();
5722 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
5723 se->probability = profile_probability::very_unlikely ();
5724 if (gimple_in_ssa_p (cfun))
5725 {
5726 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5727 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5728 !gsi_end_p (gpi); gsi_next (&gpi))
5729 {
5730 gphi *phi = gpi.phi ();
5731 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5732 se, UNKNOWN_LOCATION);
5733 }
5734 }
5735 gsi = gsi_last_bb (entry_bb);
5736 }
5737
5738 if (fd->lastprivate_conditional)
5739 {
5740 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5741 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5742 if (fd->have_pointer_condtemp)
5743 condtemp = OMP_CLAUSE_DECL (c);
5744 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5745 cond_var = OMP_CLAUSE_DECL (c);
5746 }
5747 if (fd->have_reductemp || fd->have_pointer_condtemp)
5748 {
5749 tree t1 = build_int_cst (long_integer_type_node, 0);
5750 tree t2 = build_int_cst (long_integer_type_node, 1);
5751 tree t3 = build_int_cstu (long_integer_type_node,
5752 (HOST_WIDE_INT_1U << 31) + 1);
5753 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5754 gimple_stmt_iterator gsi2 = gsi_none ();
5755 gimple *g = NULL;
5756 tree mem = null_pointer_node, memv = NULL_TREE;
5757 if (fd->have_reductemp)
5758 {
5759 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5760 reductions = OMP_CLAUSE_DECL (c);
5761 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5762 g = SSA_NAME_DEF_STMT (reductions);
5763 reductions = gimple_assign_rhs1 (g);
5764 OMP_CLAUSE_DECL (c) = reductions;
5765 gsi2 = gsi_for_stmt (g);
5766 }
5767 else
5768 {
5769 if (gsi_end_p (gsip))
5770 gsi2 = gsi_after_labels (region->entry);
5771 else
5772 gsi2 = gsip;
5773 reductions = null_pointer_node;
5774 }
5775 if (fd->have_pointer_condtemp)
5776 {
5777 tree type = TREE_TYPE (condtemp);
5778 memv = create_tmp_var (type);
5779 TREE_ADDRESSABLE (memv) = 1;
5780 unsigned HOST_WIDE_INT sz
5781 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5782 sz *= fd->lastprivate_conditional;
5783 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
5784 false);
5785 mem = build_fold_addr_expr (memv);
5786 }
5787 tree t
5788 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5789 9, t1, t2, t2, t3, t1, null_pointer_node,
5790 null_pointer_node, reductions, mem);
5791 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5792 true, GSI_SAME_STMT);
5793 if (fd->have_pointer_condtemp)
5794 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5795 if (fd->have_reductemp)
5796 {
5797 gsi_remove (&gsi2, true);
5798 release_ssa_name (gimple_assign_lhs (g));
5799 }
5800 }
5801 switch (gimple_omp_for_kind (fd->for_stmt))
5802 {
5803 case GF_OMP_FOR_KIND_FOR:
5804 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5805 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5806 break;
5807 case GF_OMP_FOR_KIND_DISTRIBUTE:
5808 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5809 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5810 break;
5811 default:
5812 gcc_unreachable ();
5813 }
5814 nthreads = build_call_expr (nthreads, 0);
5815 nthreads = fold_convert (itype, nthreads);
5816 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5817 true, GSI_SAME_STMT);
5818 threadid = build_call_expr (threadid, 0);
5819 threadid = fold_convert (itype, threadid);
5820 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5821 true, GSI_SAME_STMT);
5822
5823 n1 = fd->loop.n1;
5824 n2 = fd->loop.n2;
5825 step = fd->loop.step;
5826 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5827 {
5828 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5829 OMP_CLAUSE__LOOPTEMP_);
5830 gcc_assert (innerc);
5831 n1 = OMP_CLAUSE_DECL (innerc);
5832 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5833 OMP_CLAUSE__LOOPTEMP_);
5834 gcc_assert (innerc);
5835 n2 = OMP_CLAUSE_DECL (innerc);
5836 }
5837 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5838 true, NULL_TREE, true, GSI_SAME_STMT);
5839 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5840 true, NULL_TREE, true, GSI_SAME_STMT);
5841 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5842 true, NULL_TREE, true, GSI_SAME_STMT);
5843 tree chunk_size = fold_convert (itype, fd->chunk_size);
5844 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
5845 chunk_size
5846 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5847 GSI_SAME_STMT);
5848
5849 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5850 t = fold_build2 (PLUS_EXPR, itype, step, t);
5851 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5852 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5853 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5854 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5855 fold_build1 (NEGATE_EXPR, itype, t),
5856 fold_build1 (NEGATE_EXPR, itype, step));
5857 else
5858 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5859 t = fold_convert (itype, t);
5860 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5861 true, GSI_SAME_STMT);
5862
5863 trip_var = create_tmp_reg (itype, ".trip");
5864 if (gimple_in_ssa_p (cfun))
5865 {
5866 trip_init = make_ssa_name (trip_var);
5867 trip_main = make_ssa_name (trip_var);
5868 trip_back = make_ssa_name (trip_var);
5869 }
5870 else
5871 {
5872 trip_init = trip_var;
5873 trip_main = trip_var;
5874 trip_back = trip_var;
5875 }
5876
5877 gassign *assign_stmt
5878 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
5879 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5880
5881 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
5882 t = fold_build2 (MULT_EXPR, itype, t, step);
5883 if (POINTER_TYPE_P (type))
5884 t = fold_build_pointer_plus (n1, t);
5885 else
5886 t = fold_build2 (PLUS_EXPR, type, t, n1);
5887 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5888 true, GSI_SAME_STMT);
5889
5890 /* Remove the GIMPLE_OMP_FOR. */
5891 gsi_remove (&gsi, true);
5892
5893 gimple_stmt_iterator gsif = gsi;
5894
5895 /* Iteration space partitioning goes in ITER_PART_BB. */
5896 gsi = gsi_last_bb (iter_part_bb);
5897
5898 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
5899 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
5900 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
5901 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5902 false, GSI_CONTINUE_LINKING);
5903
5904 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
5905 t = fold_build2 (MIN_EXPR, itype, t, n);
5906 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5907 false, GSI_CONTINUE_LINKING);
5908
5909 t = build2 (LT_EXPR, boolean_type_node, s0, n);
5910 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
5911
5912 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5913 gsi = gsi_start_bb (seq_start_bb);
5914
5915 tree startvar = fd->loop.v;
5916 tree endvar = NULL_TREE;
5917
5918 if (gimple_omp_for_combined_p (fd->for_stmt))
5919 {
5920 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5921 ? gimple_omp_parallel_clauses (inner_stmt)
5922 : gimple_omp_for_clauses (inner_stmt);
5923 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5924 gcc_assert (innerc);
5925 startvar = OMP_CLAUSE_DECL (innerc);
5926 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5927 OMP_CLAUSE__LOOPTEMP_);
5928 gcc_assert (innerc);
5929 endvar = OMP_CLAUSE_DECL (innerc);
5930 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5931 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5932 {
5933 innerc = find_lastprivate_looptemp (fd, innerc);
5934 if (innerc)
5935 {
5936 /* If needed (distribute parallel for with lastprivate),
5937 propagate down the total number of iterations. */
5938 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5939 fd->loop.n2);
5940 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5941 GSI_CONTINUE_LINKING);
5942 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5943 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5944 }
5945 }
5946 }
5947
5948 t = fold_convert (itype, s0);
5949 t = fold_build2 (MULT_EXPR, itype, t, step);
5950 if (POINTER_TYPE_P (type))
5951 {
5952 t = fold_build_pointer_plus (n1, t);
5953 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5954 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5955 t = fold_convert (signed_type_for (type), t);
5956 }
5957 else
5958 t = fold_build2 (PLUS_EXPR, type, t, n1);
5959 t = fold_convert (TREE_TYPE (startvar), t);
5960 t = force_gimple_operand_gsi (&gsi, t,
5961 DECL_P (startvar)
5962 && TREE_ADDRESSABLE (startvar),
5963 NULL_TREE, false, GSI_CONTINUE_LINKING);
5964 assign_stmt = gimple_build_assign (startvar, t);
5965 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5966 if (cond_var)
5967 {
5968 tree itype = TREE_TYPE (cond_var);
5969 /* For lastprivate(conditional:) itervar, we need some iteration
5970 counter that starts at unsigned non-zero and increases.
5971 Prefer as few IVs as possible, so if we can use startvar
5972 itself, use that, or startvar + constant (those would be
5973 incremented with step), and as last resort use the s0 + 1
5974 incremented by 1. */
5975 if (POINTER_TYPE_P (type)
5976 || TREE_CODE (n1) != INTEGER_CST
5977 || fd->loop.cond_code != LT_EXPR)
5978 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5979 build_int_cst (itype, 1));
5980 else if (tree_int_cst_sgn (n1) == 1)
5981 t = fold_convert (itype, t);
5982 else
5983 {
5984 tree c = fold_convert (itype, n1);
5985 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5986 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5987 }
5988 t = force_gimple_operand_gsi (&gsi, t, false,
5989 NULL_TREE, false, GSI_CONTINUE_LINKING);
5990 assign_stmt = gimple_build_assign (cond_var, t);
5991 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5992 }
5993
5994 t = fold_convert (itype, e0);
5995 t = fold_build2 (MULT_EXPR, itype, t, step);
5996 if (POINTER_TYPE_P (type))
5997 {
5998 t = fold_build_pointer_plus (n1, t);
5999 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
6000 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
6001 t = fold_convert (signed_type_for (type), t);
6002 }
6003 else
6004 t = fold_build2 (PLUS_EXPR, type, t, n1);
6005 t = fold_convert (TREE_TYPE (startvar), t);
6006 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6007 false, GSI_CONTINUE_LINKING);
6008 if (endvar)
6009 {
6010 assign_stmt = gimple_build_assign (endvar, e);
6011 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6012 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
6013 assign_stmt = gimple_build_assign (fd->loop.v, e);
6014 else
6015 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
6016 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6017 }
6018 /* Handle linear clause adjustments. */
6019 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
6020 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
6021 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
6022 c; c = OMP_CLAUSE_CHAIN (c))
6023 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
6024 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
6025 {
6026 tree d = OMP_CLAUSE_DECL (c);
6027 tree t = d, a, dest;
6028 if (omp_privatize_by_reference (t))
6029 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
6030 tree type = TREE_TYPE (t);
6031 if (POINTER_TYPE_P (type))
6032 type = sizetype;
6033 dest = unshare_expr (t);
6034 tree v = create_tmp_var (TREE_TYPE (t), NULL);
6035 expand_omp_build_assign (&gsif, v, t);
6036 if (itercnt == NULL_TREE)
6037 {
6038 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6039 {
6040 itercntbias
6041 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
6042 fold_convert (itype, fd->loop.n1));
6043 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
6044 itercntbias, step);
6045 itercntbias
6046 = force_gimple_operand_gsi (&gsif, itercntbias, true,
6047 NULL_TREE, true,
6048 GSI_SAME_STMT);
6049 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
6050 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
6051 NULL_TREE, false,
6052 GSI_CONTINUE_LINKING);
6053 }
6054 else
6055 itercnt = s0;
6056 }
6057 a = fold_build2 (MULT_EXPR, type,
6058 fold_convert (type, itercnt),
6059 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
6060 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
6061 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
6062 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6063 false, GSI_CONTINUE_LINKING);
6064 expand_omp_build_assign (&gsi, dest, t, true);
6065 }
6066 if (fd->collapse > 1)
6067 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
6068
6069 if (!broken_loop)
6070 {
6071 /* The code controlling the sequential loop goes in CONT_BB,
6072 replacing the GIMPLE_OMP_CONTINUE. */
6073 gsi = gsi_last_nondebug_bb (cont_bb);
6074 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6075 vmain = gimple_omp_continue_control_use (cont_stmt);
6076 vback = gimple_omp_continue_control_def (cont_stmt);
6077
6078 if (cond_var)
6079 {
6080 tree itype = TREE_TYPE (cond_var);
6081 tree t2;
6082 if (POINTER_TYPE_P (type)
6083 || TREE_CODE (n1) != INTEGER_CST
6084 || fd->loop.cond_code != LT_EXPR)
6085 t2 = build_int_cst (itype, 1);
6086 else
6087 t2 = fold_convert (itype, step);
6088 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
6089 t2 = force_gimple_operand_gsi (&gsi, t2, false,
6090 NULL_TREE, true, GSI_SAME_STMT);
6091 assign_stmt = gimple_build_assign (cond_var, t2);
6092 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6093 }
6094
6095 if (!gimple_omp_for_combined_p (fd->for_stmt))
6096 {
6097 if (POINTER_TYPE_P (type))
6098 t = fold_build_pointer_plus (vmain, step);
6099 else
6100 t = fold_build2 (PLUS_EXPR, type, vmain, step);
6101 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
6102 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6103 true, GSI_SAME_STMT);
6104 assign_stmt = gimple_build_assign (vback, t);
6105 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6106
6107 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
6108 t = build2 (EQ_EXPR, boolean_type_node,
6109 build_int_cst (itype, 0),
6110 build_int_cst (itype, 1));
6111 else
6112 t = build2 (fd->loop.cond_code, boolean_type_node,
6113 DECL_P (vback) && TREE_ADDRESSABLE (vback)
6114 ? t : vback, e);
6115 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
6116 }
6117
6118 /* Remove GIMPLE_OMP_CONTINUE. */
6119 gsi_remove (&gsi, true);
6120
6121 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
6122 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
6123
6124 /* Trip update code goes into TRIP_UPDATE_BB. */
6125 gsi = gsi_start_bb (trip_update_bb);
6126
6127 t = build_int_cst (itype, 1);
6128 t = build2 (PLUS_EXPR, itype, trip_main, t);
6129 assign_stmt = gimple_build_assign (trip_back, t);
6130 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6131 }
6132
6133 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
6134 gsi = gsi_last_nondebug_bb (exit_bb);
6135 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
6136 {
6137 t = gimple_omp_return_lhs (gsi_stmt (gsi));
6138 if (fd->have_reductemp || fd->have_pointer_condtemp)
6139 {
6140 tree fn;
6141 if (t)
6142 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
6143 else
6144 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
6145 gcall *g = gimple_build_call (fn, 0);
6146 if (t)
6147 {
6148 gimple_call_set_lhs (g, t);
6149 if (fd->have_reductemp)
6150 gsi_insert_after (&gsi, gimple_build_assign (reductions,
6151 NOP_EXPR, t),
6152 GSI_SAME_STMT);
6153 }
6154 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6155 }
6156 else
6157 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
6158 }
6159 else if (fd->have_pointer_condtemp)
6160 {
6161 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
6162 gcall *g = gimple_build_call (fn, 0);
6163 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6164 }
6165 gsi_remove (&gsi, true);
6166
6167 /* Connect the new blocks. */
6168 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
6169 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
6170
6171 if (!broken_loop)
6172 {
6173 se = find_edge (cont_bb, body_bb);
6174 if (se == NULL)
6175 {
6176 se = BRANCH_EDGE (cont_bb);
6177 gcc_assert (single_succ (se->dest) == body_bb);
6178 }
6179 if (gimple_omp_for_combined_p (fd->for_stmt))
6180 {
6181 remove_edge (se);
6182 se = NULL;
6183 }
6184 else if (fd->collapse > 1)
6185 {
6186 remove_edge (se);
6187 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6188 }
6189 else
6190 se->flags = EDGE_TRUE_VALUE;
6191 find_edge (cont_bb, trip_update_bb)->flags
6192 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6193
6194 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
6195 iter_part_bb);
6196 }
6197
6198 if (gimple_in_ssa_p (cfun))
6199 {
6200 gphi_iterator psi;
6201 gphi *phi;
6202 edge re, ene;
6203 edge_var_map *vm;
6204 size_t i;
6205
6206 gcc_assert (fd->collapse == 1 && !broken_loop);
6207
6208 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
6209 remove arguments of the phi nodes in fin_bb. We need to create
6210 appropriate phi nodes in iter_part_bb instead. */
6211 se = find_edge (iter_part_bb, fin_bb);
6212 re = single_succ_edge (trip_update_bb);
6213 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
6214 ene = single_succ_edge (entry_bb);
6215
6216 psi = gsi_start_phis (fin_bb);
6217 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
6218 gsi_next (&psi), ++i)
6219 {
6220 gphi *nphi;
6221 location_t locus;
6222
6223 phi = psi.phi ();
6224 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
6225 redirect_edge_var_map_def (vm), 0))
6226 continue;
6227
6228 t = gimple_phi_result (phi);
6229 gcc_assert (t == redirect_edge_var_map_result (vm));
6230
6231 if (!single_pred_p (fin_bb))
6232 t = copy_ssa_name (t, phi);
6233
6234 nphi = create_phi_node (t, iter_part_bb);
6235
6236 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
6237 locus = gimple_phi_arg_location_from_edge (phi, se);
6238
6239 /* A special case -- fd->loop.v is not yet computed in
6240 iter_part_bb, we need to use vextra instead. */
6241 if (t == fd->loop.v)
6242 t = vextra;
6243 add_phi_arg (nphi, t, ene, locus);
6244 locus = redirect_edge_var_map_location (vm);
6245 tree back_arg = redirect_edge_var_map_def (vm);
6246 add_phi_arg (nphi, back_arg, re, locus);
6247 edge ce = find_edge (cont_bb, body_bb);
6248 if (ce == NULL)
6249 {
6250 ce = BRANCH_EDGE (cont_bb);
6251 gcc_assert (single_succ (ce->dest) == body_bb);
6252 ce = single_succ_edge (ce->dest);
6253 }
6254 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
6255 gcc_assert (inner_loop_phi != NULL);
6256 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
6257 find_edge (seq_start_bb, body_bb), locus);
6258
6259 if (!single_pred_p (fin_bb))
6260 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
6261 }
6262 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
6263 redirect_edge_var_map_clear (re);
6264 if (single_pred_p (fin_bb))
6265 while (1)
6266 {
6267 psi = gsi_start_phis (fin_bb);
6268 if (gsi_end_p (psi))
6269 break;
6270 remove_phi_node (&psi, false);
6271 }
6272
6273 /* Make phi node for trip. */
6274 phi = create_phi_node (trip_main, iter_part_bb);
6275 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
6276 UNKNOWN_LOCATION);
6277 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
6278 UNKNOWN_LOCATION);
6279 }
6280
6281 if (!broken_loop)
6282 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
6283 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
6284 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
6285 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6286 recompute_dominator (CDI_DOMINATORS, fin_bb));
6287 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
6288 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
6289 set_immediate_dominator (CDI_DOMINATORS, body_bb,
6290 recompute_dominator (CDI_DOMINATORS, body_bb));
6291
6292 if (!broken_loop)
6293 {
6294 class loop *loop = body_bb->loop_father;
6295 class loop *trip_loop = alloc_loop ();
6296 trip_loop->header = iter_part_bb;
6297 trip_loop->latch = trip_update_bb;
6298 add_loop (trip_loop, iter_part_bb->loop_father);
6299
6300 if (loop != entry_bb->loop_father)
6301 {
6302 gcc_assert (loop->header == body_bb);
6303 gcc_assert (loop->latch == region->cont
6304 || single_pred (loop->latch) == region->cont);
6305 trip_loop->inner = loop;
6306 return;
6307 }
6308
6309 if (!gimple_omp_for_combined_p (fd->for_stmt))
6310 {
6311 loop = alloc_loop ();
6312 loop->header = body_bb;
6313 if (collapse_bb == NULL)
6314 loop->latch = cont_bb;
6315 add_loop (loop, trip_loop);
6316 }
6317 }
6318 }
6319
6320 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
6321 loop. Given parameters:
6322
6323 for (V = N1; V cond N2; V += STEP) BODY;
6324
6325 where COND is "<" or ">", we generate pseudocode
6326
6327 V = N1;
6328 goto L1;
6329 L0:
6330 BODY;
6331 V += STEP;
6332 L1:
6333 if (V cond N2) goto L0; else goto L2;
6334 L2:
6335
6336 For collapsed loops, emit the outer loops as scalar
6337 and only try to vectorize the innermost loop. */
6338
6339 static void
expand_omp_simd(struct omp_region * region,struct omp_for_data * fd)6340 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6341 {
6342 tree type, t;
6343 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6344 gimple_stmt_iterator gsi;
6345 gimple *stmt;
6346 gcond *cond_stmt;
6347 bool broken_loop = region->cont == NULL;
6348 edge e, ne;
6349 tree *counts = NULL;
6350 int i;
6351 int safelen_int = INT_MAX;
6352 bool dont_vectorize = false;
6353 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6354 OMP_CLAUSE_SAFELEN);
6355 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6356 OMP_CLAUSE__SIMDUID_);
6357 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6358 OMP_CLAUSE_IF);
6359 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6360 OMP_CLAUSE_SIMDLEN);
6361 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6362 OMP_CLAUSE__CONDTEMP_);
6363 tree n1, n2;
6364 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
6365
6366 if (safelen)
6367 {
6368 poly_uint64 val;
6369 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
6370 if (!poly_int_tree_p (safelen, &val))
6371 safelen_int = 0;
6372 else
6373 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
6374 if (safelen_int == 1)
6375 safelen_int = 0;
6376 }
6377 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6378 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6379 {
6380 safelen_int = 0;
6381 dont_vectorize = true;
6382 }
6383 type = TREE_TYPE (fd->loop.v);
6384 entry_bb = region->entry;
6385 cont_bb = region->cont;
6386 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6387 gcc_assert (broken_loop
6388 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6389 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6390 if (!broken_loop)
6391 {
6392 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6393 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6394 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
6395 l2_bb = BRANCH_EDGE (entry_bb)->dest;
6396 }
6397 else
6398 {
6399 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6400 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6401 l2_bb = single_succ (l1_bb);
6402 }
6403 exit_bb = region->exit;
6404 l2_dom_bb = NULL;
6405
6406 gsi = gsi_last_nondebug_bb (entry_bb);
6407
6408 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6409 /* Not needed in SSA form right now. */
6410 gcc_assert (!gimple_in_ssa_p (cfun));
6411 if (fd->collapse > 1
6412 && (gimple_omp_for_combined_into_p (fd->for_stmt)
6413 || broken_loop))
6414 {
6415 int first_zero_iter = -1, dummy = -1;
6416 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6417
6418 counts = XALLOCAVEC (tree, fd->collapse);
6419 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6420 zero_iter_bb, first_zero_iter,
6421 dummy_bb, dummy, l2_dom_bb);
6422 }
6423 if (l2_dom_bb == NULL)
6424 l2_dom_bb = l1_bb;
6425
6426 n1 = fd->loop.n1;
6427 n2 = fd->loop.n2;
6428 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6429 {
6430 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6431 OMP_CLAUSE__LOOPTEMP_);
6432 gcc_assert (innerc);
6433 n1 = OMP_CLAUSE_DECL (innerc);
6434 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6435 OMP_CLAUSE__LOOPTEMP_);
6436 gcc_assert (innerc);
6437 n2 = OMP_CLAUSE_DECL (innerc);
6438 }
6439 tree step = fd->loop.step;
6440 tree orig_step = step; /* May be different from step if is_simt. */
6441
6442 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6443 OMP_CLAUSE__SIMT_);
6444 if (is_simt)
6445 {
6446 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
6447 is_simt = safelen_int > 1;
6448 }
6449 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6450 if (is_simt)
6451 {
6452 simt_lane = create_tmp_var (unsigned_type_node);
6453 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6454 gimple_call_set_lhs (g, simt_lane);
6455 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6456 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6457 fold_convert (TREE_TYPE (step), simt_lane));
6458 n1 = fold_convert (type, n1);
6459 if (POINTER_TYPE_P (type))
6460 n1 = fold_build_pointer_plus (n1, offset);
6461 else
6462 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6463
6464 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
6465 if (fd->collapse > 1)
6466 simt_maxlane = build_one_cst (unsigned_type_node);
6467 else if (safelen_int < omp_max_simt_vf ())
6468 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6469 tree vf
6470 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6471 unsigned_type_node, 0);
6472 if (simt_maxlane)
6473 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6474 vf = fold_convert (TREE_TYPE (step), vf);
6475 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6476 }
6477
6478 tree n2var = NULL_TREE;
6479 tree n2v = NULL_TREE;
6480 tree *nonrect_bounds = NULL;
6481 tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE;
6482 if (fd->collapse > 1)
6483 {
6484 if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt))
6485 {
6486 if (fd->non_rect)
6487 {
6488 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
6489 memset (nonrect_bounds, 0,
6490 sizeof (tree) * (fd->last_nonrect + 1));
6491 }
6492 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6493 gcc_assert (entry_bb == gsi_bb (gsi));
6494 gcc_assert (fd->for_stmt == gsi_stmt (gsi));
6495 gsi_prev (&gsi);
6496 entry_bb = split_block (entry_bb, gsi_stmt (gsi))->dest;
6497 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds,
6498 NULL, n1);
6499 gsi = gsi_for_stmt (fd->for_stmt);
6500 }
6501 if (broken_loop)
6502 ;
6503 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
6504 {
6505 /* Compute in n2var the limit for the first innermost loop,
6506 i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
6507 where cnt is how many iterations would the loop have if
6508 all further iterations were assigned to the current task. */
6509 n2var = create_tmp_var (type);
6510 i = fd->collapse - 1;
6511 tree itype = TREE_TYPE (fd->loops[i].v);
6512 if (POINTER_TYPE_P (itype))
6513 itype = signed_type_for (itype);
6514 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
6515 ? -1 : 1));
6516 t = fold_build2 (PLUS_EXPR, itype,
6517 fold_convert (itype, fd->loops[i].step), t);
6518 t = fold_build2 (PLUS_EXPR, itype, t,
6519 fold_convert (itype, fd->loops[i].n2));
6520 if (fd->loops[i].m2)
6521 {
6522 tree t2 = fold_convert (itype,
6523 fd->loops[i - fd->loops[i].outer].v);
6524 tree t3 = fold_convert (itype, fd->loops[i].m2);
6525 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6526 t = fold_build2 (PLUS_EXPR, itype, t, t2);
6527 }
6528 t = fold_build2 (MINUS_EXPR, itype, t,
6529 fold_convert (itype, fd->loops[i].v));
6530 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
6531 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6532 fold_build1 (NEGATE_EXPR, itype, t),
6533 fold_build1 (NEGATE_EXPR, itype,
6534 fold_convert (itype,
6535 fd->loops[i].step)));
6536 else
6537 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6538 fold_convert (itype, fd->loops[i].step));
6539 t = fold_convert (type, t);
6540 tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
6541 min_arg1 = create_tmp_var (type);
6542 expand_omp_build_assign (&gsi, min_arg1, t2);
6543 min_arg2 = create_tmp_var (type);
6544 expand_omp_build_assign (&gsi, min_arg2, t);
6545 }
6546 else
6547 {
6548 if (TREE_CODE (n2) == INTEGER_CST)
6549 {
6550 /* Indicate for lastprivate handling that at least one iteration
6551 has been performed, without wasting runtime. */
6552 if (integer_nonzerop (n2))
6553 expand_omp_build_assign (&gsi, fd->loop.v,
6554 fold_convert (type, n2));
6555 else
6556 /* Indicate that no iteration has been performed. */
6557 expand_omp_build_assign (&gsi, fd->loop.v,
6558 build_one_cst (type));
6559 }
6560 else
6561 {
6562 expand_omp_build_assign (&gsi, fd->loop.v,
6563 build_zero_cst (type));
6564 expand_omp_build_assign (&gsi, n2, build_one_cst (type));
6565 }
6566 for (i = 0; i < fd->collapse; i++)
6567 {
6568 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6569 if (fd->loops[i].m1)
6570 {
6571 tree t2
6572 = fold_convert (TREE_TYPE (t),
6573 fd->loops[i - fd->loops[i].outer].v);
6574 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1);
6575 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6576 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6577 }
6578 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6579 /* For normal non-combined collapsed loops just initialize
6580 the outermost iterator in the entry_bb. */
6581 if (!broken_loop)
6582 break;
6583 }
6584 }
6585 }
6586 else
6587 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6588 tree altv = NULL_TREE, altn2 = NULL_TREE;
6589 if (fd->collapse == 1
6590 && !broken_loop
6591 && TREE_CODE (orig_step) != INTEGER_CST)
6592 {
6593 /* The vectorizer currently punts on loops with non-constant steps
6594 for the main IV (can't compute number of iterations and gives up
6595 because of that). As for OpenMP loops it is always possible to
6596 compute the number of iterations upfront, use an alternate IV
6597 as the loop iterator:
6598 altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
6599 for (i = n1, altv = 0; altv < altn2; altv++, i += step) */
6600 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6601 expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv)));
6602 tree itype = TREE_TYPE (fd->loop.v);
6603 if (POINTER_TYPE_P (itype))
6604 itype = signed_type_for (itype);
6605 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
6606 t = fold_build2 (PLUS_EXPR, itype,
6607 fold_convert (itype, step), t);
6608 t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
6609 t = fold_build2 (MINUS_EXPR, itype, t,
6610 fold_convert (itype, fd->loop.v));
6611 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
6612 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6613 fold_build1 (NEGATE_EXPR, itype, t),
6614 fold_build1 (NEGATE_EXPR, itype,
6615 fold_convert (itype, step)));
6616 else
6617 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6618 fold_convert (itype, step));
6619 t = fold_convert (TREE_TYPE (altv), t);
6620 altn2 = create_tmp_var (TREE_TYPE (altv));
6621 expand_omp_build_assign (&gsi, altn2, t);
6622 tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2);
6623 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6624 true, GSI_SAME_STMT);
6625 t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2);
6626 gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6627 build_zero_cst (TREE_TYPE (altv)));
6628 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6629 }
6630 else if (fd->collapse > 1
6631 && !broken_loop
6632 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6633 && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST)
6634 {
6635 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6636 altn2 = create_tmp_var (TREE_TYPE (altv));
6637 }
6638 if (cond_var)
6639 {
6640 if (POINTER_TYPE_P (type)
6641 || TREE_CODE (n1) != INTEGER_CST
6642 || fd->loop.cond_code != LT_EXPR
6643 || tree_int_cst_sgn (n1) != 1)
6644 expand_omp_build_assign (&gsi, cond_var,
6645 build_one_cst (TREE_TYPE (cond_var)));
6646 else
6647 expand_omp_build_assign (&gsi, cond_var,
6648 fold_convert (TREE_TYPE (cond_var), n1));
6649 }
6650
6651 /* Remove the GIMPLE_OMP_FOR statement. */
6652 gsi_remove (&gsi, true);
6653
6654 if (!broken_loop)
6655 {
6656 /* Code to control the increment goes in the CONT_BB. */
6657 gsi = gsi_last_nondebug_bb (cont_bb);
6658 stmt = gsi_stmt (gsi);
6659 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6660
6661 if (fd->collapse == 1
6662 || gimple_omp_for_combined_into_p (fd->for_stmt))
6663 {
6664 if (POINTER_TYPE_P (type))
6665 t = fold_build_pointer_plus (fd->loop.v, step);
6666 else
6667 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6668 expand_omp_build_assign (&gsi, fd->loop.v, t);
6669 }
6670 else if (TREE_CODE (n2) != INTEGER_CST)
6671 expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type));
6672 if (altv)
6673 {
6674 t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv,
6675 build_one_cst (TREE_TYPE (altv)));
6676 expand_omp_build_assign (&gsi, altv, t);
6677 }
6678
6679 if (fd->collapse > 1)
6680 {
6681 i = fd->collapse - 1;
6682 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6683 {
6684 t = fold_convert (sizetype, fd->loops[i].step);
6685 t = fold_build_pointer_plus (fd->loops[i].v, t);
6686 }
6687 else
6688 {
6689 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6690 fd->loops[i].step);
6691 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6692 fd->loops[i].v, t);
6693 }
6694 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6695 }
6696 if (cond_var)
6697 {
6698 if (POINTER_TYPE_P (type)
6699 || TREE_CODE (n1) != INTEGER_CST
6700 || fd->loop.cond_code != LT_EXPR
6701 || tree_int_cst_sgn (n1) != 1)
6702 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6703 build_one_cst (TREE_TYPE (cond_var)));
6704 else
6705 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6706 fold_convert (TREE_TYPE (cond_var), step));
6707 expand_omp_build_assign (&gsi, cond_var, t);
6708 }
6709
6710 /* Remove GIMPLE_OMP_CONTINUE. */
6711 gsi_remove (&gsi, true);
6712 }
6713
6714 /* Emit the condition in L1_BB. */
6715 gsi = gsi_start_bb (l1_bb);
6716
6717 if (altv)
6718 t = build2 (LT_EXPR, boolean_type_node, altv, altn2);
6719 else if (fd->collapse > 1
6720 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6721 && !broken_loop)
6722 {
6723 i = fd->collapse - 1;
6724 tree itype = TREE_TYPE (fd->loops[i].v);
6725 if (fd->loops[i].m2)
6726 t = n2v = create_tmp_var (itype);
6727 else
6728 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
6729 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6730 false, GSI_CONTINUE_LINKING);
6731 tree v = fd->loops[i].v;
6732 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6733 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6734 false, GSI_CONTINUE_LINKING);
6735 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6736 }
6737 else
6738 {
6739 if (fd->collapse > 1 && !broken_loop)
6740 t = n2var;
6741 else
6742 t = fold_convert (type, unshare_expr (n2));
6743 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6744 false, GSI_CONTINUE_LINKING);
6745 tree v = fd->loop.v;
6746 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6747 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6748 false, GSI_CONTINUE_LINKING);
6749 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6750 }
6751 cond_stmt = gimple_build_cond_empty (t);
6752 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6753 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6754 NULL, NULL)
6755 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6756 NULL, NULL))
6757 {
6758 gsi = gsi_for_stmt (cond_stmt);
6759 gimple_regimplify_operands (cond_stmt, &gsi);
6760 }
6761
6762 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
6763 if (is_simt)
6764 {
6765 gsi = gsi_start_bb (l2_bb);
6766 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), orig_step, step);
6767 if (POINTER_TYPE_P (type))
6768 t = fold_build_pointer_plus (fd->loop.v, step);
6769 else
6770 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6771 expand_omp_build_assign (&gsi, fd->loop.v, t);
6772 }
6773
6774 /* Remove GIMPLE_OMP_RETURN. */
6775 gsi = gsi_last_nondebug_bb (exit_bb);
6776 gsi_remove (&gsi, true);
6777
6778 /* Connect the new blocks. */
6779 remove_edge (FALLTHRU_EDGE (entry_bb));
6780
6781 if (!broken_loop)
6782 {
6783 remove_edge (BRANCH_EDGE (entry_bb));
6784 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6785
6786 e = BRANCH_EDGE (l1_bb);
6787 ne = FALLTHRU_EDGE (l1_bb);
6788 e->flags = EDGE_TRUE_VALUE;
6789 }
6790 else
6791 {
6792 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6793
6794 ne = single_succ_edge (l1_bb);
6795 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6796
6797 }
6798 ne->flags = EDGE_FALSE_VALUE;
6799 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
6800 ne->probability = e->probability.invert ();
6801
6802 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6803 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6804
6805 if (simt_maxlane)
6806 {
6807 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6808 NULL_TREE, NULL_TREE);
6809 gsi = gsi_last_bb (entry_bb);
6810 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6811 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6812 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
6813 FALLTHRU_EDGE (entry_bb)->probability
6814 = profile_probability::guessed_always ().apply_scale (7, 8);
6815 BRANCH_EDGE (entry_bb)->probability
6816 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
6817 l2_dom_bb = entry_bb;
6818 }
6819 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6820
6821 if (!broken_loop && fd->collapse > 1)
6822 {
6823 basic_block last_bb = l1_bb;
6824 basic_block init_bb = NULL;
6825 for (i = fd->collapse - 2; i >= 0; i--)
6826 {
6827 tree nextn2v = NULL_TREE;
6828 if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE)
6829 e = EDGE_SUCC (last_bb, 0);
6830 else
6831 e = EDGE_SUCC (last_bb, 1);
6832 basic_block bb = split_edge (e);
6833 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6834 {
6835 t = fold_convert (sizetype, fd->loops[i].step);
6836 t = fold_build_pointer_plus (fd->loops[i].v, t);
6837 }
6838 else
6839 {
6840 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6841 fd->loops[i].step);
6842 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6843 fd->loops[i].v, t);
6844 }
6845 gsi = gsi_after_labels (bb);
6846 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6847
6848 bb = split_block (bb, last_stmt (bb))->dest;
6849 gsi = gsi_start_bb (bb);
6850 tree itype = TREE_TYPE (fd->loops[i].v);
6851 if (fd->loops[i].m2)
6852 t = nextn2v = create_tmp_var (itype);
6853 else
6854 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
6855 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6856 false, GSI_CONTINUE_LINKING);
6857 tree v = fd->loops[i].v;
6858 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6859 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6860 false, GSI_CONTINUE_LINKING);
6861 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6862 cond_stmt = gimple_build_cond_empty (t);
6863 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6864 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
6865 expand_omp_regimplify_p, NULL, NULL)
6866 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
6867 expand_omp_regimplify_p, NULL, NULL))
6868 {
6869 gsi = gsi_for_stmt (cond_stmt);
6870 gimple_regimplify_operands (cond_stmt, &gsi);
6871 }
6872 ne = single_succ_edge (bb);
6873 ne->flags = EDGE_FALSE_VALUE;
6874
6875 init_bb = create_empty_bb (bb);
6876 set_immediate_dominator (CDI_DOMINATORS, init_bb, bb);
6877 add_bb_to_loop (init_bb, bb->loop_father);
6878 e = make_edge (bb, init_bb, EDGE_TRUE_VALUE);
6879 e->probability
6880 = profile_probability::guessed_always ().apply_scale (7, 8);
6881 ne->probability = e->probability.invert ();
6882
6883 gsi = gsi_after_labels (init_bb);
6884 if (fd->loops[i + 1].m1)
6885 {
6886 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6887 fd->loops[i + 1
6888 - fd->loops[i + 1].outer].v);
6889 if (POINTER_TYPE_P (TREE_TYPE (t2)))
6890 t = fold_build_pointer_plus (t2, fd->loops[i + 1].n1);
6891 else
6892 {
6893 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6894 fd->loops[i + 1].n1);
6895 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1);
6896 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6897 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6898 }
6899 }
6900 else
6901 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6902 fd->loops[i + 1].n1);
6903 expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t);
6904 if (fd->loops[i + 1].m2)
6905 {
6906 if (i + 2 == fd->collapse && (n2var || altv))
6907 {
6908 gcc_assert (n2v == NULL_TREE);
6909 n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
6910 }
6911 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6912 fd->loops[i + 1
6913 - fd->loops[i + 1].outer].v);
6914 if (POINTER_TYPE_P (TREE_TYPE (t2)))
6915 t = fold_build_pointer_plus (t2, fd->loops[i + 1].n2);
6916 else
6917 {
6918 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6919 fd->loops[i + 1].n2);
6920 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2);
6921 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6922 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6923 }
6924 expand_omp_build_assign (&gsi, n2v, t);
6925 }
6926 if (i + 2 == fd->collapse && n2var)
6927 {
6928 /* For composite simd, n2 is the first iteration the current
6929 task shouldn't already handle, so we effectively want to use
6930 for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
6931 as the vectorized loop. Except the vectorizer will not
6932 vectorize that, so instead compute N2VAR as
6933 N2VAR = V + MIN (N2 - V, COUNTS3) and use
6934 for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
6935 as the loop to vectorize. */
6936 tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v);
6937 if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2)
6938 {
6939 tree itype = TREE_TYPE (fd->loops[i].v);
6940 if (POINTER_TYPE_P (itype))
6941 itype = signed_type_for (itype);
6942 t = build_int_cst (itype, (fd->loops[i + 1].cond_code
6943 == LT_EXPR ? -1 : 1));
6944 t = fold_build2 (PLUS_EXPR, itype,
6945 fold_convert (itype,
6946 fd->loops[i + 1].step), t);
6947 if (fd->loops[i + 1].m2 == NULL_TREE)
6948 t = fold_build2 (PLUS_EXPR, itype, t,
6949 fold_convert (itype,
6950 fd->loops[i + 1].n2));
6951 else if (POINTER_TYPE_P (TREE_TYPE (n2v)))
6952 {
6953 t = fold_build_pointer_plus (n2v, t);
6954 t = fold_convert (itype, t);
6955 }
6956 else
6957 t = fold_build2 (PLUS_EXPR, itype, t, n2v);
6958 t = fold_build2 (MINUS_EXPR, itype, t,
6959 fold_convert (itype, fd->loops[i + 1].v));
6960 tree step = fold_convert (itype, fd->loops[i + 1].step);
6961 if (TYPE_UNSIGNED (itype)
6962 && fd->loops[i + 1].cond_code == GT_EXPR)
6963 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6964 fold_build1 (NEGATE_EXPR, itype, t),
6965 fold_build1 (NEGATE_EXPR, itype, step));
6966 else
6967 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6968 t = fold_convert (type, t);
6969 }
6970 else
6971 t = counts[i + 1];
6972 expand_omp_build_assign (&gsi, min_arg1, t2);
6973 expand_omp_build_assign (&gsi, min_arg2, t);
6974 e = split_block (init_bb, last_stmt (init_bb));
6975 gsi = gsi_after_labels (e->dest);
6976 init_bb = e->dest;
6977 remove_edge (FALLTHRU_EDGE (entry_bb));
6978 make_edge (entry_bb, init_bb, EDGE_FALLTHRU);
6979 set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb);
6980 set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb);
6981 t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2);
6982 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
6983 expand_omp_build_assign (&gsi, n2var, t);
6984 }
6985 if (i + 2 == fd->collapse && altv)
6986 {
6987 /* The vectorizer currently punts on loops with non-constant
6988 steps for the main IV (can't compute number of iterations
6989 and gives up because of that). As for OpenMP loops it is
6990 always possible to compute the number of iterations upfront,
6991 use an alternate IV as the loop iterator. */
6992 expand_omp_build_assign (&gsi, altv,
6993 build_zero_cst (TREE_TYPE (altv)));
6994 tree itype = TREE_TYPE (fd->loops[i + 1].v);
6995 if (POINTER_TYPE_P (itype))
6996 itype = signed_type_for (itype);
6997 t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR
6998 ? -1 : 1));
6999 t = fold_build2 (PLUS_EXPR, itype,
7000 fold_convert (itype, fd->loops[i + 1].step), t);
7001 t = fold_build2 (PLUS_EXPR, itype, t,
7002 fold_convert (itype,
7003 fd->loops[i + 1].m2
7004 ? n2v : fd->loops[i + 1].n2));
7005 t = fold_build2 (MINUS_EXPR, itype, t,
7006 fold_convert (itype, fd->loops[i + 1].v));
7007 tree step = fold_convert (itype, fd->loops[i + 1].step);
7008 if (TYPE_UNSIGNED (itype)
7009 && fd->loops[i + 1].cond_code == GT_EXPR)
7010 t = fold_build2 (TRUNC_DIV_EXPR, itype,
7011 fold_build1 (NEGATE_EXPR, itype, t),
7012 fold_build1 (NEGATE_EXPR, itype, step));
7013 else
7014 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
7015 t = fold_convert (TREE_TYPE (altv), t);
7016 expand_omp_build_assign (&gsi, altn2, t);
7017 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
7018 fd->loops[i + 1].m2
7019 ? n2v : fd->loops[i + 1].n2);
7020 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
7021 true, GSI_SAME_STMT);
7022 t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node,
7023 fd->loops[i + 1].v, t2);
7024 gassign *g
7025 = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
7026 build_zero_cst (TREE_TYPE (altv)));
7027 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7028 }
7029 n2v = nextn2v;
7030
7031 make_edge (init_bb, last_bb, EDGE_FALLTHRU);
7032 if (!gimple_omp_for_combined_into_p (fd->for_stmt))
7033 {
7034 e = find_edge (entry_bb, last_bb);
7035 redirect_edge_succ (e, bb);
7036 set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb);
7037 set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb);
7038 }
7039
7040 last_bb = bb;
7041 }
7042 }
7043 if (!broken_loop)
7044 {
7045 class loop *loop = alloc_loop ();
7046 loop->header = l1_bb;
7047 loop->latch = cont_bb;
7048 add_loop (loop, l1_bb->loop_father);
7049 loop->safelen = safelen_int;
7050 if (simduid)
7051 {
7052 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
7053 cfun->has_simduid_loops = true;
7054 }
7055 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
7056 the loop. */
7057 if ((flag_tree_loop_vectorize
7058 || !OPTION_SET_P (flag_tree_loop_vectorize))
7059 && flag_tree_loop_optimize
7060 && loop->safelen > 1)
7061 {
7062 loop->force_vectorize = true;
7063 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
7064 {
7065 unsigned HOST_WIDE_INT v
7066 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
7067 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
7068 loop->simdlen = v;
7069 }
7070 cfun->has_force_vectorize_loops = true;
7071 }
7072 else if (dont_vectorize)
7073 loop->dont_vectorize = true;
7074 }
7075 else if (simduid)
7076 cfun->has_simduid_loops = true;
7077 }
7078
7079 /* Taskloop construct is represented after gimplification with
7080 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7081 in between them. This routine expands the outer GIMPLE_OMP_FOR,
7082 which should just compute all the needed loop temporaries
7083 for GIMPLE_OMP_TASK. */
7084
7085 static void
expand_omp_taskloop_for_outer(struct omp_region * region,struct omp_for_data * fd,gimple * inner_stmt)7086 expand_omp_taskloop_for_outer (struct omp_region *region,
7087 struct omp_for_data *fd,
7088 gimple *inner_stmt)
7089 {
7090 tree type, bias = NULL_TREE;
7091 basic_block entry_bb, cont_bb, exit_bb;
7092 gimple_stmt_iterator gsi;
7093 gassign *assign_stmt;
7094 tree *counts = NULL;
7095 int i;
7096
7097 gcc_assert (inner_stmt);
7098 gcc_assert (region->cont);
7099 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
7100 && gimple_omp_task_taskloop_p (inner_stmt));
7101 type = TREE_TYPE (fd->loop.v);
7102
7103 /* See if we need to bias by LLONG_MIN. */
7104 if (fd->iter_type == long_long_unsigned_type_node
7105 && TREE_CODE (type) == INTEGER_TYPE
7106 && !TYPE_UNSIGNED (type))
7107 {
7108 tree n1, n2;
7109
7110 if (fd->loop.cond_code == LT_EXPR)
7111 {
7112 n1 = fd->loop.n1;
7113 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7114 }
7115 else
7116 {
7117 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7118 n2 = fd->loop.n1;
7119 }
7120 if (TREE_CODE (n1) != INTEGER_CST
7121 || TREE_CODE (n2) != INTEGER_CST
7122 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7123 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7124 }
7125
7126 entry_bb = region->entry;
7127 cont_bb = region->cont;
7128 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7129 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
7130 exit_bb = region->exit;
7131
7132 gsi = gsi_last_nondebug_bb (entry_bb);
7133 gimple *for_stmt = gsi_stmt (gsi);
7134 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
7135 if (fd->collapse > 1)
7136 {
7137 int first_zero_iter = -1, dummy = -1;
7138 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
7139
7140 counts = XALLOCAVEC (tree, fd->collapse);
7141 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7142 zero_iter_bb, first_zero_iter,
7143 dummy_bb, dummy, l2_dom_bb);
7144
7145 if (zero_iter_bb)
7146 {
7147 /* Some counts[i] vars might be uninitialized if
7148 some loop has zero iterations. But the body shouldn't
7149 be executed in that case, so just avoid uninit warnings. */
7150 for (i = first_zero_iter; i < fd->collapse; i++)
7151 if (SSA_VAR_P (counts[i]))
7152 suppress_warning (counts[i], OPT_Wuninitialized);
7153 gsi_prev (&gsi);
7154 edge e = split_block (entry_bb, gsi_stmt (gsi));
7155 entry_bb = e->dest;
7156 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
7157 gsi = gsi_last_bb (entry_bb);
7158 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
7159 get_immediate_dominator (CDI_DOMINATORS,
7160 zero_iter_bb));
7161 }
7162 }
7163
7164 tree t0, t1;
7165 t1 = fd->loop.n2;
7166 t0 = fd->loop.n1;
7167 if (POINTER_TYPE_P (TREE_TYPE (t0))
7168 && TYPE_PRECISION (TREE_TYPE (t0))
7169 != TYPE_PRECISION (fd->iter_type))
7170 {
7171 /* Avoid casting pointers to integer of a different size. */
7172 tree itype = signed_type_for (type);
7173 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
7174 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
7175 }
7176 else
7177 {
7178 t1 = fold_convert (fd->iter_type, t1);
7179 t0 = fold_convert (fd->iter_type, t0);
7180 }
7181 if (bias)
7182 {
7183 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
7184 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
7185 }
7186
7187 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
7188 OMP_CLAUSE__LOOPTEMP_);
7189 gcc_assert (innerc);
7190 tree startvar = OMP_CLAUSE_DECL (innerc);
7191 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7192 gcc_assert (innerc);
7193 tree endvar = OMP_CLAUSE_DECL (innerc);
7194 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7195 {
7196 innerc = find_lastprivate_looptemp (fd, innerc);
7197 if (innerc)
7198 {
7199 /* If needed (inner taskloop has lastprivate clause), propagate
7200 down the total number of iterations. */
7201 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
7202 NULL_TREE, false,
7203 GSI_CONTINUE_LINKING);
7204 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
7205 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7206 }
7207 }
7208
7209 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
7210 GSI_CONTINUE_LINKING);
7211 assign_stmt = gimple_build_assign (startvar, t0);
7212 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7213
7214 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
7215 GSI_CONTINUE_LINKING);
7216 assign_stmt = gimple_build_assign (endvar, t1);
7217 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7218 if (fd->collapse > 1)
7219 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
7220
7221 /* Remove the GIMPLE_OMP_FOR statement. */
7222 gsi = gsi_for_stmt (for_stmt);
7223 gsi_remove (&gsi, true);
7224
7225 gsi = gsi_last_nondebug_bb (cont_bb);
7226 gsi_remove (&gsi, true);
7227
7228 gsi = gsi_last_nondebug_bb (exit_bb);
7229 gsi_remove (&gsi, true);
7230
7231 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7232 remove_edge (BRANCH_EDGE (entry_bb));
7233 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
7234 remove_edge (BRANCH_EDGE (cont_bb));
7235 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
7236 set_immediate_dominator (CDI_DOMINATORS, region->entry,
7237 recompute_dominator (CDI_DOMINATORS, region->entry));
7238 }
7239
7240 /* Taskloop construct is represented after gimplification with
7241 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7242 in between them. This routine expands the inner GIMPLE_OMP_FOR.
7243 GOMP_taskloop{,_ull} function arranges for each task to be given just
7244 a single range of iterations. */
7245
7246 static void
expand_omp_taskloop_for_inner(struct omp_region * region,struct omp_for_data * fd,gimple * inner_stmt)7247 expand_omp_taskloop_for_inner (struct omp_region *region,
7248 struct omp_for_data *fd,
7249 gimple *inner_stmt)
7250 {
7251 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
7252 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
7253 basic_block fin_bb;
7254 gimple_stmt_iterator gsi;
7255 edge ep;
7256 bool broken_loop = region->cont == NULL;
7257 tree *counts = NULL;
7258 tree n1, n2, step;
7259
7260 itype = type = TREE_TYPE (fd->loop.v);
7261 if (POINTER_TYPE_P (type))
7262 itype = signed_type_for (type);
7263
7264 /* See if we need to bias by LLONG_MIN. */
7265 if (fd->iter_type == long_long_unsigned_type_node
7266 && TREE_CODE (type) == INTEGER_TYPE
7267 && !TYPE_UNSIGNED (type))
7268 {
7269 tree n1, n2;
7270
7271 if (fd->loop.cond_code == LT_EXPR)
7272 {
7273 n1 = fd->loop.n1;
7274 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7275 }
7276 else
7277 {
7278 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7279 n2 = fd->loop.n1;
7280 }
7281 if (TREE_CODE (n1) != INTEGER_CST
7282 || TREE_CODE (n2) != INTEGER_CST
7283 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7284 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7285 }
7286
7287 entry_bb = region->entry;
7288 cont_bb = region->cont;
7289 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7290 fin_bb = BRANCH_EDGE (entry_bb)->dest;
7291 gcc_assert (broken_loop
7292 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
7293 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7294 if (!broken_loop)
7295 {
7296 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
7297 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
7298 }
7299 exit_bb = region->exit;
7300
7301 /* Iteration space partitioning goes in ENTRY_BB. */
7302 gsi = gsi_last_nondebug_bb (entry_bb);
7303 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
7304
7305 if (fd->collapse > 1)
7306 {
7307 int first_zero_iter = -1, dummy = -1;
7308 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
7309
7310 counts = XALLOCAVEC (tree, fd->collapse);
7311 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7312 fin_bb, first_zero_iter,
7313 dummy_bb, dummy, l2_dom_bb);
7314 t = NULL_TREE;
7315 }
7316 else
7317 t = integer_one_node;
7318
7319 step = fd->loop.step;
7320 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
7321 OMP_CLAUSE__LOOPTEMP_);
7322 gcc_assert (innerc);
7323 n1 = OMP_CLAUSE_DECL (innerc);
7324 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7325 gcc_assert (innerc);
7326 n2 = OMP_CLAUSE_DECL (innerc);
7327 if (bias)
7328 {
7329 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
7330 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
7331 }
7332 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7333 true, NULL_TREE, true, GSI_SAME_STMT);
7334 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
7335 true, NULL_TREE, true, GSI_SAME_STMT);
7336 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7337 true, NULL_TREE, true, GSI_SAME_STMT);
7338
7339 tree startvar = fd->loop.v;
7340 tree endvar = NULL_TREE;
7341
7342 if (gimple_omp_for_combined_p (fd->for_stmt))
7343 {
7344 tree clauses = gimple_omp_for_clauses (inner_stmt);
7345 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
7346 gcc_assert (innerc);
7347 startvar = OMP_CLAUSE_DECL (innerc);
7348 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7349 OMP_CLAUSE__LOOPTEMP_);
7350 gcc_assert (innerc);
7351 endvar = OMP_CLAUSE_DECL (innerc);
7352 }
7353 t = fold_convert (TREE_TYPE (startvar), n1);
7354 t = force_gimple_operand_gsi (&gsi, t,
7355 DECL_P (startvar)
7356 && TREE_ADDRESSABLE (startvar),
7357 NULL_TREE, false, GSI_CONTINUE_LINKING);
7358 gimple *assign_stmt = gimple_build_assign (startvar, t);
7359 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7360
7361 t = fold_convert (TREE_TYPE (startvar), n2);
7362 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7363 false, GSI_CONTINUE_LINKING);
7364 if (endvar)
7365 {
7366 assign_stmt = gimple_build_assign (endvar, e);
7367 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7368 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
7369 assign_stmt = gimple_build_assign (fd->loop.v, e);
7370 else
7371 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
7372 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7373 }
7374
7375 tree *nonrect_bounds = NULL;
7376 if (fd->collapse > 1)
7377 {
7378 if (fd->non_rect)
7379 {
7380 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
7381 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
7382 }
7383 gcc_assert (gsi_bb (gsi) == entry_bb);
7384 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
7385 startvar);
7386 entry_bb = gsi_bb (gsi);
7387 }
7388
7389 if (!broken_loop)
7390 {
7391 /* The code controlling the sequential loop replaces the
7392 GIMPLE_OMP_CONTINUE. */
7393 gsi = gsi_last_nondebug_bb (cont_bb);
7394 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7395 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
7396 vmain = gimple_omp_continue_control_use (cont_stmt);
7397 vback = gimple_omp_continue_control_def (cont_stmt);
7398
7399 if (!gimple_omp_for_combined_p (fd->for_stmt))
7400 {
7401 if (POINTER_TYPE_P (type))
7402 t = fold_build_pointer_plus (vmain, step);
7403 else
7404 t = fold_build2 (PLUS_EXPR, type, vmain, step);
7405 t = force_gimple_operand_gsi (&gsi, t,
7406 DECL_P (vback)
7407 && TREE_ADDRESSABLE (vback),
7408 NULL_TREE, true, GSI_SAME_STMT);
7409 assign_stmt = gimple_build_assign (vback, t);
7410 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7411
7412 t = build2 (fd->loop.cond_code, boolean_type_node,
7413 DECL_P (vback) && TREE_ADDRESSABLE (vback)
7414 ? t : vback, e);
7415 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
7416 }
7417
7418 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7419 gsi_remove (&gsi, true);
7420
7421 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
7422 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
7423 cont_bb, body_bb);
7424 }
7425
7426 /* Remove the GIMPLE_OMP_FOR statement. */
7427 gsi = gsi_for_stmt (fd->for_stmt);
7428 gsi_remove (&gsi, true);
7429
7430 /* Remove the GIMPLE_OMP_RETURN statement. */
7431 gsi = gsi_last_nondebug_bb (exit_bb);
7432 gsi_remove (&gsi, true);
7433
7434 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7435 if (!broken_loop)
7436 remove_edge (BRANCH_EDGE (entry_bb));
7437 else
7438 {
7439 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
7440 region->outer->cont = NULL;
7441 }
7442
7443 /* Connect all the blocks. */
7444 if (!broken_loop)
7445 {
7446 ep = find_edge (cont_bb, body_bb);
7447 if (gimple_omp_for_combined_p (fd->for_stmt))
7448 {
7449 remove_edge (ep);
7450 ep = NULL;
7451 }
7452 else if (fd->collapse > 1)
7453 {
7454 remove_edge (ep);
7455 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
7456 }
7457 else
7458 ep->flags = EDGE_TRUE_VALUE;
7459 find_edge (cont_bb, fin_bb)->flags
7460 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
7461 }
7462
7463 set_immediate_dominator (CDI_DOMINATORS, body_bb,
7464 recompute_dominator (CDI_DOMINATORS, body_bb));
7465 if (!broken_loop)
7466 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
7467 recompute_dominator (CDI_DOMINATORS, fin_bb));
7468
7469 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
7470 {
7471 class loop *loop = alloc_loop ();
7472 loop->header = body_bb;
7473 if (collapse_bb == NULL)
7474 loop->latch = cont_bb;
7475 add_loop (loop, body_bb->loop_father);
7476 }
7477 }
7478
7479 /* A subroutine of expand_omp_for. Generate code for an OpenACC
7480 partitioned loop. The lowering here is abstracted, in that the
7481 loop parameters are passed through internal functions, which are
7482 further lowered by oacc_device_lower, once we get to the target
7483 compiler. The loop is of the form:
7484
7485 for (V = B; V LTGT E; V += S) {BODY}
7486
7487 where LTGT is < or >. We may have a specified chunking size, CHUNKING
7488 (constant 0 for no chunking) and we will have a GWV partitioning
7489 mask, specifying dimensions over which the loop is to be
7490 partitioned (see note below). We generate code that looks like
7491 (this ignores tiling):
7492
7493 <entry_bb> [incoming FALL->body, BRANCH->exit]
7494 typedef signedintify (typeof (V)) T; // underlying signed integral type
7495 T range = E - B;
7496 T chunk_no = 0;
7497 T DIR = LTGT == '<' ? +1 : -1;
7498 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
7499 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
7500
7501 <head_bb> [created by splitting end of entry_bb]
7502 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
7503 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
7504 if (!(offset LTGT bound)) goto bottom_bb;
7505
7506 <body_bb> [incoming]
7507 V = B + offset;
7508 {BODY}
7509
7510 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
7511 offset += step;
7512 if (offset LTGT bound) goto body_bb; [*]
7513
7514 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
7515 chunk_no++;
7516 if (chunk < chunk_max) goto head_bb;
7517
7518 <exit_bb> [incoming]
7519 V = B + ((range -/+ 1) / S +/- 1) * S [*]
7520
7521 [*] Needed if V live at end of loop. */
7522
7523 static void
expand_oacc_for(struct omp_region * region,struct omp_for_data * fd)7524 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
7525 {
7526 bool is_oacc_kernels_parallelized
7527 = (lookup_attribute ("oacc kernels parallelized",
7528 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7529 {
7530 bool is_oacc_kernels
7531 = (lookup_attribute ("oacc kernels",
7532 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7533 if (is_oacc_kernels_parallelized)
7534 gcc_checking_assert (is_oacc_kernels);
7535 }
7536 gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized);
7537 /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are
7538 for SSA specifics, and some are for 'parloops' OpenACC
7539 'kernels'-parallelized specifics. */
7540
7541 tree v = fd->loop.v;
7542 enum tree_code cond_code = fd->loop.cond_code;
7543 enum tree_code plus_code = PLUS_EXPR;
7544
7545 tree chunk_size = integer_minus_one_node;
7546 tree gwv = integer_zero_node;
7547 tree iter_type = TREE_TYPE (v);
7548 tree diff_type = iter_type;
7549 tree plus_type = iter_type;
7550 struct oacc_collapse *counts = NULL;
7551
7552 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
7553 == GF_OMP_FOR_KIND_OACC_LOOP);
7554 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
7555 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
7556
7557 if (POINTER_TYPE_P (iter_type))
7558 {
7559 plus_code = POINTER_PLUS_EXPR;
7560 plus_type = sizetype;
7561 }
7562 for (int ix = fd->collapse; ix--;)
7563 {
7564 tree diff_type2 = TREE_TYPE (fd->loops[ix].step);
7565 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2))
7566 diff_type = diff_type2;
7567 }
7568 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7569 diff_type = signed_type_for (diff_type);
7570 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
7571 diff_type = integer_type_node;
7572
7573 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
7574 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
7575 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
7576 basic_block bottom_bb = NULL;
7577
7578 /* entry_bb has two successors; the branch edge is to the exit
7579 block, fallthrough edge to body. */
7580 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
7581 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
7582
7583 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
7584 body_bb, or to a block whose only successor is the body_bb. Its
7585 fallthrough successor is the final block (same as the branch
7586 successor of the entry_bb). */
7587 if (cont_bb)
7588 {
7589 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7590 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
7591
7592 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
7593 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
7594 }
7595 else
7596 gcc_assert (!gimple_in_ssa_p (cfun));
7597
7598 /* The exit block only has entry_bb and cont_bb as predecessors. */
7599 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
7600
7601 tree chunk_no;
7602 tree chunk_max = NULL_TREE;
7603 tree bound, offset;
7604 tree step = create_tmp_var (diff_type, ".step");
7605 bool up = cond_code == LT_EXPR;
7606 tree dir = build_int_cst (diff_type, up ? +1 : -1);
7607 bool chunking = !gimple_in_ssa_p (cfun);
7608 bool negating;
7609
7610 /* Tiling vars. */
7611 tree tile_size = NULL_TREE;
7612 tree element_s = NULL_TREE;
7613 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
7614 basic_block elem_body_bb = NULL;
7615 basic_block elem_cont_bb = NULL;
7616
7617 /* SSA instances. */
7618 tree offset_incr = NULL_TREE;
7619 tree offset_init = NULL_TREE;
7620
7621 gimple_stmt_iterator gsi;
7622 gassign *ass;
7623 gcall *call;
7624 gimple *stmt;
7625 tree expr;
7626 location_t loc;
7627 edge split, be, fte;
7628
7629 /* Split the end of entry_bb to create head_bb. */
7630 split = split_block (entry_bb, last_stmt (entry_bb));
7631 basic_block head_bb = split->dest;
7632 entry_bb = split->src;
7633
7634 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
7635 gsi = gsi_last_nondebug_bb (entry_bb);
7636 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
7637 loc = gimple_location (for_stmt);
7638
7639 if (gimple_in_ssa_p (cfun))
7640 {
7641 offset_init = gimple_omp_for_index (for_stmt, 0);
7642 gcc_assert (integer_zerop (fd->loop.n1));
7643 /* The SSA parallelizer does gang parallelism. */
7644 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
7645 }
7646
7647 if (fd->collapse > 1 || fd->tiling)
7648 {
7649 gcc_assert (!gimple_in_ssa_p (cfun) && up);
7650 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
7651 tree total = expand_oacc_collapse_init (fd, &gsi, counts, diff_type,
7652 TREE_TYPE (fd->loop.n2), loc);
7653
7654 if (SSA_VAR_P (fd->loop.n2))
7655 {
7656 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
7657 true, GSI_SAME_STMT);
7658 ass = gimple_build_assign (fd->loop.n2, total);
7659 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7660 }
7661 }
7662
7663 tree b = fd->loop.n1;
7664 tree e = fd->loop.n2;
7665 tree s = fd->loop.step;
7666
7667 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
7668 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
7669
7670 /* Convert the step, avoiding possible unsigned->signed overflow. */
7671 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7672 if (negating)
7673 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7674 s = fold_convert (diff_type, s);
7675 if (negating)
7676 s = fold_build1 (NEGATE_EXPR, diff_type, s);
7677 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
7678
7679 if (!chunking)
7680 chunk_size = integer_zero_node;
7681 expr = fold_convert (diff_type, chunk_size);
7682 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
7683 NULL_TREE, true, GSI_SAME_STMT);
7684
7685 if (fd->tiling)
7686 {
7687 /* Determine the tile size and element step,
7688 modify the outer loop step size. */
7689 tile_size = create_tmp_var (diff_type, ".tile_size");
7690 expr = build_int_cst (diff_type, 1);
7691 for (int ix = 0; ix < fd->collapse; ix++)
7692 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
7693 expr = force_gimple_operand_gsi (&gsi, expr, true,
7694 NULL_TREE, true, GSI_SAME_STMT);
7695 ass = gimple_build_assign (tile_size, expr);
7696 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7697
7698 element_s = create_tmp_var (diff_type, ".element_s");
7699 ass = gimple_build_assign (element_s, s);
7700 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7701
7702 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
7703 s = force_gimple_operand_gsi (&gsi, expr, true,
7704 NULL_TREE, true, GSI_SAME_STMT);
7705 }
7706
7707 /* Determine the range, avoiding possible unsigned->signed overflow. */
7708 negating = !up && TYPE_UNSIGNED (iter_type);
7709 expr = fold_build2 (MINUS_EXPR, plus_type,
7710 fold_convert (plus_type, negating ? b : e),
7711 fold_convert (plus_type, negating ? e : b));
7712 expr = fold_convert (diff_type, expr);
7713 if (negating)
7714 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7715 tree range = force_gimple_operand_gsi (&gsi, expr, true,
7716 NULL_TREE, true, GSI_SAME_STMT);
7717
7718 chunk_no = build_int_cst (diff_type, 0);
7719 if (chunking)
7720 {
7721 gcc_assert (!gimple_in_ssa_p (cfun));
7722
7723 expr = chunk_no;
7724 chunk_max = create_tmp_var (diff_type, ".chunk_max");
7725 chunk_no = create_tmp_var (diff_type, ".chunk_no");
7726
7727 ass = gimple_build_assign (chunk_no, expr);
7728 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7729
7730 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7731 build_int_cst (integer_type_node,
7732 IFN_GOACC_LOOP_CHUNKS),
7733 dir, range, s, chunk_size, gwv);
7734 gimple_call_set_lhs (call, chunk_max);
7735 gimple_set_location (call, loc);
7736 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7737 }
7738 else
7739 chunk_size = chunk_no;
7740
7741 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7742 build_int_cst (integer_type_node,
7743 IFN_GOACC_LOOP_STEP),
7744 dir, range, s, chunk_size, gwv);
7745 gimple_call_set_lhs (call, step);
7746 gimple_set_location (call, loc);
7747 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7748
7749 /* Remove the GIMPLE_OMP_FOR. */
7750 gsi_remove (&gsi, true);
7751
7752 /* Fixup edges from head_bb. */
7753 be = BRANCH_EDGE (head_bb);
7754 fte = FALLTHRU_EDGE (head_bb);
7755 be->flags |= EDGE_FALSE_VALUE;
7756 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7757
7758 basic_block body_bb = fte->dest;
7759
7760 if (gimple_in_ssa_p (cfun))
7761 {
7762 gsi = gsi_last_nondebug_bb (cont_bb);
7763 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7764
7765 offset = gimple_omp_continue_control_use (cont_stmt);
7766 offset_incr = gimple_omp_continue_control_def (cont_stmt);
7767 }
7768 else
7769 {
7770 offset = create_tmp_var (diff_type, ".offset");
7771 offset_init = offset_incr = offset;
7772 }
7773 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7774
7775 /* Loop offset & bound go into head_bb. */
7776 gsi = gsi_start_bb (head_bb);
7777
7778 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7779 build_int_cst (integer_type_node,
7780 IFN_GOACC_LOOP_OFFSET),
7781 dir, range, s,
7782 chunk_size, gwv, chunk_no);
7783 gimple_call_set_lhs (call, offset_init);
7784 gimple_set_location (call, loc);
7785 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7786
7787 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7788 build_int_cst (integer_type_node,
7789 IFN_GOACC_LOOP_BOUND),
7790 dir, range, s,
7791 chunk_size, gwv, offset_init);
7792 gimple_call_set_lhs (call, bound);
7793 gimple_set_location (call, loc);
7794 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7795
7796 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7797 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7798 GSI_CONTINUE_LINKING);
7799
7800 /* V assignment goes into body_bb. */
7801 if (!gimple_in_ssa_p (cfun))
7802 {
7803 gsi = gsi_start_bb (body_bb);
7804
7805 expr = build2 (plus_code, iter_type, b,
7806 fold_convert (plus_type, offset));
7807 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7808 true, GSI_SAME_STMT);
7809 ass = gimple_build_assign (v, expr);
7810 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7811
7812 if (fd->collapse > 1 || fd->tiling)
7813 expand_oacc_collapse_vars (fd, false, &gsi, counts, v, diff_type);
7814
7815 if (fd->tiling)
7816 {
7817 /* Determine the range of the element loop -- usually simply
7818 the tile_size, but could be smaller if the final
7819 iteration of the outer loop is a partial tile. */
7820 tree e_range = create_tmp_var (diff_type, ".e_range");
7821
7822 expr = build2 (MIN_EXPR, diff_type,
7823 build2 (MINUS_EXPR, diff_type, bound, offset),
7824 build2 (MULT_EXPR, diff_type, tile_size,
7825 element_s));
7826 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7827 true, GSI_SAME_STMT);
7828 ass = gimple_build_assign (e_range, expr);
7829 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7830
7831 /* Determine bound, offset & step of inner loop. */
7832 e_bound = create_tmp_var (diff_type, ".e_bound");
7833 e_offset = create_tmp_var (diff_type, ".e_offset");
7834 e_step = create_tmp_var (diff_type, ".e_step");
7835
7836 /* Mark these as element loops. */
7837 tree t, e_gwv = integer_minus_one_node;
7838 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
7839
7840 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7841 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7842 element_s, chunk, e_gwv, chunk);
7843 gimple_call_set_lhs (call, e_offset);
7844 gimple_set_location (call, loc);
7845 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7846
7847 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7848 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7849 element_s, chunk, e_gwv, e_offset);
7850 gimple_call_set_lhs (call, e_bound);
7851 gimple_set_location (call, loc);
7852 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7853
7854 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7855 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7856 element_s, chunk, e_gwv);
7857 gimple_call_set_lhs (call, e_step);
7858 gimple_set_location (call, loc);
7859 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7860
7861 /* Add test and split block. */
7862 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7863 stmt = gimple_build_cond_empty (expr);
7864 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7865 split = split_block (body_bb, stmt);
7866 elem_body_bb = split->dest;
7867 if (cont_bb == body_bb)
7868 cont_bb = elem_body_bb;
7869 body_bb = split->src;
7870
7871 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7872
7873 /* Add a dummy exit for the tiled block when cont_bb is missing. */
7874 if (cont_bb == NULL)
7875 {
7876 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
7877 e->probability = profile_probability::even ();
7878 split->probability = profile_probability::even ();
7879 }
7880
7881 /* Initialize the user's loop vars. */
7882 gsi = gsi_start_bb (elem_body_bb);
7883 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset,
7884 diff_type);
7885 }
7886 }
7887
7888 /* Loop increment goes into cont_bb. If this is not a loop, we
7889 will have spawned threads as if it was, and each one will
7890 execute one iteration. The specification is not explicit about
7891 whether such constructs are ill-formed or not, and they can
7892 occur, especially when noreturn routines are involved. */
7893 if (cont_bb)
7894 {
7895 gsi = gsi_last_nondebug_bb (cont_bb);
7896 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7897 loc = gimple_location (cont_stmt);
7898
7899 if (fd->tiling)
7900 {
7901 /* Insert element loop increment and test. */
7902 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
7903 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7904 true, GSI_SAME_STMT);
7905 ass = gimple_build_assign (e_offset, expr);
7906 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7907 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7908
7909 stmt = gimple_build_cond_empty (expr);
7910 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7911 split = split_block (cont_bb, stmt);
7912 elem_cont_bb = split->src;
7913 cont_bb = split->dest;
7914
7915 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7916 split->probability = profile_probability::unlikely ().guessed ();
7917 edge latch_edge
7918 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
7919 latch_edge->probability = profile_probability::likely ().guessed ();
7920
7921 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
7922 skip_edge->probability = profile_probability::unlikely ().guessed ();
7923 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
7924 loop_entry_edge->probability
7925 = profile_probability::likely ().guessed ();
7926
7927 gsi = gsi_for_stmt (cont_stmt);
7928 }
7929
7930 /* Increment offset. */
7931 if (gimple_in_ssa_p (cfun))
7932 expr = build2 (plus_code, iter_type, offset,
7933 fold_convert (plus_type, step));
7934 else
7935 expr = build2 (PLUS_EXPR, diff_type, offset, step);
7936 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7937 true, GSI_SAME_STMT);
7938 ass = gimple_build_assign (offset_incr, expr);
7939 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7940 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
7941 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
7942
7943 /* Remove the GIMPLE_OMP_CONTINUE. */
7944 gsi_remove (&gsi, true);
7945
7946 /* Fixup edges from cont_bb. */
7947 be = BRANCH_EDGE (cont_bb);
7948 fte = FALLTHRU_EDGE (cont_bb);
7949 be->flags |= EDGE_TRUE_VALUE;
7950 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7951
7952 if (chunking)
7953 {
7954 /* Split the beginning of exit_bb to make bottom_bb. We
7955 need to insert a nop at the start, because splitting is
7956 after a stmt, not before. */
7957 gsi = gsi_start_bb (exit_bb);
7958 stmt = gimple_build_nop ();
7959 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7960 split = split_block (exit_bb, stmt);
7961 bottom_bb = split->src;
7962 exit_bb = split->dest;
7963 gsi = gsi_last_bb (bottom_bb);
7964
7965 /* Chunk increment and test goes into bottom_bb. */
7966 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
7967 build_int_cst (diff_type, 1));
7968 ass = gimple_build_assign (chunk_no, expr);
7969 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
7970
7971 /* Chunk test at end of bottom_bb. */
7972 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
7973 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7974 GSI_CONTINUE_LINKING);
7975
7976 /* Fixup edges from bottom_bb. */
7977 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7978 split->probability = profile_probability::unlikely ().guessed ();
7979 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
7980 latch_edge->probability = profile_probability::likely ().guessed ();
7981 }
7982 }
7983
7984 gsi = gsi_last_nondebug_bb (exit_bb);
7985 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7986 loc = gimple_location (gsi_stmt (gsi));
7987
7988 if (!gimple_in_ssa_p (cfun))
7989 {
7990 /* Insert the final value of V, in case it is live. This is the
7991 value for the only thread that survives past the join. */
7992 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
7993 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
7994 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
7995 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
7996 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
7997 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7998 true, GSI_SAME_STMT);
7999 ass = gimple_build_assign (v, expr);
8000 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
8001 }
8002
8003 /* Remove the OMP_RETURN. */
8004 gsi_remove (&gsi, true);
8005
8006 if (cont_bb)
8007 {
8008 /* We now have one, two or three nested loops. Update the loop
8009 structures. */
8010 class loop *parent = entry_bb->loop_father;
8011 class loop *body = body_bb->loop_father;
8012
8013 if (chunking)
8014 {
8015 class loop *chunk_loop = alloc_loop ();
8016 chunk_loop->header = head_bb;
8017 chunk_loop->latch = bottom_bb;
8018 add_loop (chunk_loop, parent);
8019 parent = chunk_loop;
8020 }
8021 else if (parent != body)
8022 {
8023 gcc_assert (body->header == body_bb);
8024 gcc_assert (body->latch == cont_bb
8025 || single_pred (body->latch) == cont_bb);
8026 parent = NULL;
8027 }
8028
8029 if (parent)
8030 {
8031 class loop *body_loop = alloc_loop ();
8032 body_loop->header = body_bb;
8033 body_loop->latch = cont_bb;
8034 add_loop (body_loop, parent);
8035
8036 if (fd->tiling)
8037 {
8038 /* Insert tiling's element loop. */
8039 class loop *inner_loop = alloc_loop ();
8040 inner_loop->header = elem_body_bb;
8041 inner_loop->latch = elem_cont_bb;
8042 add_loop (inner_loop, body_loop);
8043 }
8044 }
8045 }
8046 }
8047
8048 /* Expand the OMP loop defined by REGION. */
8049
8050 static void
expand_omp_for(struct omp_region * region,gimple * inner_stmt)8051 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
8052 {
8053 struct omp_for_data fd;
8054 struct omp_for_data_loop *loops;
8055
8056 loops = XALLOCAVEC (struct omp_for_data_loop,
8057 gimple_omp_for_collapse (last_stmt (region->entry)));
8058 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
8059 &fd, loops);
8060 region->sched_kind = fd.sched_kind;
8061 region->sched_modifiers = fd.sched_modifiers;
8062 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
8063 if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt))
8064 {
8065 for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
8066 if ((loops[i].m1 || loops[i].m2)
8067 && (loops[i].m1 == NULL_TREE
8068 || TREE_CODE (loops[i].m1) == INTEGER_CST)
8069 && (loops[i].m2 == NULL_TREE
8070 || TREE_CODE (loops[i].m2) == INTEGER_CST)
8071 && TREE_CODE (loops[i].step) == INTEGER_CST
8072 && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
8073 {
8074 tree t;
8075 tree itype = TREE_TYPE (loops[i].v);
8076 if (loops[i].m1 && loops[i].m2)
8077 t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
8078 else if (loops[i].m1)
8079 t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
8080 else
8081 t = loops[i].m2;
8082 t = fold_build2 (MULT_EXPR, itype, t,
8083 fold_convert (itype,
8084 loops[i - loops[i].outer].step));
8085 if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
8086 t = fold_build2 (TRUNC_MOD_EXPR, itype,
8087 fold_build1 (NEGATE_EXPR, itype, t),
8088 fold_build1 (NEGATE_EXPR, itype,
8089 fold_convert (itype,
8090 loops[i].step)));
8091 else
8092 t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
8093 fold_convert (itype, loops[i].step));
8094 if (integer_nonzerop (t))
8095 error_at (gimple_location (fd.for_stmt),
8096 "invalid OpenMP non-rectangular loop step; "
8097 "%<(%E - %E) * %E%> is not a multiple of loop %d "
8098 "step %qE",
8099 loops[i].m2 ? loops[i].m2 : integer_zero_node,
8100 loops[i].m1 ? loops[i].m1 : integer_zero_node,
8101 loops[i - loops[i].outer].step, i + 1,
8102 loops[i].step);
8103 }
8104 }
8105
8106 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
8107 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8108 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8109 if (region->cont)
8110 {
8111 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
8112 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8113 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8114 }
8115 else
8116 /* If there isn't a continue then this is a degerate case where
8117 the introduction of abnormal edges during lowering will prevent
8118 original loops from being detected. Fix that up. */
8119 loops_state_set (LOOPS_NEED_FIXUP);
8120
8121 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
8122 expand_omp_simd (region, &fd);
8123 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
8124 {
8125 gcc_assert (!inner_stmt && !fd.non_rect);
8126 expand_oacc_for (region, &fd);
8127 }
8128 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
8129 {
8130 if (gimple_omp_for_combined_into_p (fd.for_stmt))
8131 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
8132 else
8133 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
8134 }
8135 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
8136 && !fd.have_ordered)
8137 {
8138 if (fd.chunk_size == NULL)
8139 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
8140 else
8141 expand_omp_for_static_chunk (region, &fd, inner_stmt);
8142 }
8143 else
8144 {
8145 int fn_index, start_ix, next_ix;
8146 unsigned HOST_WIDE_INT sched = 0;
8147 tree sched_arg = NULL_TREE;
8148
8149 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
8150 == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
8151 if (fd.chunk_size == NULL
8152 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
8153 fd.chunk_size = integer_zero_node;
8154 switch (fd.sched_kind)
8155 {
8156 case OMP_CLAUSE_SCHEDULE_RUNTIME:
8157 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
8158 && fd.lastprivate_conditional == 0)
8159 {
8160 gcc_assert (!fd.have_ordered);
8161 fn_index = 6;
8162 sched = 4;
8163 }
8164 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8165 && !fd.have_ordered
8166 && fd.lastprivate_conditional == 0)
8167 fn_index = 7;
8168 else
8169 {
8170 fn_index = 3;
8171 sched = (HOST_WIDE_INT_1U << 31);
8172 }
8173 break;
8174 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
8175 case OMP_CLAUSE_SCHEDULE_GUIDED:
8176 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8177 && !fd.have_ordered
8178 && fd.lastprivate_conditional == 0)
8179 {
8180 fn_index = 3 + fd.sched_kind;
8181 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8182 break;
8183 }
8184 fn_index = fd.sched_kind;
8185 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8186 sched += (HOST_WIDE_INT_1U << 31);
8187 break;
8188 case OMP_CLAUSE_SCHEDULE_STATIC:
8189 gcc_assert (fd.have_ordered);
8190 fn_index = 0;
8191 sched = (HOST_WIDE_INT_1U << 31) + 1;
8192 break;
8193 default:
8194 gcc_unreachable ();
8195 }
8196 if (!fd.ordered)
8197 fn_index += fd.have_ordered * 8;
8198 if (fd.ordered)
8199 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
8200 else
8201 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
8202 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8203 if (fd.have_reductemp || fd.have_pointer_condtemp)
8204 {
8205 if (fd.ordered)
8206 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
8207 else if (fd.have_ordered)
8208 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
8209 else
8210 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
8211 sched_arg = build_int_cstu (long_integer_type_node, sched);
8212 if (!fd.chunk_size)
8213 fd.chunk_size = integer_zero_node;
8214 }
8215 if (fd.iter_type == long_long_unsigned_type_node)
8216 {
8217 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
8218 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
8219 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
8220 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
8221 }
8222 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
8223 (enum built_in_function) next_ix, sched_arg,
8224 inner_stmt);
8225 }
8226
8227 if (gimple_in_ssa_p (cfun))
8228 update_ssa (TODO_update_ssa_only_virtuals);
8229 }
8230
8231 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
8232
8233 v = GOMP_sections_start (n);
8234 L0:
8235 switch (v)
8236 {
8237 case 0:
8238 goto L2;
8239 case 1:
8240 section 1;
8241 goto L1;
8242 case 2:
8243 ...
8244 case n:
8245 ...
8246 default:
8247 abort ();
8248 }
8249 L1:
8250 v = GOMP_sections_next ();
8251 goto L0;
8252 L2:
8253 reduction;
8254
8255 If this is a combined parallel sections, replace the call to
8256 GOMP_sections_start with call to GOMP_sections_next. */
8257
8258 static void
expand_omp_sections(struct omp_region * region)8259 expand_omp_sections (struct omp_region *region)
8260 {
8261 tree t, u, vin = NULL, vmain, vnext, l2;
8262 unsigned len;
8263 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
8264 gimple_stmt_iterator si, switch_si;
8265 gomp_sections *sections_stmt;
8266 gimple *stmt;
8267 gomp_continue *cont;
8268 edge_iterator ei;
8269 edge e;
8270 struct omp_region *inner;
8271 unsigned i, casei;
8272 bool exit_reachable = region->cont != NULL;
8273
8274 gcc_assert (region->exit != NULL);
8275 entry_bb = region->entry;
8276 l0_bb = single_succ (entry_bb);
8277 l1_bb = region->cont;
8278 l2_bb = region->exit;
8279 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
8280 l2 = gimple_block_label (l2_bb);
8281 else
8282 {
8283 /* This can happen if there are reductions. */
8284 len = EDGE_COUNT (l0_bb->succs);
8285 gcc_assert (len > 0);
8286 e = EDGE_SUCC (l0_bb, len - 1);
8287 si = gsi_last_nondebug_bb (e->dest);
8288 l2 = NULL_TREE;
8289 if (gsi_end_p (si)
8290 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8291 l2 = gimple_block_label (e->dest);
8292 else
8293 FOR_EACH_EDGE (e, ei, l0_bb->succs)
8294 {
8295 si = gsi_last_nondebug_bb (e->dest);
8296 if (gsi_end_p (si)
8297 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8298 {
8299 l2 = gimple_block_label (e->dest);
8300 break;
8301 }
8302 }
8303 }
8304 if (exit_reachable)
8305 default_bb = create_empty_bb (l1_bb->prev_bb);
8306 else
8307 default_bb = create_empty_bb (l0_bb);
8308
8309 /* We will build a switch() with enough cases for all the
8310 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
8311 and a default case to abort if something goes wrong. */
8312 len = EDGE_COUNT (l0_bb->succs);
8313
8314 /* Use vec::quick_push on label_vec throughout, since we know the size
8315 in advance. */
8316 auto_vec<tree> label_vec (len);
8317
8318 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
8319 GIMPLE_OMP_SECTIONS statement. */
8320 si = gsi_last_nondebug_bb (entry_bb);
8321 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
8322 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
8323 vin = gimple_omp_sections_control (sections_stmt);
8324 tree clauses = gimple_omp_sections_clauses (sections_stmt);
8325 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8326 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
8327 tree cond_var = NULL_TREE;
8328 if (reductmp || condtmp)
8329 {
8330 tree reductions = null_pointer_node, mem = null_pointer_node;
8331 tree memv = NULL_TREE, condtemp = NULL_TREE;
8332 gimple_stmt_iterator gsi = gsi_none ();
8333 gimple *g = NULL;
8334 if (reductmp)
8335 {
8336 reductions = OMP_CLAUSE_DECL (reductmp);
8337 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
8338 g = SSA_NAME_DEF_STMT (reductions);
8339 reductions = gimple_assign_rhs1 (g);
8340 OMP_CLAUSE_DECL (reductmp) = reductions;
8341 gsi = gsi_for_stmt (g);
8342 }
8343 else
8344 gsi = si;
8345 if (condtmp)
8346 {
8347 condtemp = OMP_CLAUSE_DECL (condtmp);
8348 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
8349 OMP_CLAUSE__CONDTEMP_);
8350 cond_var = OMP_CLAUSE_DECL (c);
8351 tree type = TREE_TYPE (condtemp);
8352 memv = create_tmp_var (type);
8353 TREE_ADDRESSABLE (memv) = 1;
8354 unsigned cnt = 0;
8355 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
8356 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
8357 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
8358 ++cnt;
8359 unsigned HOST_WIDE_INT sz
8360 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
8361 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
8362 false);
8363 mem = build_fold_addr_expr (memv);
8364 }
8365 t = build_int_cst (unsigned_type_node, len - 1);
8366 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8367 stmt = gimple_build_call (u, 3, t, reductions, mem);
8368 gimple_call_set_lhs (stmt, vin);
8369 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8370 if (condtmp)
8371 {
8372 expand_omp_build_assign (&gsi, condtemp, memv, false);
8373 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8374 vin, build_one_cst (TREE_TYPE (cond_var)));
8375 expand_omp_build_assign (&gsi, cond_var, t, false);
8376 }
8377 if (reductmp)
8378 {
8379 gsi_remove (&gsi, true);
8380 release_ssa_name (gimple_assign_lhs (g));
8381 }
8382 }
8383 else if (!is_combined_parallel (region))
8384 {
8385 /* If we are not inside a combined parallel+sections region,
8386 call GOMP_sections_start. */
8387 t = build_int_cst (unsigned_type_node, len - 1);
8388 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
8389 stmt = gimple_build_call (u, 1, t);
8390 }
8391 else
8392 {
8393 /* Otherwise, call GOMP_sections_next. */
8394 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8395 stmt = gimple_build_call (u, 0);
8396 }
8397 if (!reductmp && !condtmp)
8398 {
8399 gimple_call_set_lhs (stmt, vin);
8400 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8401 }
8402 gsi_remove (&si, true);
8403
8404 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
8405 L0_BB. */
8406 switch_si = gsi_last_nondebug_bb (l0_bb);
8407 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
8408 if (exit_reachable)
8409 {
8410 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
8411 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
8412 vmain = gimple_omp_continue_control_use (cont);
8413 vnext = gimple_omp_continue_control_def (cont);
8414 }
8415 else
8416 {
8417 vmain = vin;
8418 vnext = NULL_TREE;
8419 }
8420
8421 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
8422 label_vec.quick_push (t);
8423 i = 1;
8424
8425 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
8426 for (inner = region->inner, casei = 1;
8427 inner;
8428 inner = inner->next, i++, casei++)
8429 {
8430 basic_block s_entry_bb, s_exit_bb;
8431
8432 /* Skip optional reduction region. */
8433 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
8434 {
8435 --i;
8436 --casei;
8437 continue;
8438 }
8439
8440 s_entry_bb = inner->entry;
8441 s_exit_bb = inner->exit;
8442
8443 t = gimple_block_label (s_entry_bb);
8444 u = build_int_cst (unsigned_type_node, casei);
8445 u = build_case_label (u, NULL, t);
8446 label_vec.quick_push (u);
8447
8448 si = gsi_last_nondebug_bb (s_entry_bb);
8449 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
8450 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
8451 gsi_remove (&si, true);
8452 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
8453
8454 if (s_exit_bb == NULL)
8455 continue;
8456
8457 si = gsi_last_nondebug_bb (s_exit_bb);
8458 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8459 gsi_remove (&si, true);
8460
8461 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
8462 }
8463
8464 /* Error handling code goes in DEFAULT_BB. */
8465 t = gimple_block_label (default_bb);
8466 u = build_case_label (NULL, NULL, t);
8467 make_edge (l0_bb, default_bb, 0);
8468 add_bb_to_loop (default_bb, current_loops->tree_root);
8469
8470 stmt = gimple_build_switch (vmain, u, label_vec);
8471 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
8472 gsi_remove (&switch_si, true);
8473
8474 si = gsi_start_bb (default_bb);
8475 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
8476 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
8477
8478 if (exit_reachable)
8479 {
8480 tree bfn_decl;
8481
8482 /* Code to get the next section goes in L1_BB. */
8483 si = gsi_last_nondebug_bb (l1_bb);
8484 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
8485
8486 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8487 stmt = gimple_build_call (bfn_decl, 0);
8488 gimple_call_set_lhs (stmt, vnext);
8489 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8490 if (cond_var)
8491 {
8492 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8493 vnext, build_one_cst (TREE_TYPE (cond_var)));
8494 expand_omp_build_assign (&si, cond_var, t, false);
8495 }
8496 gsi_remove (&si, true);
8497
8498 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
8499 }
8500
8501 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
8502 si = gsi_last_nondebug_bb (l2_bb);
8503 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
8504 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
8505 else if (gimple_omp_return_lhs (gsi_stmt (si)))
8506 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
8507 else
8508 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
8509 stmt = gimple_build_call (t, 0);
8510 if (gimple_omp_return_lhs (gsi_stmt (si)))
8511 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
8512 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8513 gsi_remove (&si, true);
8514
8515 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
8516 }
8517
8518 /* Expand code for an OpenMP single or scope directive. We've already expanded
8519 much of the code, here we simply place the GOMP_barrier call. */
8520
8521 static void
expand_omp_single(struct omp_region * region)8522 expand_omp_single (struct omp_region *region)
8523 {
8524 basic_block entry_bb, exit_bb;
8525 gimple_stmt_iterator si;
8526
8527 entry_bb = region->entry;
8528 exit_bb = region->exit;
8529
8530 si = gsi_last_nondebug_bb (entry_bb);
8531 enum gimple_code code = gimple_code (gsi_stmt (si));
8532 gcc_assert (code == GIMPLE_OMP_SINGLE || code == GIMPLE_OMP_SCOPE);
8533 gsi_remove (&si, true);
8534 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8535
8536 if (exit_bb == NULL)
8537 {
8538 gcc_assert (code == GIMPLE_OMP_SCOPE);
8539 return;
8540 }
8541
8542 si = gsi_last_nondebug_bb (exit_bb);
8543 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
8544 {
8545 tree t = gimple_omp_return_lhs (gsi_stmt (si));
8546 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
8547 }
8548 gsi_remove (&si, true);
8549 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8550 }
8551
8552 /* Generic expansion for OpenMP synchronization directives: master,
8553 ordered and critical. All we need to do here is remove the entry
8554 and exit markers for REGION. */
8555
8556 static void
expand_omp_synch(struct omp_region * region)8557 expand_omp_synch (struct omp_region *region)
8558 {
8559 basic_block entry_bb, exit_bb;
8560 gimple_stmt_iterator si;
8561
8562 entry_bb = region->entry;
8563 exit_bb = region->exit;
8564
8565 si = gsi_last_nondebug_bb (entry_bb);
8566 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8567 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
8568 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASKED
8569 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
8570 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
8571 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
8572 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
8573 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
8574 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
8575 {
8576 expand_omp_taskreg (region);
8577 return;
8578 }
8579 gsi_remove (&si, true);
8580 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8581
8582 if (exit_bb)
8583 {
8584 si = gsi_last_nondebug_bb (exit_bb);
8585 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8586 gsi_remove (&si, true);
8587 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8588 }
8589 }
8590
8591 /* Translate enum omp_memory_order to enum memmodel for the embedded
8592 fail clause in there. */
8593
8594 static enum memmodel
omp_memory_order_to_fail_memmodel(enum omp_memory_order mo)8595 omp_memory_order_to_fail_memmodel (enum omp_memory_order mo)
8596 {
8597 switch (mo & OMP_FAIL_MEMORY_ORDER_MASK)
8598 {
8599 case OMP_FAIL_MEMORY_ORDER_UNSPECIFIED:
8600 switch (mo & OMP_MEMORY_ORDER_MASK)
8601 {
8602 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8603 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8604 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELAXED;
8605 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQUIRE;
8606 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8607 default: break;
8608 }
8609 gcc_unreachable ();
8610 case OMP_FAIL_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8611 case OMP_FAIL_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8612 case OMP_FAIL_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8613 default: gcc_unreachable ();
8614 }
8615 }
8616
8617 /* Translate enum omp_memory_order to enum memmodel. The two enums
8618 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
8619 is 0 and omp_memory_order has the fail mode encoded in it too. */
8620
8621 static enum memmodel
omp_memory_order_to_memmodel(enum omp_memory_order mo)8622 omp_memory_order_to_memmodel (enum omp_memory_order mo)
8623 {
8624 enum memmodel ret, fail_ret;
8625 switch (mo & OMP_MEMORY_ORDER_MASK)
8626 {
8627 case OMP_MEMORY_ORDER_RELAXED: ret = MEMMODEL_RELAXED; break;
8628 case OMP_MEMORY_ORDER_ACQUIRE: ret = MEMMODEL_ACQUIRE; break;
8629 case OMP_MEMORY_ORDER_RELEASE: ret = MEMMODEL_RELEASE; break;
8630 case OMP_MEMORY_ORDER_ACQ_REL: ret = MEMMODEL_ACQ_REL; break;
8631 case OMP_MEMORY_ORDER_SEQ_CST: ret = MEMMODEL_SEQ_CST; break;
8632 default: gcc_unreachable ();
8633 }
8634 /* If we drop the -Winvalid-memory-model warning for C++17 P0418R2,
8635 we can just return ret here unconditionally. Otherwise, work around
8636 it here and make sure fail memmodel is not stronger. */
8637 if ((mo & OMP_FAIL_MEMORY_ORDER_MASK) == OMP_FAIL_MEMORY_ORDER_UNSPECIFIED)
8638 return ret;
8639 fail_ret = omp_memory_order_to_fail_memmodel (mo);
8640 if (fail_ret > ret)
8641 return fail_ret;
8642 return ret;
8643 }
8644
8645 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8646 operation as a normal volatile load. */
8647
8648 static bool
expand_omp_atomic_load(basic_block load_bb,tree addr,tree loaded_val,int index)8649 expand_omp_atomic_load (basic_block load_bb, tree addr,
8650 tree loaded_val, int index)
8651 {
8652 enum built_in_function tmpbase;
8653 gimple_stmt_iterator gsi;
8654 basic_block store_bb;
8655 location_t loc;
8656 gimple *stmt;
8657 tree decl, call, type, itype;
8658
8659 gsi = gsi_last_nondebug_bb (load_bb);
8660 stmt = gsi_stmt (gsi);
8661 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8662 loc = gimple_location (stmt);
8663
8664 /* ??? If the target does not implement atomic_load_optab[mode], and mode
8665 is smaller than word size, then expand_atomic_load assumes that the load
8666 is atomic. We could avoid the builtin entirely in this case. */
8667
8668 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8669 decl = builtin_decl_explicit (tmpbase);
8670 if (decl == NULL_TREE)
8671 return false;
8672
8673 type = TREE_TYPE (loaded_val);
8674 itype = TREE_TYPE (TREE_TYPE (decl));
8675
8676 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8677 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8678 call = build_call_expr_loc (loc, decl, 2, addr, mo);
8679 if (!useless_type_conversion_p (type, itype))
8680 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8681 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8682
8683 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8684 gsi_remove (&gsi, true);
8685
8686 store_bb = single_succ (load_bb);
8687 gsi = gsi_last_nondebug_bb (store_bb);
8688 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8689 gsi_remove (&gsi, true);
8690
8691 if (gimple_in_ssa_p (cfun))
8692 update_ssa (TODO_update_ssa_no_phi);
8693
8694 return true;
8695 }
8696
8697 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8698 operation as a normal volatile store. */
8699
8700 static bool
expand_omp_atomic_store(basic_block load_bb,tree addr,tree loaded_val,tree stored_val,int index)8701 expand_omp_atomic_store (basic_block load_bb, tree addr,
8702 tree loaded_val, tree stored_val, int index)
8703 {
8704 enum built_in_function tmpbase;
8705 gimple_stmt_iterator gsi;
8706 basic_block store_bb = single_succ (load_bb);
8707 location_t loc;
8708 gimple *stmt;
8709 tree decl, call, type, itype;
8710 machine_mode imode;
8711 bool exchange;
8712
8713 gsi = gsi_last_nondebug_bb (load_bb);
8714 stmt = gsi_stmt (gsi);
8715 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8716
8717 /* If the load value is needed, then this isn't a store but an exchange. */
8718 exchange = gimple_omp_atomic_need_value_p (stmt);
8719
8720 gsi = gsi_last_nondebug_bb (store_bb);
8721 stmt = gsi_stmt (gsi);
8722 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
8723 loc = gimple_location (stmt);
8724
8725 /* ??? If the target does not implement atomic_store_optab[mode], and mode
8726 is smaller than word size, then expand_atomic_store assumes that the store
8727 is atomic. We could avoid the builtin entirely in this case. */
8728
8729 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
8730 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
8731 decl = builtin_decl_explicit (tmpbase);
8732 if (decl == NULL_TREE)
8733 return false;
8734
8735 type = TREE_TYPE (stored_val);
8736
8737 /* Dig out the type of the function's second argument. */
8738 itype = TREE_TYPE (decl);
8739 itype = TYPE_ARG_TYPES (itype);
8740 itype = TREE_CHAIN (itype);
8741 itype = TREE_VALUE (itype);
8742 imode = TYPE_MODE (itype);
8743
8744 if (exchange && !can_atomic_exchange_p (imode, true))
8745 return false;
8746
8747 if (!useless_type_conversion_p (itype, type))
8748 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
8749 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8750 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8751 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
8752 if (exchange)
8753 {
8754 if (!useless_type_conversion_p (type, itype))
8755 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8756 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8757 }
8758
8759 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8760 gsi_remove (&gsi, true);
8761
8762 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
8763 gsi = gsi_last_nondebug_bb (load_bb);
8764 gsi_remove (&gsi, true);
8765
8766 if (gimple_in_ssa_p (cfun))
8767 update_ssa (TODO_update_ssa_no_phi);
8768
8769 return true;
8770 }
8771
8772 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8773 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
8774 size of the data type, and thus usable to find the index of the builtin
8775 decl. Returns false if the expression is not of the proper form. */
8776
8777 static bool
expand_omp_atomic_fetch_op(basic_block load_bb,tree addr,tree loaded_val,tree stored_val,int index)8778 expand_omp_atomic_fetch_op (basic_block load_bb,
8779 tree addr, tree loaded_val,
8780 tree stored_val, int index)
8781 {
8782 enum built_in_function oldbase, newbase, tmpbase;
8783 tree decl, itype, call;
8784 tree lhs, rhs;
8785 basic_block store_bb = single_succ (load_bb);
8786 gimple_stmt_iterator gsi;
8787 gimple *stmt;
8788 location_t loc;
8789 enum tree_code code;
8790 bool need_old, need_new;
8791 machine_mode imode;
8792
8793 /* We expect to find the following sequences:
8794
8795 load_bb:
8796 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8797
8798 store_bb:
8799 val = tmp OP something; (or: something OP tmp)
8800 GIMPLE_OMP_STORE (val)
8801
8802 ???FIXME: Allow a more flexible sequence.
8803 Perhaps use data flow to pick the statements.
8804
8805 */
8806
8807 gsi = gsi_after_labels (store_bb);
8808 stmt = gsi_stmt (gsi);
8809 if (is_gimple_debug (stmt))
8810 {
8811 gsi_next_nondebug (&gsi);
8812 if (gsi_end_p (gsi))
8813 return false;
8814 stmt = gsi_stmt (gsi);
8815 }
8816 loc = gimple_location (stmt);
8817 if (!is_gimple_assign (stmt))
8818 return false;
8819 gsi_next_nondebug (&gsi);
8820 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8821 return false;
8822 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
8823 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
8824 enum omp_memory_order omo
8825 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
8826 enum memmodel mo = omp_memory_order_to_memmodel (omo);
8827 gcc_checking_assert (!need_old || !need_new);
8828
8829 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
8830 return false;
8831
8832 /* Check for one of the supported fetch-op operations. */
8833 code = gimple_assign_rhs_code (stmt);
8834 switch (code)
8835 {
8836 case PLUS_EXPR:
8837 case POINTER_PLUS_EXPR:
8838 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8839 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8840 break;
8841 case MINUS_EXPR:
8842 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
8843 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
8844 break;
8845 case BIT_AND_EXPR:
8846 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
8847 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
8848 break;
8849 case BIT_IOR_EXPR:
8850 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
8851 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
8852 break;
8853 case BIT_XOR_EXPR:
8854 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
8855 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
8856 break;
8857 default:
8858 return false;
8859 }
8860
8861 /* Make sure the expression is of the proper form. */
8862 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
8863 rhs = gimple_assign_rhs2 (stmt);
8864 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
8865 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
8866 rhs = gimple_assign_rhs1 (stmt);
8867 else
8868 return false;
8869
8870 tmpbase = ((enum built_in_function)
8871 ((need_new ? newbase : oldbase) + index + 1));
8872 decl = builtin_decl_explicit (tmpbase);
8873 if (decl == NULL_TREE)
8874 return false;
8875 itype = TREE_TYPE (TREE_TYPE (decl));
8876 imode = TYPE_MODE (itype);
8877
8878 /* We could test all of the various optabs involved, but the fact of the
8879 matter is that (with the exception of i486 vs i586 and xadd) all targets
8880 that support any atomic operaton optab also implements compare-and-swap.
8881 Let optabs.cc take care of expanding any compare-and-swap loop. */
8882 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
8883 return false;
8884
8885 gsi = gsi_last_nondebug_bb (load_bb);
8886 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
8887
8888 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8889 It only requires that the operation happen atomically. Thus we can
8890 use the RELAXED memory model. */
8891 call = build_call_expr_loc (loc, decl, 3, addr,
8892 fold_convert_loc (loc, itype, rhs),
8893 build_int_cst (NULL, mo));
8894
8895 if (need_old || need_new)
8896 {
8897 lhs = need_old ? loaded_val : stored_val;
8898 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
8899 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
8900 }
8901 else
8902 call = fold_convert_loc (loc, void_type_node, call);
8903 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8904 gsi_remove (&gsi, true);
8905
8906 gsi = gsi_last_nondebug_bb (store_bb);
8907 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8908 gsi_remove (&gsi, true);
8909 gsi = gsi_last_nondebug_bb (store_bb);
8910 stmt = gsi_stmt (gsi);
8911 gsi_remove (&gsi, true);
8912
8913 if (gimple_in_ssa_p (cfun))
8914 {
8915 release_defs (stmt);
8916 update_ssa (TODO_update_ssa_no_phi);
8917 }
8918
8919 return true;
8920 }
8921
8922 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8923 compare and exchange as an ATOMIC_COMPARE_EXCHANGE internal function.
8924 Returns false if the expression is not of the proper form. */
8925
8926 static bool
expand_omp_atomic_cas(basic_block load_bb,tree addr,tree loaded_val,tree stored_val,int index)8927 expand_omp_atomic_cas (basic_block load_bb, tree addr,
8928 tree loaded_val, tree stored_val, int index)
8929 {
8930 /* We expect to find the following sequences:
8931
8932 load_bb:
8933 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8934
8935 store_bb:
8936 val = tmp == e ? d : tmp;
8937 GIMPLE_OMP_ATOMIC_STORE (val)
8938
8939 or in store_bb instead:
8940 tmp2 = tmp == e;
8941 val = tmp2 ? d : tmp;
8942 GIMPLE_OMP_ATOMIC_STORE (val)
8943
8944 or:
8945 tmp3 = VIEW_CONVERT_EXPR<integral_type>(tmp);
8946 val = e == tmp3 ? d : tmp;
8947 GIMPLE_OMP_ATOMIC_STORE (val)
8948
8949 etc. */
8950
8951
8952 basic_block store_bb = single_succ (load_bb);
8953 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (store_bb);
8954 gimple *store_stmt = gsi_stmt (gsi);
8955 if (!store_stmt || gimple_code (store_stmt) != GIMPLE_OMP_ATOMIC_STORE)
8956 return false;
8957 gsi_prev_nondebug (&gsi);
8958 if (gsi_end_p (gsi))
8959 return false;
8960 gimple *condexpr_stmt = gsi_stmt (gsi);
8961 if (!is_gimple_assign (condexpr_stmt)
8962 || gimple_assign_rhs_code (condexpr_stmt) != COND_EXPR)
8963 return false;
8964 if (!operand_equal_p (gimple_assign_lhs (condexpr_stmt), stored_val, 0))
8965 return false;
8966 gimple *cond_stmt = NULL;
8967 gimple *vce_stmt = NULL;
8968 gsi_prev_nondebug (&gsi);
8969 if (!gsi_end_p (gsi))
8970 {
8971 cond_stmt = gsi_stmt (gsi);
8972 if (!is_gimple_assign (cond_stmt))
8973 return false;
8974 if (gimple_assign_rhs_code (cond_stmt) == EQ_EXPR)
8975 {
8976 gsi_prev_nondebug (&gsi);
8977 if (!gsi_end_p (gsi))
8978 {
8979 vce_stmt = gsi_stmt (gsi);
8980 if (!is_gimple_assign (vce_stmt)
8981 || gimple_assign_rhs_code (vce_stmt) != VIEW_CONVERT_EXPR)
8982 return false;
8983 }
8984 }
8985 else if (gimple_assign_rhs_code (cond_stmt) == VIEW_CONVERT_EXPR)
8986 std::swap (vce_stmt, cond_stmt);
8987 else
8988 return false;
8989 if (vce_stmt)
8990 {
8991 tree vce_rhs = gimple_assign_rhs1 (vce_stmt);
8992 if (TREE_CODE (vce_rhs) != VIEW_CONVERT_EXPR
8993 || !operand_equal_p (TREE_OPERAND (vce_rhs, 0), loaded_val))
8994 return false;
8995 if (!INTEGRAL_TYPE_P (TREE_TYPE (vce_rhs))
8996 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE (loaded_val))
8997 || !tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vce_rhs)),
8998 TYPE_SIZE (TREE_TYPE (loaded_val))))
8999 return false;
9000 gsi_prev_nondebug (&gsi);
9001 if (!gsi_end_p (gsi))
9002 return false;
9003 }
9004 }
9005 tree cond = gimple_assign_rhs1 (condexpr_stmt);
9006 tree cond_op1, cond_op2;
9007 if (cond_stmt)
9008 {
9009 if (!operand_equal_p (cond, gimple_assign_lhs (cond_stmt)))
9010 return false;
9011 cond_op1 = gimple_assign_rhs1 (cond_stmt);
9012 cond_op2 = gimple_assign_rhs2 (cond_stmt);
9013 }
9014 else if (TREE_CODE (cond) != EQ_EXPR && TREE_CODE (cond) != NE_EXPR)
9015 return false;
9016 else
9017 {
9018 cond_op1 = TREE_OPERAND (cond, 0);
9019 cond_op2 = TREE_OPERAND (cond, 1);
9020 }
9021 tree d;
9022 if (TREE_CODE (cond) == NE_EXPR)
9023 {
9024 if (!operand_equal_p (gimple_assign_rhs2 (condexpr_stmt), loaded_val))
9025 return false;
9026 d = gimple_assign_rhs3 (condexpr_stmt);
9027 }
9028 else if (!operand_equal_p (gimple_assign_rhs3 (condexpr_stmt), loaded_val))
9029 return false;
9030 else
9031 d = gimple_assign_rhs2 (condexpr_stmt);
9032 tree e = vce_stmt ? gimple_assign_lhs (vce_stmt) : loaded_val;
9033 if (operand_equal_p (e, cond_op1))
9034 e = cond_op2;
9035 else if (operand_equal_p (e, cond_op2))
9036 e = cond_op1;
9037 else
9038 return false;
9039
9040 location_t loc = gimple_location (store_stmt);
9041 gimple *load_stmt = last_stmt (load_bb);
9042 bool need_new = gimple_omp_atomic_need_value_p (store_stmt);
9043 bool need_old = gimple_omp_atomic_need_value_p (load_stmt);
9044 bool weak = gimple_omp_atomic_weak_p (load_stmt);
9045 enum omp_memory_order omo = gimple_omp_atomic_memory_order (load_stmt);
9046 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
9047 tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
9048 gcc_checking_assert (!need_old || !need_new);
9049
9050 enum built_in_function fncode
9051 = (enum built_in_function) ((int) BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9052 + index + 1);
9053 tree cmpxchg = builtin_decl_explicit (fncode);
9054 if (cmpxchg == NULL_TREE)
9055 return false;
9056 tree itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9057
9058 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9059 || !can_atomic_load_p (TYPE_MODE (itype)))
9060 return false;
9061
9062 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9063 if (SCALAR_FLOAT_TYPE_P (type) && !vce_stmt)
9064 return false;
9065
9066 gsi = gsi_for_stmt (store_stmt);
9067 if (!useless_type_conversion_p (itype, TREE_TYPE (e)))
9068 {
9069 tree ne = create_tmp_reg (itype);
9070 gimple *g = gimple_build_assign (ne, NOP_EXPR, e);
9071 gimple_set_location (g, loc);
9072 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9073 e = ne;
9074 }
9075 if (!useless_type_conversion_p (itype, TREE_TYPE (d)))
9076 {
9077 tree nd = create_tmp_reg (itype);
9078 enum tree_code code;
9079 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (d)))
9080 {
9081 code = VIEW_CONVERT_EXPR;
9082 d = build1 (VIEW_CONVERT_EXPR, itype, d);
9083 }
9084 else
9085 code = NOP_EXPR;
9086 gimple *g = gimple_build_assign (nd, code, d);
9087 gimple_set_location (g, loc);
9088 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9089 d = nd;
9090 }
9091
9092 tree ctype = build_complex_type (itype);
9093 int flag = int_size_in_bytes (itype) + (weak ? 256 : 0);
9094 gimple *g
9095 = gimple_build_call_internal (IFN_ATOMIC_COMPARE_EXCHANGE, 6, addr, e, d,
9096 build_int_cst (integer_type_node, flag),
9097 mo, fmo);
9098 tree cres = create_tmp_reg (ctype);
9099 gimple_call_set_lhs (g, cres);
9100 gimple_set_location (g, loc);
9101 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9102
9103 if (cond_stmt || need_old || need_new)
9104 {
9105 tree im = create_tmp_reg (itype);
9106 g = gimple_build_assign (im, IMAGPART_EXPR,
9107 build1 (IMAGPART_EXPR, itype, cres));
9108 gimple_set_location (g, loc);
9109 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9110
9111 tree re = NULL_TREE;
9112 if (need_old || need_new)
9113 {
9114 re = create_tmp_reg (itype);
9115 g = gimple_build_assign (re, REALPART_EXPR,
9116 build1 (REALPART_EXPR, itype, cres));
9117 gimple_set_location (g, loc);
9118 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9119 }
9120
9121 if (cond_stmt)
9122 {
9123 g = gimple_build_assign (gimple_assign_lhs (cond_stmt),
9124 NOP_EXPR, im);
9125 gimple_set_location (g, loc);
9126 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9127 }
9128 else if (need_new)
9129 {
9130 g = gimple_build_assign (create_tmp_reg (itype), COND_EXPR,
9131 build2 (NE_EXPR, boolean_type_node,
9132 im, build_zero_cst (itype)),
9133 d, re);
9134 gimple_set_location (g, loc);
9135 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9136 re = gimple_assign_lhs (g);
9137 }
9138
9139 if (need_old || need_new)
9140 {
9141 tree v = need_old ? loaded_val : stored_val;
9142 enum tree_code code;
9143 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (v)))
9144 {
9145 code = VIEW_CONVERT_EXPR;
9146 re = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (v), re);
9147 }
9148 else if (!useless_type_conversion_p (TREE_TYPE (v), itype))
9149 code = NOP_EXPR;
9150 else
9151 code = TREE_CODE (re);
9152 g = gimple_build_assign (v, code, re);
9153 gimple_set_location (g, loc);
9154 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9155 }
9156 }
9157
9158 gsi_remove (&gsi, true);
9159 gsi = gsi_for_stmt (load_stmt);
9160 gsi_remove (&gsi, true);
9161 gsi = gsi_for_stmt (condexpr_stmt);
9162 gsi_remove (&gsi, true);
9163 if (cond_stmt)
9164 {
9165 gsi = gsi_for_stmt (cond_stmt);
9166 gsi_remove (&gsi, true);
9167 }
9168 if (vce_stmt)
9169 {
9170 gsi = gsi_for_stmt (vce_stmt);
9171 gsi_remove (&gsi, true);
9172 }
9173
9174 return true;
9175 }
9176
9177 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
9178
9179 oldval = *addr;
9180 repeat:
9181 newval = rhs; // with oldval replacing *addr in rhs
9182 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
9183 if (oldval != newval)
9184 goto repeat;
9185
9186 INDEX is log2 of the size of the data type, and thus usable to find the
9187 index of the builtin decl. */
9188
9189 static bool
expand_omp_atomic_pipeline(basic_block load_bb,basic_block store_bb,tree addr,tree loaded_val,tree stored_val,int index)9190 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
9191 tree addr, tree loaded_val, tree stored_val,
9192 int index)
9193 {
9194 tree loadedi, storedi, initial, new_storedi, old_vali;
9195 tree type, itype, cmpxchg, iaddr, atype;
9196 gimple_stmt_iterator si;
9197 basic_block loop_header = single_succ (load_bb);
9198 gimple *phi, *stmt;
9199 edge e;
9200 enum built_in_function fncode;
9201
9202 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9203 + index + 1);
9204 cmpxchg = builtin_decl_explicit (fncode);
9205 if (cmpxchg == NULL_TREE)
9206 return false;
9207 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9208 atype = type;
9209 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9210
9211 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9212 || !can_atomic_load_p (TYPE_MODE (itype)))
9213 return false;
9214
9215 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
9216 si = gsi_last_nondebug_bb (load_bb);
9217 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9218 location_t loc = gimple_location (gsi_stmt (si));
9219 enum omp_memory_order omo = gimple_omp_atomic_memory_order (gsi_stmt (si));
9220 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
9221 tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
9222
9223 /* For floating-point values, we'll need to view-convert them to integers
9224 so that we can perform the atomic compare and swap. Simplify the
9225 following code by always setting up the "i"ntegral variables. */
9226 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
9227 {
9228 tree iaddr_val;
9229
9230 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
9231 true));
9232 atype = itype;
9233 iaddr_val
9234 = force_gimple_operand_gsi (&si,
9235 fold_convert (TREE_TYPE (iaddr), addr),
9236 false, NULL_TREE, true, GSI_SAME_STMT);
9237 stmt = gimple_build_assign (iaddr, iaddr_val);
9238 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9239 loadedi = create_tmp_var (itype);
9240 if (gimple_in_ssa_p (cfun))
9241 loadedi = make_ssa_name (loadedi);
9242 }
9243 else
9244 {
9245 iaddr = addr;
9246 loadedi = loaded_val;
9247 }
9248
9249 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
9250 tree loaddecl = builtin_decl_explicit (fncode);
9251 if (loaddecl)
9252 initial
9253 = fold_convert (atype,
9254 build_call_expr (loaddecl, 2, iaddr,
9255 build_int_cst (NULL_TREE,
9256 MEMMODEL_RELAXED)));
9257 else
9258 {
9259 tree off
9260 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
9261 true), 0);
9262 initial = build2 (MEM_REF, atype, iaddr, off);
9263 }
9264
9265 initial
9266 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
9267 GSI_SAME_STMT);
9268
9269 /* Move the value to the LOADEDI temporary. */
9270 if (gimple_in_ssa_p (cfun))
9271 {
9272 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
9273 phi = create_phi_node (loadedi, loop_header);
9274 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
9275 initial);
9276 }
9277 else
9278 gsi_insert_before (&si,
9279 gimple_build_assign (loadedi, initial),
9280 GSI_SAME_STMT);
9281 if (loadedi != loaded_val)
9282 {
9283 gimple_stmt_iterator gsi2;
9284 tree x;
9285
9286 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
9287 gsi2 = gsi_start_bb (loop_header);
9288 if (gimple_in_ssa_p (cfun))
9289 {
9290 gassign *stmt;
9291 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9292 true, GSI_SAME_STMT);
9293 stmt = gimple_build_assign (loaded_val, x);
9294 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
9295 }
9296 else
9297 {
9298 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
9299 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9300 true, GSI_SAME_STMT);
9301 }
9302 }
9303 gsi_remove (&si, true);
9304
9305 si = gsi_last_nondebug_bb (store_bb);
9306 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9307
9308 if (iaddr == addr)
9309 storedi = stored_val;
9310 else
9311 storedi
9312 = force_gimple_operand_gsi (&si,
9313 build1 (VIEW_CONVERT_EXPR, itype,
9314 stored_val), true, NULL_TREE, true,
9315 GSI_SAME_STMT);
9316
9317 /* Build the compare&swap statement. */
9318 tree ctype = build_complex_type (itype);
9319 int flag = int_size_in_bytes (itype);
9320 new_storedi = build_call_expr_internal_loc (loc, IFN_ATOMIC_COMPARE_EXCHANGE,
9321 ctype, 6, iaddr, loadedi,
9322 storedi,
9323 build_int_cst (integer_type_node,
9324 flag),
9325 mo, fmo);
9326 new_storedi = build1 (REALPART_EXPR, itype, new_storedi);
9327 new_storedi = force_gimple_operand_gsi (&si,
9328 fold_convert (TREE_TYPE (loadedi),
9329 new_storedi),
9330 true, NULL_TREE,
9331 true, GSI_SAME_STMT);
9332
9333 if (gimple_in_ssa_p (cfun))
9334 old_vali = loadedi;
9335 else
9336 {
9337 old_vali = create_tmp_var (TREE_TYPE (loadedi));
9338 stmt = gimple_build_assign (old_vali, loadedi);
9339 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9340
9341 stmt = gimple_build_assign (loadedi, new_storedi);
9342 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9343 }
9344
9345 /* Note that we always perform the comparison as an integer, even for
9346 floating point. This allows the atomic operation to properly
9347 succeed even with NaNs and -0.0. */
9348 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
9349 stmt = gimple_build_cond_empty (ne);
9350 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9351
9352 /* Update cfg. */
9353 e = single_succ_edge (store_bb);
9354 e->flags &= ~EDGE_FALLTHRU;
9355 e->flags |= EDGE_FALSE_VALUE;
9356 /* Expect no looping. */
9357 e->probability = profile_probability::guessed_always ();
9358
9359 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
9360 e->probability = profile_probability::guessed_never ();
9361
9362 /* Copy the new value to loadedi (we already did that before the condition
9363 if we are not in SSA). */
9364 if (gimple_in_ssa_p (cfun))
9365 {
9366 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
9367 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
9368 }
9369
9370 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
9371 gsi_remove (&si, true);
9372
9373 class loop *loop = alloc_loop ();
9374 loop->header = loop_header;
9375 loop->latch = store_bb;
9376 add_loop (loop, loop_header->loop_father);
9377
9378 if (gimple_in_ssa_p (cfun))
9379 update_ssa (TODO_update_ssa_no_phi);
9380
9381 return true;
9382 }
9383
9384 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
9385
9386 GOMP_atomic_start ();
9387 *addr = rhs;
9388 GOMP_atomic_end ();
9389
9390 The result is not globally atomic, but works so long as all parallel
9391 references are within #pragma omp atomic directives. According to
9392 responses received from omp@openmp.org, appears to be within spec.
9393 Which makes sense, since that's how several other compilers handle
9394 this situation as well.
9395 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
9396 expanding. STORED_VAL is the operand of the matching
9397 GIMPLE_OMP_ATOMIC_STORE.
9398
9399 We replace
9400 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
9401 loaded_val = *addr;
9402
9403 and replace
9404 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
9405 *addr = stored_val;
9406 */
9407
9408 static bool
expand_omp_atomic_mutex(basic_block load_bb,basic_block store_bb,tree addr,tree loaded_val,tree stored_val)9409 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
9410 tree addr, tree loaded_val, tree stored_val)
9411 {
9412 gimple_stmt_iterator si;
9413 gassign *stmt;
9414 tree t;
9415
9416 si = gsi_last_nondebug_bb (load_bb);
9417 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9418
9419 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
9420 t = build_call_expr (t, 0);
9421 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9422
9423 tree mem = build_simple_mem_ref (addr);
9424 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
9425 TREE_OPERAND (mem, 1)
9426 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
9427 true),
9428 TREE_OPERAND (mem, 1));
9429 stmt = gimple_build_assign (loaded_val, mem);
9430 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9431 gsi_remove (&si, true);
9432
9433 si = gsi_last_nondebug_bb (store_bb);
9434 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9435
9436 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
9437 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9438
9439 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
9440 t = build_call_expr (t, 0);
9441 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9442 gsi_remove (&si, true);
9443
9444 if (gimple_in_ssa_p (cfun))
9445 update_ssa (TODO_update_ssa_no_phi);
9446 return true;
9447 }
9448
9449 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
9450 using expand_omp_atomic_fetch_op. If it failed, we try to
9451 call expand_omp_atomic_pipeline, and if it fails too, the
9452 ultimate fallback is wrapping the operation in a mutex
9453 (expand_omp_atomic_mutex). REGION is the atomic region built
9454 by build_omp_regions_1(). */
9455
9456 static void
expand_omp_atomic(struct omp_region * region)9457 expand_omp_atomic (struct omp_region *region)
9458 {
9459 basic_block load_bb = region->entry, store_bb = region->exit;
9460 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
9461 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
9462 tree loaded_val = gimple_omp_atomic_load_lhs (load);
9463 tree addr = gimple_omp_atomic_load_rhs (load);
9464 tree stored_val = gimple_omp_atomic_store_val (store);
9465 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9466 HOST_WIDE_INT index;
9467
9468 /* Make sure the type is one of the supported sizes. */
9469 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
9470 index = exact_log2 (index);
9471 if (index >= 0 && index <= 4)
9472 {
9473 unsigned int align = TYPE_ALIGN_UNIT (type);
9474
9475 /* __sync builtins require strict data alignment. */
9476 if (exact_log2 (align) >= index)
9477 {
9478 /* Atomic load. */
9479 scalar_mode smode;
9480 if (loaded_val == stored_val
9481 && (is_int_mode (TYPE_MODE (type), &smode)
9482 || is_float_mode (TYPE_MODE (type), &smode))
9483 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9484 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
9485 return;
9486
9487 /* Atomic store. */
9488 if ((is_int_mode (TYPE_MODE (type), &smode)
9489 || is_float_mode (TYPE_MODE (type), &smode))
9490 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9491 && store_bb == single_succ (load_bb)
9492 && first_stmt (store_bb) == store
9493 && expand_omp_atomic_store (load_bb, addr, loaded_val,
9494 stored_val, index))
9495 return;
9496
9497 /* When possible, use specialized atomic update functions. */
9498 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
9499 && store_bb == single_succ (load_bb)
9500 && expand_omp_atomic_fetch_op (load_bb, addr,
9501 loaded_val, stored_val, index))
9502 return;
9503
9504 /* When possible, use ATOMIC_COMPARE_EXCHANGE ifn without a loop. */
9505 if (store_bb == single_succ (load_bb)
9506 && !gimple_in_ssa_p (cfun)
9507 && expand_omp_atomic_cas (load_bb, addr, loaded_val, stored_val,
9508 index))
9509 return;
9510
9511 /* If we don't have specialized __sync builtins, try and implement
9512 as a compare and swap loop. */
9513 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
9514 loaded_val, stored_val, index))
9515 return;
9516 }
9517 }
9518
9519 /* The ultimate fallback is wrapping the operation in a mutex. */
9520 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
9521 }
9522
9523 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
9524 at REGION_EXIT. */
9525
9526 static void
mark_loops_in_oacc_kernels_region(basic_block region_entry,basic_block region_exit)9527 mark_loops_in_oacc_kernels_region (basic_block region_entry,
9528 basic_block region_exit)
9529 {
9530 class loop *outer = region_entry->loop_father;
9531 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
9532
9533 /* Don't parallelize the kernels region if it contains more than one outer
9534 loop. */
9535 unsigned int nr_outer_loops = 0;
9536 class loop *single_outer = NULL;
9537 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
9538 {
9539 gcc_assert (loop_outer (loop) == outer);
9540
9541 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
9542 continue;
9543
9544 if (region_exit != NULL
9545 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
9546 continue;
9547
9548 nr_outer_loops++;
9549 single_outer = loop;
9550 }
9551 if (nr_outer_loops != 1)
9552 return;
9553
9554 for (class loop *loop = single_outer->inner;
9555 loop != NULL;
9556 loop = loop->inner)
9557 if (loop->next)
9558 return;
9559
9560 /* Mark the loops in the region. */
9561 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
9562 loop->in_oacc_kernels_region = true;
9563 }
9564
9565 /* Build target argument identifier from the DEVICE identifier, value
9566 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
9567
9568 static tree
get_target_argument_identifier_1(int device,bool subseqent_param,int id)9569 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
9570 {
9571 tree t = build_int_cst (integer_type_node, device);
9572 if (subseqent_param)
9573 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9574 build_int_cst (integer_type_node,
9575 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
9576 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9577 build_int_cst (integer_type_node, id));
9578 return t;
9579 }
9580
9581 /* Like above but return it in type that can be directly stored as an element
9582 of the argument array. */
9583
9584 static tree
get_target_argument_identifier(int device,bool subseqent_param,int id)9585 get_target_argument_identifier (int device, bool subseqent_param, int id)
9586 {
9587 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
9588 return fold_convert (ptr_type_node, t);
9589 }
9590
9591 /* Return a target argument consisting of DEVICE identifier, value identifier
9592 ID, and the actual VALUE. */
9593
9594 static tree
get_target_argument_value(gimple_stmt_iterator * gsi,int device,int id,tree value)9595 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
9596 tree value)
9597 {
9598 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
9599 fold_convert (integer_type_node, value),
9600 build_int_cst (unsigned_type_node,
9601 GOMP_TARGET_ARG_VALUE_SHIFT));
9602 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9603 get_target_argument_identifier_1 (device, false, id));
9604 t = fold_convert (ptr_type_node, t);
9605 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
9606 }
9607
9608 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
9609 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
9610 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
9611 arguments. */
9612
9613 static void
push_target_argument_according_to_value(gimple_stmt_iterator * gsi,int device,int id,tree value,vec<tree> * args)9614 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
9615 int id, tree value, vec <tree> *args)
9616 {
9617 if (tree_fits_shwi_p (value)
9618 && tree_to_shwi (value) > -(1 << 15)
9619 && tree_to_shwi (value) < (1 << 15))
9620 args->quick_push (get_target_argument_value (gsi, device, id, value));
9621 else
9622 {
9623 args->quick_push (get_target_argument_identifier (device, true, id));
9624 value = fold_convert (ptr_type_node, value);
9625 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
9626 GSI_SAME_STMT);
9627 args->quick_push (value);
9628 }
9629 }
9630
9631 /* Create an array of arguments that is then passed to GOMP_target. */
9632
9633 static tree
get_target_arguments(gimple_stmt_iterator * gsi,gomp_target * tgt_stmt)9634 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
9635 {
9636 auto_vec <tree, 6> args;
9637 tree clauses = gimple_omp_target_clauses (tgt_stmt);
9638 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
9639 if (c)
9640 t = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (c);
9641 else
9642 t = integer_minus_one_node;
9643 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9644 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
9645
9646 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
9647 if (c)
9648 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
9649 else
9650 t = integer_minus_one_node;
9651 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9652 GOMP_TARGET_ARG_THREAD_LIMIT, t,
9653 &args);
9654
9655 /* Produce more, perhaps device specific, arguments here. */
9656
9657 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
9658 args.length () + 1),
9659 ".omp_target_args");
9660 for (unsigned i = 0; i < args.length (); i++)
9661 {
9662 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9663 build_int_cst (integer_type_node, i),
9664 NULL_TREE, NULL_TREE);
9665 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
9666 GSI_SAME_STMT);
9667 }
9668 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9669 build_int_cst (integer_type_node, args.length ()),
9670 NULL_TREE, NULL_TREE);
9671 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
9672 GSI_SAME_STMT);
9673 TREE_ADDRESSABLE (argarray) = 1;
9674 return build_fold_addr_expr (argarray);
9675 }
9676
9677 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
9678
9679 static void
expand_omp_target(struct omp_region * region)9680 expand_omp_target (struct omp_region *region)
9681 {
9682 basic_block entry_bb, exit_bb, new_bb;
9683 struct function *child_cfun;
9684 tree child_fn, block, t;
9685 gimple_stmt_iterator gsi;
9686 gomp_target *entry_stmt;
9687 gimple *stmt;
9688 edge e;
9689 bool offloaded;
9690 int target_kind;
9691
9692 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
9693 target_kind = gimple_omp_target_kind (entry_stmt);
9694 new_bb = region->entry;
9695
9696 offloaded = is_gimple_omp_offloaded (entry_stmt);
9697 switch (target_kind)
9698 {
9699 case GF_OMP_TARGET_KIND_REGION:
9700 case GF_OMP_TARGET_KIND_UPDATE:
9701 case GF_OMP_TARGET_KIND_ENTER_DATA:
9702 case GF_OMP_TARGET_KIND_EXIT_DATA:
9703 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9704 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9705 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9706 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9707 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9708 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9709 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9710 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9711 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9712 case GF_OMP_TARGET_KIND_DATA:
9713 case GF_OMP_TARGET_KIND_OACC_DATA:
9714 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9715 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9716 break;
9717 default:
9718 gcc_unreachable ();
9719 }
9720
9721 child_fn = NULL_TREE;
9722 child_cfun = NULL;
9723 if (offloaded)
9724 {
9725 child_fn = gimple_omp_target_child_fn (entry_stmt);
9726 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
9727 }
9728
9729 /* Supported by expand_omp_taskreg, but not here. */
9730 if (child_cfun != NULL)
9731 gcc_checking_assert (!child_cfun->cfg);
9732 gcc_checking_assert (!gimple_in_ssa_p (cfun));
9733
9734 entry_bb = region->entry;
9735 exit_bb = region->exit;
9736
9737 if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
9738 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
9739
9740 /* Going on, all OpenACC compute constructs are mapped to
9741 'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
9742 To distinguish between them, we attach attributes. */
9743 switch (target_kind)
9744 {
9745 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9746 DECL_ATTRIBUTES (child_fn)
9747 = tree_cons (get_identifier ("oacc parallel"),
9748 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9749 break;
9750 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9751 DECL_ATTRIBUTES (child_fn)
9752 = tree_cons (get_identifier ("oacc kernels"),
9753 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9754 break;
9755 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9756 DECL_ATTRIBUTES (child_fn)
9757 = tree_cons (get_identifier ("oacc serial"),
9758 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9759 break;
9760 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9761 DECL_ATTRIBUTES (child_fn)
9762 = tree_cons (get_identifier ("oacc parallel_kernels_parallelized"),
9763 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9764 break;
9765 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9766 DECL_ATTRIBUTES (child_fn)
9767 = tree_cons (get_identifier ("oacc parallel_kernels_gang_single"),
9768 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9769 break;
9770 default:
9771 /* Make sure we don't miss any. */
9772 gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
9773 && is_gimple_omp_offloaded (entry_stmt)));
9774 break;
9775 }
9776
9777 if (offloaded)
9778 {
9779 unsigned srcidx, dstidx, num;
9780
9781 /* If the offloading region needs data sent from the parent
9782 function, then the very first statement (except possible
9783 tree profile counter updates) of the offloading body
9784 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
9785 &.OMP_DATA_O is passed as an argument to the child function,
9786 we need to replace it with the argument as seen by the child
9787 function.
9788
9789 In most cases, this will end up being the identity assignment
9790 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
9791 a function call that has been inlined, the original PARM_DECL
9792 .OMP_DATA_I may have been converted into a different local
9793 variable. In which case, we need to keep the assignment. */
9794 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
9795 if (data_arg)
9796 {
9797 basic_block entry_succ_bb = single_succ (entry_bb);
9798 gimple_stmt_iterator gsi;
9799 tree arg;
9800 gimple *tgtcopy_stmt = NULL;
9801 tree sender = TREE_VEC_ELT (data_arg, 0);
9802
9803 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
9804 {
9805 gcc_assert (!gsi_end_p (gsi));
9806 stmt = gsi_stmt (gsi);
9807 if (gimple_code (stmt) != GIMPLE_ASSIGN)
9808 continue;
9809
9810 if (gimple_num_ops (stmt) == 2)
9811 {
9812 tree arg = gimple_assign_rhs1 (stmt);
9813
9814 /* We're ignoring the subcode because we're
9815 effectively doing a STRIP_NOPS. */
9816
9817 if (TREE_CODE (arg) == ADDR_EXPR
9818 && TREE_OPERAND (arg, 0) == sender)
9819 {
9820 tgtcopy_stmt = stmt;
9821 break;
9822 }
9823 }
9824 }
9825
9826 gcc_assert (tgtcopy_stmt != NULL);
9827 arg = DECL_ARGUMENTS (child_fn);
9828
9829 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
9830 gsi_remove (&gsi, true);
9831 }
9832
9833 /* Declare local variables needed in CHILD_CFUN. */
9834 block = DECL_INITIAL (child_fn);
9835 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
9836 /* The gimplifier could record temporaries in the offloading block
9837 rather than in containing function's local_decls chain,
9838 which would mean cgraph missed finalizing them. Do it now. */
9839 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
9840 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
9841 varpool_node::finalize_decl (t);
9842 DECL_SAVED_TREE (child_fn) = NULL;
9843 /* We'll create a CFG for child_fn, so no gimple body is needed. */
9844 gimple_set_body (child_fn, NULL);
9845 TREE_USED (block) = 1;
9846
9847 /* Reset DECL_CONTEXT on function arguments. */
9848 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
9849 DECL_CONTEXT (t) = child_fn;
9850
9851 /* Split ENTRY_BB at GIMPLE_*,
9852 so that it can be moved to the child function. */
9853 gsi = gsi_last_nondebug_bb (entry_bb);
9854 stmt = gsi_stmt (gsi);
9855 gcc_assert (stmt
9856 && gimple_code (stmt) == gimple_code (entry_stmt));
9857 e = split_block (entry_bb, stmt);
9858 gsi_remove (&gsi, true);
9859 entry_bb = e->dest;
9860 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
9861
9862 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
9863 if (exit_bb)
9864 {
9865 gsi = gsi_last_nondebug_bb (exit_bb);
9866 gcc_assert (!gsi_end_p (gsi)
9867 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9868 stmt = gimple_build_return (NULL);
9869 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
9870 gsi_remove (&gsi, true);
9871 }
9872
9873 /* Move the offloading region into CHILD_CFUN. */
9874
9875 block = gimple_block (entry_stmt);
9876
9877 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
9878 if (exit_bb)
9879 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
9880 /* When the OMP expansion process cannot guarantee an up-to-date
9881 loop tree arrange for the child function to fixup loops. */
9882 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9883 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
9884
9885 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
9886 num = vec_safe_length (child_cfun->local_decls);
9887 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
9888 {
9889 t = (*child_cfun->local_decls)[srcidx];
9890 if (DECL_CONTEXT (t) == cfun->decl)
9891 continue;
9892 if (srcidx != dstidx)
9893 (*child_cfun->local_decls)[dstidx] = t;
9894 dstidx++;
9895 }
9896 if (dstidx != num)
9897 vec_safe_truncate (child_cfun->local_decls, dstidx);
9898
9899 /* Inform the callgraph about the new function. */
9900 child_cfun->curr_properties = cfun->curr_properties;
9901 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
9902 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
9903 cgraph_node *node = cgraph_node::get_create (child_fn);
9904 node->parallelized_function = 1;
9905 cgraph_node::add_new_function (child_fn, true);
9906
9907 /* Add the new function to the offload table. */
9908 if (ENABLE_OFFLOADING)
9909 {
9910 if (in_lto_p)
9911 DECL_PRESERVE_P (child_fn) = 1;
9912 vec_safe_push (offload_funcs, child_fn);
9913 }
9914
9915 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
9916 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
9917
9918 /* Fix the callgraph edges for child_cfun. Those for cfun will be
9919 fixed in a following pass. */
9920 push_cfun (child_cfun);
9921 if (need_asm)
9922 assign_assembler_name_if_needed (child_fn);
9923 cgraph_edge::rebuild_edges ();
9924
9925 /* Some EH regions might become dead, see PR34608. If
9926 pass_cleanup_cfg isn't the first pass to happen with the
9927 new child, these dead EH edges might cause problems.
9928 Clean them up now. */
9929 if (flag_exceptions)
9930 {
9931 basic_block bb;
9932 bool changed = false;
9933
9934 FOR_EACH_BB_FN (bb, cfun)
9935 changed |= gimple_purge_dead_eh_edges (bb);
9936 if (changed)
9937 cleanup_tree_cfg ();
9938 }
9939 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9940 verify_loop_structure ();
9941 pop_cfun ();
9942
9943 if (dump_file && !gimple_in_ssa_p (cfun))
9944 {
9945 omp_any_child_fn_dumped = true;
9946 dump_function_header (dump_file, child_fn, dump_flags);
9947 dump_function_to_file (child_fn, dump_file, dump_flags);
9948 }
9949
9950 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
9951 }
9952
9953 /* Emit a library call to launch the offloading region, or do data
9954 transfers. */
9955 tree t1, t2, t3, t4, depend, c, clauses;
9956 enum built_in_function start_ix;
9957 unsigned int flags_i = 0;
9958
9959 switch (gimple_omp_target_kind (entry_stmt))
9960 {
9961 case GF_OMP_TARGET_KIND_REGION:
9962 start_ix = BUILT_IN_GOMP_TARGET;
9963 break;
9964 case GF_OMP_TARGET_KIND_DATA:
9965 start_ix = BUILT_IN_GOMP_TARGET_DATA;
9966 break;
9967 case GF_OMP_TARGET_KIND_UPDATE:
9968 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
9969 break;
9970 case GF_OMP_TARGET_KIND_ENTER_DATA:
9971 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9972 break;
9973 case GF_OMP_TARGET_KIND_EXIT_DATA:
9974 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9975 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
9976 break;
9977 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9978 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9979 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9980 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9981 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9982 start_ix = BUILT_IN_GOACC_PARALLEL;
9983 break;
9984 case GF_OMP_TARGET_KIND_OACC_DATA:
9985 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9986 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9987 start_ix = BUILT_IN_GOACC_DATA_START;
9988 break;
9989 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9990 start_ix = BUILT_IN_GOACC_UPDATE;
9991 break;
9992 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9993 start_ix = BUILT_IN_GOACC_ENTER_DATA;
9994 break;
9995 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9996 start_ix = BUILT_IN_GOACC_EXIT_DATA;
9997 break;
9998 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9999 start_ix = BUILT_IN_GOACC_DECLARE;
10000 break;
10001 default:
10002 gcc_unreachable ();
10003 }
10004
10005 clauses = gimple_omp_target_clauses (entry_stmt);
10006
10007 tree device = NULL_TREE;
10008 location_t device_loc = UNKNOWN_LOCATION;
10009 tree goacc_flags = NULL_TREE;
10010 if (is_gimple_omp_oacc (entry_stmt))
10011 {
10012 /* By default, no GOACC_FLAGs are set. */
10013 goacc_flags = integer_zero_node;
10014 }
10015 else
10016 {
10017 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
10018 if (c)
10019 {
10020 device = OMP_CLAUSE_DEVICE_ID (c);
10021 device_loc = OMP_CLAUSE_LOCATION (c);
10022 if (OMP_CLAUSE_DEVICE_ANCESTOR (c))
10023 sorry_at (device_loc, "%<ancestor%> not yet supported");
10024 }
10025 else
10026 {
10027 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
10028 library choose). */
10029 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
10030 device_loc = gimple_location (entry_stmt);
10031 }
10032
10033 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
10034 /* FIXME: in_reduction(...) nowait is unimplemented yet, pretend
10035 nowait doesn't appear. */
10036 if (c && omp_find_clause (clauses, OMP_CLAUSE_IN_REDUCTION))
10037 c = NULL;
10038 if (c)
10039 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
10040 }
10041
10042 /* By default, there is no conditional. */
10043 tree cond = NULL_TREE;
10044 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
10045 if (c)
10046 cond = OMP_CLAUSE_IF_EXPR (c);
10047 /* If we found the clause 'if (cond)', build:
10048 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
10049 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
10050 if (cond)
10051 {
10052 tree *tp;
10053 if (is_gimple_omp_oacc (entry_stmt))
10054 tp = &goacc_flags;
10055 else
10056 {
10057 /* Ensure 'device' is of the correct type. */
10058 device = fold_convert_loc (device_loc, integer_type_node, device);
10059
10060 tp = &device;
10061 }
10062
10063 cond = gimple_boolify (cond);
10064
10065 basic_block cond_bb, then_bb, else_bb;
10066 edge e;
10067 tree tmp_var;
10068
10069 tmp_var = create_tmp_var (TREE_TYPE (*tp));
10070 if (offloaded)
10071 e = split_block_after_labels (new_bb);
10072 else
10073 {
10074 gsi = gsi_last_nondebug_bb (new_bb);
10075 gsi_prev (&gsi);
10076 e = split_block (new_bb, gsi_stmt (gsi));
10077 }
10078 cond_bb = e->src;
10079 new_bb = e->dest;
10080 remove_edge (e);
10081
10082 then_bb = create_empty_bb (cond_bb);
10083 else_bb = create_empty_bb (then_bb);
10084 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
10085 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
10086
10087 stmt = gimple_build_cond_empty (cond);
10088 gsi = gsi_last_bb (cond_bb);
10089 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10090
10091 gsi = gsi_start_bb (then_bb);
10092 stmt = gimple_build_assign (tmp_var, *tp);
10093 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10094
10095 gsi = gsi_start_bb (else_bb);
10096 if (is_gimple_omp_oacc (entry_stmt))
10097 stmt = gimple_build_assign (tmp_var,
10098 BIT_IOR_EXPR,
10099 *tp,
10100 build_int_cst (integer_type_node,
10101 GOACC_FLAG_HOST_FALLBACK));
10102 else
10103 stmt = gimple_build_assign (tmp_var,
10104 build_int_cst (integer_type_node,
10105 GOMP_DEVICE_HOST_FALLBACK));
10106 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10107
10108 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
10109 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
10110 add_bb_to_loop (then_bb, cond_bb->loop_father);
10111 add_bb_to_loop (else_bb, cond_bb->loop_father);
10112 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
10113 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
10114
10115 *tp = tmp_var;
10116
10117 gsi = gsi_last_nondebug_bb (new_bb);
10118 }
10119 else
10120 {
10121 gsi = gsi_last_nondebug_bb (new_bb);
10122
10123 if (device != NULL_TREE)
10124 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
10125 true, GSI_SAME_STMT);
10126 }
10127
10128 t = gimple_omp_target_data_arg (entry_stmt);
10129 if (t == NULL)
10130 {
10131 t1 = size_zero_node;
10132 t2 = build_zero_cst (ptr_type_node);
10133 t3 = t2;
10134 t4 = t2;
10135 }
10136 else
10137 {
10138 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
10139 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
10140 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
10141 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
10142 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
10143 }
10144
10145 gimple *g;
10146 bool tagging = false;
10147 /* The maximum number used by any start_ix, without varargs. */
10148 auto_vec<tree, 11> args;
10149 if (is_gimple_omp_oacc (entry_stmt))
10150 {
10151 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
10152 TREE_TYPE (goacc_flags), goacc_flags);
10153 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
10154 NULL_TREE, true,
10155 GSI_SAME_STMT);
10156 args.quick_push (goacc_flags_m);
10157 }
10158 else
10159 args.quick_push (device);
10160 if (offloaded)
10161 args.quick_push (build_fold_addr_expr (child_fn));
10162 args.quick_push (t1);
10163 args.quick_push (t2);
10164 args.quick_push (t3);
10165 args.quick_push (t4);
10166 switch (start_ix)
10167 {
10168 case BUILT_IN_GOACC_DATA_START:
10169 case BUILT_IN_GOACC_DECLARE:
10170 case BUILT_IN_GOMP_TARGET_DATA:
10171 break;
10172 case BUILT_IN_GOMP_TARGET:
10173 case BUILT_IN_GOMP_TARGET_UPDATE:
10174 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
10175 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
10176 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
10177 if (c)
10178 depend = OMP_CLAUSE_DECL (c);
10179 else
10180 depend = build_int_cst (ptr_type_node, 0);
10181 args.quick_push (depend);
10182 if (start_ix == BUILT_IN_GOMP_TARGET)
10183 args.quick_push (get_target_arguments (&gsi, entry_stmt));
10184 break;
10185 case BUILT_IN_GOACC_PARALLEL:
10186 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
10187 {
10188 tree dims = NULL_TREE;
10189 unsigned int ix;
10190
10191 /* For serial constructs we set all dimensions to 1. */
10192 for (ix = GOMP_DIM_MAX; ix--;)
10193 dims = tree_cons (NULL_TREE, integer_one_node, dims);
10194 oacc_replace_fn_attrib (child_fn, dims);
10195 }
10196 else
10197 oacc_set_fn_attrib (child_fn, clauses, &args);
10198 tagging = true;
10199 /* FALLTHRU */
10200 case BUILT_IN_GOACC_ENTER_DATA:
10201 case BUILT_IN_GOACC_EXIT_DATA:
10202 case BUILT_IN_GOACC_UPDATE:
10203 {
10204 tree t_async = NULL_TREE;
10205
10206 /* If present, use the value specified by the respective
10207 clause, making sure that is of the correct type. */
10208 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
10209 if (c)
10210 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10211 integer_type_node,
10212 OMP_CLAUSE_ASYNC_EXPR (c));
10213 else if (!tagging)
10214 /* Default values for t_async. */
10215 t_async = fold_convert_loc (gimple_location (entry_stmt),
10216 integer_type_node,
10217 build_int_cst (integer_type_node,
10218 GOMP_ASYNC_SYNC));
10219 if (tagging && t_async)
10220 {
10221 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
10222
10223 if (TREE_CODE (t_async) == INTEGER_CST)
10224 {
10225 /* See if we can pack the async arg in to the tag's
10226 operand. */
10227 i_async = TREE_INT_CST_LOW (t_async);
10228 if (i_async < GOMP_LAUNCH_OP_MAX)
10229 t_async = NULL_TREE;
10230 else
10231 i_async = GOMP_LAUNCH_OP_MAX;
10232 }
10233 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
10234 i_async));
10235 }
10236 if (t_async)
10237 args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
10238 NULL_TREE, true,
10239 GSI_SAME_STMT));
10240
10241 /* Save the argument index, and ... */
10242 unsigned t_wait_idx = args.length ();
10243 unsigned num_waits = 0;
10244 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
10245 if (!tagging || c)
10246 /* ... push a placeholder. */
10247 args.safe_push (integer_zero_node);
10248
10249 for (; c; c = OMP_CLAUSE_CHAIN (c))
10250 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
10251 {
10252 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10253 integer_type_node,
10254 OMP_CLAUSE_WAIT_EXPR (c));
10255 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
10256 GSI_SAME_STMT);
10257 args.safe_push (arg);
10258 num_waits++;
10259 }
10260
10261 if (!tagging || num_waits)
10262 {
10263 tree len;
10264
10265 /* Now that we know the number, update the placeholder. */
10266 if (tagging)
10267 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
10268 else
10269 len = build_int_cst (integer_type_node, num_waits);
10270 len = fold_convert_loc (gimple_location (entry_stmt),
10271 unsigned_type_node, len);
10272 args[t_wait_idx] = len;
10273 }
10274 }
10275 break;
10276 default:
10277 gcc_unreachable ();
10278 }
10279 if (tagging)
10280 /* Push terminal marker - zero. */
10281 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
10282
10283 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
10284 gimple_set_location (g, gimple_location (entry_stmt));
10285 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
10286 if (!offloaded)
10287 {
10288 g = gsi_stmt (gsi);
10289 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
10290 gsi_remove (&gsi, true);
10291 }
10292 }
10293
10294 /* Expand the parallel region tree rooted at REGION. Expansion
10295 proceeds in depth-first order. Innermost regions are expanded
10296 first. This way, parallel regions that require a new function to
10297 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
10298 internal dependencies in their body. */
10299
10300 static void
expand_omp(struct omp_region * region)10301 expand_omp (struct omp_region *region)
10302 {
10303 omp_any_child_fn_dumped = false;
10304 while (region)
10305 {
10306 location_t saved_location;
10307 gimple *inner_stmt = NULL;
10308
10309 /* First, determine whether this is a combined parallel+workshare
10310 region. */
10311 if (region->type == GIMPLE_OMP_PARALLEL)
10312 determine_parallel_type (region);
10313
10314 if (region->type == GIMPLE_OMP_FOR
10315 && gimple_omp_for_combined_p (last_stmt (region->entry)))
10316 inner_stmt = last_stmt (region->inner->entry);
10317
10318 if (region->inner)
10319 expand_omp (region->inner);
10320
10321 saved_location = input_location;
10322 if (gimple_has_location (last_stmt (region->entry)))
10323 input_location = gimple_location (last_stmt (region->entry));
10324
10325 switch (region->type)
10326 {
10327 case GIMPLE_OMP_PARALLEL:
10328 case GIMPLE_OMP_TASK:
10329 expand_omp_taskreg (region);
10330 break;
10331
10332 case GIMPLE_OMP_FOR:
10333 expand_omp_for (region, inner_stmt);
10334 break;
10335
10336 case GIMPLE_OMP_SECTIONS:
10337 expand_omp_sections (region);
10338 break;
10339
10340 case GIMPLE_OMP_SECTION:
10341 /* Individual omp sections are handled together with their
10342 parent GIMPLE_OMP_SECTIONS region. */
10343 break;
10344
10345 case GIMPLE_OMP_SINGLE:
10346 case GIMPLE_OMP_SCOPE:
10347 expand_omp_single (region);
10348 break;
10349
10350 case GIMPLE_OMP_ORDERED:
10351 {
10352 gomp_ordered *ord_stmt
10353 = as_a <gomp_ordered *> (last_stmt (region->entry));
10354 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
10355 OMP_CLAUSE_DEPEND))
10356 {
10357 /* We'll expand these when expanding corresponding
10358 worksharing region with ordered(n) clause. */
10359 gcc_assert (region->outer
10360 && region->outer->type == GIMPLE_OMP_FOR);
10361 region->ord_stmt = ord_stmt;
10362 break;
10363 }
10364 }
10365 /* FALLTHRU */
10366 case GIMPLE_OMP_MASTER:
10367 case GIMPLE_OMP_MASKED:
10368 case GIMPLE_OMP_TASKGROUP:
10369 case GIMPLE_OMP_CRITICAL:
10370 case GIMPLE_OMP_TEAMS:
10371 expand_omp_synch (region);
10372 break;
10373
10374 case GIMPLE_OMP_ATOMIC_LOAD:
10375 expand_omp_atomic (region);
10376 break;
10377
10378 case GIMPLE_OMP_TARGET:
10379 expand_omp_target (region);
10380 break;
10381
10382 default:
10383 gcc_unreachable ();
10384 }
10385
10386 input_location = saved_location;
10387 region = region->next;
10388 }
10389 if (omp_any_child_fn_dumped)
10390 {
10391 if (dump_file)
10392 dump_function_header (dump_file, current_function_decl, dump_flags);
10393 omp_any_child_fn_dumped = false;
10394 }
10395 }
10396
10397 /* Helper for build_omp_regions. Scan the dominator tree starting at
10398 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
10399 true, the function ends once a single tree is built (otherwise, whole
10400 forest of OMP constructs may be built). */
10401
10402 static void
build_omp_regions_1(basic_block bb,struct omp_region * parent,bool single_tree)10403 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
10404 bool single_tree)
10405 {
10406 gimple_stmt_iterator gsi;
10407 gimple *stmt;
10408 basic_block son;
10409
10410 gsi = gsi_last_nondebug_bb (bb);
10411 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
10412 {
10413 struct omp_region *region;
10414 enum gimple_code code;
10415
10416 stmt = gsi_stmt (gsi);
10417 code = gimple_code (stmt);
10418 if (code == GIMPLE_OMP_RETURN)
10419 {
10420 /* STMT is the return point out of region PARENT. Mark it
10421 as the exit point and make PARENT the immediately
10422 enclosing region. */
10423 gcc_assert (parent);
10424 region = parent;
10425 region->exit = bb;
10426 parent = parent->outer;
10427 }
10428 else if (code == GIMPLE_OMP_ATOMIC_STORE)
10429 {
10430 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
10431 GIMPLE_OMP_RETURN, but matches with
10432 GIMPLE_OMP_ATOMIC_LOAD. */
10433 gcc_assert (parent);
10434 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
10435 region = parent;
10436 region->exit = bb;
10437 parent = parent->outer;
10438 }
10439 else if (code == GIMPLE_OMP_CONTINUE)
10440 {
10441 gcc_assert (parent);
10442 parent->cont = bb;
10443 }
10444 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
10445 {
10446 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
10447 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
10448 }
10449 else
10450 {
10451 region = new_omp_region (bb, code, parent);
10452 /* Otherwise... */
10453 if (code == GIMPLE_OMP_TARGET)
10454 {
10455 switch (gimple_omp_target_kind (stmt))
10456 {
10457 case GF_OMP_TARGET_KIND_REGION:
10458 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10459 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10460 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10461 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10462 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10463 break;
10464 case GF_OMP_TARGET_KIND_UPDATE:
10465 case GF_OMP_TARGET_KIND_ENTER_DATA:
10466 case GF_OMP_TARGET_KIND_EXIT_DATA:
10467 case GF_OMP_TARGET_KIND_DATA:
10468 case GF_OMP_TARGET_KIND_OACC_DATA:
10469 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10470 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10471 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10472 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10473 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10474 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10475 /* ..., other than for those stand-alone directives...
10476 To be precise, target data isn't stand-alone, but
10477 gimplifier put the end API call into try finally block
10478 for it, so omp expansion can treat it as such. */
10479 region = NULL;
10480 break;
10481 default:
10482 gcc_unreachable ();
10483 }
10484 }
10485 else if (code == GIMPLE_OMP_ORDERED
10486 && omp_find_clause (gimple_omp_ordered_clauses
10487 (as_a <gomp_ordered *> (stmt)),
10488 OMP_CLAUSE_DEPEND))
10489 /* #pragma omp ordered depend is also just a stand-alone
10490 directive. */
10491 region = NULL;
10492 else if (code == GIMPLE_OMP_TASK
10493 && gimple_omp_task_taskwait_p (stmt))
10494 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
10495 region = NULL;
10496 else if (code == GIMPLE_OMP_TASKGROUP)
10497 /* #pragma omp taskgroup isn't a stand-alone directive, but
10498 gimplifier put the end API call into try finall block
10499 for it, so omp expansion can treat it as such. */
10500 region = NULL;
10501 /* ..., this directive becomes the parent for a new region. */
10502 if (region)
10503 parent = region;
10504 }
10505 }
10506
10507 if (single_tree && !parent)
10508 return;
10509
10510 for (son = first_dom_son (CDI_DOMINATORS, bb);
10511 son;
10512 son = next_dom_son (CDI_DOMINATORS, son))
10513 build_omp_regions_1 (son, parent, single_tree);
10514 }
10515
10516 /* Builds the tree of OMP regions rooted at ROOT, storing it to
10517 root_omp_region. */
10518
10519 static void
build_omp_regions_root(basic_block root)10520 build_omp_regions_root (basic_block root)
10521 {
10522 gcc_assert (root_omp_region == NULL);
10523 build_omp_regions_1 (root, NULL, true);
10524 gcc_assert (root_omp_region != NULL);
10525 }
10526
10527 /* Expands omp construct (and its subconstructs) starting in HEAD. */
10528
10529 void
omp_expand_local(basic_block head)10530 omp_expand_local (basic_block head)
10531 {
10532 build_omp_regions_root (head);
10533 if (dump_file && (dump_flags & TDF_DETAILS))
10534 {
10535 fprintf (dump_file, "\nOMP region tree\n\n");
10536 dump_omp_region (dump_file, root_omp_region, 0);
10537 fprintf (dump_file, "\n");
10538 }
10539
10540 remove_exit_barriers (root_omp_region);
10541 expand_omp (root_omp_region);
10542
10543 omp_free_regions ();
10544 }
10545
10546 /* Scan the CFG and build a tree of OMP regions. Return the root of
10547 the OMP region tree. */
10548
10549 static void
build_omp_regions(void)10550 build_omp_regions (void)
10551 {
10552 gcc_assert (root_omp_region == NULL);
10553 calculate_dominance_info (CDI_DOMINATORS);
10554 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
10555 }
10556
10557 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
10558
10559 static unsigned int
execute_expand_omp(void)10560 execute_expand_omp (void)
10561 {
10562 build_omp_regions ();
10563
10564 if (!root_omp_region)
10565 return 0;
10566
10567 if (dump_file)
10568 {
10569 fprintf (dump_file, "\nOMP region tree\n\n");
10570 dump_omp_region (dump_file, root_omp_region, 0);
10571 fprintf (dump_file, "\n");
10572 }
10573
10574 remove_exit_barriers (root_omp_region);
10575
10576 expand_omp (root_omp_region);
10577
10578 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
10579 verify_loop_structure ();
10580 cleanup_tree_cfg ();
10581
10582 omp_free_regions ();
10583
10584 return 0;
10585 }
10586
10587 /* OMP expansion -- the default pass, run before creation of SSA form. */
10588
10589 namespace {
10590
10591 const pass_data pass_data_expand_omp =
10592 {
10593 GIMPLE_PASS, /* type */
10594 "ompexp", /* name */
10595 OPTGROUP_OMP, /* optinfo_flags */
10596 TV_NONE, /* tv_id */
10597 PROP_gimple_any, /* properties_required */
10598 PROP_gimple_eomp, /* properties_provided */
10599 0, /* properties_destroyed */
10600 0, /* todo_flags_start */
10601 0, /* todo_flags_finish */
10602 };
10603
10604 class pass_expand_omp : public gimple_opt_pass
10605 {
10606 public:
pass_expand_omp(gcc::context * ctxt)10607 pass_expand_omp (gcc::context *ctxt)
10608 : gimple_opt_pass (pass_data_expand_omp, ctxt)
10609 {}
10610
10611 /* opt_pass methods: */
execute(function *)10612 virtual unsigned int execute (function *)
10613 {
10614 bool gate = ((flag_openacc != 0 || flag_openmp != 0
10615 || flag_openmp_simd != 0)
10616 && !seen_error ());
10617
10618 /* This pass always runs, to provide PROP_gimple_eomp.
10619 But often, there is nothing to do. */
10620 if (!gate)
10621 return 0;
10622
10623 return execute_expand_omp ();
10624 }
10625
10626 }; // class pass_expand_omp
10627
10628 } // anon namespace
10629
10630 gimple_opt_pass *
make_pass_expand_omp(gcc::context * ctxt)10631 make_pass_expand_omp (gcc::context *ctxt)
10632 {
10633 return new pass_expand_omp (ctxt);
10634 }
10635
10636 namespace {
10637
10638 const pass_data pass_data_expand_omp_ssa =
10639 {
10640 GIMPLE_PASS, /* type */
10641 "ompexpssa", /* name */
10642 OPTGROUP_OMP, /* optinfo_flags */
10643 TV_NONE, /* tv_id */
10644 PROP_cfg | PROP_ssa, /* properties_required */
10645 PROP_gimple_eomp, /* properties_provided */
10646 0, /* properties_destroyed */
10647 0, /* todo_flags_start */
10648 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
10649 };
10650
10651 class pass_expand_omp_ssa : public gimple_opt_pass
10652 {
10653 public:
pass_expand_omp_ssa(gcc::context * ctxt)10654 pass_expand_omp_ssa (gcc::context *ctxt)
10655 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
10656 {}
10657
10658 /* opt_pass methods: */
gate(function * fun)10659 virtual bool gate (function *fun)
10660 {
10661 return !(fun->curr_properties & PROP_gimple_eomp);
10662 }
execute(function *)10663 virtual unsigned int execute (function *) { return execute_expand_omp (); }
clone()10664 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
10665
10666 }; // class pass_expand_omp_ssa
10667
10668 } // anon namespace
10669
10670 gimple_opt_pass *
make_pass_expand_omp_ssa(gcc::context * ctxt)10671 make_pass_expand_omp_ssa (gcc::context *ctxt)
10672 {
10673 return new pass_expand_omp_ssa (ctxt);
10674 }
10675
10676 /* Called from tree-cfg.cc::make_edges to create cfg edges for all relevant
10677 GIMPLE_* codes. */
10678
10679 bool
omp_make_gimple_edges(basic_block bb,struct omp_region ** region,int * region_idx)10680 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
10681 int *region_idx)
10682 {
10683 gimple *last = last_stmt (bb);
10684 enum gimple_code code = gimple_code (last);
10685 struct omp_region *cur_region = *region;
10686 bool fallthru = false;
10687
10688 switch (code)
10689 {
10690 case GIMPLE_OMP_PARALLEL:
10691 case GIMPLE_OMP_FOR:
10692 case GIMPLE_OMP_SINGLE:
10693 case GIMPLE_OMP_TEAMS:
10694 case GIMPLE_OMP_MASTER:
10695 case GIMPLE_OMP_MASKED:
10696 case GIMPLE_OMP_SCOPE:
10697 case GIMPLE_OMP_CRITICAL:
10698 case GIMPLE_OMP_SECTION:
10699 cur_region = new_omp_region (bb, code, cur_region);
10700 fallthru = true;
10701 break;
10702
10703 case GIMPLE_OMP_TASKGROUP:
10704 cur_region = new_omp_region (bb, code, cur_region);
10705 fallthru = true;
10706 cur_region = cur_region->outer;
10707 break;
10708
10709 case GIMPLE_OMP_TASK:
10710 cur_region = new_omp_region (bb, code, cur_region);
10711 fallthru = true;
10712 if (gimple_omp_task_taskwait_p (last))
10713 cur_region = cur_region->outer;
10714 break;
10715
10716 case GIMPLE_OMP_ORDERED:
10717 cur_region = new_omp_region (bb, code, cur_region);
10718 fallthru = true;
10719 if (omp_find_clause (gimple_omp_ordered_clauses
10720 (as_a <gomp_ordered *> (last)),
10721 OMP_CLAUSE_DEPEND))
10722 cur_region = cur_region->outer;
10723 break;
10724
10725 case GIMPLE_OMP_TARGET:
10726 cur_region = new_omp_region (bb, code, cur_region);
10727 fallthru = true;
10728 switch (gimple_omp_target_kind (last))
10729 {
10730 case GF_OMP_TARGET_KIND_REGION:
10731 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10732 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10733 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10734 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10735 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10736 break;
10737 case GF_OMP_TARGET_KIND_UPDATE:
10738 case GF_OMP_TARGET_KIND_ENTER_DATA:
10739 case GF_OMP_TARGET_KIND_EXIT_DATA:
10740 case GF_OMP_TARGET_KIND_DATA:
10741 case GF_OMP_TARGET_KIND_OACC_DATA:
10742 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10743 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10744 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10745 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10746 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10747 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10748 cur_region = cur_region->outer;
10749 break;
10750 default:
10751 gcc_unreachable ();
10752 }
10753 break;
10754
10755 case GIMPLE_OMP_SECTIONS:
10756 cur_region = new_omp_region (bb, code, cur_region);
10757 fallthru = true;
10758 break;
10759
10760 case GIMPLE_OMP_SECTIONS_SWITCH:
10761 fallthru = false;
10762 break;
10763
10764 case GIMPLE_OMP_ATOMIC_LOAD:
10765 case GIMPLE_OMP_ATOMIC_STORE:
10766 fallthru = true;
10767 break;
10768
10769 case GIMPLE_OMP_RETURN:
10770 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
10771 somewhere other than the next block. This will be
10772 created later. */
10773 cur_region->exit = bb;
10774 if (cur_region->type == GIMPLE_OMP_TASK)
10775 /* Add an edge corresponding to not scheduling the task
10776 immediately. */
10777 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
10778 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
10779 cur_region = cur_region->outer;
10780 break;
10781
10782 case GIMPLE_OMP_CONTINUE:
10783 cur_region->cont = bb;
10784 switch (cur_region->type)
10785 {
10786 case GIMPLE_OMP_FOR:
10787 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
10788 succs edges as abnormal to prevent splitting
10789 them. */
10790 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
10791 /* Make the loopback edge. */
10792 make_edge (bb, single_succ (cur_region->entry),
10793 EDGE_ABNORMAL);
10794
10795 /* Create an edge from GIMPLE_OMP_FOR to exit, which
10796 corresponds to the case that the body of the loop
10797 is not executed at all. */
10798 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
10799 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
10800 fallthru = false;
10801 break;
10802
10803 case GIMPLE_OMP_SECTIONS:
10804 /* Wire up the edges into and out of the nested sections. */
10805 {
10806 basic_block switch_bb = single_succ (cur_region->entry);
10807
10808 struct omp_region *i;
10809 for (i = cur_region->inner; i ; i = i->next)
10810 {
10811 gcc_assert (i->type == GIMPLE_OMP_SECTION);
10812 make_edge (switch_bb, i->entry, 0);
10813 make_edge (i->exit, bb, EDGE_FALLTHRU);
10814 }
10815
10816 /* Make the loopback edge to the block with
10817 GIMPLE_OMP_SECTIONS_SWITCH. */
10818 make_edge (bb, switch_bb, 0);
10819
10820 /* Make the edge from the switch to exit. */
10821 make_edge (switch_bb, bb->next_bb, 0);
10822 fallthru = false;
10823 }
10824 break;
10825
10826 case GIMPLE_OMP_TASK:
10827 fallthru = true;
10828 break;
10829
10830 default:
10831 gcc_unreachable ();
10832 }
10833 break;
10834
10835 default:
10836 gcc_unreachable ();
10837 }
10838
10839 if (*region != cur_region)
10840 {
10841 *region = cur_region;
10842 if (cur_region)
10843 *region_idx = cur_region->entry->index;
10844 else
10845 *region_idx = 0;
10846 }
10847
10848 return fallthru;
10849 }
10850