1 /* Expansion pass for OMP directives. Outlines regions of certain OMP 2 directives to separate functions, converts others into explicit calls to the 3 runtime library (libgomp) and so forth 4 5 Copyright (C) 2005-2019 Free Software Foundation, Inc. 6 7 This file is part of GCC. 8 9 GCC is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free 11 Software Foundation; either version 3, or (at your option) any later 12 version. 13 14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 15 WARRANTY; without even the implied warranty of MERCHANTABILITY or 16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 17 for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with GCC; see the file COPYING3. If not see 21 <http://www.gnu.org/licenses/>. */ 22 23 #include "config.h" 24 #include "system.h" 25 #include "coretypes.h" 26 #include "memmodel.h" 27 #include "backend.h" 28 #include "target.h" 29 #include "rtl.h" 30 #include "tree.h" 31 #include "gimple.h" 32 #include "cfghooks.h" 33 #include "tree-pass.h" 34 #include "ssa.h" 35 #include "optabs.h" 36 #include "cgraph.h" 37 #include "pretty-print.h" 38 #include "diagnostic-core.h" 39 #include "fold-const.h" 40 #include "stor-layout.h" 41 #include "cfganal.h" 42 #include "internal-fn.h" 43 #include "gimplify.h" 44 #include "gimple-iterator.h" 45 #include "gimplify-me.h" 46 #include "gimple-walk.h" 47 #include "tree-cfg.h" 48 #include "tree-into-ssa.h" 49 #include "tree-ssa.h" 50 #include "splay-tree.h" 51 #include "cfgloop.h" 52 #include "omp-general.h" 53 #include "omp-offload.h" 54 #include "tree-cfgcleanup.h" 55 #include "symbol-summary.h" 56 #include "gomp-constants.h" 57 #include "gimple-pretty-print.h" 58 #include "hsa-common.h" 59 #include "stringpool.h" 60 #include "attribs.h" 61 62 /* OMP region information. Every parallel and workshare 63 directive is enclosed between two markers, the OMP_* directive 64 and a corresponding GIMPLE_OMP_RETURN statement. */ 65 66 struct omp_region 67 { 68 /* The enclosing region. */ 69 struct omp_region *outer; 70 71 /* First child region. */ 72 struct omp_region *inner; 73 74 /* Next peer region. */ 75 struct omp_region *next; 76 77 /* Block containing the omp directive as its last stmt. */ 78 basic_block entry; 79 80 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */ 81 basic_block exit; 82 83 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */ 84 basic_block cont; 85 86 /* If this is a combined parallel+workshare region, this is a list 87 of additional arguments needed by the combined parallel+workshare 88 library call. */ 89 vec<tree, va_gc> *ws_args; 90 91 /* The code for the omp directive of this region. */ 92 enum gimple_code type; 93 94 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */ 95 enum omp_clause_schedule_kind sched_kind; 96 97 /* Schedule modifiers. */ 98 unsigned char sched_modifiers; 99 100 /* True if this is a combined parallel+workshare region. */ 101 bool is_combined_parallel; 102 103 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has 104 a depend clause. */ 105 gomp_ordered *ord_stmt; 106 }; 107 108 static struct omp_region *root_omp_region; 109 static bool omp_any_child_fn_dumped; 110 111 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree, 112 bool = false); 113 static gphi *find_phi_with_arg_on_edge (tree, edge); 114 static void expand_omp (struct omp_region *region); 115 116 /* Return true if REGION is a combined parallel+workshare region. */ 117 118 static inline bool 119 is_combined_parallel (struct omp_region *region) 120 { 121 return region->is_combined_parallel; 122 } 123 124 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB 125 is the immediate dominator of PAR_ENTRY_BB, return true if there 126 are no data dependencies that would prevent expanding the parallel 127 directive at PAR_ENTRY_BB as a combined parallel+workshare region. 128 129 When expanding a combined parallel+workshare region, the call to 130 the child function may need additional arguments in the case of 131 GIMPLE_OMP_FOR regions. In some cases, these arguments are 132 computed out of variables passed in from the parent to the child 133 via 'struct .omp_data_s'. For instance: 134 135 #pragma omp parallel for schedule (guided, i * 4) 136 for (j ...) 137 138 Is lowered into: 139 140 # BLOCK 2 (PAR_ENTRY_BB) 141 .omp_data_o.i = i; 142 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598) 143 144 # BLOCK 3 (WS_ENTRY_BB) 145 .omp_data_i = &.omp_data_o; 146 D.1667 = .omp_data_i->i; 147 D.1598 = D.1667 * 4; 148 #pragma omp for schedule (guided, D.1598) 149 150 When we outline the parallel region, the call to the child function 151 'bar.omp_fn.0' will need the value D.1598 in its argument list, but 152 that value is computed *after* the call site. So, in principle we 153 cannot do the transformation. 154 155 To see whether the code in WS_ENTRY_BB blocks the combined 156 parallel+workshare call, we collect all the variables used in the 157 GIMPLE_OMP_FOR header check whether they appear on the LHS of any 158 statement in WS_ENTRY_BB. If so, then we cannot emit the combined 159 call. 160 161 FIXME. If we had the SSA form built at this point, we could merely 162 hoist the code in block 3 into block 2 and be done with it. But at 163 this point we don't have dataflow information and though we could 164 hack something up here, it is really not worth the aggravation. */ 165 166 static bool 167 workshare_safe_to_combine_p (basic_block ws_entry_bb) 168 { 169 struct omp_for_data fd; 170 gimple *ws_stmt = last_stmt (ws_entry_bb); 171 172 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS) 173 return true; 174 175 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR); 176 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR) 177 return false; 178 179 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL); 180 181 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST) 182 return false; 183 if (fd.iter_type != long_integer_type_node) 184 return false; 185 186 /* FIXME. We give up too easily here. If any of these arguments 187 are not constants, they will likely involve variables that have 188 been mapped into fields of .omp_data_s for sharing with the child 189 function. With appropriate data flow, it would be possible to 190 see through this. */ 191 if (!is_gimple_min_invariant (fd.loop.n1) 192 || !is_gimple_min_invariant (fd.loop.n2) 193 || !is_gimple_min_invariant (fd.loop.step) 194 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size))) 195 return false; 196 197 return true; 198 } 199 200 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier 201 presence (SIMD_SCHEDULE). */ 202 203 static tree 204 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule) 205 { 206 if (!simd_schedule || integer_zerop (chunk_size)) 207 return chunk_size; 208 209 poly_uint64 vf = omp_max_vf (); 210 if (known_eq (vf, 1U)) 211 return chunk_size; 212 213 tree type = TREE_TYPE (chunk_size); 214 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size, 215 build_int_cst (type, vf - 1)); 216 return fold_build2 (BIT_AND_EXPR, type, chunk_size, 217 build_int_cst (type, -vf)); 218 } 219 220 /* Collect additional arguments needed to emit a combined 221 parallel+workshare call. WS_STMT is the workshare directive being 222 expanded. */ 223 224 static vec<tree, va_gc> * 225 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt) 226 { 227 tree t; 228 location_t loc = gimple_location (ws_stmt); 229 vec<tree, va_gc> *ws_args; 230 231 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt)) 232 { 233 struct omp_for_data fd; 234 tree n1, n2; 235 236 omp_extract_for_data (for_stmt, &fd, NULL); 237 n1 = fd.loop.n1; 238 n2 = fd.loop.n2; 239 240 if (gimple_omp_for_combined_into_p (for_stmt)) 241 { 242 tree innerc 243 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt), 244 OMP_CLAUSE__LOOPTEMP_); 245 gcc_assert (innerc); 246 n1 = OMP_CLAUSE_DECL (innerc); 247 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 248 OMP_CLAUSE__LOOPTEMP_); 249 gcc_assert (innerc); 250 n2 = OMP_CLAUSE_DECL (innerc); 251 } 252 253 vec_alloc (ws_args, 3 + (fd.chunk_size != 0)); 254 255 t = fold_convert_loc (loc, long_integer_type_node, n1); 256 ws_args->quick_push (t); 257 258 t = fold_convert_loc (loc, long_integer_type_node, n2); 259 ws_args->quick_push (t); 260 261 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step); 262 ws_args->quick_push (t); 263 264 if (fd.chunk_size) 265 { 266 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size); 267 t = omp_adjust_chunk_size (t, fd.simd_schedule); 268 ws_args->quick_push (t); 269 } 270 271 return ws_args; 272 } 273 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS) 274 { 275 /* Number of sections is equal to the number of edges from the 276 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to 277 the exit of the sections region. */ 278 basic_block bb = single_succ (gimple_bb (ws_stmt)); 279 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1); 280 vec_alloc (ws_args, 1); 281 ws_args->quick_push (t); 282 return ws_args; 283 } 284 285 gcc_unreachable (); 286 } 287 288 /* Discover whether REGION is a combined parallel+workshare region. */ 289 290 static void 291 determine_parallel_type (struct omp_region *region) 292 { 293 basic_block par_entry_bb, par_exit_bb; 294 basic_block ws_entry_bb, ws_exit_bb; 295 296 if (region == NULL || region->inner == NULL 297 || region->exit == NULL || region->inner->exit == NULL 298 || region->inner->cont == NULL) 299 return; 300 301 /* We only support parallel+for and parallel+sections. */ 302 if (region->type != GIMPLE_OMP_PARALLEL 303 || (region->inner->type != GIMPLE_OMP_FOR 304 && region->inner->type != GIMPLE_OMP_SECTIONS)) 305 return; 306 307 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and 308 WS_EXIT_BB -> PAR_EXIT_BB. */ 309 par_entry_bb = region->entry; 310 par_exit_bb = region->exit; 311 ws_entry_bb = region->inner->entry; 312 ws_exit_bb = region->inner->exit; 313 314 /* Give up for task reductions on the parallel, while it is implementable, 315 adding another big set of APIs or slowing down the normal paths is 316 not acceptable. */ 317 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb)); 318 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_)) 319 return; 320 321 if (single_succ (par_entry_bb) == ws_entry_bb 322 && single_succ (ws_exit_bb) == par_exit_bb 323 && workshare_safe_to_combine_p (ws_entry_bb) 324 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb)) 325 || (last_and_only_stmt (ws_entry_bb) 326 && last_and_only_stmt (par_exit_bb)))) 327 { 328 gimple *par_stmt = last_stmt (par_entry_bb); 329 gimple *ws_stmt = last_stmt (ws_entry_bb); 330 331 if (region->inner->type == GIMPLE_OMP_FOR) 332 { 333 /* If this is a combined parallel loop, we need to determine 334 whether or not to use the combined library calls. There 335 are two cases where we do not apply the transformation: 336 static loops and any kind of ordered loop. In the first 337 case, we already open code the loop so there is no need 338 to do anything else. In the latter case, the combined 339 parallel loop call would still need extra synchronization 340 to implement ordered semantics, so there would not be any 341 gain in using the combined call. */ 342 tree clauses = gimple_omp_for_clauses (ws_stmt); 343 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE); 344 if (c == NULL 345 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK) 346 == OMP_CLAUSE_SCHEDULE_STATIC) 347 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED) 348 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)) 349 return; 350 } 351 else if (region->inner->type == GIMPLE_OMP_SECTIONS 352 && omp_find_clause (gimple_omp_sections_clauses (ws_stmt), 353 OMP_CLAUSE__REDUCTEMP_)) 354 return; 355 356 region->is_combined_parallel = true; 357 region->inner->is_combined_parallel = true; 358 region->ws_args = get_ws_args_for (par_stmt, ws_stmt); 359 } 360 } 361 362 /* Debugging dumps for parallel regions. */ 363 void dump_omp_region (FILE *, struct omp_region *, int); 364 void debug_omp_region (struct omp_region *); 365 void debug_all_omp_regions (void); 366 367 /* Dump the parallel region tree rooted at REGION. */ 368 369 void 370 dump_omp_region (FILE *file, struct omp_region *region, int indent) 371 { 372 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index, 373 gimple_code_name[region->type]); 374 375 if (region->inner) 376 dump_omp_region (file, region->inner, indent + 4); 377 378 if (region->cont) 379 { 380 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "", 381 region->cont->index); 382 } 383 384 if (region->exit) 385 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "", 386 region->exit->index); 387 else 388 fprintf (file, "%*s[no exit marker]\n", indent, ""); 389 390 if (region->next) 391 dump_omp_region (file, region->next, indent); 392 } 393 394 DEBUG_FUNCTION void 395 debug_omp_region (struct omp_region *region) 396 { 397 dump_omp_region (stderr, region, 0); 398 } 399 400 DEBUG_FUNCTION void 401 debug_all_omp_regions (void) 402 { 403 dump_omp_region (stderr, root_omp_region, 0); 404 } 405 406 /* Create a new parallel region starting at STMT inside region PARENT. */ 407 408 static struct omp_region * 409 new_omp_region (basic_block bb, enum gimple_code type, 410 struct omp_region *parent) 411 { 412 struct omp_region *region = XCNEW (struct omp_region); 413 414 region->outer = parent; 415 region->entry = bb; 416 region->type = type; 417 418 if (parent) 419 { 420 /* This is a nested region. Add it to the list of inner 421 regions in PARENT. */ 422 region->next = parent->inner; 423 parent->inner = region; 424 } 425 else 426 { 427 /* This is a toplevel region. Add it to the list of toplevel 428 regions in ROOT_OMP_REGION. */ 429 region->next = root_omp_region; 430 root_omp_region = region; 431 } 432 433 return region; 434 } 435 436 /* Release the memory associated with the region tree rooted at REGION. */ 437 438 static void 439 free_omp_region_1 (struct omp_region *region) 440 { 441 struct omp_region *i, *n; 442 443 for (i = region->inner; i ; i = n) 444 { 445 n = i->next; 446 free_omp_region_1 (i); 447 } 448 449 free (region); 450 } 451 452 /* Release the memory for the entire omp region tree. */ 453 454 void 455 omp_free_regions (void) 456 { 457 struct omp_region *r, *n; 458 for (r = root_omp_region; r ; r = n) 459 { 460 n = r->next; 461 free_omp_region_1 (r); 462 } 463 root_omp_region = NULL; 464 } 465 466 /* A convenience function to build an empty GIMPLE_COND with just the 467 condition. */ 468 469 static gcond * 470 gimple_build_cond_empty (tree cond) 471 { 472 enum tree_code pred_code; 473 tree lhs, rhs; 474 475 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs); 476 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE); 477 } 478 479 /* Return true if a parallel REGION is within a declare target function or 480 within a target region and is not a part of a gridified target. */ 481 482 static bool 483 parallel_needs_hsa_kernel_p (struct omp_region *region) 484 { 485 bool indirect = false; 486 for (region = region->outer; region; region = region->outer) 487 { 488 if (region->type == GIMPLE_OMP_PARALLEL) 489 indirect = true; 490 else if (region->type == GIMPLE_OMP_TARGET) 491 { 492 gomp_target *tgt_stmt 493 = as_a <gomp_target *> (last_stmt (region->entry)); 494 495 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt), 496 OMP_CLAUSE__GRIDDIM_)) 497 return indirect; 498 else 499 return true; 500 } 501 } 502 503 if (lookup_attribute ("omp declare target", 504 DECL_ATTRIBUTES (current_function_decl))) 505 return true; 506 507 return false; 508 } 509 510 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function. 511 Add CHILD_FNDECL to decl chain of the supercontext of the block 512 ENTRY_BLOCK - this is the block which originally contained the 513 code from which CHILD_FNDECL was created. 514 515 Together, these actions ensure that the debug info for the outlined 516 function will be emitted with the correct lexical scope. */ 517 518 static void 519 adjust_context_and_scope (struct omp_region *region, tree entry_block, 520 tree child_fndecl) 521 { 522 tree parent_fndecl = NULL_TREE; 523 gimple *entry_stmt; 524 /* OMP expansion expands inner regions before outer ones, so if 525 we e.g. have explicit task region nested in parallel region, when 526 expanding the task region current_function_decl will be the original 527 source function, but we actually want to use as context the child 528 function of the parallel. */ 529 for (region = region->outer; 530 region && parent_fndecl == NULL_TREE; region = region->outer) 531 switch (region->type) 532 { 533 case GIMPLE_OMP_PARALLEL: 534 case GIMPLE_OMP_TASK: 535 case GIMPLE_OMP_TEAMS: 536 entry_stmt = last_stmt (region->entry); 537 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt); 538 break; 539 case GIMPLE_OMP_TARGET: 540 entry_stmt = last_stmt (region->entry); 541 parent_fndecl 542 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt)); 543 break; 544 default: 545 break; 546 } 547 548 if (parent_fndecl == NULL_TREE) 549 parent_fndecl = current_function_decl; 550 DECL_CONTEXT (child_fndecl) = parent_fndecl; 551 552 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK) 553 { 554 tree b = BLOCK_SUPERCONTEXT (entry_block); 555 if (TREE_CODE (b) == BLOCK) 556 { 557 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b); 558 BLOCK_VARS (b) = child_fndecl; 559 } 560 } 561 } 562 563 /* Build the function calls to GOMP_parallel etc to actually 564 generate the parallel operation. REGION is the parallel region 565 being expanded. BB is the block where to insert the code. WS_ARGS 566 will be set if this is a call to a combined parallel+workshare 567 construct, it contains the list of additional arguments needed by 568 the workshare construct. */ 569 570 static void 571 expand_parallel_call (struct omp_region *region, basic_block bb, 572 gomp_parallel *entry_stmt, 573 vec<tree, va_gc> *ws_args) 574 { 575 tree t, t1, t2, val, cond, c, clauses, flags; 576 gimple_stmt_iterator gsi; 577 gimple *stmt; 578 enum built_in_function start_ix; 579 int start_ix2; 580 location_t clause_loc; 581 vec<tree, va_gc> *args; 582 583 clauses = gimple_omp_parallel_clauses (entry_stmt); 584 585 /* Determine what flavor of GOMP_parallel we will be 586 emitting. */ 587 start_ix = BUILT_IN_GOMP_PARALLEL; 588 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); 589 if (rtmp) 590 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS; 591 else if (is_combined_parallel (region)) 592 { 593 switch (region->inner->type) 594 { 595 case GIMPLE_OMP_FOR: 596 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO); 597 switch (region->inner->sched_kind) 598 { 599 case OMP_CLAUSE_SCHEDULE_RUNTIME: 600 if ((region->inner->sched_modifiers 601 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0) 602 start_ix2 = 6; 603 else if ((region->inner->sched_modifiers 604 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0) 605 start_ix2 = 7; 606 else 607 start_ix2 = 3; 608 break; 609 case OMP_CLAUSE_SCHEDULE_DYNAMIC: 610 case OMP_CLAUSE_SCHEDULE_GUIDED: 611 if ((region->inner->sched_modifiers 612 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0) 613 { 614 start_ix2 = 3 + region->inner->sched_kind; 615 break; 616 } 617 /* FALLTHRU */ 618 default: 619 start_ix2 = region->inner->sched_kind; 620 break; 621 } 622 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC; 623 start_ix = (enum built_in_function) start_ix2; 624 break; 625 case GIMPLE_OMP_SECTIONS: 626 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS; 627 break; 628 default: 629 gcc_unreachable (); 630 } 631 } 632 633 /* By default, the value of NUM_THREADS is zero (selected at run time) 634 and there is no conditional. */ 635 cond = NULL_TREE; 636 val = build_int_cst (unsigned_type_node, 0); 637 flags = build_int_cst (unsigned_type_node, 0); 638 639 c = omp_find_clause (clauses, OMP_CLAUSE_IF); 640 if (c) 641 cond = OMP_CLAUSE_IF_EXPR (c); 642 643 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS); 644 if (c) 645 { 646 val = OMP_CLAUSE_NUM_THREADS_EXPR (c); 647 clause_loc = OMP_CLAUSE_LOCATION (c); 648 } 649 else 650 clause_loc = gimple_location (entry_stmt); 651 652 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND); 653 if (c) 654 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c)); 655 656 /* Ensure 'val' is of the correct type. */ 657 val = fold_convert_loc (clause_loc, unsigned_type_node, val); 658 659 /* If we found the clause 'if (cond)', build either 660 (cond != 0) or (cond ? val : 1u). */ 661 if (cond) 662 { 663 cond = gimple_boolify (cond); 664 665 if (integer_zerop (val)) 666 val = fold_build2_loc (clause_loc, 667 EQ_EXPR, unsigned_type_node, cond, 668 build_int_cst (TREE_TYPE (cond), 0)); 669 else 670 { 671 basic_block cond_bb, then_bb, else_bb; 672 edge e, e_then, e_else; 673 tree tmp_then, tmp_else, tmp_join, tmp_var; 674 675 tmp_var = create_tmp_var (TREE_TYPE (val)); 676 if (gimple_in_ssa_p (cfun)) 677 { 678 tmp_then = make_ssa_name (tmp_var); 679 tmp_else = make_ssa_name (tmp_var); 680 tmp_join = make_ssa_name (tmp_var); 681 } 682 else 683 { 684 tmp_then = tmp_var; 685 tmp_else = tmp_var; 686 tmp_join = tmp_var; 687 } 688 689 e = split_block_after_labels (bb); 690 cond_bb = e->src; 691 bb = e->dest; 692 remove_edge (e); 693 694 then_bb = create_empty_bb (cond_bb); 695 else_bb = create_empty_bb (then_bb); 696 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); 697 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb); 698 699 stmt = gimple_build_cond_empty (cond); 700 gsi = gsi_start_bb (cond_bb); 701 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 702 703 gsi = gsi_start_bb (then_bb); 704 expand_omp_build_assign (&gsi, tmp_then, val, true); 705 706 gsi = gsi_start_bb (else_bb); 707 expand_omp_build_assign (&gsi, tmp_else, 708 build_int_cst (unsigned_type_node, 1), 709 true); 710 711 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); 712 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE); 713 add_bb_to_loop (then_bb, cond_bb->loop_father); 714 add_bb_to_loop (else_bb, cond_bb->loop_father); 715 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU); 716 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU); 717 718 if (gimple_in_ssa_p (cfun)) 719 { 720 gphi *phi = create_phi_node (tmp_join, bb); 721 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION); 722 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION); 723 } 724 725 val = tmp_join; 726 } 727 728 gsi = gsi_start_bb (bb); 729 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE, 730 false, GSI_CONTINUE_LINKING); 731 } 732 733 gsi = gsi_last_nondebug_bb (bb); 734 t = gimple_omp_parallel_data_arg (entry_stmt); 735 if (t == NULL) 736 t1 = null_pointer_node; 737 else 738 t1 = build_fold_addr_expr (t); 739 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt); 740 t2 = build_fold_addr_expr (child_fndecl); 741 742 vec_alloc (args, 4 + vec_safe_length (ws_args)); 743 args->quick_push (t2); 744 args->quick_push (t1); 745 args->quick_push (val); 746 if (ws_args) 747 args->splice (*ws_args); 748 args->quick_push (flags); 749 750 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, 751 builtin_decl_explicit (start_ix), args); 752 753 if (rtmp) 754 { 755 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp)); 756 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp), 757 fold_convert (type, 758 fold_convert (pointer_sized_int_node, t))); 759 } 760 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 761 false, GSI_CONTINUE_LINKING); 762 763 if (hsa_gen_requested_p () 764 && parallel_needs_hsa_kernel_p (region)) 765 { 766 cgraph_node *child_cnode = cgraph_node::get (child_fndecl); 767 hsa_register_kernel (child_cnode); 768 } 769 } 770 771 /* Build the function call to GOMP_task to actually 772 generate the task operation. BB is the block where to insert the code. */ 773 774 static void 775 expand_task_call (struct omp_region *region, basic_block bb, 776 gomp_task *entry_stmt) 777 { 778 tree t1, t2, t3; 779 gimple_stmt_iterator gsi; 780 location_t loc = gimple_location (entry_stmt); 781 782 tree clauses = gimple_omp_task_clauses (entry_stmt); 783 784 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF); 785 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED); 786 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE); 787 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND); 788 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL); 789 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY); 790 791 unsigned int iflags 792 = (untied ? GOMP_TASK_FLAG_UNTIED : 0) 793 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0) 794 | (depend ? GOMP_TASK_FLAG_DEPEND : 0); 795 796 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt); 797 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE; 798 tree num_tasks = NULL_TREE; 799 bool ull = false; 800 if (taskloop_p) 801 { 802 gimple *g = last_stmt (region->outer->entry); 803 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR 804 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP); 805 struct omp_for_data fd; 806 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL); 807 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 808 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar), 809 OMP_CLAUSE__LOOPTEMP_); 810 startvar = OMP_CLAUSE_DECL (startvar); 811 endvar = OMP_CLAUSE_DECL (endvar); 812 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step); 813 if (fd.loop.cond_code == LT_EXPR) 814 iflags |= GOMP_TASK_FLAG_UP; 815 tree tclauses = gimple_omp_for_clauses (g); 816 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS); 817 if (num_tasks) 818 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks); 819 else 820 { 821 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE); 822 if (num_tasks) 823 { 824 iflags |= GOMP_TASK_FLAG_GRAINSIZE; 825 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks); 826 } 827 else 828 num_tasks = integer_zero_node; 829 } 830 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks); 831 if (ifc == NULL_TREE) 832 iflags |= GOMP_TASK_FLAG_IF; 833 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP)) 834 iflags |= GOMP_TASK_FLAG_NOGROUP; 835 ull = fd.iter_type == long_long_unsigned_type_node; 836 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION)) 837 iflags |= GOMP_TASK_FLAG_REDUCTION; 838 } 839 else if (priority) 840 iflags |= GOMP_TASK_FLAG_PRIORITY; 841 842 tree flags = build_int_cst (unsigned_type_node, iflags); 843 844 tree cond = boolean_true_node; 845 if (ifc) 846 { 847 if (taskloop_p) 848 { 849 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc)); 850 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t, 851 build_int_cst (unsigned_type_node, 852 GOMP_TASK_FLAG_IF), 853 build_int_cst (unsigned_type_node, 0)); 854 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, 855 flags, t); 856 } 857 else 858 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc)); 859 } 860 861 if (finalc) 862 { 863 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc)); 864 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t, 865 build_int_cst (unsigned_type_node, 866 GOMP_TASK_FLAG_FINAL), 867 build_int_cst (unsigned_type_node, 0)); 868 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t); 869 } 870 if (depend) 871 depend = OMP_CLAUSE_DECL (depend); 872 else 873 depend = build_int_cst (ptr_type_node, 0); 874 if (priority) 875 priority = fold_convert (integer_type_node, 876 OMP_CLAUSE_PRIORITY_EXPR (priority)); 877 else 878 priority = integer_zero_node; 879 880 gsi = gsi_last_nondebug_bb (bb); 881 tree t = gimple_omp_task_data_arg (entry_stmt); 882 if (t == NULL) 883 t2 = null_pointer_node; 884 else 885 t2 = build_fold_addr_expr_loc (loc, t); 886 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt)); 887 t = gimple_omp_task_copy_fn (entry_stmt); 888 if (t == NULL) 889 t3 = null_pointer_node; 890 else 891 t3 = build_fold_addr_expr_loc (loc, t); 892 893 if (taskloop_p) 894 t = build_call_expr (ull 895 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL) 896 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP), 897 11, t1, t2, t3, 898 gimple_omp_task_arg_size (entry_stmt), 899 gimple_omp_task_arg_align (entry_stmt), flags, 900 num_tasks, priority, startvar, endvar, step); 901 else 902 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK), 903 9, t1, t2, t3, 904 gimple_omp_task_arg_size (entry_stmt), 905 gimple_omp_task_arg_align (entry_stmt), cond, flags, 906 depend, priority); 907 908 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 909 false, GSI_CONTINUE_LINKING); 910 } 911 912 /* Build the function call to GOMP_taskwait_depend to actually 913 generate the taskwait operation. BB is the block where to insert the 914 code. */ 915 916 static void 917 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt) 918 { 919 tree clauses = gimple_omp_task_clauses (entry_stmt); 920 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND); 921 if (depend == NULL_TREE) 922 return; 923 924 depend = OMP_CLAUSE_DECL (depend); 925 926 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb); 927 tree t 928 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND), 929 1, depend); 930 931 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 932 false, GSI_CONTINUE_LINKING); 933 } 934 935 /* Build the function call to GOMP_teams_reg to actually 936 generate the host teams operation. REGION is the teams region 937 being expanded. BB is the block where to insert the code. */ 938 939 static void 940 expand_teams_call (basic_block bb, gomp_teams *entry_stmt) 941 { 942 tree clauses = gimple_omp_teams_clauses (entry_stmt); 943 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS); 944 if (num_teams == NULL_TREE) 945 num_teams = build_int_cst (unsigned_type_node, 0); 946 else 947 { 948 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams); 949 num_teams = fold_convert (unsigned_type_node, num_teams); 950 } 951 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT); 952 if (thread_limit == NULL_TREE) 953 thread_limit = build_int_cst (unsigned_type_node, 0); 954 else 955 { 956 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit); 957 thread_limit = fold_convert (unsigned_type_node, thread_limit); 958 } 959 960 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb); 961 tree t = gimple_omp_teams_data_arg (entry_stmt), t1; 962 if (t == NULL) 963 t1 = null_pointer_node; 964 else 965 t1 = build_fold_addr_expr (t); 966 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt); 967 tree t2 = build_fold_addr_expr (child_fndecl); 968 969 vec<tree, va_gc> *args; 970 vec_alloc (args, 5); 971 args->quick_push (t2); 972 args->quick_push (t1); 973 args->quick_push (num_teams); 974 args->quick_push (thread_limit); 975 /* For future extensibility. */ 976 args->quick_push (build_zero_cst (unsigned_type_node)); 977 978 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, 979 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG), 980 args); 981 982 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 983 false, GSI_CONTINUE_LINKING); 984 } 985 986 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */ 987 988 static tree 989 vec2chain (vec<tree, va_gc> *v) 990 { 991 tree chain = NULL_TREE, t; 992 unsigned ix; 993 994 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t) 995 { 996 DECL_CHAIN (t) = chain; 997 chain = t; 998 } 999 1000 return chain; 1001 } 1002 1003 /* Remove barriers in REGION->EXIT's block. Note that this is only 1004 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region 1005 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that 1006 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be 1007 removed. */ 1008 1009 static void 1010 remove_exit_barrier (struct omp_region *region) 1011 { 1012 gimple_stmt_iterator gsi; 1013 basic_block exit_bb; 1014 edge_iterator ei; 1015 edge e; 1016 gimple *stmt; 1017 int any_addressable_vars = -1; 1018 1019 exit_bb = region->exit; 1020 1021 /* If the parallel region doesn't return, we don't have REGION->EXIT 1022 block at all. */ 1023 if (! exit_bb) 1024 return; 1025 1026 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The 1027 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of 1028 statements that can appear in between are extremely limited -- no 1029 memory operations at all. Here, we allow nothing at all, so the 1030 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */ 1031 gsi = gsi_last_nondebug_bb (exit_bb); 1032 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 1033 gsi_prev_nondebug (&gsi); 1034 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL) 1035 return; 1036 1037 FOR_EACH_EDGE (e, ei, exit_bb->preds) 1038 { 1039 gsi = gsi_last_nondebug_bb (e->src); 1040 if (gsi_end_p (gsi)) 1041 continue; 1042 stmt = gsi_stmt (gsi); 1043 if (gimple_code (stmt) == GIMPLE_OMP_RETURN 1044 && !gimple_omp_return_nowait_p (stmt)) 1045 { 1046 /* OpenMP 3.0 tasks unfortunately prevent this optimization 1047 in many cases. If there could be tasks queued, the barrier 1048 might be needed to let the tasks run before some local 1049 variable of the parallel that the task uses as shared 1050 runs out of scope. The task can be spawned either 1051 from within current function (this would be easy to check) 1052 or from some function it calls and gets passed an address 1053 of such a variable. */ 1054 if (any_addressable_vars < 0) 1055 { 1056 gomp_parallel *parallel_stmt 1057 = as_a <gomp_parallel *> (last_stmt (region->entry)); 1058 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt); 1059 tree local_decls, block, decl; 1060 unsigned ix; 1061 1062 any_addressable_vars = 0; 1063 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl) 1064 if (TREE_ADDRESSABLE (decl)) 1065 { 1066 any_addressable_vars = 1; 1067 break; 1068 } 1069 for (block = gimple_block (stmt); 1070 !any_addressable_vars 1071 && block 1072 && TREE_CODE (block) == BLOCK; 1073 block = BLOCK_SUPERCONTEXT (block)) 1074 { 1075 for (local_decls = BLOCK_VARS (block); 1076 local_decls; 1077 local_decls = DECL_CHAIN (local_decls)) 1078 if (TREE_ADDRESSABLE (local_decls)) 1079 { 1080 any_addressable_vars = 1; 1081 break; 1082 } 1083 if (block == gimple_block (parallel_stmt)) 1084 break; 1085 } 1086 } 1087 if (!any_addressable_vars) 1088 gimple_omp_return_set_nowait (stmt); 1089 } 1090 } 1091 } 1092 1093 static void 1094 remove_exit_barriers (struct omp_region *region) 1095 { 1096 if (region->type == GIMPLE_OMP_PARALLEL) 1097 remove_exit_barrier (region); 1098 1099 if (region->inner) 1100 { 1101 region = region->inner; 1102 remove_exit_barriers (region); 1103 while (region->next) 1104 { 1105 region = region->next; 1106 remove_exit_barriers (region); 1107 } 1108 } 1109 } 1110 1111 /* Optimize omp_get_thread_num () and omp_get_num_threads () 1112 calls. These can't be declared as const functions, but 1113 within one parallel body they are constant, so they can be 1114 transformed there into __builtin_omp_get_{thread_num,num_threads} () 1115 which are declared const. Similarly for task body, except 1116 that in untied task omp_get_thread_num () can change at any task 1117 scheduling point. */ 1118 1119 static void 1120 optimize_omp_library_calls (gimple *entry_stmt) 1121 { 1122 basic_block bb; 1123 gimple_stmt_iterator gsi; 1124 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); 1125 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree); 1126 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 1127 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree); 1128 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK 1129 && omp_find_clause (gimple_omp_task_clauses (entry_stmt), 1130 OMP_CLAUSE_UNTIED) != NULL); 1131 1132 FOR_EACH_BB_FN (bb, cfun) 1133 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1134 { 1135 gimple *call = gsi_stmt (gsi); 1136 tree decl; 1137 1138 if (is_gimple_call (call) 1139 && (decl = gimple_call_fndecl (call)) 1140 && DECL_EXTERNAL (decl) 1141 && TREE_PUBLIC (decl) 1142 && DECL_INITIAL (decl) == NULL) 1143 { 1144 tree built_in; 1145 1146 if (DECL_NAME (decl) == thr_num_id) 1147 { 1148 /* In #pragma omp task untied omp_get_thread_num () can change 1149 during the execution of the task region. */ 1150 if (untied_task) 1151 continue; 1152 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); 1153 } 1154 else if (DECL_NAME (decl) == num_thr_id) 1155 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 1156 else 1157 continue; 1158 1159 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in) 1160 || gimple_call_num_args (call) != 0) 1161 continue; 1162 1163 if (flag_exceptions && !TREE_NOTHROW (decl)) 1164 continue; 1165 1166 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE 1167 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)), 1168 TREE_TYPE (TREE_TYPE (built_in)))) 1169 continue; 1170 1171 gimple_call_set_fndecl (call, built_in); 1172 } 1173 } 1174 } 1175 1176 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be 1177 regimplified. */ 1178 1179 static tree 1180 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *) 1181 { 1182 tree t = *tp; 1183 1184 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */ 1185 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t)) 1186 return t; 1187 1188 if (TREE_CODE (t) == ADDR_EXPR) 1189 recompute_tree_invariant_for_addr_expr (t); 1190 1191 *walk_subtrees = !TYPE_P (t) && !DECL_P (t); 1192 return NULL_TREE; 1193 } 1194 1195 /* Prepend or append TO = FROM assignment before or after *GSI_P. */ 1196 1197 static void 1198 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from, 1199 bool after) 1200 { 1201 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to); 1202 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE, 1203 !after, after ? GSI_CONTINUE_LINKING 1204 : GSI_SAME_STMT); 1205 gimple *stmt = gimple_build_assign (to, from); 1206 if (after) 1207 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING); 1208 else 1209 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT); 1210 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL) 1211 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL)) 1212 { 1213 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 1214 gimple_regimplify_operands (stmt, &gsi); 1215 } 1216 } 1217 1218 /* Expand the OpenMP parallel or task directive starting at REGION. */ 1219 1220 static void 1221 expand_omp_taskreg (struct omp_region *region) 1222 { 1223 basic_block entry_bb, exit_bb, new_bb; 1224 struct function *child_cfun; 1225 tree child_fn, block, t; 1226 gimple_stmt_iterator gsi; 1227 gimple *entry_stmt, *stmt; 1228 edge e; 1229 vec<tree, va_gc> *ws_args; 1230 1231 entry_stmt = last_stmt (region->entry); 1232 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK 1233 && gimple_omp_task_taskwait_p (entry_stmt)) 1234 { 1235 new_bb = region->entry; 1236 gsi = gsi_last_nondebug_bb (region->entry); 1237 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK); 1238 gsi_remove (&gsi, true); 1239 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt)); 1240 return; 1241 } 1242 1243 child_fn = gimple_omp_taskreg_child_fn (entry_stmt); 1244 child_cfun = DECL_STRUCT_FUNCTION (child_fn); 1245 1246 entry_bb = region->entry; 1247 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK) 1248 exit_bb = region->cont; 1249 else 1250 exit_bb = region->exit; 1251 1252 if (is_combined_parallel (region)) 1253 ws_args = region->ws_args; 1254 else 1255 ws_args = NULL; 1256 1257 if (child_cfun->cfg) 1258 { 1259 /* Due to inlining, it may happen that we have already outlined 1260 the region, in which case all we need to do is make the 1261 sub-graph unreachable and emit the parallel call. */ 1262 edge entry_succ_e, exit_succ_e; 1263 1264 entry_succ_e = single_succ_edge (entry_bb); 1265 1266 gsi = gsi_last_nondebug_bb (entry_bb); 1267 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL 1268 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK 1269 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS); 1270 gsi_remove (&gsi, true); 1271 1272 new_bb = entry_bb; 1273 if (exit_bb) 1274 { 1275 exit_succ_e = single_succ_edge (exit_bb); 1276 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU); 1277 } 1278 remove_edge_and_dominated_blocks (entry_succ_e); 1279 } 1280 else 1281 { 1282 unsigned srcidx, dstidx, num; 1283 1284 /* If the parallel region needs data sent from the parent 1285 function, then the very first statement (except possible 1286 tree profile counter updates) of the parallel body 1287 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since 1288 &.OMP_DATA_O is passed as an argument to the child function, 1289 we need to replace it with the argument as seen by the child 1290 function. 1291 1292 In most cases, this will end up being the identity assignment 1293 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had 1294 a function call that has been inlined, the original PARM_DECL 1295 .OMP_DATA_I may have been converted into a different local 1296 variable. In which case, we need to keep the assignment. */ 1297 if (gimple_omp_taskreg_data_arg (entry_stmt)) 1298 { 1299 basic_block entry_succ_bb 1300 = single_succ_p (entry_bb) ? single_succ (entry_bb) 1301 : FALLTHRU_EDGE (entry_bb)->dest; 1302 tree arg; 1303 gimple *parcopy_stmt = NULL; 1304 1305 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi)) 1306 { 1307 gimple *stmt; 1308 1309 gcc_assert (!gsi_end_p (gsi)); 1310 stmt = gsi_stmt (gsi); 1311 if (gimple_code (stmt) != GIMPLE_ASSIGN) 1312 continue; 1313 1314 if (gimple_num_ops (stmt) == 2) 1315 { 1316 tree arg = gimple_assign_rhs1 (stmt); 1317 1318 /* We're ignore the subcode because we're 1319 effectively doing a STRIP_NOPS. */ 1320 1321 if (TREE_CODE (arg) == ADDR_EXPR 1322 && (TREE_OPERAND (arg, 0) 1323 == gimple_omp_taskreg_data_arg (entry_stmt))) 1324 { 1325 parcopy_stmt = stmt; 1326 break; 1327 } 1328 } 1329 } 1330 1331 gcc_assert (parcopy_stmt != NULL); 1332 arg = DECL_ARGUMENTS (child_fn); 1333 1334 if (!gimple_in_ssa_p (cfun)) 1335 { 1336 if (gimple_assign_lhs (parcopy_stmt) == arg) 1337 gsi_remove (&gsi, true); 1338 else 1339 { 1340 /* ?? Is setting the subcode really necessary ?? */ 1341 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg)); 1342 gimple_assign_set_rhs1 (parcopy_stmt, arg); 1343 } 1344 } 1345 else 1346 { 1347 tree lhs = gimple_assign_lhs (parcopy_stmt); 1348 gcc_assert (SSA_NAME_VAR (lhs) == arg); 1349 /* We'd like to set the rhs to the default def in the child_fn, 1350 but it's too early to create ssa names in the child_fn. 1351 Instead, we set the rhs to the parm. In 1352 move_sese_region_to_fn, we introduce a default def for the 1353 parm, map the parm to it's default def, and once we encounter 1354 this stmt, replace the parm with the default def. */ 1355 gimple_assign_set_rhs1 (parcopy_stmt, arg); 1356 update_stmt (parcopy_stmt); 1357 } 1358 } 1359 1360 /* Declare local variables needed in CHILD_CFUN. */ 1361 block = DECL_INITIAL (child_fn); 1362 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls); 1363 /* The gimplifier could record temporaries in parallel/task block 1364 rather than in containing function's local_decls chain, 1365 which would mean cgraph missed finalizing them. Do it now. */ 1366 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t)) 1367 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t)) 1368 varpool_node::finalize_decl (t); 1369 DECL_SAVED_TREE (child_fn) = NULL; 1370 /* We'll create a CFG for child_fn, so no gimple body is needed. */ 1371 gimple_set_body (child_fn, NULL); 1372 TREE_USED (block) = 1; 1373 1374 /* Reset DECL_CONTEXT on function arguments. */ 1375 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t)) 1376 DECL_CONTEXT (t) = child_fn; 1377 1378 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK, 1379 so that it can be moved to the child function. */ 1380 gsi = gsi_last_nondebug_bb (entry_bb); 1381 stmt = gsi_stmt (gsi); 1382 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL 1383 || gimple_code (stmt) == GIMPLE_OMP_TASK 1384 || gimple_code (stmt) == GIMPLE_OMP_TEAMS)); 1385 e = split_block (entry_bb, stmt); 1386 gsi_remove (&gsi, true); 1387 entry_bb = e->dest; 1388 edge e2 = NULL; 1389 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK) 1390 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 1391 else 1392 { 1393 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL); 1394 gcc_assert (e2->dest == region->exit); 1395 remove_edge (BRANCH_EDGE (entry_bb)); 1396 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src); 1397 gsi = gsi_last_nondebug_bb (region->exit); 1398 gcc_assert (!gsi_end_p (gsi) 1399 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 1400 gsi_remove (&gsi, true); 1401 } 1402 1403 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */ 1404 if (exit_bb) 1405 { 1406 gsi = gsi_last_nondebug_bb (exit_bb); 1407 gcc_assert (!gsi_end_p (gsi) 1408 && (gimple_code (gsi_stmt (gsi)) 1409 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN))); 1410 stmt = gimple_build_return (NULL); 1411 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); 1412 gsi_remove (&gsi, true); 1413 } 1414 1415 /* Move the parallel region into CHILD_CFUN. */ 1416 1417 if (gimple_in_ssa_p (cfun)) 1418 { 1419 init_tree_ssa (child_cfun); 1420 init_ssa_operands (child_cfun); 1421 child_cfun->gimple_df->in_ssa_p = true; 1422 block = NULL_TREE; 1423 } 1424 else 1425 block = gimple_block (entry_stmt); 1426 1427 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block); 1428 if (exit_bb) 1429 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; 1430 if (e2) 1431 { 1432 basic_block dest_bb = e2->dest; 1433 if (!exit_bb) 1434 make_edge (new_bb, dest_bb, EDGE_FALLTHRU); 1435 remove_edge (e2); 1436 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb); 1437 } 1438 /* When the OMP expansion process cannot guarantee an up-to-date 1439 loop tree arrange for the child function to fixup loops. */ 1440 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 1441 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP; 1442 1443 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */ 1444 num = vec_safe_length (child_cfun->local_decls); 1445 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++) 1446 { 1447 t = (*child_cfun->local_decls)[srcidx]; 1448 if (DECL_CONTEXT (t) == cfun->decl) 1449 continue; 1450 if (srcidx != dstidx) 1451 (*child_cfun->local_decls)[dstidx] = t; 1452 dstidx++; 1453 } 1454 if (dstidx != num) 1455 vec_safe_truncate (child_cfun->local_decls, dstidx); 1456 1457 /* Inform the callgraph about the new function. */ 1458 child_cfun->curr_properties = cfun->curr_properties; 1459 child_cfun->has_simduid_loops |= cfun->has_simduid_loops; 1460 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops; 1461 cgraph_node *node = cgraph_node::get_create (child_fn); 1462 node->parallelized_function = 1; 1463 cgraph_node::add_new_function (child_fn, true); 1464 1465 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl) 1466 && !DECL_ASSEMBLER_NAME_SET_P (child_fn); 1467 1468 /* Fix the callgraph edges for child_cfun. Those for cfun will be 1469 fixed in a following pass. */ 1470 push_cfun (child_cfun); 1471 if (need_asm) 1472 assign_assembler_name_if_needed (child_fn); 1473 1474 if (optimize) 1475 optimize_omp_library_calls (entry_stmt); 1476 update_max_bb_count (); 1477 cgraph_edge::rebuild_edges (); 1478 1479 /* Some EH regions might become dead, see PR34608. If 1480 pass_cleanup_cfg isn't the first pass to happen with the 1481 new child, these dead EH edges might cause problems. 1482 Clean them up now. */ 1483 if (flag_exceptions) 1484 { 1485 basic_block bb; 1486 bool changed = false; 1487 1488 FOR_EACH_BB_FN (bb, cfun) 1489 changed |= gimple_purge_dead_eh_edges (bb); 1490 if (changed) 1491 cleanup_tree_cfg (); 1492 } 1493 if (gimple_in_ssa_p (cfun)) 1494 update_ssa (TODO_update_ssa); 1495 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 1496 verify_loop_structure (); 1497 pop_cfun (); 1498 1499 if (dump_file && !gimple_in_ssa_p (cfun)) 1500 { 1501 omp_any_child_fn_dumped = true; 1502 dump_function_header (dump_file, child_fn, dump_flags); 1503 dump_function_to_file (child_fn, dump_file, dump_flags); 1504 } 1505 } 1506 1507 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn); 1508 1509 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL) 1510 expand_parallel_call (region, new_bb, 1511 as_a <gomp_parallel *> (entry_stmt), ws_args); 1512 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS) 1513 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt)); 1514 else 1515 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt)); 1516 if (gimple_in_ssa_p (cfun)) 1517 update_ssa (TODO_update_ssa_only_virtuals); 1518 } 1519 1520 /* Information about members of an OpenACC collapsed loop nest. */ 1521 1522 struct oacc_collapse 1523 { 1524 tree base; /* Base value. */ 1525 tree iters; /* Number of steps. */ 1526 tree step; /* Step size. */ 1527 tree tile; /* Tile increment (if tiled). */ 1528 tree outer; /* Tile iterator var. */ 1529 }; 1530 1531 /* Helper for expand_oacc_for. Determine collapsed loop information. 1532 Fill in COUNTS array. Emit any initialization code before GSI. 1533 Return the calculated outer loop bound of BOUND_TYPE. */ 1534 1535 static tree 1536 expand_oacc_collapse_init (const struct omp_for_data *fd, 1537 gimple_stmt_iterator *gsi, 1538 oacc_collapse *counts, tree bound_type, 1539 location_t loc) 1540 { 1541 tree tiling = fd->tiling; 1542 tree total = build_int_cst (bound_type, 1); 1543 int ix; 1544 1545 gcc_assert (integer_onep (fd->loop.step)); 1546 gcc_assert (integer_zerop (fd->loop.n1)); 1547 1548 /* When tiling, the first operand of the tile clause applies to the 1549 innermost loop, and we work outwards from there. Seems 1550 backwards, but whatever. */ 1551 for (ix = fd->collapse; ix--;) 1552 { 1553 const omp_for_data_loop *loop = &fd->loops[ix]; 1554 1555 tree iter_type = TREE_TYPE (loop->v); 1556 tree diff_type = iter_type; 1557 tree plus_type = iter_type; 1558 1559 gcc_assert (loop->cond_code == fd->loop.cond_code); 1560 1561 if (POINTER_TYPE_P (iter_type)) 1562 plus_type = sizetype; 1563 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type)) 1564 diff_type = signed_type_for (diff_type); 1565 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node)) 1566 diff_type = integer_type_node; 1567 1568 if (tiling) 1569 { 1570 tree num = build_int_cst (integer_type_node, fd->collapse); 1571 tree loop_no = build_int_cst (integer_type_node, ix); 1572 tree tile = TREE_VALUE (tiling); 1573 gcall *call 1574 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile, 1575 /* gwv-outer=*/integer_zero_node, 1576 /* gwv-inner=*/integer_zero_node); 1577 1578 counts[ix].outer = create_tmp_var (iter_type, ".outer"); 1579 counts[ix].tile = create_tmp_var (diff_type, ".tile"); 1580 gimple_call_set_lhs (call, counts[ix].tile); 1581 gimple_set_location (call, loc); 1582 gsi_insert_before (gsi, call, GSI_SAME_STMT); 1583 1584 tiling = TREE_CHAIN (tiling); 1585 } 1586 else 1587 { 1588 counts[ix].tile = NULL; 1589 counts[ix].outer = loop->v; 1590 } 1591 1592 tree b = loop->n1; 1593 tree e = loop->n2; 1594 tree s = loop->step; 1595 bool up = loop->cond_code == LT_EXPR; 1596 tree dir = build_int_cst (diff_type, up ? +1 : -1); 1597 bool negating; 1598 tree expr; 1599 1600 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE, 1601 true, GSI_SAME_STMT); 1602 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE, 1603 true, GSI_SAME_STMT); 1604 1605 /* Convert the step, avoiding possible unsigned->signed overflow. */ 1606 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s)); 1607 if (negating) 1608 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s); 1609 s = fold_convert (diff_type, s); 1610 if (negating) 1611 s = fold_build1 (NEGATE_EXPR, diff_type, s); 1612 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE, 1613 true, GSI_SAME_STMT); 1614 1615 /* Determine the range, avoiding possible unsigned->signed overflow. */ 1616 negating = !up && TYPE_UNSIGNED (iter_type); 1617 expr = fold_build2 (MINUS_EXPR, plus_type, 1618 fold_convert (plus_type, negating ? b : e), 1619 fold_convert (plus_type, negating ? e : b)); 1620 expr = fold_convert (diff_type, expr); 1621 if (negating) 1622 expr = fold_build1 (NEGATE_EXPR, diff_type, expr); 1623 tree range = force_gimple_operand_gsi 1624 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT); 1625 1626 /* Determine number of iterations. */ 1627 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir); 1628 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s); 1629 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s); 1630 1631 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, 1632 true, GSI_SAME_STMT); 1633 1634 counts[ix].base = b; 1635 counts[ix].iters = iters; 1636 counts[ix].step = s; 1637 1638 total = fold_build2 (MULT_EXPR, bound_type, total, 1639 fold_convert (bound_type, iters)); 1640 } 1641 1642 return total; 1643 } 1644 1645 /* Emit initializers for collapsed loop members. INNER is true if 1646 this is for the element loop of a TILE. IVAR is the outer 1647 loop iteration variable, from which collapsed loop iteration values 1648 are calculated. COUNTS array has been initialized by 1649 expand_oacc_collapse_inits. */ 1650 1651 static void 1652 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner, 1653 gimple_stmt_iterator *gsi, 1654 const oacc_collapse *counts, tree ivar) 1655 { 1656 tree ivar_type = TREE_TYPE (ivar); 1657 1658 /* The most rapidly changing iteration variable is the innermost 1659 one. */ 1660 for (int ix = fd->collapse; ix--;) 1661 { 1662 const omp_for_data_loop *loop = &fd->loops[ix]; 1663 const oacc_collapse *collapse = &counts[ix]; 1664 tree v = inner ? loop->v : collapse->outer; 1665 tree iter_type = TREE_TYPE (v); 1666 tree diff_type = TREE_TYPE (collapse->step); 1667 tree plus_type = iter_type; 1668 enum tree_code plus_code = PLUS_EXPR; 1669 tree expr; 1670 1671 if (POINTER_TYPE_P (iter_type)) 1672 { 1673 plus_code = POINTER_PLUS_EXPR; 1674 plus_type = sizetype; 1675 } 1676 1677 expr = ivar; 1678 if (ix) 1679 { 1680 tree mod = fold_convert (ivar_type, collapse->iters); 1681 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod); 1682 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod); 1683 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE, 1684 true, GSI_SAME_STMT); 1685 } 1686 1687 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr), 1688 collapse->step); 1689 expr = fold_build2 (plus_code, iter_type, 1690 inner ? collapse->outer : collapse->base, 1691 fold_convert (plus_type, expr)); 1692 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE, 1693 true, GSI_SAME_STMT); 1694 gassign *ass = gimple_build_assign (v, expr); 1695 gsi_insert_before (gsi, ass, GSI_SAME_STMT); 1696 } 1697 } 1698 1699 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost 1700 of the combined collapse > 1 loop constructs, generate code like: 1701 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB; 1702 if (cond3 is <) 1703 adj = STEP3 - 1; 1704 else 1705 adj = STEP3 + 1; 1706 count3 = (adj + N32 - N31) / STEP3; 1707 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB; 1708 if (cond2 is <) 1709 adj = STEP2 - 1; 1710 else 1711 adj = STEP2 + 1; 1712 count2 = (adj + N22 - N21) / STEP2; 1713 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB; 1714 if (cond1 is <) 1715 adj = STEP1 - 1; 1716 else 1717 adj = STEP1 + 1; 1718 count1 = (adj + N12 - N11) / STEP1; 1719 count = count1 * count2 * count3; 1720 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does: 1721 count = 0; 1722 and set ZERO_ITER_BB to that bb. If this isn't the outermost 1723 of the combined loop constructs, just initialize COUNTS array 1724 from the _looptemp_ clauses. */ 1725 1726 /* NOTE: It *could* be better to moosh all of the BBs together, 1727 creating one larger BB with all the computation and the unexpected 1728 jump at the end. I.e. 1729 1730 bool zero3, zero2, zero1, zero; 1731 1732 zero3 = N32 c3 N31; 1733 count3 = (N32 - N31) /[cl] STEP3; 1734 zero2 = N22 c2 N21; 1735 count2 = (N22 - N21) /[cl] STEP2; 1736 zero1 = N12 c1 N11; 1737 count1 = (N12 - N11) /[cl] STEP1; 1738 zero = zero3 || zero2 || zero1; 1739 count = count1 * count2 * count3; 1740 if (__builtin_expect(zero, false)) goto zero_iter_bb; 1741 1742 After all, we expect the zero=false, and thus we expect to have to 1743 evaluate all of the comparison expressions, so short-circuiting 1744 oughtn't be a win. Since the condition isn't protecting a 1745 denominator, we're not concerned about divide-by-zero, so we can 1746 fully evaluate count even if a numerator turned out to be wrong. 1747 1748 It seems like putting this all together would create much better 1749 scheduling opportunities, and less pressure on the chip's branch 1750 predictor. */ 1751 1752 static void 1753 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi, 1754 basic_block &entry_bb, tree *counts, 1755 basic_block &zero_iter1_bb, int &first_zero_iter1, 1756 basic_block &zero_iter2_bb, int &first_zero_iter2, 1757 basic_block &l2_dom_bb) 1758 { 1759 tree t, type = TREE_TYPE (fd->loop.v); 1760 edge e, ne; 1761 int i; 1762 1763 /* Collapsed loops need work for expansion into SSA form. */ 1764 gcc_assert (!gimple_in_ssa_p (cfun)); 1765 1766 if (gimple_omp_for_combined_into_p (fd->for_stmt) 1767 && TREE_CODE (fd->loop.n2) != INTEGER_CST) 1768 { 1769 gcc_assert (fd->ordered == 0); 1770 /* First two _looptemp_ clauses are for istart/iend, counts[0] 1771 isn't supposed to be handled, as the inner loop doesn't 1772 use it. */ 1773 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 1774 OMP_CLAUSE__LOOPTEMP_); 1775 gcc_assert (innerc); 1776 for (i = 0; i < fd->collapse; i++) 1777 { 1778 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 1779 OMP_CLAUSE__LOOPTEMP_); 1780 gcc_assert (innerc); 1781 if (i) 1782 counts[i] = OMP_CLAUSE_DECL (innerc); 1783 else 1784 counts[0] = NULL_TREE; 1785 } 1786 return; 1787 } 1788 1789 for (i = fd->collapse; i < fd->ordered; i++) 1790 { 1791 tree itype = TREE_TYPE (fd->loops[i].v); 1792 counts[i] = NULL_TREE; 1793 t = fold_binary (fd->loops[i].cond_code, boolean_type_node, 1794 fold_convert (itype, fd->loops[i].n1), 1795 fold_convert (itype, fd->loops[i].n2)); 1796 if (t && integer_zerop (t)) 1797 { 1798 for (i = fd->collapse; i < fd->ordered; i++) 1799 counts[i] = build_int_cst (type, 0); 1800 break; 1801 } 1802 } 1803 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++) 1804 { 1805 tree itype = TREE_TYPE (fd->loops[i].v); 1806 1807 if (i >= fd->collapse && counts[i]) 1808 continue; 1809 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse) 1810 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node, 1811 fold_convert (itype, fd->loops[i].n1), 1812 fold_convert (itype, fd->loops[i].n2))) 1813 == NULL_TREE || !integer_onep (t))) 1814 { 1815 gcond *cond_stmt; 1816 tree n1, n2; 1817 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1)); 1818 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE, 1819 true, GSI_SAME_STMT); 1820 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2)); 1821 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE, 1822 true, GSI_SAME_STMT); 1823 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2, 1824 NULL_TREE, NULL_TREE); 1825 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT); 1826 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), 1827 expand_omp_regimplify_p, NULL, NULL) 1828 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), 1829 expand_omp_regimplify_p, NULL, NULL)) 1830 { 1831 *gsi = gsi_for_stmt (cond_stmt); 1832 gimple_regimplify_operands (cond_stmt, gsi); 1833 } 1834 e = split_block (entry_bb, cond_stmt); 1835 basic_block &zero_iter_bb 1836 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb; 1837 int &first_zero_iter 1838 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2; 1839 if (zero_iter_bb == NULL) 1840 { 1841 gassign *assign_stmt; 1842 first_zero_iter = i; 1843 zero_iter_bb = create_empty_bb (entry_bb); 1844 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father); 1845 *gsi = gsi_after_labels (zero_iter_bb); 1846 if (i < fd->collapse) 1847 assign_stmt = gimple_build_assign (fd->loop.n2, 1848 build_zero_cst (type)); 1849 else 1850 { 1851 counts[i] = create_tmp_reg (type, ".count"); 1852 assign_stmt 1853 = gimple_build_assign (counts[i], build_zero_cst (type)); 1854 } 1855 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT); 1856 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb, 1857 entry_bb); 1858 } 1859 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE); 1860 ne->probability = profile_probability::very_unlikely (); 1861 e->flags = EDGE_TRUE_VALUE; 1862 e->probability = ne->probability.invert (); 1863 if (l2_dom_bb == NULL) 1864 l2_dom_bb = entry_bb; 1865 entry_bb = e->dest; 1866 *gsi = gsi_last_nondebug_bb (entry_bb); 1867 } 1868 1869 if (POINTER_TYPE_P (itype)) 1870 itype = signed_type_for (itype); 1871 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR 1872 ? -1 : 1)); 1873 t = fold_build2 (PLUS_EXPR, itype, 1874 fold_convert (itype, fd->loops[i].step), t); 1875 t = fold_build2 (PLUS_EXPR, itype, t, 1876 fold_convert (itype, fd->loops[i].n2)); 1877 t = fold_build2 (MINUS_EXPR, itype, t, 1878 fold_convert (itype, fd->loops[i].n1)); 1879 /* ?? We could probably use CEIL_DIV_EXPR instead of 1880 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't 1881 generate the same code in the end because generically we 1882 don't know that the values involved must be negative for 1883 GT?? */ 1884 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR) 1885 t = fold_build2 (TRUNC_DIV_EXPR, itype, 1886 fold_build1 (NEGATE_EXPR, itype, t), 1887 fold_build1 (NEGATE_EXPR, itype, 1888 fold_convert (itype, 1889 fd->loops[i].step))); 1890 else 1891 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, 1892 fold_convert (itype, fd->loops[i].step)); 1893 t = fold_convert (type, t); 1894 if (TREE_CODE (t) == INTEGER_CST) 1895 counts[i] = t; 1896 else 1897 { 1898 if (i < fd->collapse || i != first_zero_iter2) 1899 counts[i] = create_tmp_reg (type, ".count"); 1900 expand_omp_build_assign (gsi, counts[i], t); 1901 } 1902 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse) 1903 { 1904 if (i == 0) 1905 t = counts[0]; 1906 else 1907 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]); 1908 expand_omp_build_assign (gsi, fd->loop.n2, t); 1909 } 1910 } 1911 } 1912 1913 /* Helper function for expand_omp_{for_*,simd}. Generate code like: 1914 T = V; 1915 V3 = N31 + (T % count3) * STEP3; 1916 T = T / count3; 1917 V2 = N21 + (T % count2) * STEP2; 1918 T = T / count2; 1919 V1 = N11 + T * STEP1; 1920 if this loop doesn't have an inner loop construct combined with it. 1921 If it does have an inner loop construct combined with it and the 1922 iteration count isn't known constant, store values from counts array 1923 into its _looptemp_ temporaries instead. */ 1924 1925 static void 1926 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi, 1927 tree *counts, gimple *inner_stmt, tree startvar) 1928 { 1929 int i; 1930 if (gimple_omp_for_combined_p (fd->for_stmt)) 1931 { 1932 /* If fd->loop.n2 is constant, then no propagation of the counts 1933 is needed, they are constant. */ 1934 if (TREE_CODE (fd->loop.n2) == INTEGER_CST) 1935 return; 1936 1937 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR 1938 ? gimple_omp_taskreg_clauses (inner_stmt) 1939 : gimple_omp_for_clauses (inner_stmt); 1940 /* First two _looptemp_ clauses are for istart/iend, counts[0] 1941 isn't supposed to be handled, as the inner loop doesn't 1942 use it. */ 1943 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 1944 gcc_assert (innerc); 1945 for (i = 0; i < fd->collapse; i++) 1946 { 1947 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 1948 OMP_CLAUSE__LOOPTEMP_); 1949 gcc_assert (innerc); 1950 if (i) 1951 { 1952 tree tem = OMP_CLAUSE_DECL (innerc); 1953 tree t = fold_convert (TREE_TYPE (tem), counts[i]); 1954 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE, 1955 false, GSI_CONTINUE_LINKING); 1956 gassign *stmt = gimple_build_assign (tem, t); 1957 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); 1958 } 1959 } 1960 return; 1961 } 1962 1963 tree type = TREE_TYPE (fd->loop.v); 1964 tree tem = create_tmp_reg (type, ".tem"); 1965 gassign *stmt = gimple_build_assign (tem, startvar); 1966 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); 1967 1968 for (i = fd->collapse - 1; i >= 0; i--) 1969 { 1970 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t; 1971 itype = vtype; 1972 if (POINTER_TYPE_P (vtype)) 1973 itype = signed_type_for (vtype); 1974 if (i != 0) 1975 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]); 1976 else 1977 t = tem; 1978 t = fold_convert (itype, t); 1979 t = fold_build2 (MULT_EXPR, itype, t, 1980 fold_convert (itype, fd->loops[i].step)); 1981 if (POINTER_TYPE_P (vtype)) 1982 t = fold_build_pointer_plus (fd->loops[i].n1, t); 1983 else 1984 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t); 1985 t = force_gimple_operand_gsi (gsi, t, 1986 DECL_P (fd->loops[i].v) 1987 && TREE_ADDRESSABLE (fd->loops[i].v), 1988 NULL_TREE, false, 1989 GSI_CONTINUE_LINKING); 1990 stmt = gimple_build_assign (fd->loops[i].v, t); 1991 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); 1992 if (i != 0) 1993 { 1994 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]); 1995 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE, 1996 false, GSI_CONTINUE_LINKING); 1997 stmt = gimple_build_assign (tem, t); 1998 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); 1999 } 2000 } 2001 } 2002 2003 /* Helper function for expand_omp_for_*. Generate code like: 2004 L10: 2005 V3 += STEP3; 2006 if (V3 cond3 N32) goto BODY_BB; else goto L11; 2007 L11: 2008 V3 = N31; 2009 V2 += STEP2; 2010 if (V2 cond2 N22) goto BODY_BB; else goto L12; 2011 L12: 2012 V2 = N21; 2013 V1 += STEP1; 2014 goto BODY_BB; */ 2015 2016 static basic_block 2017 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb, 2018 basic_block body_bb) 2019 { 2020 basic_block last_bb, bb, collapse_bb = NULL; 2021 int i; 2022 gimple_stmt_iterator gsi; 2023 edge e; 2024 tree t; 2025 gimple *stmt; 2026 2027 last_bb = cont_bb; 2028 for (i = fd->collapse - 1; i >= 0; i--) 2029 { 2030 tree vtype = TREE_TYPE (fd->loops[i].v); 2031 2032 bb = create_empty_bb (last_bb); 2033 add_bb_to_loop (bb, last_bb->loop_father); 2034 gsi = gsi_start_bb (bb); 2035 2036 if (i < fd->collapse - 1) 2037 { 2038 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE); 2039 e->probability = profile_probability::guessed_always ().apply_scale (1, 8); 2040 2041 t = fd->loops[i + 1].n1; 2042 t = force_gimple_operand_gsi (&gsi, t, 2043 DECL_P (fd->loops[i + 1].v) 2044 && TREE_ADDRESSABLE (fd->loops[i 2045 + 1].v), 2046 NULL_TREE, false, 2047 GSI_CONTINUE_LINKING); 2048 stmt = gimple_build_assign (fd->loops[i + 1].v, t); 2049 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 2050 } 2051 else 2052 collapse_bb = bb; 2053 2054 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb); 2055 2056 if (POINTER_TYPE_P (vtype)) 2057 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step); 2058 else 2059 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step); 2060 t = force_gimple_operand_gsi (&gsi, t, 2061 DECL_P (fd->loops[i].v) 2062 && TREE_ADDRESSABLE (fd->loops[i].v), 2063 NULL_TREE, false, GSI_CONTINUE_LINKING); 2064 stmt = gimple_build_assign (fd->loops[i].v, t); 2065 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 2066 2067 if (i > 0) 2068 { 2069 t = fd->loops[i].n2; 2070 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 2071 false, GSI_CONTINUE_LINKING); 2072 tree v = fd->loops[i].v; 2073 if (DECL_P (v) && TREE_ADDRESSABLE (v)) 2074 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, 2075 false, GSI_CONTINUE_LINKING); 2076 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t); 2077 stmt = gimple_build_cond_empty (t); 2078 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 2079 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)), 2080 expand_omp_regimplify_p, NULL, NULL) 2081 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)), 2082 expand_omp_regimplify_p, NULL, NULL)) 2083 gimple_regimplify_operands (stmt, &gsi); 2084 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE); 2085 e->probability = profile_probability::guessed_always ().apply_scale (7, 8); 2086 } 2087 else 2088 make_edge (bb, body_bb, EDGE_FALLTHRU); 2089 last_bb = bb; 2090 } 2091 2092 return collapse_bb; 2093 } 2094 2095 /* Expand #pragma omp ordered depend(source). */ 2096 2097 static void 2098 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd, 2099 tree *counts, location_t loc) 2100 { 2101 enum built_in_function source_ix 2102 = fd->iter_type == long_integer_type_node 2103 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST; 2104 gimple *g 2105 = gimple_build_call (builtin_decl_explicit (source_ix), 1, 2106 build_fold_addr_expr (counts[fd->ordered])); 2107 gimple_set_location (g, loc); 2108 gsi_insert_before (gsi, g, GSI_SAME_STMT); 2109 } 2110 2111 /* Expand a single depend from #pragma omp ordered depend(sink:...). */ 2112 2113 static void 2114 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd, 2115 tree *counts, tree c, location_t loc) 2116 { 2117 auto_vec<tree, 10> args; 2118 enum built_in_function sink_ix 2119 = fd->iter_type == long_integer_type_node 2120 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT; 2121 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE; 2122 int i; 2123 gimple_stmt_iterator gsi2 = *gsi; 2124 bool warned_step = false; 2125 2126 for (i = 0; i < fd->ordered; i++) 2127 { 2128 tree step = NULL_TREE; 2129 off = TREE_PURPOSE (deps); 2130 if (TREE_CODE (off) == TRUNC_DIV_EXPR) 2131 { 2132 step = TREE_OPERAND (off, 1); 2133 off = TREE_OPERAND (off, 0); 2134 } 2135 if (!integer_zerop (off)) 2136 { 2137 gcc_assert (fd->loops[i].cond_code == LT_EXPR 2138 || fd->loops[i].cond_code == GT_EXPR); 2139 bool forward = fd->loops[i].cond_code == LT_EXPR; 2140 if (step) 2141 { 2142 /* Non-simple Fortran DO loops. If step is variable, 2143 we don't know at compile even the direction, so can't 2144 warn. */ 2145 if (TREE_CODE (step) != INTEGER_CST) 2146 break; 2147 forward = tree_int_cst_sgn (step) != -1; 2148 } 2149 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2150 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier " 2151 "waiting for lexically later iteration"); 2152 break; 2153 } 2154 deps = TREE_CHAIN (deps); 2155 } 2156 /* If all offsets corresponding to the collapsed loops are zero, 2157 this depend clause can be ignored. FIXME: but there is still a 2158 flush needed. We need to emit one __sync_synchronize () for it 2159 though (perhaps conditionally)? Solve this together with the 2160 conservative dependence folding optimization. 2161 if (i >= fd->collapse) 2162 return; */ 2163 2164 deps = OMP_CLAUSE_DECL (c); 2165 gsi_prev (&gsi2); 2166 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2)); 2167 edge e2 = split_block_after_labels (e1->dest); 2168 2169 gsi2 = gsi_after_labels (e1->dest); 2170 *gsi = gsi_last_bb (e1->src); 2171 for (i = 0; i < fd->ordered; i++) 2172 { 2173 tree itype = TREE_TYPE (fd->loops[i].v); 2174 tree step = NULL_TREE; 2175 tree orig_off = NULL_TREE; 2176 if (POINTER_TYPE_P (itype)) 2177 itype = sizetype; 2178 if (i) 2179 deps = TREE_CHAIN (deps); 2180 off = TREE_PURPOSE (deps); 2181 if (TREE_CODE (off) == TRUNC_DIV_EXPR) 2182 { 2183 step = TREE_OPERAND (off, 1); 2184 off = TREE_OPERAND (off, 0); 2185 gcc_assert (fd->loops[i].cond_code == LT_EXPR 2186 && integer_onep (fd->loops[i].step) 2187 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))); 2188 } 2189 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step); 2190 if (step) 2191 { 2192 off = fold_convert_loc (loc, itype, off); 2193 orig_off = off; 2194 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s); 2195 } 2196 2197 if (integer_zerop (off)) 2198 t = boolean_true_node; 2199 else 2200 { 2201 tree a; 2202 tree co = fold_convert_loc (loc, itype, off); 2203 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))) 2204 { 2205 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2206 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co); 2207 a = fold_build2_loc (loc, POINTER_PLUS_EXPR, 2208 TREE_TYPE (fd->loops[i].v), fd->loops[i].v, 2209 co); 2210 } 2211 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2212 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v), 2213 fd->loops[i].v, co); 2214 else 2215 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v), 2216 fd->loops[i].v, co); 2217 if (step) 2218 { 2219 tree t1, t2; 2220 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2221 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, 2222 fd->loops[i].n1); 2223 else 2224 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, 2225 fd->loops[i].n2); 2226 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2227 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, 2228 fd->loops[i].n2); 2229 else 2230 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, 2231 fd->loops[i].n1); 2232 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, 2233 step, build_int_cst (TREE_TYPE (step), 0)); 2234 if (TREE_CODE (step) != INTEGER_CST) 2235 { 2236 t1 = unshare_expr (t1); 2237 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE, 2238 false, GSI_CONTINUE_LINKING); 2239 t2 = unshare_expr (t2); 2240 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE, 2241 false, GSI_CONTINUE_LINKING); 2242 } 2243 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node, 2244 t, t2, t1); 2245 } 2246 else if (fd->loops[i].cond_code == LT_EXPR) 2247 { 2248 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2249 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, 2250 fd->loops[i].n1); 2251 else 2252 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, 2253 fd->loops[i].n2); 2254 } 2255 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2256 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a, 2257 fd->loops[i].n2); 2258 else 2259 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a, 2260 fd->loops[i].n1); 2261 } 2262 if (cond) 2263 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t); 2264 else 2265 cond = t; 2266 2267 off = fold_convert_loc (loc, itype, off); 2268 2269 if (step 2270 || (fd->loops[i].cond_code == LT_EXPR 2271 ? !integer_onep (fd->loops[i].step) 2272 : !integer_minus_onep (fd->loops[i].step))) 2273 { 2274 if (step == NULL_TREE 2275 && TYPE_UNSIGNED (itype) 2276 && fd->loops[i].cond_code == GT_EXPR) 2277 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off, 2278 fold_build1_loc (loc, NEGATE_EXPR, itype, 2279 s)); 2280 else 2281 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, 2282 orig_off ? orig_off : off, s); 2283 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t, 2284 build_int_cst (itype, 0)); 2285 if (integer_zerop (t) && !warned_step) 2286 { 2287 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier " 2288 "refers to iteration never in the iteration " 2289 "space"); 2290 warned_step = true; 2291 } 2292 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, 2293 cond, t); 2294 } 2295 2296 if (i <= fd->collapse - 1 && fd->collapse > 1) 2297 t = fd->loop.v; 2298 else if (counts[i]) 2299 t = counts[i]; 2300 else 2301 { 2302 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v), 2303 fd->loops[i].v, fd->loops[i].n1); 2304 t = fold_convert_loc (loc, fd->iter_type, t); 2305 } 2306 if (step) 2307 /* We have divided off by step already earlier. */; 2308 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR) 2309 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, 2310 fold_build1_loc (loc, NEGATE_EXPR, itype, 2311 s)); 2312 else 2313 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s); 2314 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2315 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off); 2316 off = fold_convert_loc (loc, fd->iter_type, off); 2317 if (i <= fd->collapse - 1 && fd->collapse > 1) 2318 { 2319 if (i) 2320 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff, 2321 off); 2322 if (i < fd->collapse - 1) 2323 { 2324 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off, 2325 counts[i]); 2326 continue; 2327 } 2328 } 2329 off = unshare_expr (off); 2330 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off); 2331 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, 2332 true, GSI_SAME_STMT); 2333 args.safe_push (t); 2334 } 2335 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args); 2336 gimple_set_location (g, loc); 2337 gsi_insert_before (&gsi2, g, GSI_SAME_STMT); 2338 2339 cond = unshare_expr (cond); 2340 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false, 2341 GSI_CONTINUE_LINKING); 2342 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT); 2343 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE); 2344 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8); 2345 e1->probability = e3->probability.invert (); 2346 e1->flags = EDGE_TRUE_VALUE; 2347 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src); 2348 2349 *gsi = gsi_after_labels (e2->dest); 2350 } 2351 2352 /* Expand all #pragma omp ordered depend(source) and 2353 #pragma omp ordered depend(sink:...) constructs in the current 2354 #pragma omp for ordered(n) region. */ 2355 2356 static void 2357 expand_omp_ordered_source_sink (struct omp_region *region, 2358 struct omp_for_data *fd, tree *counts, 2359 basic_block cont_bb) 2360 { 2361 struct omp_region *inner; 2362 int i; 2363 for (i = fd->collapse - 1; i < fd->ordered; i++) 2364 if (i == fd->collapse - 1 && fd->collapse > 1) 2365 counts[i] = NULL_TREE; 2366 else if (i >= fd->collapse && !cont_bb) 2367 counts[i] = build_zero_cst (fd->iter_type); 2368 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)) 2369 && integer_onep (fd->loops[i].step)) 2370 counts[i] = NULL_TREE; 2371 else 2372 counts[i] = create_tmp_var (fd->iter_type, ".orditer"); 2373 tree atype 2374 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1); 2375 counts[fd->ordered] = create_tmp_var (atype, ".orditera"); 2376 TREE_ADDRESSABLE (counts[fd->ordered]) = 1; 2377 2378 for (inner = region->inner; inner; inner = inner->next) 2379 if (inner->type == GIMPLE_OMP_ORDERED) 2380 { 2381 gomp_ordered *ord_stmt = inner->ord_stmt; 2382 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt); 2383 location_t loc = gimple_location (ord_stmt); 2384 tree c; 2385 for (c = gimple_omp_ordered_clauses (ord_stmt); 2386 c; c = OMP_CLAUSE_CHAIN (c)) 2387 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE) 2388 break; 2389 if (c) 2390 expand_omp_ordered_source (&gsi, fd, counts, loc); 2391 for (c = gimple_omp_ordered_clauses (ord_stmt); 2392 c; c = OMP_CLAUSE_CHAIN (c)) 2393 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK) 2394 expand_omp_ordered_sink (&gsi, fd, counts, c, loc); 2395 gsi_remove (&gsi, true); 2396 } 2397 } 2398 2399 /* Wrap the body into fd->ordered - fd->collapse loops that aren't 2400 collapsed. */ 2401 2402 static basic_block 2403 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts, 2404 basic_block cont_bb, basic_block body_bb, 2405 bool ordered_lastprivate) 2406 { 2407 if (fd->ordered == fd->collapse) 2408 return cont_bb; 2409 2410 if (!cont_bb) 2411 { 2412 gimple_stmt_iterator gsi = gsi_after_labels (body_bb); 2413 for (int i = fd->collapse; i < fd->ordered; i++) 2414 { 2415 tree type = TREE_TYPE (fd->loops[i].v); 2416 tree n1 = fold_convert (type, fd->loops[i].n1); 2417 expand_omp_build_assign (&gsi, fd->loops[i].v, n1); 2418 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 2419 size_int (i - fd->collapse + 1), 2420 NULL_TREE, NULL_TREE); 2421 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type)); 2422 } 2423 return NULL; 2424 } 2425 2426 for (int i = fd->ordered - 1; i >= fd->collapse; i--) 2427 { 2428 tree t, type = TREE_TYPE (fd->loops[i].v); 2429 gimple_stmt_iterator gsi = gsi_after_labels (body_bb); 2430 expand_omp_build_assign (&gsi, fd->loops[i].v, 2431 fold_convert (type, fd->loops[i].n1)); 2432 if (counts[i]) 2433 expand_omp_build_assign (&gsi, counts[i], 2434 build_zero_cst (fd->iter_type)); 2435 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 2436 size_int (i - fd->collapse + 1), 2437 NULL_TREE, NULL_TREE); 2438 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type)); 2439 if (!gsi_end_p (gsi)) 2440 gsi_prev (&gsi); 2441 else 2442 gsi = gsi_last_bb (body_bb); 2443 edge e1 = split_block (body_bb, gsi_stmt (gsi)); 2444 basic_block new_body = e1->dest; 2445 if (body_bb == cont_bb) 2446 cont_bb = new_body; 2447 edge e2 = NULL; 2448 basic_block new_header; 2449 if (EDGE_COUNT (cont_bb->preds) > 0) 2450 { 2451 gsi = gsi_last_bb (cont_bb); 2452 if (POINTER_TYPE_P (type)) 2453 t = fold_build_pointer_plus (fd->loops[i].v, 2454 fold_convert (sizetype, 2455 fd->loops[i].step)); 2456 else 2457 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v, 2458 fold_convert (type, fd->loops[i].step)); 2459 expand_omp_build_assign (&gsi, fd->loops[i].v, t); 2460 if (counts[i]) 2461 { 2462 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i], 2463 build_int_cst (fd->iter_type, 1)); 2464 expand_omp_build_assign (&gsi, counts[i], t); 2465 t = counts[i]; 2466 } 2467 else 2468 { 2469 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v), 2470 fd->loops[i].v, fd->loops[i].n1); 2471 t = fold_convert (fd->iter_type, t); 2472 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 2473 true, GSI_SAME_STMT); 2474 } 2475 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 2476 size_int (i - fd->collapse + 1), 2477 NULL_TREE, NULL_TREE); 2478 expand_omp_build_assign (&gsi, aref, t); 2479 gsi_prev (&gsi); 2480 e2 = split_block (cont_bb, gsi_stmt (gsi)); 2481 new_header = e2->dest; 2482 } 2483 else 2484 new_header = cont_bb; 2485 gsi = gsi_after_labels (new_header); 2486 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE, 2487 true, GSI_SAME_STMT); 2488 tree n2 2489 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2), 2490 true, NULL_TREE, true, GSI_SAME_STMT); 2491 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2); 2492 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT); 2493 edge e3 = split_block (new_header, gsi_stmt (gsi)); 2494 cont_bb = e3->dest; 2495 remove_edge (e1); 2496 make_edge (body_bb, new_header, EDGE_FALLTHRU); 2497 e3->flags = EDGE_FALSE_VALUE; 2498 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8); 2499 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE); 2500 e1->probability = e3->probability.invert (); 2501 2502 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb); 2503 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header); 2504 2505 if (e2) 2506 { 2507 struct loop *loop = alloc_loop (); 2508 loop->header = new_header; 2509 loop->latch = e2->src; 2510 add_loop (loop, body_bb->loop_father); 2511 } 2512 } 2513 2514 /* If there are any lastprivate clauses and it is possible some loops 2515 might have zero iterations, ensure all the decls are initialized, 2516 otherwise we could crash evaluating C++ class iterators with lastprivate 2517 clauses. */ 2518 bool need_inits = false; 2519 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++) 2520 if (need_inits) 2521 { 2522 tree type = TREE_TYPE (fd->loops[i].v); 2523 gimple_stmt_iterator gsi = gsi_after_labels (body_bb); 2524 expand_omp_build_assign (&gsi, fd->loops[i].v, 2525 fold_convert (type, fd->loops[i].n1)); 2526 } 2527 else 2528 { 2529 tree type = TREE_TYPE (fd->loops[i].v); 2530 tree this_cond = fold_build2 (fd->loops[i].cond_code, 2531 boolean_type_node, 2532 fold_convert (type, fd->loops[i].n1), 2533 fold_convert (type, fd->loops[i].n2)); 2534 if (!integer_onep (this_cond)) 2535 need_inits = true; 2536 } 2537 2538 return cont_bb; 2539 } 2540 2541 /* A subroutine of expand_omp_for. Generate code for a parallel 2542 loop with any schedule. Given parameters: 2543 2544 for (V = N1; V cond N2; V += STEP) BODY; 2545 2546 where COND is "<" or ">", we generate pseudocode 2547 2548 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0); 2549 if (more) goto L0; else goto L3; 2550 L0: 2551 V = istart0; 2552 iend = iend0; 2553 L1: 2554 BODY; 2555 V += STEP; 2556 if (V cond iend) goto L1; else goto L2; 2557 L2: 2558 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3; 2559 L3: 2560 2561 If this is a combined omp parallel loop, instead of the call to 2562 GOMP_loop_foo_start, we call GOMP_loop_foo_next. 2563 If this is gimple_omp_for_combined_p loop, then instead of assigning 2564 V and iend in L0 we assign the first two _looptemp_ clause decls of the 2565 inner GIMPLE_OMP_FOR and V += STEP; and 2566 if (V cond iend) goto L1; else goto L2; are removed. 2567 2568 For collapsed loops, given parameters: 2569 collapse(3) 2570 for (V1 = N11; V1 cond1 N12; V1 += STEP1) 2571 for (V2 = N21; V2 cond2 N22; V2 += STEP2) 2572 for (V3 = N31; V3 cond3 N32; V3 += STEP3) 2573 BODY; 2574 2575 we generate pseudocode 2576 2577 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0; 2578 if (cond3 is <) 2579 adj = STEP3 - 1; 2580 else 2581 adj = STEP3 + 1; 2582 count3 = (adj + N32 - N31) / STEP3; 2583 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0; 2584 if (cond2 is <) 2585 adj = STEP2 - 1; 2586 else 2587 adj = STEP2 + 1; 2588 count2 = (adj + N22 - N21) / STEP2; 2589 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0; 2590 if (cond1 is <) 2591 adj = STEP1 - 1; 2592 else 2593 adj = STEP1 + 1; 2594 count1 = (adj + N12 - N11) / STEP1; 2595 count = count1 * count2 * count3; 2596 goto Z1; 2597 Z0: 2598 count = 0; 2599 Z1: 2600 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0); 2601 if (more) goto L0; else goto L3; 2602 L0: 2603 V = istart0; 2604 T = V; 2605 V3 = N31 + (T % count3) * STEP3; 2606 T = T / count3; 2607 V2 = N21 + (T % count2) * STEP2; 2608 T = T / count2; 2609 V1 = N11 + T * STEP1; 2610 iend = iend0; 2611 L1: 2612 BODY; 2613 V += 1; 2614 if (V < iend) goto L10; else goto L2; 2615 L10: 2616 V3 += STEP3; 2617 if (V3 cond3 N32) goto L1; else goto L11; 2618 L11: 2619 V3 = N31; 2620 V2 += STEP2; 2621 if (V2 cond2 N22) goto L1; else goto L12; 2622 L12: 2623 V2 = N21; 2624 V1 += STEP1; 2625 goto L1; 2626 L2: 2627 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3; 2628 L3: 2629 2630 */ 2631 2632 static void 2633 expand_omp_for_generic (struct omp_region *region, 2634 struct omp_for_data *fd, 2635 enum built_in_function start_fn, 2636 enum built_in_function next_fn, 2637 tree sched_arg, 2638 gimple *inner_stmt) 2639 { 2640 tree type, istart0, iend0, iend; 2641 tree t, vmain, vback, bias = NULL_TREE; 2642 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb; 2643 basic_block l2_bb = NULL, l3_bb = NULL; 2644 gimple_stmt_iterator gsi; 2645 gassign *assign_stmt; 2646 bool in_combined_parallel = is_combined_parallel (region); 2647 bool broken_loop = region->cont == NULL; 2648 edge e, ne; 2649 tree *counts = NULL; 2650 int i; 2651 bool ordered_lastprivate = false; 2652 2653 gcc_assert (!broken_loop || !in_combined_parallel); 2654 gcc_assert (fd->iter_type == long_integer_type_node 2655 || !in_combined_parallel); 2656 2657 entry_bb = region->entry; 2658 cont_bb = region->cont; 2659 collapse_bb = NULL; 2660 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 2661 gcc_assert (broken_loop 2662 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); 2663 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb)); 2664 l1_bb = single_succ (l0_bb); 2665 if (!broken_loop) 2666 { 2667 l2_bb = create_empty_bb (cont_bb); 2668 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb 2669 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest 2670 == l1_bb)); 2671 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 2672 } 2673 else 2674 l2_bb = NULL; 2675 l3_bb = BRANCH_EDGE (entry_bb)->dest; 2676 exit_bb = region->exit; 2677 2678 gsi = gsi_last_nondebug_bb (entry_bb); 2679 2680 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 2681 if (fd->ordered 2682 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)), 2683 OMP_CLAUSE_LASTPRIVATE)) 2684 ordered_lastprivate = false; 2685 tree reductions = NULL_TREE; 2686 tree mem = NULL_TREE; 2687 if (sched_arg) 2688 { 2689 if (fd->have_reductemp) 2690 { 2691 tree c = omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)), 2692 OMP_CLAUSE__REDUCTEMP_); 2693 reductions = OMP_CLAUSE_DECL (c); 2694 gcc_assert (TREE_CODE (reductions) == SSA_NAME); 2695 gimple *g = SSA_NAME_DEF_STMT (reductions); 2696 reductions = gimple_assign_rhs1 (g); 2697 OMP_CLAUSE_DECL (c) = reductions; 2698 entry_bb = gimple_bb (g); 2699 edge e = split_block (entry_bb, g); 2700 if (region->entry == entry_bb) 2701 region->entry = e->dest; 2702 gsi = gsi_last_bb (entry_bb); 2703 } 2704 else 2705 reductions = null_pointer_node; 2706 /* For now. */ 2707 mem = null_pointer_node; 2708 } 2709 if (fd->collapse > 1 || fd->ordered) 2710 { 2711 int first_zero_iter1 = -1, first_zero_iter2 = -1; 2712 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL; 2713 2714 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse); 2715 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 2716 zero_iter1_bb, first_zero_iter1, 2717 zero_iter2_bb, first_zero_iter2, l2_dom_bb); 2718 2719 if (zero_iter1_bb) 2720 { 2721 /* Some counts[i] vars might be uninitialized if 2722 some loop has zero iterations. But the body shouldn't 2723 be executed in that case, so just avoid uninit warnings. */ 2724 for (i = first_zero_iter1; 2725 i < (fd->ordered ? fd->ordered : fd->collapse); i++) 2726 if (SSA_VAR_P (counts[i])) 2727 TREE_NO_WARNING (counts[i]) = 1; 2728 gsi_prev (&gsi); 2729 e = split_block (entry_bb, gsi_stmt (gsi)); 2730 entry_bb = e->dest; 2731 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU); 2732 gsi = gsi_last_nondebug_bb (entry_bb); 2733 set_immediate_dominator (CDI_DOMINATORS, entry_bb, 2734 get_immediate_dominator (CDI_DOMINATORS, 2735 zero_iter1_bb)); 2736 } 2737 if (zero_iter2_bb) 2738 { 2739 /* Some counts[i] vars might be uninitialized if 2740 some loop has zero iterations. But the body shouldn't 2741 be executed in that case, so just avoid uninit warnings. */ 2742 for (i = first_zero_iter2; i < fd->ordered; i++) 2743 if (SSA_VAR_P (counts[i])) 2744 TREE_NO_WARNING (counts[i]) = 1; 2745 if (zero_iter1_bb) 2746 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU); 2747 else 2748 { 2749 gsi_prev (&gsi); 2750 e = split_block (entry_bb, gsi_stmt (gsi)); 2751 entry_bb = e->dest; 2752 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU); 2753 gsi = gsi_last_nondebug_bb (entry_bb); 2754 set_immediate_dominator (CDI_DOMINATORS, entry_bb, 2755 get_immediate_dominator 2756 (CDI_DOMINATORS, zero_iter2_bb)); 2757 } 2758 } 2759 if (fd->collapse == 1) 2760 { 2761 counts[0] = fd->loop.n2; 2762 fd->loop = fd->loops[0]; 2763 } 2764 } 2765 2766 type = TREE_TYPE (fd->loop.v); 2767 istart0 = create_tmp_var (fd->iter_type, ".istart0"); 2768 iend0 = create_tmp_var (fd->iter_type, ".iend0"); 2769 TREE_ADDRESSABLE (istart0) = 1; 2770 TREE_ADDRESSABLE (iend0) = 1; 2771 2772 /* See if we need to bias by LLONG_MIN. */ 2773 if (fd->iter_type == long_long_unsigned_type_node 2774 && TREE_CODE (type) == INTEGER_TYPE 2775 && !TYPE_UNSIGNED (type) 2776 && fd->ordered == 0) 2777 { 2778 tree n1, n2; 2779 2780 if (fd->loop.cond_code == LT_EXPR) 2781 { 2782 n1 = fd->loop.n1; 2783 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); 2784 } 2785 else 2786 { 2787 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); 2788 n2 = fd->loop.n1; 2789 } 2790 if (TREE_CODE (n1) != INTEGER_CST 2791 || TREE_CODE (n2) != INTEGER_CST 2792 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) 2793 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); 2794 } 2795 2796 gimple_stmt_iterator gsif = gsi; 2797 gsi_prev (&gsif); 2798 2799 tree arr = NULL_TREE; 2800 if (in_combined_parallel) 2801 { 2802 gcc_assert (fd->ordered == 0); 2803 /* In a combined parallel loop, emit a call to 2804 GOMP_loop_foo_next. */ 2805 t = build_call_expr (builtin_decl_explicit (next_fn), 2, 2806 build_fold_addr_expr (istart0), 2807 build_fold_addr_expr (iend0)); 2808 } 2809 else 2810 { 2811 tree t0, t1, t2, t3, t4; 2812 /* If this is not a combined parallel loop, emit a call to 2813 GOMP_loop_foo_start in ENTRY_BB. */ 2814 t4 = build_fold_addr_expr (iend0); 2815 t3 = build_fold_addr_expr (istart0); 2816 if (fd->ordered) 2817 { 2818 t0 = build_int_cst (unsigned_type_node, 2819 fd->ordered - fd->collapse + 1); 2820 arr = create_tmp_var (build_array_type_nelts (fd->iter_type, 2821 fd->ordered 2822 - fd->collapse + 1), 2823 ".omp_counts"); 2824 DECL_NAMELESS (arr) = 1; 2825 TREE_ADDRESSABLE (arr) = 1; 2826 TREE_STATIC (arr) = 1; 2827 vec<constructor_elt, va_gc> *v; 2828 vec_alloc (v, fd->ordered - fd->collapse + 1); 2829 int idx; 2830 2831 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++) 2832 { 2833 tree c; 2834 if (idx == 0 && fd->collapse > 1) 2835 c = fd->loop.n2; 2836 else 2837 c = counts[idx + fd->collapse - 1]; 2838 tree purpose = size_int (idx); 2839 CONSTRUCTOR_APPEND_ELT (v, purpose, c); 2840 if (TREE_CODE (c) != INTEGER_CST) 2841 TREE_STATIC (arr) = 0; 2842 } 2843 2844 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v); 2845 if (!TREE_STATIC (arr)) 2846 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR, 2847 void_type_node, arr), 2848 true, NULL_TREE, true, GSI_SAME_STMT); 2849 t1 = build_fold_addr_expr (arr); 2850 t2 = NULL_TREE; 2851 } 2852 else 2853 { 2854 t2 = fold_convert (fd->iter_type, fd->loop.step); 2855 t1 = fd->loop.n2; 2856 t0 = fd->loop.n1; 2857 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 2858 { 2859 tree innerc 2860 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 2861 OMP_CLAUSE__LOOPTEMP_); 2862 gcc_assert (innerc); 2863 t0 = OMP_CLAUSE_DECL (innerc); 2864 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 2865 OMP_CLAUSE__LOOPTEMP_); 2866 gcc_assert (innerc); 2867 t1 = OMP_CLAUSE_DECL (innerc); 2868 } 2869 if (POINTER_TYPE_P (TREE_TYPE (t0)) 2870 && TYPE_PRECISION (TREE_TYPE (t0)) 2871 != TYPE_PRECISION (fd->iter_type)) 2872 { 2873 /* Avoid casting pointers to integer of a different size. */ 2874 tree itype = signed_type_for (type); 2875 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1)); 2876 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0)); 2877 } 2878 else 2879 { 2880 t1 = fold_convert (fd->iter_type, t1); 2881 t0 = fold_convert (fd->iter_type, t0); 2882 } 2883 if (bias) 2884 { 2885 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias); 2886 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias); 2887 } 2888 } 2889 if (fd->iter_type == long_integer_type_node || fd->ordered) 2890 { 2891 if (fd->chunk_size) 2892 { 2893 t = fold_convert (fd->iter_type, fd->chunk_size); 2894 t = omp_adjust_chunk_size (t, fd->simd_schedule); 2895 if (sched_arg) 2896 { 2897 if (fd->ordered) 2898 t = build_call_expr (builtin_decl_explicit (start_fn), 2899 8, t0, t1, sched_arg, t, t3, t4, 2900 reductions, mem); 2901 else 2902 t = build_call_expr (builtin_decl_explicit (start_fn), 2903 9, t0, t1, t2, sched_arg, t, t3, t4, 2904 reductions, mem); 2905 } 2906 else if (fd->ordered) 2907 t = build_call_expr (builtin_decl_explicit (start_fn), 2908 5, t0, t1, t, t3, t4); 2909 else 2910 t = build_call_expr (builtin_decl_explicit (start_fn), 2911 6, t0, t1, t2, t, t3, t4); 2912 } 2913 else if (fd->ordered) 2914 t = build_call_expr (builtin_decl_explicit (start_fn), 2915 4, t0, t1, t3, t4); 2916 else 2917 t = build_call_expr (builtin_decl_explicit (start_fn), 2918 5, t0, t1, t2, t3, t4); 2919 } 2920 else 2921 { 2922 tree t5; 2923 tree c_bool_type; 2924 tree bfn_decl; 2925 2926 /* The GOMP_loop_ull_*start functions have additional boolean 2927 argument, true for < loops and false for > loops. 2928 In Fortran, the C bool type can be different from 2929 boolean_type_node. */ 2930 bfn_decl = builtin_decl_explicit (start_fn); 2931 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl)); 2932 t5 = build_int_cst (c_bool_type, 2933 fd->loop.cond_code == LT_EXPR ? 1 : 0); 2934 if (fd->chunk_size) 2935 { 2936 tree bfn_decl = builtin_decl_explicit (start_fn); 2937 t = fold_convert (fd->iter_type, fd->chunk_size); 2938 t = omp_adjust_chunk_size (t, fd->simd_schedule); 2939 if (sched_arg) 2940 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg, 2941 t, t3, t4, reductions, mem); 2942 else 2943 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4); 2944 } 2945 else 2946 t = build_call_expr (builtin_decl_explicit (start_fn), 2947 6, t5, t0, t1, t2, t3, t4); 2948 } 2949 } 2950 if (TREE_TYPE (t) != boolean_type_node) 2951 t = fold_build2 (NE_EXPR, boolean_type_node, 2952 t, build_int_cst (TREE_TYPE (t), 0)); 2953 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 2954 true, GSI_SAME_STMT); 2955 if (arr && !TREE_STATIC (arr)) 2956 { 2957 tree clobber = build_constructor (TREE_TYPE (arr), NULL); 2958 TREE_THIS_VOLATILE (clobber) = 1; 2959 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber), 2960 GSI_SAME_STMT); 2961 } 2962 if (fd->have_reductemp) 2963 { 2964 gimple *g = gsi_stmt (gsi); 2965 gsi_remove (&gsi, true); 2966 release_ssa_name (gimple_assign_lhs (g)); 2967 2968 entry_bb = region->entry; 2969 gsi = gsi_last_nondebug_bb (entry_bb); 2970 2971 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 2972 } 2973 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 2974 2975 /* Remove the GIMPLE_OMP_FOR statement. */ 2976 gsi_remove (&gsi, true); 2977 2978 if (gsi_end_p (gsif)) 2979 gsif = gsi_after_labels (gsi_bb (gsif)); 2980 gsi_next (&gsif); 2981 2982 /* Iteration setup for sequential loop goes in L0_BB. */ 2983 tree startvar = fd->loop.v; 2984 tree endvar = NULL_TREE; 2985 2986 if (gimple_omp_for_combined_p (fd->for_stmt)) 2987 { 2988 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR 2989 && gimple_omp_for_kind (inner_stmt) 2990 == GF_OMP_FOR_KIND_SIMD); 2991 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt), 2992 OMP_CLAUSE__LOOPTEMP_); 2993 gcc_assert (innerc); 2994 startvar = OMP_CLAUSE_DECL (innerc); 2995 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 2996 OMP_CLAUSE__LOOPTEMP_); 2997 gcc_assert (innerc); 2998 endvar = OMP_CLAUSE_DECL (innerc); 2999 } 3000 3001 gsi = gsi_start_bb (l0_bb); 3002 t = istart0; 3003 if (fd->ordered && fd->collapse == 1) 3004 t = fold_build2 (MULT_EXPR, fd->iter_type, t, 3005 fold_convert (fd->iter_type, fd->loop.step)); 3006 else if (bias) 3007 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias); 3008 if (fd->ordered && fd->collapse == 1) 3009 { 3010 if (POINTER_TYPE_P (TREE_TYPE (startvar))) 3011 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar), 3012 fd->loop.n1, fold_convert (sizetype, t)); 3013 else 3014 { 3015 t = fold_convert (TREE_TYPE (startvar), t); 3016 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar), 3017 fd->loop.n1, t); 3018 } 3019 } 3020 else 3021 { 3022 if (POINTER_TYPE_P (TREE_TYPE (startvar))) 3023 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t); 3024 t = fold_convert (TREE_TYPE (startvar), t); 3025 } 3026 t = force_gimple_operand_gsi (&gsi, t, 3027 DECL_P (startvar) 3028 && TREE_ADDRESSABLE (startvar), 3029 NULL_TREE, false, GSI_CONTINUE_LINKING); 3030 assign_stmt = gimple_build_assign (startvar, t); 3031 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3032 3033 t = iend0; 3034 if (fd->ordered && fd->collapse == 1) 3035 t = fold_build2 (MULT_EXPR, fd->iter_type, t, 3036 fold_convert (fd->iter_type, fd->loop.step)); 3037 else if (bias) 3038 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias); 3039 if (fd->ordered && fd->collapse == 1) 3040 { 3041 if (POINTER_TYPE_P (TREE_TYPE (startvar))) 3042 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar), 3043 fd->loop.n1, fold_convert (sizetype, t)); 3044 else 3045 { 3046 t = fold_convert (TREE_TYPE (startvar), t); 3047 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar), 3048 fd->loop.n1, t); 3049 } 3050 } 3051 else 3052 { 3053 if (POINTER_TYPE_P (TREE_TYPE (startvar))) 3054 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t); 3055 t = fold_convert (TREE_TYPE (startvar), t); 3056 } 3057 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3058 false, GSI_CONTINUE_LINKING); 3059 if (endvar) 3060 { 3061 assign_stmt = gimple_build_assign (endvar, iend); 3062 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3063 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend))) 3064 assign_stmt = gimple_build_assign (fd->loop.v, iend); 3065 else 3066 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend); 3067 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3068 } 3069 /* Handle linear clause adjustments. */ 3070 tree itercnt = NULL_TREE; 3071 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) 3072 for (tree c = gimple_omp_for_clauses (fd->for_stmt); 3073 c; c = OMP_CLAUSE_CHAIN (c)) 3074 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR 3075 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) 3076 { 3077 tree d = OMP_CLAUSE_DECL (c); 3078 bool is_ref = omp_is_reference (d); 3079 tree t = d, a, dest; 3080 if (is_ref) 3081 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); 3082 tree type = TREE_TYPE (t); 3083 if (POINTER_TYPE_P (type)) 3084 type = sizetype; 3085 dest = unshare_expr (t); 3086 tree v = create_tmp_var (TREE_TYPE (t), NULL); 3087 expand_omp_build_assign (&gsif, v, t); 3088 if (itercnt == NULL_TREE) 3089 { 3090 itercnt = startvar; 3091 tree n1 = fd->loop.n1; 3092 if (POINTER_TYPE_P (TREE_TYPE (itercnt))) 3093 { 3094 itercnt 3095 = fold_convert (signed_type_for (TREE_TYPE (itercnt)), 3096 itercnt); 3097 n1 = fold_convert (TREE_TYPE (itercnt), n1); 3098 } 3099 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt), 3100 itercnt, n1); 3101 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt), 3102 itercnt, fd->loop.step); 3103 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, 3104 NULL_TREE, false, 3105 GSI_CONTINUE_LINKING); 3106 } 3107 a = fold_build2 (MULT_EXPR, type, 3108 fold_convert (type, itercnt), 3109 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); 3110 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR 3111 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a); 3112 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3113 false, GSI_CONTINUE_LINKING); 3114 assign_stmt = gimple_build_assign (dest, t); 3115 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3116 } 3117 if (fd->collapse > 1) 3118 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 3119 3120 if (fd->ordered) 3121 { 3122 /* Until now, counts array contained number of iterations or 3123 variable containing it for ith loop. From now on, we need 3124 those counts only for collapsed loops, and only for the 2nd 3125 till the last collapsed one. Move those one element earlier, 3126 we'll use counts[fd->collapse - 1] for the first source/sink 3127 iteration counter and so on and counts[fd->ordered] 3128 as the array holding the current counter values for 3129 depend(source). */ 3130 if (fd->collapse > 1) 3131 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0])); 3132 if (broken_loop) 3133 { 3134 int i; 3135 for (i = fd->collapse; i < fd->ordered; i++) 3136 { 3137 tree type = TREE_TYPE (fd->loops[i].v); 3138 tree this_cond 3139 = fold_build2 (fd->loops[i].cond_code, boolean_type_node, 3140 fold_convert (type, fd->loops[i].n1), 3141 fold_convert (type, fd->loops[i].n2)); 3142 if (!integer_onep (this_cond)) 3143 break; 3144 } 3145 if (i < fd->ordered) 3146 { 3147 cont_bb 3148 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb); 3149 add_bb_to_loop (cont_bb, l1_bb->loop_father); 3150 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb); 3151 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v); 3152 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3153 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU); 3154 make_edge (cont_bb, l1_bb, 0); 3155 l2_bb = create_empty_bb (cont_bb); 3156 broken_loop = false; 3157 } 3158 } 3159 expand_omp_ordered_source_sink (region, fd, counts, cont_bb); 3160 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb, 3161 ordered_lastprivate); 3162 if (counts[fd->collapse - 1]) 3163 { 3164 gcc_assert (fd->collapse == 1); 3165 gsi = gsi_last_bb (l0_bb); 3166 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], 3167 istart0, true); 3168 gsi = gsi_last_bb (cont_bb); 3169 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1], 3170 build_int_cst (fd->iter_type, 1)); 3171 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t); 3172 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 3173 size_zero_node, NULL_TREE, NULL_TREE); 3174 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]); 3175 t = counts[fd->collapse - 1]; 3176 } 3177 else if (fd->collapse > 1) 3178 t = fd->loop.v; 3179 else 3180 { 3181 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v), 3182 fd->loops[0].v, fd->loops[0].n1); 3183 t = fold_convert (fd->iter_type, t); 3184 } 3185 gsi = gsi_last_bb (l0_bb); 3186 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 3187 size_zero_node, NULL_TREE, NULL_TREE); 3188 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3189 false, GSI_CONTINUE_LINKING); 3190 expand_omp_build_assign (&gsi, aref, t, true); 3191 } 3192 3193 if (!broken_loop) 3194 { 3195 /* Code to control the increment and predicate for the sequential 3196 loop goes in the CONT_BB. */ 3197 gsi = gsi_last_nondebug_bb (cont_bb); 3198 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 3199 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); 3200 vmain = gimple_omp_continue_control_use (cont_stmt); 3201 vback = gimple_omp_continue_control_def (cont_stmt); 3202 3203 if (!gimple_omp_for_combined_p (fd->for_stmt)) 3204 { 3205 if (POINTER_TYPE_P (type)) 3206 t = fold_build_pointer_plus (vmain, fd->loop.step); 3207 else 3208 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step); 3209 t = force_gimple_operand_gsi (&gsi, t, 3210 DECL_P (vback) 3211 && TREE_ADDRESSABLE (vback), 3212 NULL_TREE, true, GSI_SAME_STMT); 3213 assign_stmt = gimple_build_assign (vback, t); 3214 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 3215 3216 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE) 3217 { 3218 tree tem; 3219 if (fd->collapse > 1) 3220 tem = fd->loop.v; 3221 else 3222 { 3223 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v), 3224 fd->loops[0].v, fd->loops[0].n1); 3225 tem = fold_convert (fd->iter_type, tem); 3226 } 3227 tree aref = build4 (ARRAY_REF, fd->iter_type, 3228 counts[fd->ordered], size_zero_node, 3229 NULL_TREE, NULL_TREE); 3230 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, 3231 true, GSI_SAME_STMT); 3232 expand_omp_build_assign (&gsi, aref, tem); 3233 } 3234 3235 t = build2 (fd->loop.cond_code, boolean_type_node, 3236 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback, 3237 iend); 3238 gcond *cond_stmt = gimple_build_cond_empty (t); 3239 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); 3240 } 3241 3242 /* Remove GIMPLE_OMP_CONTINUE. */ 3243 gsi_remove (&gsi, true); 3244 3245 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) 3246 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb); 3247 3248 /* Emit code to get the next parallel iteration in L2_BB. */ 3249 gsi = gsi_start_bb (l2_bb); 3250 3251 t = build_call_expr (builtin_decl_explicit (next_fn), 2, 3252 build_fold_addr_expr (istart0), 3253 build_fold_addr_expr (iend0)); 3254 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3255 false, GSI_CONTINUE_LINKING); 3256 if (TREE_TYPE (t) != boolean_type_node) 3257 t = fold_build2 (NE_EXPR, boolean_type_node, 3258 t, build_int_cst (TREE_TYPE (t), 0)); 3259 gcond *cond_stmt = gimple_build_cond_empty (t); 3260 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); 3261 } 3262 3263 /* Add the loop cleanup function. */ 3264 gsi = gsi_last_nondebug_bb (exit_bb); 3265 if (gimple_omp_return_nowait_p (gsi_stmt (gsi))) 3266 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT); 3267 else if (gimple_omp_return_lhs (gsi_stmt (gsi))) 3268 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL); 3269 else 3270 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); 3271 gcall *call_stmt = gimple_build_call (t, 0); 3272 if (fd->ordered) 3273 { 3274 tree arr = counts[fd->ordered]; 3275 tree clobber = build_constructor (TREE_TYPE (arr), NULL); 3276 TREE_THIS_VOLATILE (clobber) = 1; 3277 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber), 3278 GSI_SAME_STMT); 3279 } 3280 if (gimple_omp_return_lhs (gsi_stmt (gsi))) 3281 { 3282 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi))); 3283 if (fd->have_reductemp) 3284 { 3285 gimple *g = gimple_build_assign (reductions, NOP_EXPR, 3286 gimple_call_lhs (call_stmt)); 3287 gsi_insert_after (&gsi, g, GSI_SAME_STMT); 3288 } 3289 } 3290 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT); 3291 gsi_remove (&gsi, true); 3292 3293 /* Connect the new blocks. */ 3294 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE; 3295 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE; 3296 3297 if (!broken_loop) 3298 { 3299 gimple_seq phis; 3300 3301 e = find_edge (cont_bb, l3_bb); 3302 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE); 3303 3304 phis = phi_nodes (l3_bb); 3305 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi)) 3306 { 3307 gimple *phi = gsi_stmt (gsi); 3308 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne), 3309 PHI_ARG_DEF_FROM_EDGE (phi, e)); 3310 } 3311 remove_edge (e); 3312 3313 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE); 3314 e = find_edge (cont_bb, l1_bb); 3315 if (e == NULL) 3316 { 3317 e = BRANCH_EDGE (cont_bb); 3318 gcc_assert (single_succ (e->dest) == l1_bb); 3319 } 3320 if (gimple_omp_for_combined_p (fd->for_stmt)) 3321 { 3322 remove_edge (e); 3323 e = NULL; 3324 } 3325 else if (fd->collapse > 1) 3326 { 3327 remove_edge (e); 3328 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); 3329 } 3330 else 3331 e->flags = EDGE_TRUE_VALUE; 3332 if (e) 3333 { 3334 e->probability = profile_probability::guessed_always ().apply_scale (7, 8); 3335 find_edge (cont_bb, l2_bb)->probability = e->probability.invert (); 3336 } 3337 else 3338 { 3339 e = find_edge (cont_bb, l2_bb); 3340 e->flags = EDGE_FALLTHRU; 3341 } 3342 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE); 3343 3344 if (gimple_in_ssa_p (cfun)) 3345 { 3346 /* Add phis to the outer loop that connect to the phis in the inner, 3347 original loop, and move the loop entry value of the inner phi to 3348 the loop entry value of the outer phi. */ 3349 gphi_iterator psi; 3350 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi)) 3351 { 3352 location_t locus; 3353 gphi *nphi; 3354 gphi *exit_phi = psi.phi (); 3355 3356 if (virtual_operand_p (gimple_phi_result (exit_phi))) 3357 continue; 3358 3359 edge l2_to_l3 = find_edge (l2_bb, l3_bb); 3360 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3); 3361 3362 basic_block latch = BRANCH_EDGE (cont_bb)->dest; 3363 edge latch_to_l1 = find_edge (latch, l1_bb); 3364 gphi *inner_phi 3365 = find_phi_with_arg_on_edge (exit_res, latch_to_l1); 3366 3367 tree t = gimple_phi_result (exit_phi); 3368 tree new_res = copy_ssa_name (t, NULL); 3369 nphi = create_phi_node (new_res, l0_bb); 3370 3371 edge l0_to_l1 = find_edge (l0_bb, l1_bb); 3372 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1); 3373 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1); 3374 edge entry_to_l0 = find_edge (entry_bb, l0_bb); 3375 add_phi_arg (nphi, t, entry_to_l0, locus); 3376 3377 edge l2_to_l0 = find_edge (l2_bb, l0_bb); 3378 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION); 3379 3380 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION); 3381 } 3382 } 3383 3384 set_immediate_dominator (CDI_DOMINATORS, l2_bb, 3385 recompute_dominator (CDI_DOMINATORS, l2_bb)); 3386 set_immediate_dominator (CDI_DOMINATORS, l3_bb, 3387 recompute_dominator (CDI_DOMINATORS, l3_bb)); 3388 set_immediate_dominator (CDI_DOMINATORS, l0_bb, 3389 recompute_dominator (CDI_DOMINATORS, l0_bb)); 3390 set_immediate_dominator (CDI_DOMINATORS, l1_bb, 3391 recompute_dominator (CDI_DOMINATORS, l1_bb)); 3392 3393 /* We enter expand_omp_for_generic with a loop. This original loop may 3394 have its own loop struct, or it may be part of an outer loop struct 3395 (which may be the fake loop). */ 3396 struct loop *outer_loop = entry_bb->loop_father; 3397 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop; 3398 3399 add_bb_to_loop (l2_bb, outer_loop); 3400 3401 /* We've added a new loop around the original loop. Allocate the 3402 corresponding loop struct. */ 3403 struct loop *new_loop = alloc_loop (); 3404 new_loop->header = l0_bb; 3405 new_loop->latch = l2_bb; 3406 add_loop (new_loop, outer_loop); 3407 3408 /* Allocate a loop structure for the original loop unless we already 3409 had one. */ 3410 if (!orig_loop_has_loop_struct 3411 && !gimple_omp_for_combined_p (fd->for_stmt)) 3412 { 3413 struct loop *orig_loop = alloc_loop (); 3414 orig_loop->header = l1_bb; 3415 /* The loop may have multiple latches. */ 3416 add_loop (orig_loop, new_loop); 3417 } 3418 } 3419 } 3420 3421 /* A subroutine of expand_omp_for. Generate code for a parallel 3422 loop with static schedule and no specified chunk size. Given 3423 parameters: 3424 3425 for (V = N1; V cond N2; V += STEP) BODY; 3426 3427 where COND is "<" or ">", we generate pseudocode 3428 3429 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2; 3430 if (cond is <) 3431 adj = STEP - 1; 3432 else 3433 adj = STEP + 1; 3434 if ((__typeof (V)) -1 > 0 && cond is >) 3435 n = -(adj + N2 - N1) / -STEP; 3436 else 3437 n = (adj + N2 - N1) / STEP; 3438 q = n / nthreads; 3439 tt = n % nthreads; 3440 if (threadid < tt) goto L3; else goto L4; 3441 L3: 3442 tt = 0; 3443 q = q + 1; 3444 L4: 3445 s0 = q * threadid + tt; 3446 e0 = s0 + q; 3447 V = s0 * STEP + N1; 3448 if (s0 >= e0) goto L2; else goto L0; 3449 L0: 3450 e = e0 * STEP + N1; 3451 L1: 3452 BODY; 3453 V += STEP; 3454 if (V cond e) goto L1; 3455 L2: 3456 */ 3457 3458 static void 3459 expand_omp_for_static_nochunk (struct omp_region *region, 3460 struct omp_for_data *fd, 3461 gimple *inner_stmt) 3462 { 3463 tree n, q, s0, e0, e, t, tt, nthreads, threadid; 3464 tree type, itype, vmain, vback; 3465 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb; 3466 basic_block body_bb, cont_bb, collapse_bb = NULL; 3467 basic_block fin_bb; 3468 gimple_stmt_iterator gsi; 3469 edge ep; 3470 bool broken_loop = region->cont == NULL; 3471 tree *counts = NULL; 3472 tree n1, n2, step; 3473 tree reductions = NULL_TREE; 3474 3475 itype = type = TREE_TYPE (fd->loop.v); 3476 if (POINTER_TYPE_P (type)) 3477 itype = signed_type_for (type); 3478 3479 entry_bb = region->entry; 3480 cont_bb = region->cont; 3481 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 3482 fin_bb = BRANCH_EDGE (entry_bb)->dest; 3483 gcc_assert (broken_loop 3484 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest)); 3485 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb)); 3486 body_bb = single_succ (seq_start_bb); 3487 if (!broken_loop) 3488 { 3489 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb 3490 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb); 3491 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 3492 } 3493 exit_bb = region->exit; 3494 3495 /* Iteration space partitioning goes in ENTRY_BB. */ 3496 gsi = gsi_last_nondebug_bb (entry_bb); 3497 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 3498 3499 if (fd->collapse > 1) 3500 { 3501 int first_zero_iter = -1, dummy = -1; 3502 basic_block l2_dom_bb = NULL, dummy_bb = NULL; 3503 3504 counts = XALLOCAVEC (tree, fd->collapse); 3505 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 3506 fin_bb, first_zero_iter, 3507 dummy_bb, dummy, l2_dom_bb); 3508 t = NULL_TREE; 3509 } 3510 else if (gimple_omp_for_combined_into_p (fd->for_stmt)) 3511 t = integer_one_node; 3512 else 3513 t = fold_binary (fd->loop.cond_code, boolean_type_node, 3514 fold_convert (type, fd->loop.n1), 3515 fold_convert (type, fd->loop.n2)); 3516 if (fd->collapse == 1 3517 && TYPE_UNSIGNED (type) 3518 && (t == NULL_TREE || !integer_onep (t))) 3519 { 3520 n1 = fold_convert (type, unshare_expr (fd->loop.n1)); 3521 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE, 3522 true, GSI_SAME_STMT); 3523 n2 = fold_convert (type, unshare_expr (fd->loop.n2)); 3524 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE, 3525 true, GSI_SAME_STMT); 3526 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2, 3527 NULL_TREE, NULL_TREE); 3528 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); 3529 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), 3530 expand_omp_regimplify_p, NULL, NULL) 3531 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), 3532 expand_omp_regimplify_p, NULL, NULL)) 3533 { 3534 gsi = gsi_for_stmt (cond_stmt); 3535 gimple_regimplify_operands (cond_stmt, &gsi); 3536 } 3537 ep = split_block (entry_bb, cond_stmt); 3538 ep->flags = EDGE_TRUE_VALUE; 3539 entry_bb = ep->dest; 3540 ep->probability = profile_probability::very_likely (); 3541 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE); 3542 ep->probability = profile_probability::very_unlikely (); 3543 if (gimple_in_ssa_p (cfun)) 3544 { 3545 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx; 3546 for (gphi_iterator gpi = gsi_start_phis (fin_bb); 3547 !gsi_end_p (gpi); gsi_next (&gpi)) 3548 { 3549 gphi *phi = gpi.phi (); 3550 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx), 3551 ep, UNKNOWN_LOCATION); 3552 } 3553 } 3554 gsi = gsi_last_bb (entry_bb); 3555 } 3556 3557 if (fd->have_reductemp) 3558 { 3559 tree t1 = build_int_cst (long_integer_type_node, 0); 3560 tree t2 = build_int_cst (long_integer_type_node, 1); 3561 tree t3 = build_int_cstu (long_integer_type_node, 3562 (HOST_WIDE_INT_1U << 31) + 1); 3563 tree clauses = gimple_omp_for_clauses (fd->for_stmt); 3564 clauses = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); 3565 reductions = OMP_CLAUSE_DECL (clauses); 3566 gcc_assert (TREE_CODE (reductions) == SSA_NAME); 3567 gimple *g = SSA_NAME_DEF_STMT (reductions); 3568 reductions = gimple_assign_rhs1 (g); 3569 OMP_CLAUSE_DECL (clauses) = reductions; 3570 gimple_stmt_iterator gsi2 = gsi_for_stmt (g); 3571 tree t 3572 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START), 3573 9, t1, t2, t2, t3, t1, null_pointer_node, 3574 null_pointer_node, reductions, null_pointer_node); 3575 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, 3576 true, GSI_SAME_STMT); 3577 gsi_remove (&gsi2, true); 3578 release_ssa_name (gimple_assign_lhs (g)); 3579 } 3580 switch (gimple_omp_for_kind (fd->for_stmt)) 3581 { 3582 case GF_OMP_FOR_KIND_FOR: 3583 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 3584 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); 3585 break; 3586 case GF_OMP_FOR_KIND_DISTRIBUTE: 3587 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS); 3588 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM); 3589 break; 3590 default: 3591 gcc_unreachable (); 3592 } 3593 nthreads = build_call_expr (nthreads, 0); 3594 nthreads = fold_convert (itype, nthreads); 3595 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE, 3596 true, GSI_SAME_STMT); 3597 threadid = build_call_expr (threadid, 0); 3598 threadid = fold_convert (itype, threadid); 3599 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, 3600 true, GSI_SAME_STMT); 3601 3602 n1 = fd->loop.n1; 3603 n2 = fd->loop.n2; 3604 step = fd->loop.step; 3605 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 3606 { 3607 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 3608 OMP_CLAUSE__LOOPTEMP_); 3609 gcc_assert (innerc); 3610 n1 = OMP_CLAUSE_DECL (innerc); 3611 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3612 OMP_CLAUSE__LOOPTEMP_); 3613 gcc_assert (innerc); 3614 n2 = OMP_CLAUSE_DECL (innerc); 3615 } 3616 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), 3617 true, NULL_TREE, true, GSI_SAME_STMT); 3618 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), 3619 true, NULL_TREE, true, GSI_SAME_STMT); 3620 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), 3621 true, NULL_TREE, true, GSI_SAME_STMT); 3622 3623 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1)); 3624 t = fold_build2 (PLUS_EXPR, itype, step, t); 3625 t = fold_build2 (PLUS_EXPR, itype, t, n2); 3626 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1)); 3627 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR) 3628 t = fold_build2 (TRUNC_DIV_EXPR, itype, 3629 fold_build1 (NEGATE_EXPR, itype, t), 3630 fold_build1 (NEGATE_EXPR, itype, step)); 3631 else 3632 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); 3633 t = fold_convert (itype, t); 3634 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); 3635 3636 q = create_tmp_reg (itype, "q"); 3637 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads); 3638 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT); 3639 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT); 3640 3641 tt = create_tmp_reg (itype, "tt"); 3642 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads); 3643 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT); 3644 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT); 3645 3646 t = build2 (LT_EXPR, boolean_type_node, threadid, tt); 3647 gcond *cond_stmt = gimple_build_cond_empty (t); 3648 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); 3649 3650 second_bb = split_block (entry_bb, cond_stmt)->dest; 3651 gsi = gsi_last_nondebug_bb (second_bb); 3652 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 3653 3654 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)), 3655 GSI_SAME_STMT); 3656 gassign *assign_stmt 3657 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1)); 3658 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 3659 3660 third_bb = split_block (second_bb, assign_stmt)->dest; 3661 gsi = gsi_last_nondebug_bb (third_bb); 3662 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 3663 3664 t = build2 (MULT_EXPR, itype, q, threadid); 3665 t = build2 (PLUS_EXPR, itype, t, tt); 3666 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); 3667 3668 t = fold_build2 (PLUS_EXPR, itype, s0, q); 3669 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); 3670 3671 t = build2 (GE_EXPR, boolean_type_node, s0, e0); 3672 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 3673 3674 /* Remove the GIMPLE_OMP_FOR statement. */ 3675 gsi_remove (&gsi, true); 3676 3677 /* Setup code for sequential iteration goes in SEQ_START_BB. */ 3678 gsi = gsi_start_bb (seq_start_bb); 3679 3680 tree startvar = fd->loop.v; 3681 tree endvar = NULL_TREE; 3682 3683 if (gimple_omp_for_combined_p (fd->for_stmt)) 3684 { 3685 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL 3686 ? gimple_omp_parallel_clauses (inner_stmt) 3687 : gimple_omp_for_clauses (inner_stmt); 3688 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 3689 gcc_assert (innerc); 3690 startvar = OMP_CLAUSE_DECL (innerc); 3691 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3692 OMP_CLAUSE__LOOPTEMP_); 3693 gcc_assert (innerc); 3694 endvar = OMP_CLAUSE_DECL (innerc); 3695 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST 3696 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE) 3697 { 3698 int i; 3699 for (i = 1; i < fd->collapse; i++) 3700 { 3701 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3702 OMP_CLAUSE__LOOPTEMP_); 3703 gcc_assert (innerc); 3704 } 3705 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3706 OMP_CLAUSE__LOOPTEMP_); 3707 if (innerc) 3708 { 3709 /* If needed (distribute parallel for with lastprivate), 3710 propagate down the total number of iterations. */ 3711 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)), 3712 fd->loop.n2); 3713 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false, 3714 GSI_CONTINUE_LINKING); 3715 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); 3716 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3717 } 3718 } 3719 } 3720 t = fold_convert (itype, s0); 3721 t = fold_build2 (MULT_EXPR, itype, t, step); 3722 if (POINTER_TYPE_P (type)) 3723 { 3724 t = fold_build_pointer_plus (n1, t); 3725 if (!POINTER_TYPE_P (TREE_TYPE (startvar)) 3726 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type)) 3727 t = fold_convert (signed_type_for (type), t); 3728 } 3729 else 3730 t = fold_build2 (PLUS_EXPR, type, t, n1); 3731 t = fold_convert (TREE_TYPE (startvar), t); 3732 t = force_gimple_operand_gsi (&gsi, t, 3733 DECL_P (startvar) 3734 && TREE_ADDRESSABLE (startvar), 3735 NULL_TREE, false, GSI_CONTINUE_LINKING); 3736 assign_stmt = gimple_build_assign (startvar, t); 3737 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3738 3739 t = fold_convert (itype, e0); 3740 t = fold_build2 (MULT_EXPR, itype, t, step); 3741 if (POINTER_TYPE_P (type)) 3742 { 3743 t = fold_build_pointer_plus (n1, t); 3744 if (!POINTER_TYPE_P (TREE_TYPE (startvar)) 3745 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type)) 3746 t = fold_convert (signed_type_for (type), t); 3747 } 3748 else 3749 t = fold_build2 (PLUS_EXPR, type, t, n1); 3750 t = fold_convert (TREE_TYPE (startvar), t); 3751 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3752 false, GSI_CONTINUE_LINKING); 3753 if (endvar) 3754 { 3755 assign_stmt = gimple_build_assign (endvar, e); 3756 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3757 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) 3758 assign_stmt = gimple_build_assign (fd->loop.v, e); 3759 else 3760 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); 3761 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3762 } 3763 /* Handle linear clause adjustments. */ 3764 tree itercnt = NULL_TREE; 3765 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) 3766 for (tree c = gimple_omp_for_clauses (fd->for_stmt); 3767 c; c = OMP_CLAUSE_CHAIN (c)) 3768 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR 3769 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) 3770 { 3771 tree d = OMP_CLAUSE_DECL (c); 3772 bool is_ref = omp_is_reference (d); 3773 tree t = d, a, dest; 3774 if (is_ref) 3775 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); 3776 if (itercnt == NULL_TREE) 3777 { 3778 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 3779 { 3780 itercnt = fold_build2 (MINUS_EXPR, itype, 3781 fold_convert (itype, n1), 3782 fold_convert (itype, fd->loop.n1)); 3783 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step); 3784 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0); 3785 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, 3786 NULL_TREE, false, 3787 GSI_CONTINUE_LINKING); 3788 } 3789 else 3790 itercnt = s0; 3791 } 3792 tree type = TREE_TYPE (t); 3793 if (POINTER_TYPE_P (type)) 3794 type = sizetype; 3795 a = fold_build2 (MULT_EXPR, type, 3796 fold_convert (type, itercnt), 3797 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); 3798 dest = unshare_expr (t); 3799 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR 3800 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a); 3801 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3802 false, GSI_CONTINUE_LINKING); 3803 assign_stmt = gimple_build_assign (dest, t); 3804 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3805 } 3806 if (fd->collapse > 1) 3807 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 3808 3809 if (!broken_loop) 3810 { 3811 /* The code controlling the sequential loop replaces the 3812 GIMPLE_OMP_CONTINUE. */ 3813 gsi = gsi_last_nondebug_bb (cont_bb); 3814 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 3815 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); 3816 vmain = gimple_omp_continue_control_use (cont_stmt); 3817 vback = gimple_omp_continue_control_def (cont_stmt); 3818 3819 if (!gimple_omp_for_combined_p (fd->for_stmt)) 3820 { 3821 if (POINTER_TYPE_P (type)) 3822 t = fold_build_pointer_plus (vmain, step); 3823 else 3824 t = fold_build2 (PLUS_EXPR, type, vmain, step); 3825 t = force_gimple_operand_gsi (&gsi, t, 3826 DECL_P (vback) 3827 && TREE_ADDRESSABLE (vback), 3828 NULL_TREE, true, GSI_SAME_STMT); 3829 assign_stmt = gimple_build_assign (vback, t); 3830 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 3831 3832 t = build2 (fd->loop.cond_code, boolean_type_node, 3833 DECL_P (vback) && TREE_ADDRESSABLE (vback) 3834 ? t : vback, e); 3835 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 3836 } 3837 3838 /* Remove the GIMPLE_OMP_CONTINUE statement. */ 3839 gsi_remove (&gsi, true); 3840 3841 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) 3842 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); 3843 } 3844 3845 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */ 3846 gsi = gsi_last_nondebug_bb (exit_bb); 3847 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) 3848 { 3849 t = gimple_omp_return_lhs (gsi_stmt (gsi)); 3850 if (fd->have_reductemp) 3851 { 3852 tree fn; 3853 if (t) 3854 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL); 3855 else 3856 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); 3857 gcall *g = gimple_build_call (fn, 0); 3858 if (t) 3859 { 3860 gimple_call_set_lhs (g, t); 3861 gsi_insert_after (&gsi, gimple_build_assign (reductions, 3862 NOP_EXPR, t), 3863 GSI_SAME_STMT); 3864 } 3865 gsi_insert_after (&gsi, g, GSI_SAME_STMT); 3866 } 3867 else 3868 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT); 3869 } 3870 gsi_remove (&gsi, true); 3871 3872 /* Connect all the blocks. */ 3873 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE); 3874 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4); 3875 ep = find_edge (entry_bb, second_bb); 3876 ep->flags = EDGE_TRUE_VALUE; 3877 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4); 3878 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE; 3879 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE; 3880 3881 if (!broken_loop) 3882 { 3883 ep = find_edge (cont_bb, body_bb); 3884 if (ep == NULL) 3885 { 3886 ep = BRANCH_EDGE (cont_bb); 3887 gcc_assert (single_succ (ep->dest) == body_bb); 3888 } 3889 if (gimple_omp_for_combined_p (fd->for_stmt)) 3890 { 3891 remove_edge (ep); 3892 ep = NULL; 3893 } 3894 else if (fd->collapse > 1) 3895 { 3896 remove_edge (ep); 3897 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); 3898 } 3899 else 3900 ep->flags = EDGE_TRUE_VALUE; 3901 find_edge (cont_bb, fin_bb)->flags 3902 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; 3903 } 3904 3905 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb); 3906 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb); 3907 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb); 3908 3909 set_immediate_dominator (CDI_DOMINATORS, body_bb, 3910 recompute_dominator (CDI_DOMINATORS, body_bb)); 3911 set_immediate_dominator (CDI_DOMINATORS, fin_bb, 3912 recompute_dominator (CDI_DOMINATORS, fin_bb)); 3913 3914 struct loop *loop = body_bb->loop_father; 3915 if (loop != entry_bb->loop_father) 3916 { 3917 gcc_assert (broken_loop || loop->header == body_bb); 3918 gcc_assert (broken_loop 3919 || loop->latch == region->cont 3920 || single_pred (loop->latch) == region->cont); 3921 return; 3922 } 3923 3924 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt)) 3925 { 3926 loop = alloc_loop (); 3927 loop->header = body_bb; 3928 if (collapse_bb == NULL) 3929 loop->latch = cont_bb; 3930 add_loop (loop, body_bb->loop_father); 3931 } 3932 } 3933 3934 /* Return phi in E->DEST with ARG on edge E. */ 3935 3936 static gphi * 3937 find_phi_with_arg_on_edge (tree arg, edge e) 3938 { 3939 basic_block bb = e->dest; 3940 3941 for (gphi_iterator gpi = gsi_start_phis (bb); 3942 !gsi_end_p (gpi); 3943 gsi_next (&gpi)) 3944 { 3945 gphi *phi = gpi.phi (); 3946 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg) 3947 return phi; 3948 } 3949 3950 return NULL; 3951 } 3952 3953 /* A subroutine of expand_omp_for. Generate code for a parallel 3954 loop with static schedule and a specified chunk size. Given 3955 parameters: 3956 3957 for (V = N1; V cond N2; V += STEP) BODY; 3958 3959 where COND is "<" or ">", we generate pseudocode 3960 3961 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2; 3962 if (cond is <) 3963 adj = STEP - 1; 3964 else 3965 adj = STEP + 1; 3966 if ((__typeof (V)) -1 > 0 && cond is >) 3967 n = -(adj + N2 - N1) / -STEP; 3968 else 3969 n = (adj + N2 - N1) / STEP; 3970 trip = 0; 3971 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is 3972 here so that V is defined 3973 if the loop is not entered 3974 L0: 3975 s0 = (trip * nthreads + threadid) * CHUNK; 3976 e0 = min (s0 + CHUNK, n); 3977 if (s0 < n) goto L1; else goto L4; 3978 L1: 3979 V = s0 * STEP + N1; 3980 e = e0 * STEP + N1; 3981 L2: 3982 BODY; 3983 V += STEP; 3984 if (V cond e) goto L2; else goto L3; 3985 L3: 3986 trip += 1; 3987 goto L0; 3988 L4: 3989 */ 3990 3991 static void 3992 expand_omp_for_static_chunk (struct omp_region *region, 3993 struct omp_for_data *fd, gimple *inner_stmt) 3994 { 3995 tree n, s0, e0, e, t; 3996 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid; 3997 tree type, itype, vmain, vback, vextra; 3998 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb; 3999 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb; 4000 gimple_stmt_iterator gsi; 4001 edge se; 4002 bool broken_loop = region->cont == NULL; 4003 tree *counts = NULL; 4004 tree n1, n2, step; 4005 tree reductions = NULL_TREE; 4006 4007 itype = type = TREE_TYPE (fd->loop.v); 4008 if (POINTER_TYPE_P (type)) 4009 itype = signed_type_for (type); 4010 4011 entry_bb = region->entry; 4012 se = split_block (entry_bb, last_stmt (entry_bb)); 4013 entry_bb = se->src; 4014 iter_part_bb = se->dest; 4015 cont_bb = region->cont; 4016 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2); 4017 fin_bb = BRANCH_EDGE (iter_part_bb)->dest; 4018 gcc_assert (broken_loop 4019 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest); 4020 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb)); 4021 body_bb = single_succ (seq_start_bb); 4022 if (!broken_loop) 4023 { 4024 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb 4025 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb); 4026 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 4027 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb)); 4028 } 4029 exit_bb = region->exit; 4030 4031 /* Trip and adjustment setup goes in ENTRY_BB. */ 4032 gsi = gsi_last_nondebug_bb (entry_bb); 4033 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 4034 4035 if (fd->collapse > 1) 4036 { 4037 int first_zero_iter = -1, dummy = -1; 4038 basic_block l2_dom_bb = NULL, dummy_bb = NULL; 4039 4040 counts = XALLOCAVEC (tree, fd->collapse); 4041 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 4042 fin_bb, first_zero_iter, 4043 dummy_bb, dummy, l2_dom_bb); 4044 t = NULL_TREE; 4045 } 4046 else if (gimple_omp_for_combined_into_p (fd->for_stmt)) 4047 t = integer_one_node; 4048 else 4049 t = fold_binary (fd->loop.cond_code, boolean_type_node, 4050 fold_convert (type, fd->loop.n1), 4051 fold_convert (type, fd->loop.n2)); 4052 if (fd->collapse == 1 4053 && TYPE_UNSIGNED (type) 4054 && (t == NULL_TREE || !integer_onep (t))) 4055 { 4056 n1 = fold_convert (type, unshare_expr (fd->loop.n1)); 4057 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE, 4058 true, GSI_SAME_STMT); 4059 n2 = fold_convert (type, unshare_expr (fd->loop.n2)); 4060 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE, 4061 true, GSI_SAME_STMT); 4062 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2, 4063 NULL_TREE, NULL_TREE); 4064 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); 4065 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), 4066 expand_omp_regimplify_p, NULL, NULL) 4067 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), 4068 expand_omp_regimplify_p, NULL, NULL)) 4069 { 4070 gsi = gsi_for_stmt (cond_stmt); 4071 gimple_regimplify_operands (cond_stmt, &gsi); 4072 } 4073 se = split_block (entry_bb, cond_stmt); 4074 se->flags = EDGE_TRUE_VALUE; 4075 entry_bb = se->dest; 4076 se->probability = profile_probability::very_likely (); 4077 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE); 4078 se->probability = profile_probability::very_unlikely (); 4079 if (gimple_in_ssa_p (cfun)) 4080 { 4081 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx; 4082 for (gphi_iterator gpi = gsi_start_phis (fin_bb); 4083 !gsi_end_p (gpi); gsi_next (&gpi)) 4084 { 4085 gphi *phi = gpi.phi (); 4086 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx), 4087 se, UNKNOWN_LOCATION); 4088 } 4089 } 4090 gsi = gsi_last_bb (entry_bb); 4091 } 4092 4093 if (fd->have_reductemp) 4094 { 4095 tree t1 = build_int_cst (long_integer_type_node, 0); 4096 tree t2 = build_int_cst (long_integer_type_node, 1); 4097 tree t3 = build_int_cstu (long_integer_type_node, 4098 (HOST_WIDE_INT_1U << 31) + 1); 4099 tree clauses = gimple_omp_for_clauses (fd->for_stmt); 4100 clauses = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); 4101 reductions = OMP_CLAUSE_DECL (clauses); 4102 gcc_assert (TREE_CODE (reductions) == SSA_NAME); 4103 gimple *g = SSA_NAME_DEF_STMT (reductions); 4104 reductions = gimple_assign_rhs1 (g); 4105 OMP_CLAUSE_DECL (clauses) = reductions; 4106 gimple_stmt_iterator gsi2 = gsi_for_stmt (g); 4107 tree t 4108 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START), 4109 9, t1, t2, t2, t3, t1, null_pointer_node, 4110 null_pointer_node, reductions, null_pointer_node); 4111 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, 4112 true, GSI_SAME_STMT); 4113 gsi_remove (&gsi2, true); 4114 release_ssa_name (gimple_assign_lhs (g)); 4115 } 4116 switch (gimple_omp_for_kind (fd->for_stmt)) 4117 { 4118 case GF_OMP_FOR_KIND_FOR: 4119 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 4120 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); 4121 break; 4122 case GF_OMP_FOR_KIND_DISTRIBUTE: 4123 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS); 4124 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM); 4125 break; 4126 default: 4127 gcc_unreachable (); 4128 } 4129 nthreads = build_call_expr (nthreads, 0); 4130 nthreads = fold_convert (itype, nthreads); 4131 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE, 4132 true, GSI_SAME_STMT); 4133 threadid = build_call_expr (threadid, 0); 4134 threadid = fold_convert (itype, threadid); 4135 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, 4136 true, GSI_SAME_STMT); 4137 4138 n1 = fd->loop.n1; 4139 n2 = fd->loop.n2; 4140 step = fd->loop.step; 4141 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 4142 { 4143 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 4144 OMP_CLAUSE__LOOPTEMP_); 4145 gcc_assert (innerc); 4146 n1 = OMP_CLAUSE_DECL (innerc); 4147 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4148 OMP_CLAUSE__LOOPTEMP_); 4149 gcc_assert (innerc); 4150 n2 = OMP_CLAUSE_DECL (innerc); 4151 } 4152 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), 4153 true, NULL_TREE, true, GSI_SAME_STMT); 4154 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), 4155 true, NULL_TREE, true, GSI_SAME_STMT); 4156 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), 4157 true, NULL_TREE, true, GSI_SAME_STMT); 4158 tree chunk_size = fold_convert (itype, fd->chunk_size); 4159 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule); 4160 chunk_size 4161 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true, 4162 GSI_SAME_STMT); 4163 4164 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1)); 4165 t = fold_build2 (PLUS_EXPR, itype, step, t); 4166 t = fold_build2 (PLUS_EXPR, itype, t, n2); 4167 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1)); 4168 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR) 4169 t = fold_build2 (TRUNC_DIV_EXPR, itype, 4170 fold_build1 (NEGATE_EXPR, itype, t), 4171 fold_build1 (NEGATE_EXPR, itype, step)); 4172 else 4173 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); 4174 t = fold_convert (itype, t); 4175 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4176 true, GSI_SAME_STMT); 4177 4178 trip_var = create_tmp_reg (itype, ".trip"); 4179 if (gimple_in_ssa_p (cfun)) 4180 { 4181 trip_init = make_ssa_name (trip_var); 4182 trip_main = make_ssa_name (trip_var); 4183 trip_back = make_ssa_name (trip_var); 4184 } 4185 else 4186 { 4187 trip_init = trip_var; 4188 trip_main = trip_var; 4189 trip_back = trip_var; 4190 } 4191 4192 gassign *assign_stmt 4193 = gimple_build_assign (trip_init, build_int_cst (itype, 0)); 4194 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 4195 4196 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size); 4197 t = fold_build2 (MULT_EXPR, itype, t, step); 4198 if (POINTER_TYPE_P (type)) 4199 t = fold_build_pointer_plus (n1, t); 4200 else 4201 t = fold_build2 (PLUS_EXPR, type, t, n1); 4202 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4203 true, GSI_SAME_STMT); 4204 4205 /* Remove the GIMPLE_OMP_FOR. */ 4206 gsi_remove (&gsi, true); 4207 4208 gimple_stmt_iterator gsif = gsi; 4209 4210 /* Iteration space partitioning goes in ITER_PART_BB. */ 4211 gsi = gsi_last_bb (iter_part_bb); 4212 4213 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads); 4214 t = fold_build2 (PLUS_EXPR, itype, t, threadid); 4215 t = fold_build2 (MULT_EXPR, itype, t, chunk_size); 4216 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4217 false, GSI_CONTINUE_LINKING); 4218 4219 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size); 4220 t = fold_build2 (MIN_EXPR, itype, t, n); 4221 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4222 false, GSI_CONTINUE_LINKING); 4223 4224 t = build2 (LT_EXPR, boolean_type_node, s0, n); 4225 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING); 4226 4227 /* Setup code for sequential iteration goes in SEQ_START_BB. */ 4228 gsi = gsi_start_bb (seq_start_bb); 4229 4230 tree startvar = fd->loop.v; 4231 tree endvar = NULL_TREE; 4232 4233 if (gimple_omp_for_combined_p (fd->for_stmt)) 4234 { 4235 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL 4236 ? gimple_omp_parallel_clauses (inner_stmt) 4237 : gimple_omp_for_clauses (inner_stmt); 4238 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 4239 gcc_assert (innerc); 4240 startvar = OMP_CLAUSE_DECL (innerc); 4241 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4242 OMP_CLAUSE__LOOPTEMP_); 4243 gcc_assert (innerc); 4244 endvar = OMP_CLAUSE_DECL (innerc); 4245 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST 4246 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE) 4247 { 4248 int i; 4249 for (i = 1; i < fd->collapse; i++) 4250 { 4251 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4252 OMP_CLAUSE__LOOPTEMP_); 4253 gcc_assert (innerc); 4254 } 4255 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4256 OMP_CLAUSE__LOOPTEMP_); 4257 if (innerc) 4258 { 4259 /* If needed (distribute parallel for with lastprivate), 4260 propagate down the total number of iterations. */ 4261 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)), 4262 fd->loop.n2); 4263 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false, 4264 GSI_CONTINUE_LINKING); 4265 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); 4266 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4267 } 4268 } 4269 } 4270 4271 t = fold_convert (itype, s0); 4272 t = fold_build2 (MULT_EXPR, itype, t, step); 4273 if (POINTER_TYPE_P (type)) 4274 { 4275 t = fold_build_pointer_plus (n1, t); 4276 if (!POINTER_TYPE_P (TREE_TYPE (startvar)) 4277 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type)) 4278 t = fold_convert (signed_type_for (type), t); 4279 } 4280 else 4281 t = fold_build2 (PLUS_EXPR, type, t, n1); 4282 t = fold_convert (TREE_TYPE (startvar), t); 4283 t = force_gimple_operand_gsi (&gsi, t, 4284 DECL_P (startvar) 4285 && TREE_ADDRESSABLE (startvar), 4286 NULL_TREE, false, GSI_CONTINUE_LINKING); 4287 assign_stmt = gimple_build_assign (startvar, t); 4288 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4289 4290 t = fold_convert (itype, e0); 4291 t = fold_build2 (MULT_EXPR, itype, t, step); 4292 if (POINTER_TYPE_P (type)) 4293 { 4294 t = fold_build_pointer_plus (n1, t); 4295 if (!POINTER_TYPE_P (TREE_TYPE (startvar)) 4296 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type)) 4297 t = fold_convert (signed_type_for (type), t); 4298 } 4299 else 4300 t = fold_build2 (PLUS_EXPR, type, t, n1); 4301 t = fold_convert (TREE_TYPE (startvar), t); 4302 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4303 false, GSI_CONTINUE_LINKING); 4304 if (endvar) 4305 { 4306 assign_stmt = gimple_build_assign (endvar, e); 4307 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4308 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) 4309 assign_stmt = gimple_build_assign (fd->loop.v, e); 4310 else 4311 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); 4312 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4313 } 4314 /* Handle linear clause adjustments. */ 4315 tree itercnt = NULL_TREE, itercntbias = NULL_TREE; 4316 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) 4317 for (tree c = gimple_omp_for_clauses (fd->for_stmt); 4318 c; c = OMP_CLAUSE_CHAIN (c)) 4319 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR 4320 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) 4321 { 4322 tree d = OMP_CLAUSE_DECL (c); 4323 bool is_ref = omp_is_reference (d); 4324 tree t = d, a, dest; 4325 if (is_ref) 4326 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); 4327 tree type = TREE_TYPE (t); 4328 if (POINTER_TYPE_P (type)) 4329 type = sizetype; 4330 dest = unshare_expr (t); 4331 tree v = create_tmp_var (TREE_TYPE (t), NULL); 4332 expand_omp_build_assign (&gsif, v, t); 4333 if (itercnt == NULL_TREE) 4334 { 4335 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 4336 { 4337 itercntbias 4338 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1), 4339 fold_convert (itype, fd->loop.n1)); 4340 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype, 4341 itercntbias, step); 4342 itercntbias 4343 = force_gimple_operand_gsi (&gsif, itercntbias, true, 4344 NULL_TREE, true, 4345 GSI_SAME_STMT); 4346 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0); 4347 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, 4348 NULL_TREE, false, 4349 GSI_CONTINUE_LINKING); 4350 } 4351 else 4352 itercnt = s0; 4353 } 4354 a = fold_build2 (MULT_EXPR, type, 4355 fold_convert (type, itercnt), 4356 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); 4357 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR 4358 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a); 4359 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4360 false, GSI_CONTINUE_LINKING); 4361 assign_stmt = gimple_build_assign (dest, t); 4362 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4363 } 4364 if (fd->collapse > 1) 4365 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 4366 4367 if (!broken_loop) 4368 { 4369 /* The code controlling the sequential loop goes in CONT_BB, 4370 replacing the GIMPLE_OMP_CONTINUE. */ 4371 gsi = gsi_last_nondebug_bb (cont_bb); 4372 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 4373 vmain = gimple_omp_continue_control_use (cont_stmt); 4374 vback = gimple_omp_continue_control_def (cont_stmt); 4375 4376 if (!gimple_omp_for_combined_p (fd->for_stmt)) 4377 { 4378 if (POINTER_TYPE_P (type)) 4379 t = fold_build_pointer_plus (vmain, step); 4380 else 4381 t = fold_build2 (PLUS_EXPR, type, vmain, step); 4382 if (DECL_P (vback) && TREE_ADDRESSABLE (vback)) 4383 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4384 true, GSI_SAME_STMT); 4385 assign_stmt = gimple_build_assign (vback, t); 4386 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 4387 4388 if (tree_int_cst_equal (fd->chunk_size, integer_one_node)) 4389 t = build2 (EQ_EXPR, boolean_type_node, 4390 build_int_cst (itype, 0), 4391 build_int_cst (itype, 1)); 4392 else 4393 t = build2 (fd->loop.cond_code, boolean_type_node, 4394 DECL_P (vback) && TREE_ADDRESSABLE (vback) 4395 ? t : vback, e); 4396 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 4397 } 4398 4399 /* Remove GIMPLE_OMP_CONTINUE. */ 4400 gsi_remove (&gsi, true); 4401 4402 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) 4403 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); 4404 4405 /* Trip update code goes into TRIP_UPDATE_BB. */ 4406 gsi = gsi_start_bb (trip_update_bb); 4407 4408 t = build_int_cst (itype, 1); 4409 t = build2 (PLUS_EXPR, itype, trip_main, t); 4410 assign_stmt = gimple_build_assign (trip_back, t); 4411 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4412 } 4413 4414 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */ 4415 gsi = gsi_last_nondebug_bb (exit_bb); 4416 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) 4417 { 4418 t = gimple_omp_return_lhs (gsi_stmt (gsi)); 4419 if (fd->have_reductemp) 4420 { 4421 tree fn; 4422 if (t) 4423 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL); 4424 else 4425 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); 4426 gcall *g = gimple_build_call (fn, 0); 4427 if (t) 4428 { 4429 gimple_call_set_lhs (g, t); 4430 gsi_insert_after (&gsi, gimple_build_assign (reductions, 4431 NOP_EXPR, t), 4432 GSI_SAME_STMT); 4433 } 4434 gsi_insert_after (&gsi, g, GSI_SAME_STMT); 4435 } 4436 else 4437 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT); 4438 } 4439 gsi_remove (&gsi, true); 4440 4441 /* Connect the new blocks. */ 4442 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE; 4443 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE; 4444 4445 if (!broken_loop) 4446 { 4447 se = find_edge (cont_bb, body_bb); 4448 if (se == NULL) 4449 { 4450 se = BRANCH_EDGE (cont_bb); 4451 gcc_assert (single_succ (se->dest) == body_bb); 4452 } 4453 if (gimple_omp_for_combined_p (fd->for_stmt)) 4454 { 4455 remove_edge (se); 4456 se = NULL; 4457 } 4458 else if (fd->collapse > 1) 4459 { 4460 remove_edge (se); 4461 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); 4462 } 4463 else 4464 se->flags = EDGE_TRUE_VALUE; 4465 find_edge (cont_bb, trip_update_bb)->flags 4466 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; 4467 4468 redirect_edge_and_branch (single_succ_edge (trip_update_bb), 4469 iter_part_bb); 4470 } 4471 4472 if (gimple_in_ssa_p (cfun)) 4473 { 4474 gphi_iterator psi; 4475 gphi *phi; 4476 edge re, ene; 4477 edge_var_map *vm; 4478 size_t i; 4479 4480 gcc_assert (fd->collapse == 1 && !broken_loop); 4481 4482 /* When we redirect the edge from trip_update_bb to iter_part_bb, we 4483 remove arguments of the phi nodes in fin_bb. We need to create 4484 appropriate phi nodes in iter_part_bb instead. */ 4485 se = find_edge (iter_part_bb, fin_bb); 4486 re = single_succ_edge (trip_update_bb); 4487 vec<edge_var_map> *head = redirect_edge_var_map_vector (re); 4488 ene = single_succ_edge (entry_bb); 4489 4490 psi = gsi_start_phis (fin_bb); 4491 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm); 4492 gsi_next (&psi), ++i) 4493 { 4494 gphi *nphi; 4495 location_t locus; 4496 4497 phi = psi.phi (); 4498 if (operand_equal_p (gimple_phi_arg_def (phi, 0), 4499 redirect_edge_var_map_def (vm), 0)) 4500 continue; 4501 4502 t = gimple_phi_result (phi); 4503 gcc_assert (t == redirect_edge_var_map_result (vm)); 4504 4505 if (!single_pred_p (fin_bb)) 4506 t = copy_ssa_name (t, phi); 4507 4508 nphi = create_phi_node (t, iter_part_bb); 4509 4510 t = PHI_ARG_DEF_FROM_EDGE (phi, se); 4511 locus = gimple_phi_arg_location_from_edge (phi, se); 4512 4513 /* A special case -- fd->loop.v is not yet computed in 4514 iter_part_bb, we need to use vextra instead. */ 4515 if (t == fd->loop.v) 4516 t = vextra; 4517 add_phi_arg (nphi, t, ene, locus); 4518 locus = redirect_edge_var_map_location (vm); 4519 tree back_arg = redirect_edge_var_map_def (vm); 4520 add_phi_arg (nphi, back_arg, re, locus); 4521 edge ce = find_edge (cont_bb, body_bb); 4522 if (ce == NULL) 4523 { 4524 ce = BRANCH_EDGE (cont_bb); 4525 gcc_assert (single_succ (ce->dest) == body_bb); 4526 ce = single_succ_edge (ce->dest); 4527 } 4528 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce); 4529 gcc_assert (inner_loop_phi != NULL); 4530 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi), 4531 find_edge (seq_start_bb, body_bb), locus); 4532 4533 if (!single_pred_p (fin_bb)) 4534 add_phi_arg (phi, gimple_phi_result (nphi), se, locus); 4535 } 4536 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ())); 4537 redirect_edge_var_map_clear (re); 4538 if (single_pred_p (fin_bb)) 4539 while (1) 4540 { 4541 psi = gsi_start_phis (fin_bb); 4542 if (gsi_end_p (psi)) 4543 break; 4544 remove_phi_node (&psi, false); 4545 } 4546 4547 /* Make phi node for trip. */ 4548 phi = create_phi_node (trip_main, iter_part_bb); 4549 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb), 4550 UNKNOWN_LOCATION); 4551 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb), 4552 UNKNOWN_LOCATION); 4553 } 4554 4555 if (!broken_loop) 4556 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb); 4557 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb, 4558 recompute_dominator (CDI_DOMINATORS, iter_part_bb)); 4559 set_immediate_dominator (CDI_DOMINATORS, fin_bb, 4560 recompute_dominator (CDI_DOMINATORS, fin_bb)); 4561 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, 4562 recompute_dominator (CDI_DOMINATORS, seq_start_bb)); 4563 set_immediate_dominator (CDI_DOMINATORS, body_bb, 4564 recompute_dominator (CDI_DOMINATORS, body_bb)); 4565 4566 if (!broken_loop) 4567 { 4568 struct loop *loop = body_bb->loop_father; 4569 struct loop *trip_loop = alloc_loop (); 4570 trip_loop->header = iter_part_bb; 4571 trip_loop->latch = trip_update_bb; 4572 add_loop (trip_loop, iter_part_bb->loop_father); 4573 4574 if (loop != entry_bb->loop_father) 4575 { 4576 gcc_assert (loop->header == body_bb); 4577 gcc_assert (loop->latch == region->cont 4578 || single_pred (loop->latch) == region->cont); 4579 trip_loop->inner = loop; 4580 return; 4581 } 4582 4583 if (!gimple_omp_for_combined_p (fd->for_stmt)) 4584 { 4585 loop = alloc_loop (); 4586 loop->header = body_bb; 4587 if (collapse_bb == NULL) 4588 loop->latch = cont_bb; 4589 add_loop (loop, trip_loop); 4590 } 4591 } 4592 } 4593 4594 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing 4595 loop. Given parameters: 4596 4597 for (V = N1; V cond N2; V += STEP) BODY; 4598 4599 where COND is "<" or ">", we generate pseudocode 4600 4601 V = N1; 4602 goto L1; 4603 L0: 4604 BODY; 4605 V += STEP; 4606 L1: 4607 if (V cond N2) goto L0; else goto L2; 4608 L2: 4609 4610 For collapsed loops, given parameters: 4611 collapse(3) 4612 for (V1 = N11; V1 cond1 N12; V1 += STEP1) 4613 for (V2 = N21; V2 cond2 N22; V2 += STEP2) 4614 for (V3 = N31; V3 cond3 N32; V3 += STEP3) 4615 BODY; 4616 4617 we generate pseudocode 4618 4619 if (cond3 is <) 4620 adj = STEP3 - 1; 4621 else 4622 adj = STEP3 + 1; 4623 count3 = (adj + N32 - N31) / STEP3; 4624 if (cond2 is <) 4625 adj = STEP2 - 1; 4626 else 4627 adj = STEP2 + 1; 4628 count2 = (adj + N22 - N21) / STEP2; 4629 if (cond1 is <) 4630 adj = STEP1 - 1; 4631 else 4632 adj = STEP1 + 1; 4633 count1 = (adj + N12 - N11) / STEP1; 4634 count = count1 * count2 * count3; 4635 V = 0; 4636 V1 = N11; 4637 V2 = N21; 4638 V3 = N31; 4639 goto L1; 4640 L0: 4641 BODY; 4642 V += 1; 4643 V3 += STEP3; 4644 V2 += (V3 cond3 N32) ? 0 : STEP2; 4645 V3 = (V3 cond3 N32) ? V3 : N31; 4646 V1 += (V2 cond2 N22) ? 0 : STEP1; 4647 V2 = (V2 cond2 N22) ? V2 : N21; 4648 L1: 4649 if (V < count) goto L0; else goto L2; 4650 L2: 4651 4652 */ 4653 4654 static void 4655 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) 4656 { 4657 tree type, t; 4658 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb; 4659 gimple_stmt_iterator gsi; 4660 gimple *stmt; 4661 gcond *cond_stmt; 4662 bool broken_loop = region->cont == NULL; 4663 edge e, ne; 4664 tree *counts = NULL; 4665 int i; 4666 int safelen_int = INT_MAX; 4667 bool dont_vectorize = false; 4668 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 4669 OMP_CLAUSE_SAFELEN); 4670 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 4671 OMP_CLAUSE__SIMDUID_); 4672 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 4673 OMP_CLAUSE_IF); 4674 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 4675 OMP_CLAUSE_SIMDLEN); 4676 tree n1, n2; 4677 4678 if (safelen) 4679 { 4680 poly_uint64 val; 4681 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen); 4682 if (!poly_int_tree_p (safelen, &val)) 4683 safelen_int = 0; 4684 else 4685 safelen_int = MIN (constant_lower_bound (val), INT_MAX); 4686 if (safelen_int == 1) 4687 safelen_int = 0; 4688 } 4689 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc))) 4690 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))) 4691 { 4692 safelen_int = 0; 4693 dont_vectorize = true; 4694 } 4695 type = TREE_TYPE (fd->loop.v); 4696 entry_bb = region->entry; 4697 cont_bb = region->cont; 4698 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 4699 gcc_assert (broken_loop 4700 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); 4701 l0_bb = FALLTHRU_EDGE (entry_bb)->dest; 4702 if (!broken_loop) 4703 { 4704 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb); 4705 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 4706 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest; 4707 l2_bb = BRANCH_EDGE (entry_bb)->dest; 4708 } 4709 else 4710 { 4711 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL; 4712 l1_bb = split_edge (BRANCH_EDGE (entry_bb)); 4713 l2_bb = single_succ (l1_bb); 4714 } 4715 exit_bb = region->exit; 4716 l2_dom_bb = NULL; 4717 4718 gsi = gsi_last_nondebug_bb (entry_bb); 4719 4720 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 4721 /* Not needed in SSA form right now. */ 4722 gcc_assert (!gimple_in_ssa_p (cfun)); 4723 if (fd->collapse > 1) 4724 { 4725 int first_zero_iter = -1, dummy = -1; 4726 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL; 4727 4728 counts = XALLOCAVEC (tree, fd->collapse); 4729 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 4730 zero_iter_bb, first_zero_iter, 4731 dummy_bb, dummy, l2_dom_bb); 4732 } 4733 if (l2_dom_bb == NULL) 4734 l2_dom_bb = l1_bb; 4735 4736 n1 = fd->loop.n1; 4737 n2 = fd->loop.n2; 4738 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 4739 { 4740 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 4741 OMP_CLAUSE__LOOPTEMP_); 4742 gcc_assert (innerc); 4743 n1 = OMP_CLAUSE_DECL (innerc); 4744 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4745 OMP_CLAUSE__LOOPTEMP_); 4746 gcc_assert (innerc); 4747 n2 = OMP_CLAUSE_DECL (innerc); 4748 } 4749 tree step = fd->loop.step; 4750 4751 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 4752 OMP_CLAUSE__SIMT_); 4753 if (is_simt) 4754 { 4755 cfun->curr_properties &= ~PROP_gimple_lomp_dev; 4756 is_simt = safelen_int > 1; 4757 } 4758 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE; 4759 if (is_simt) 4760 { 4761 simt_lane = create_tmp_var (unsigned_type_node); 4762 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0); 4763 gimple_call_set_lhs (g, simt_lane); 4764 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 4765 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, 4766 fold_convert (TREE_TYPE (step), simt_lane)); 4767 n1 = fold_convert (type, n1); 4768 if (POINTER_TYPE_P (type)) 4769 n1 = fold_build_pointer_plus (n1, offset); 4770 else 4771 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset)); 4772 4773 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */ 4774 if (fd->collapse > 1) 4775 simt_maxlane = build_one_cst (unsigned_type_node); 4776 else if (safelen_int < omp_max_simt_vf ()) 4777 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int); 4778 tree vf 4779 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF, 4780 unsigned_type_node, 0); 4781 if (simt_maxlane) 4782 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane); 4783 vf = fold_convert (TREE_TYPE (step), vf); 4784 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf); 4785 } 4786 4787 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1)); 4788 if (fd->collapse > 1) 4789 { 4790 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 4791 { 4792 gsi_prev (&gsi); 4793 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1); 4794 gsi_next (&gsi); 4795 } 4796 else 4797 for (i = 0; i < fd->collapse; i++) 4798 { 4799 tree itype = TREE_TYPE (fd->loops[i].v); 4800 if (POINTER_TYPE_P (itype)) 4801 itype = signed_type_for (itype); 4802 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1); 4803 expand_omp_build_assign (&gsi, fd->loops[i].v, t); 4804 } 4805 } 4806 4807 /* Remove the GIMPLE_OMP_FOR statement. */ 4808 gsi_remove (&gsi, true); 4809 4810 if (!broken_loop) 4811 { 4812 /* Code to control the increment goes in the CONT_BB. */ 4813 gsi = gsi_last_nondebug_bb (cont_bb); 4814 stmt = gsi_stmt (gsi); 4815 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE); 4816 4817 if (POINTER_TYPE_P (type)) 4818 t = fold_build_pointer_plus (fd->loop.v, step); 4819 else 4820 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step); 4821 expand_omp_build_assign (&gsi, fd->loop.v, t); 4822 4823 if (fd->collapse > 1) 4824 { 4825 i = fd->collapse - 1; 4826 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))) 4827 { 4828 t = fold_convert (sizetype, fd->loops[i].step); 4829 t = fold_build_pointer_plus (fd->loops[i].v, t); 4830 } 4831 else 4832 { 4833 t = fold_convert (TREE_TYPE (fd->loops[i].v), 4834 fd->loops[i].step); 4835 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v), 4836 fd->loops[i].v, t); 4837 } 4838 expand_omp_build_assign (&gsi, fd->loops[i].v, t); 4839 4840 for (i = fd->collapse - 1; i > 0; i--) 4841 { 4842 tree itype = TREE_TYPE (fd->loops[i].v); 4843 tree itype2 = TREE_TYPE (fd->loops[i - 1].v); 4844 if (POINTER_TYPE_P (itype2)) 4845 itype2 = signed_type_for (itype2); 4846 t = fold_convert (itype2, fd->loops[i - 1].step); 4847 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, 4848 GSI_SAME_STMT); 4849 t = build3 (COND_EXPR, itype2, 4850 build2 (fd->loops[i].cond_code, boolean_type_node, 4851 fd->loops[i].v, 4852 fold_convert (itype, fd->loops[i].n2)), 4853 build_int_cst (itype2, 0), t); 4854 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v))) 4855 t = fold_build_pointer_plus (fd->loops[i - 1].v, t); 4856 else 4857 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t); 4858 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t); 4859 4860 t = fold_convert (itype, fd->loops[i].n1); 4861 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, 4862 GSI_SAME_STMT); 4863 t = build3 (COND_EXPR, itype, 4864 build2 (fd->loops[i].cond_code, boolean_type_node, 4865 fd->loops[i].v, 4866 fold_convert (itype, fd->loops[i].n2)), 4867 fd->loops[i].v, t); 4868 expand_omp_build_assign (&gsi, fd->loops[i].v, t); 4869 } 4870 } 4871 4872 /* Remove GIMPLE_OMP_CONTINUE. */ 4873 gsi_remove (&gsi, true); 4874 } 4875 4876 /* Emit the condition in L1_BB. */ 4877 gsi = gsi_start_bb (l1_bb); 4878 4879 t = fold_convert (type, n2); 4880 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4881 false, GSI_CONTINUE_LINKING); 4882 tree v = fd->loop.v; 4883 if (DECL_P (v) && TREE_ADDRESSABLE (v)) 4884 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, 4885 false, GSI_CONTINUE_LINKING); 4886 t = build2 (fd->loop.cond_code, boolean_type_node, v, t); 4887 cond_stmt = gimple_build_cond_empty (t); 4888 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); 4889 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p, 4890 NULL, NULL) 4891 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p, 4892 NULL, NULL)) 4893 { 4894 gsi = gsi_for_stmt (cond_stmt); 4895 gimple_regimplify_operands (cond_stmt, &gsi); 4896 } 4897 4898 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */ 4899 if (is_simt) 4900 { 4901 gsi = gsi_start_bb (l2_bb); 4902 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step); 4903 if (POINTER_TYPE_P (type)) 4904 t = fold_build_pointer_plus (fd->loop.v, step); 4905 else 4906 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step); 4907 expand_omp_build_assign (&gsi, fd->loop.v, t); 4908 } 4909 4910 /* Remove GIMPLE_OMP_RETURN. */ 4911 gsi = gsi_last_nondebug_bb (exit_bb); 4912 gsi_remove (&gsi, true); 4913 4914 /* Connect the new blocks. */ 4915 remove_edge (FALLTHRU_EDGE (entry_bb)); 4916 4917 if (!broken_loop) 4918 { 4919 remove_edge (BRANCH_EDGE (entry_bb)); 4920 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU); 4921 4922 e = BRANCH_EDGE (l1_bb); 4923 ne = FALLTHRU_EDGE (l1_bb); 4924 e->flags = EDGE_TRUE_VALUE; 4925 } 4926 else 4927 { 4928 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 4929 4930 ne = single_succ_edge (l1_bb); 4931 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE); 4932 4933 } 4934 ne->flags = EDGE_FALSE_VALUE; 4935 e->probability = profile_probability::guessed_always ().apply_scale (7, 8); 4936 ne->probability = e->probability.invert (); 4937 4938 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb); 4939 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb); 4940 4941 if (simt_maxlane) 4942 { 4943 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane, 4944 NULL_TREE, NULL_TREE); 4945 gsi = gsi_last_bb (entry_bb); 4946 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT); 4947 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE); 4948 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE; 4949 FALLTHRU_EDGE (entry_bb)->probability 4950 = profile_probability::guessed_always ().apply_scale (7, 8); 4951 BRANCH_EDGE (entry_bb)->probability 4952 = FALLTHRU_EDGE (entry_bb)->probability.invert (); 4953 l2_dom_bb = entry_bb; 4954 } 4955 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb); 4956 4957 if (!broken_loop) 4958 { 4959 struct loop *loop = alloc_loop (); 4960 loop->header = l1_bb; 4961 loop->latch = cont_bb; 4962 add_loop (loop, l1_bb->loop_father); 4963 loop->safelen = safelen_int; 4964 if (simduid) 4965 { 4966 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid); 4967 cfun->has_simduid_loops = true; 4968 } 4969 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize 4970 the loop. */ 4971 if ((flag_tree_loop_vectorize 4972 || !global_options_set.x_flag_tree_loop_vectorize) 4973 && flag_tree_loop_optimize 4974 && loop->safelen > 1) 4975 { 4976 loop->force_vectorize = true; 4977 cfun->has_force_vectorize_loops = true; 4978 } 4979 else if (dont_vectorize) 4980 loop->dont_vectorize = true; 4981 } 4982 else if (simduid) 4983 cfun->has_simduid_loops = true; 4984 } 4985 4986 /* Taskloop construct is represented after gimplification with 4987 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched 4988 in between them. This routine expands the outer GIMPLE_OMP_FOR, 4989 which should just compute all the needed loop temporaries 4990 for GIMPLE_OMP_TASK. */ 4991 4992 static void 4993 expand_omp_taskloop_for_outer (struct omp_region *region, 4994 struct omp_for_data *fd, 4995 gimple *inner_stmt) 4996 { 4997 tree type, bias = NULL_TREE; 4998 basic_block entry_bb, cont_bb, exit_bb; 4999 gimple_stmt_iterator gsi; 5000 gassign *assign_stmt; 5001 tree *counts = NULL; 5002 int i; 5003 5004 gcc_assert (inner_stmt); 5005 gcc_assert (region->cont); 5006 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK 5007 && gimple_omp_task_taskloop_p (inner_stmt)); 5008 type = TREE_TYPE (fd->loop.v); 5009 5010 /* See if we need to bias by LLONG_MIN. */ 5011 if (fd->iter_type == long_long_unsigned_type_node 5012 && TREE_CODE (type) == INTEGER_TYPE 5013 && !TYPE_UNSIGNED (type)) 5014 { 5015 tree n1, n2; 5016 5017 if (fd->loop.cond_code == LT_EXPR) 5018 { 5019 n1 = fd->loop.n1; 5020 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); 5021 } 5022 else 5023 { 5024 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); 5025 n2 = fd->loop.n1; 5026 } 5027 if (TREE_CODE (n1) != INTEGER_CST 5028 || TREE_CODE (n2) != INTEGER_CST 5029 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) 5030 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); 5031 } 5032 5033 entry_bb = region->entry; 5034 cont_bb = region->cont; 5035 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 5036 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); 5037 exit_bb = region->exit; 5038 5039 gsi = gsi_last_nondebug_bb (entry_bb); 5040 gimple *for_stmt = gsi_stmt (gsi); 5041 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR); 5042 if (fd->collapse > 1) 5043 { 5044 int first_zero_iter = -1, dummy = -1; 5045 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL; 5046 5047 counts = XALLOCAVEC (tree, fd->collapse); 5048 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 5049 zero_iter_bb, first_zero_iter, 5050 dummy_bb, dummy, l2_dom_bb); 5051 5052 if (zero_iter_bb) 5053 { 5054 /* Some counts[i] vars might be uninitialized if 5055 some loop has zero iterations. But the body shouldn't 5056 be executed in that case, so just avoid uninit warnings. */ 5057 for (i = first_zero_iter; i < fd->collapse; i++) 5058 if (SSA_VAR_P (counts[i])) 5059 TREE_NO_WARNING (counts[i]) = 1; 5060 gsi_prev (&gsi); 5061 edge e = split_block (entry_bb, gsi_stmt (gsi)); 5062 entry_bb = e->dest; 5063 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU); 5064 gsi = gsi_last_bb (entry_bb); 5065 set_immediate_dominator (CDI_DOMINATORS, entry_bb, 5066 get_immediate_dominator (CDI_DOMINATORS, 5067 zero_iter_bb)); 5068 } 5069 } 5070 5071 tree t0, t1; 5072 t1 = fd->loop.n2; 5073 t0 = fd->loop.n1; 5074 if (POINTER_TYPE_P (TREE_TYPE (t0)) 5075 && TYPE_PRECISION (TREE_TYPE (t0)) 5076 != TYPE_PRECISION (fd->iter_type)) 5077 { 5078 /* Avoid casting pointers to integer of a different size. */ 5079 tree itype = signed_type_for (type); 5080 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1)); 5081 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0)); 5082 } 5083 else 5084 { 5085 t1 = fold_convert (fd->iter_type, t1); 5086 t0 = fold_convert (fd->iter_type, t0); 5087 } 5088 if (bias) 5089 { 5090 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias); 5091 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias); 5092 } 5093 5094 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt), 5095 OMP_CLAUSE__LOOPTEMP_); 5096 gcc_assert (innerc); 5097 tree startvar = OMP_CLAUSE_DECL (innerc); 5098 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_); 5099 gcc_assert (innerc); 5100 tree endvar = OMP_CLAUSE_DECL (innerc); 5101 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST) 5102 { 5103 gcc_assert (innerc); 5104 for (i = 1; i < fd->collapse; i++) 5105 { 5106 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 5107 OMP_CLAUSE__LOOPTEMP_); 5108 gcc_assert (innerc); 5109 } 5110 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 5111 OMP_CLAUSE__LOOPTEMP_); 5112 if (innerc) 5113 { 5114 /* If needed (inner taskloop has lastprivate clause), propagate 5115 down the total number of iterations. */ 5116 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false, 5117 NULL_TREE, false, 5118 GSI_CONTINUE_LINKING); 5119 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); 5120 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5121 } 5122 } 5123 5124 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false, 5125 GSI_CONTINUE_LINKING); 5126 assign_stmt = gimple_build_assign (startvar, t0); 5127 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5128 5129 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false, 5130 GSI_CONTINUE_LINKING); 5131 assign_stmt = gimple_build_assign (endvar, t1); 5132 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5133 if (fd->collapse > 1) 5134 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 5135 5136 /* Remove the GIMPLE_OMP_FOR statement. */ 5137 gsi = gsi_for_stmt (for_stmt); 5138 gsi_remove (&gsi, true); 5139 5140 gsi = gsi_last_nondebug_bb (cont_bb); 5141 gsi_remove (&gsi, true); 5142 5143 gsi = gsi_last_nondebug_bb (exit_bb); 5144 gsi_remove (&gsi, true); 5145 5146 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always (); 5147 remove_edge (BRANCH_EDGE (entry_bb)); 5148 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always (); 5149 remove_edge (BRANCH_EDGE (cont_bb)); 5150 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb); 5151 set_immediate_dominator (CDI_DOMINATORS, region->entry, 5152 recompute_dominator (CDI_DOMINATORS, region->entry)); 5153 } 5154 5155 /* Taskloop construct is represented after gimplification with 5156 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched 5157 in between them. This routine expands the inner GIMPLE_OMP_FOR. 5158 GOMP_taskloop{,_ull} function arranges for each task to be given just 5159 a single range of iterations. */ 5160 5161 static void 5162 expand_omp_taskloop_for_inner (struct omp_region *region, 5163 struct omp_for_data *fd, 5164 gimple *inner_stmt) 5165 { 5166 tree e, t, type, itype, vmain, vback, bias = NULL_TREE; 5167 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL; 5168 basic_block fin_bb; 5169 gimple_stmt_iterator gsi; 5170 edge ep; 5171 bool broken_loop = region->cont == NULL; 5172 tree *counts = NULL; 5173 tree n1, n2, step; 5174 5175 itype = type = TREE_TYPE (fd->loop.v); 5176 if (POINTER_TYPE_P (type)) 5177 itype = signed_type_for (type); 5178 5179 /* See if we need to bias by LLONG_MIN. */ 5180 if (fd->iter_type == long_long_unsigned_type_node 5181 && TREE_CODE (type) == INTEGER_TYPE 5182 && !TYPE_UNSIGNED (type)) 5183 { 5184 tree n1, n2; 5185 5186 if (fd->loop.cond_code == LT_EXPR) 5187 { 5188 n1 = fd->loop.n1; 5189 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); 5190 } 5191 else 5192 { 5193 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); 5194 n2 = fd->loop.n1; 5195 } 5196 if (TREE_CODE (n1) != INTEGER_CST 5197 || TREE_CODE (n2) != INTEGER_CST 5198 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) 5199 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); 5200 } 5201 5202 entry_bb = region->entry; 5203 cont_bb = region->cont; 5204 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 5205 fin_bb = BRANCH_EDGE (entry_bb)->dest; 5206 gcc_assert (broken_loop 5207 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest)); 5208 body_bb = FALLTHRU_EDGE (entry_bb)->dest; 5209 if (!broken_loop) 5210 { 5211 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb); 5212 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 5213 } 5214 exit_bb = region->exit; 5215 5216 /* Iteration space partitioning goes in ENTRY_BB. */ 5217 gsi = gsi_last_nondebug_bb (entry_bb); 5218 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 5219 5220 if (fd->collapse > 1) 5221 { 5222 int first_zero_iter = -1, dummy = -1; 5223 basic_block l2_dom_bb = NULL, dummy_bb = NULL; 5224 5225 counts = XALLOCAVEC (tree, fd->collapse); 5226 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 5227 fin_bb, first_zero_iter, 5228 dummy_bb, dummy, l2_dom_bb); 5229 t = NULL_TREE; 5230 } 5231 else 5232 t = integer_one_node; 5233 5234 step = fd->loop.step; 5235 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 5236 OMP_CLAUSE__LOOPTEMP_); 5237 gcc_assert (innerc); 5238 n1 = OMP_CLAUSE_DECL (innerc); 5239 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_); 5240 gcc_assert (innerc); 5241 n2 = OMP_CLAUSE_DECL (innerc); 5242 if (bias) 5243 { 5244 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias); 5245 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias); 5246 } 5247 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), 5248 true, NULL_TREE, true, GSI_SAME_STMT); 5249 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), 5250 true, NULL_TREE, true, GSI_SAME_STMT); 5251 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), 5252 true, NULL_TREE, true, GSI_SAME_STMT); 5253 5254 tree startvar = fd->loop.v; 5255 tree endvar = NULL_TREE; 5256 5257 if (gimple_omp_for_combined_p (fd->for_stmt)) 5258 { 5259 tree clauses = gimple_omp_for_clauses (inner_stmt); 5260 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 5261 gcc_assert (innerc); 5262 startvar = OMP_CLAUSE_DECL (innerc); 5263 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 5264 OMP_CLAUSE__LOOPTEMP_); 5265 gcc_assert (innerc); 5266 endvar = OMP_CLAUSE_DECL (innerc); 5267 } 5268 t = fold_convert (TREE_TYPE (startvar), n1); 5269 t = force_gimple_operand_gsi (&gsi, t, 5270 DECL_P (startvar) 5271 && TREE_ADDRESSABLE (startvar), 5272 NULL_TREE, false, GSI_CONTINUE_LINKING); 5273 gimple *assign_stmt = gimple_build_assign (startvar, t); 5274 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5275 5276 t = fold_convert (TREE_TYPE (startvar), n2); 5277 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 5278 false, GSI_CONTINUE_LINKING); 5279 if (endvar) 5280 { 5281 assign_stmt = gimple_build_assign (endvar, e); 5282 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5283 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) 5284 assign_stmt = gimple_build_assign (fd->loop.v, e); 5285 else 5286 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); 5287 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5288 } 5289 if (fd->collapse > 1) 5290 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 5291 5292 if (!broken_loop) 5293 { 5294 /* The code controlling the sequential loop replaces the 5295 GIMPLE_OMP_CONTINUE. */ 5296 gsi = gsi_last_nondebug_bb (cont_bb); 5297 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 5298 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); 5299 vmain = gimple_omp_continue_control_use (cont_stmt); 5300 vback = gimple_omp_continue_control_def (cont_stmt); 5301 5302 if (!gimple_omp_for_combined_p (fd->for_stmt)) 5303 { 5304 if (POINTER_TYPE_P (type)) 5305 t = fold_build_pointer_plus (vmain, step); 5306 else 5307 t = fold_build2 (PLUS_EXPR, type, vmain, step); 5308 t = force_gimple_operand_gsi (&gsi, t, 5309 DECL_P (vback) 5310 && TREE_ADDRESSABLE (vback), 5311 NULL_TREE, true, GSI_SAME_STMT); 5312 assign_stmt = gimple_build_assign (vback, t); 5313 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 5314 5315 t = build2 (fd->loop.cond_code, boolean_type_node, 5316 DECL_P (vback) && TREE_ADDRESSABLE (vback) 5317 ? t : vback, e); 5318 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 5319 } 5320 5321 /* Remove the GIMPLE_OMP_CONTINUE statement. */ 5322 gsi_remove (&gsi, true); 5323 5324 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) 5325 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); 5326 } 5327 5328 /* Remove the GIMPLE_OMP_FOR statement. */ 5329 gsi = gsi_for_stmt (fd->for_stmt); 5330 gsi_remove (&gsi, true); 5331 5332 /* Remove the GIMPLE_OMP_RETURN statement. */ 5333 gsi = gsi_last_nondebug_bb (exit_bb); 5334 gsi_remove (&gsi, true); 5335 5336 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always (); 5337 if (!broken_loop) 5338 remove_edge (BRANCH_EDGE (entry_bb)); 5339 else 5340 { 5341 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb)); 5342 region->outer->cont = NULL; 5343 } 5344 5345 /* Connect all the blocks. */ 5346 if (!broken_loop) 5347 { 5348 ep = find_edge (cont_bb, body_bb); 5349 if (gimple_omp_for_combined_p (fd->for_stmt)) 5350 { 5351 remove_edge (ep); 5352 ep = NULL; 5353 } 5354 else if (fd->collapse > 1) 5355 { 5356 remove_edge (ep); 5357 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); 5358 } 5359 else 5360 ep->flags = EDGE_TRUE_VALUE; 5361 find_edge (cont_bb, fin_bb)->flags 5362 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; 5363 } 5364 5365 set_immediate_dominator (CDI_DOMINATORS, body_bb, 5366 recompute_dominator (CDI_DOMINATORS, body_bb)); 5367 if (!broken_loop) 5368 set_immediate_dominator (CDI_DOMINATORS, fin_bb, 5369 recompute_dominator (CDI_DOMINATORS, fin_bb)); 5370 5371 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt)) 5372 { 5373 struct loop *loop = alloc_loop (); 5374 loop->header = body_bb; 5375 if (collapse_bb == NULL) 5376 loop->latch = cont_bb; 5377 add_loop (loop, body_bb->loop_father); 5378 } 5379 } 5380 5381 /* A subroutine of expand_omp_for. Generate code for an OpenACC 5382 partitioned loop. The lowering here is abstracted, in that the 5383 loop parameters are passed through internal functions, which are 5384 further lowered by oacc_device_lower, once we get to the target 5385 compiler. The loop is of the form: 5386 5387 for (V = B; V LTGT E; V += S) {BODY} 5388 5389 where LTGT is < or >. We may have a specified chunking size, CHUNKING 5390 (constant 0 for no chunking) and we will have a GWV partitioning 5391 mask, specifying dimensions over which the loop is to be 5392 partitioned (see note below). We generate code that looks like 5393 (this ignores tiling): 5394 5395 <entry_bb> [incoming FALL->body, BRANCH->exit] 5396 typedef signedintify (typeof (V)) T; // underlying signed integral type 5397 T range = E - B; 5398 T chunk_no = 0; 5399 T DIR = LTGT == '<' ? +1 : -1; 5400 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV); 5401 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV); 5402 5403 <head_bb> [created by splitting end of entry_bb] 5404 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no); 5405 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset); 5406 if (!(offset LTGT bound)) goto bottom_bb; 5407 5408 <body_bb> [incoming] 5409 V = B + offset; 5410 {BODY} 5411 5412 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb] 5413 offset += step; 5414 if (offset LTGT bound) goto body_bb; [*] 5415 5416 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb 5417 chunk_no++; 5418 if (chunk < chunk_max) goto head_bb; 5419 5420 <exit_bb> [incoming] 5421 V = B + ((range -/+ 1) / S +/- 1) * S [*] 5422 5423 [*] Needed if V live at end of loop. */ 5424 5425 static void 5426 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd) 5427 { 5428 tree v = fd->loop.v; 5429 enum tree_code cond_code = fd->loop.cond_code; 5430 enum tree_code plus_code = PLUS_EXPR; 5431 5432 tree chunk_size = integer_minus_one_node; 5433 tree gwv = integer_zero_node; 5434 tree iter_type = TREE_TYPE (v); 5435 tree diff_type = iter_type; 5436 tree plus_type = iter_type; 5437 struct oacc_collapse *counts = NULL; 5438 5439 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt) 5440 == GF_OMP_FOR_KIND_OACC_LOOP); 5441 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt)); 5442 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR); 5443 5444 if (POINTER_TYPE_P (iter_type)) 5445 { 5446 plus_code = POINTER_PLUS_EXPR; 5447 plus_type = sizetype; 5448 } 5449 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type)) 5450 diff_type = signed_type_for (diff_type); 5451 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node)) 5452 diff_type = integer_type_node; 5453 5454 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */ 5455 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */ 5456 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */ 5457 basic_block bottom_bb = NULL; 5458 5459 /* entry_bb has two sucessors; the branch edge is to the exit 5460 block, fallthrough edge to body. */ 5461 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2 5462 && BRANCH_EDGE (entry_bb)->dest == exit_bb); 5463 5464 /* If cont_bb non-NULL, it has 2 successors. The branch successor is 5465 body_bb, or to a block whose only successor is the body_bb. Its 5466 fallthrough successor is the final block (same as the branch 5467 successor of the entry_bb). */ 5468 if (cont_bb) 5469 { 5470 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest; 5471 basic_block bed = BRANCH_EDGE (cont_bb)->dest; 5472 5473 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb); 5474 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb); 5475 } 5476 else 5477 gcc_assert (!gimple_in_ssa_p (cfun)); 5478 5479 /* The exit block only has entry_bb and cont_bb as predecessors. */ 5480 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL)); 5481 5482 tree chunk_no; 5483 tree chunk_max = NULL_TREE; 5484 tree bound, offset; 5485 tree step = create_tmp_var (diff_type, ".step"); 5486 bool up = cond_code == LT_EXPR; 5487 tree dir = build_int_cst (diff_type, up ? +1 : -1); 5488 bool chunking = !gimple_in_ssa_p (cfun); 5489 bool negating; 5490 5491 /* Tiling vars. */ 5492 tree tile_size = NULL_TREE; 5493 tree element_s = NULL_TREE; 5494 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE; 5495 basic_block elem_body_bb = NULL; 5496 basic_block elem_cont_bb = NULL; 5497 5498 /* SSA instances. */ 5499 tree offset_incr = NULL_TREE; 5500 tree offset_init = NULL_TREE; 5501 5502 gimple_stmt_iterator gsi; 5503 gassign *ass; 5504 gcall *call; 5505 gimple *stmt; 5506 tree expr; 5507 location_t loc; 5508 edge split, be, fte; 5509 5510 /* Split the end of entry_bb to create head_bb. */ 5511 split = split_block (entry_bb, last_stmt (entry_bb)); 5512 basic_block head_bb = split->dest; 5513 entry_bb = split->src; 5514 5515 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */ 5516 gsi = gsi_last_nondebug_bb (entry_bb); 5517 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi)); 5518 loc = gimple_location (for_stmt); 5519 5520 if (gimple_in_ssa_p (cfun)) 5521 { 5522 offset_init = gimple_omp_for_index (for_stmt, 0); 5523 gcc_assert (integer_zerop (fd->loop.n1)); 5524 /* The SSA parallelizer does gang parallelism. */ 5525 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG)); 5526 } 5527 5528 if (fd->collapse > 1 || fd->tiling) 5529 { 5530 gcc_assert (!gimple_in_ssa_p (cfun) && up); 5531 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse); 5532 tree total = expand_oacc_collapse_init (fd, &gsi, counts, 5533 TREE_TYPE (fd->loop.n2), loc); 5534 5535 if (SSA_VAR_P (fd->loop.n2)) 5536 { 5537 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE, 5538 true, GSI_SAME_STMT); 5539 ass = gimple_build_assign (fd->loop.n2, total); 5540 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5541 } 5542 } 5543 5544 tree b = fd->loop.n1; 5545 tree e = fd->loop.n2; 5546 tree s = fd->loop.step; 5547 5548 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT); 5549 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT); 5550 5551 /* Convert the step, avoiding possible unsigned->signed overflow. */ 5552 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s)); 5553 if (negating) 5554 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s); 5555 s = fold_convert (diff_type, s); 5556 if (negating) 5557 s = fold_build1 (NEGATE_EXPR, diff_type, s); 5558 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT); 5559 5560 if (!chunking) 5561 chunk_size = integer_zero_node; 5562 expr = fold_convert (diff_type, chunk_size); 5563 chunk_size = force_gimple_operand_gsi (&gsi, expr, true, 5564 NULL_TREE, true, GSI_SAME_STMT); 5565 5566 if (fd->tiling) 5567 { 5568 /* Determine the tile size and element step, 5569 modify the outer loop step size. */ 5570 tile_size = create_tmp_var (diff_type, ".tile_size"); 5571 expr = build_int_cst (diff_type, 1); 5572 for (int ix = 0; ix < fd->collapse; ix++) 5573 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr); 5574 expr = force_gimple_operand_gsi (&gsi, expr, true, 5575 NULL_TREE, true, GSI_SAME_STMT); 5576 ass = gimple_build_assign (tile_size, expr); 5577 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5578 5579 element_s = create_tmp_var (diff_type, ".element_s"); 5580 ass = gimple_build_assign (element_s, s); 5581 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5582 5583 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size); 5584 s = force_gimple_operand_gsi (&gsi, expr, true, 5585 NULL_TREE, true, GSI_SAME_STMT); 5586 } 5587 5588 /* Determine the range, avoiding possible unsigned->signed overflow. */ 5589 negating = !up && TYPE_UNSIGNED (iter_type); 5590 expr = fold_build2 (MINUS_EXPR, plus_type, 5591 fold_convert (plus_type, negating ? b : e), 5592 fold_convert (plus_type, negating ? e : b)); 5593 expr = fold_convert (diff_type, expr); 5594 if (negating) 5595 expr = fold_build1 (NEGATE_EXPR, diff_type, expr); 5596 tree range = force_gimple_operand_gsi (&gsi, expr, true, 5597 NULL_TREE, true, GSI_SAME_STMT); 5598 5599 chunk_no = build_int_cst (diff_type, 0); 5600 if (chunking) 5601 { 5602 gcc_assert (!gimple_in_ssa_p (cfun)); 5603 5604 expr = chunk_no; 5605 chunk_max = create_tmp_var (diff_type, ".chunk_max"); 5606 chunk_no = create_tmp_var (diff_type, ".chunk_no"); 5607 5608 ass = gimple_build_assign (chunk_no, expr); 5609 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5610 5611 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, 5612 build_int_cst (integer_type_node, 5613 IFN_GOACC_LOOP_CHUNKS), 5614 dir, range, s, chunk_size, gwv); 5615 gimple_call_set_lhs (call, chunk_max); 5616 gimple_set_location (call, loc); 5617 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 5618 } 5619 else 5620 chunk_size = chunk_no; 5621 5622 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, 5623 build_int_cst (integer_type_node, 5624 IFN_GOACC_LOOP_STEP), 5625 dir, range, s, chunk_size, gwv); 5626 gimple_call_set_lhs (call, step); 5627 gimple_set_location (call, loc); 5628 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 5629 5630 /* Remove the GIMPLE_OMP_FOR. */ 5631 gsi_remove (&gsi, true); 5632 5633 /* Fixup edges from head_bb. */ 5634 be = BRANCH_EDGE (head_bb); 5635 fte = FALLTHRU_EDGE (head_bb); 5636 be->flags |= EDGE_FALSE_VALUE; 5637 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE; 5638 5639 basic_block body_bb = fte->dest; 5640 5641 if (gimple_in_ssa_p (cfun)) 5642 { 5643 gsi = gsi_last_nondebug_bb (cont_bb); 5644 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 5645 5646 offset = gimple_omp_continue_control_use (cont_stmt); 5647 offset_incr = gimple_omp_continue_control_def (cont_stmt); 5648 } 5649 else 5650 { 5651 offset = create_tmp_var (diff_type, ".offset"); 5652 offset_init = offset_incr = offset; 5653 } 5654 bound = create_tmp_var (TREE_TYPE (offset), ".bound"); 5655 5656 /* Loop offset & bound go into head_bb. */ 5657 gsi = gsi_start_bb (head_bb); 5658 5659 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, 5660 build_int_cst (integer_type_node, 5661 IFN_GOACC_LOOP_OFFSET), 5662 dir, range, s, 5663 chunk_size, gwv, chunk_no); 5664 gimple_call_set_lhs (call, offset_init); 5665 gimple_set_location (call, loc); 5666 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING); 5667 5668 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, 5669 build_int_cst (integer_type_node, 5670 IFN_GOACC_LOOP_BOUND), 5671 dir, range, s, 5672 chunk_size, gwv, offset_init); 5673 gimple_call_set_lhs (call, bound); 5674 gimple_set_location (call, loc); 5675 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING); 5676 5677 expr = build2 (cond_code, boolean_type_node, offset_init, bound); 5678 gsi_insert_after (&gsi, gimple_build_cond_empty (expr), 5679 GSI_CONTINUE_LINKING); 5680 5681 /* V assignment goes into body_bb. */ 5682 if (!gimple_in_ssa_p (cfun)) 5683 { 5684 gsi = gsi_start_bb (body_bb); 5685 5686 expr = build2 (plus_code, iter_type, b, 5687 fold_convert (plus_type, offset)); 5688 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 5689 true, GSI_SAME_STMT); 5690 ass = gimple_build_assign (v, expr); 5691 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5692 5693 if (fd->collapse > 1 || fd->tiling) 5694 expand_oacc_collapse_vars (fd, false, &gsi, counts, v); 5695 5696 if (fd->tiling) 5697 { 5698 /* Determine the range of the element loop -- usually simply 5699 the tile_size, but could be smaller if the final 5700 iteration of the outer loop is a partial tile. */ 5701 tree e_range = create_tmp_var (diff_type, ".e_range"); 5702 5703 expr = build2 (MIN_EXPR, diff_type, 5704 build2 (MINUS_EXPR, diff_type, bound, offset), 5705 build2 (MULT_EXPR, diff_type, tile_size, 5706 element_s)); 5707 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 5708 true, GSI_SAME_STMT); 5709 ass = gimple_build_assign (e_range, expr); 5710 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5711 5712 /* Determine bound, offset & step of inner loop. */ 5713 e_bound = create_tmp_var (diff_type, ".e_bound"); 5714 e_offset = create_tmp_var (diff_type, ".e_offset"); 5715 e_step = create_tmp_var (diff_type, ".e_step"); 5716 5717 /* Mark these as element loops. */ 5718 tree t, e_gwv = integer_minus_one_node; 5719 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */ 5720 5721 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET); 5722 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range, 5723 element_s, chunk, e_gwv, chunk); 5724 gimple_call_set_lhs (call, e_offset); 5725 gimple_set_location (call, loc); 5726 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 5727 5728 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND); 5729 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range, 5730 element_s, chunk, e_gwv, e_offset); 5731 gimple_call_set_lhs (call, e_bound); 5732 gimple_set_location (call, loc); 5733 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 5734 5735 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP); 5736 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range, 5737 element_s, chunk, e_gwv); 5738 gimple_call_set_lhs (call, e_step); 5739 gimple_set_location (call, loc); 5740 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 5741 5742 /* Add test and split block. */ 5743 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound); 5744 stmt = gimple_build_cond_empty (expr); 5745 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 5746 split = split_block (body_bb, stmt); 5747 elem_body_bb = split->dest; 5748 if (cont_bb == body_bb) 5749 cont_bb = elem_body_bb; 5750 body_bb = split->src; 5751 5752 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE; 5753 5754 /* Add a dummy exit for the tiled block when cont_bb is missing. */ 5755 if (cont_bb == NULL) 5756 { 5757 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE); 5758 e->probability = profile_probability::even (); 5759 split->probability = profile_probability::even (); 5760 } 5761 5762 /* Initialize the user's loop vars. */ 5763 gsi = gsi_start_bb (elem_body_bb); 5764 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset); 5765 } 5766 } 5767 5768 /* Loop increment goes into cont_bb. If this is not a loop, we 5769 will have spawned threads as if it was, and each one will 5770 execute one iteration. The specification is not explicit about 5771 whether such constructs are ill-formed or not, and they can 5772 occur, especially when noreturn routines are involved. */ 5773 if (cont_bb) 5774 { 5775 gsi = gsi_last_nondebug_bb (cont_bb); 5776 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 5777 loc = gimple_location (cont_stmt); 5778 5779 if (fd->tiling) 5780 { 5781 /* Insert element loop increment and test. */ 5782 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step); 5783 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 5784 true, GSI_SAME_STMT); 5785 ass = gimple_build_assign (e_offset, expr); 5786 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5787 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound); 5788 5789 stmt = gimple_build_cond_empty (expr); 5790 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 5791 split = split_block (cont_bb, stmt); 5792 elem_cont_bb = split->src; 5793 cont_bb = split->dest; 5794 5795 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE; 5796 split->probability = profile_probability::unlikely ().guessed (); 5797 edge latch_edge 5798 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE); 5799 latch_edge->probability = profile_probability::likely ().guessed (); 5800 5801 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE); 5802 skip_edge->probability = profile_probability::unlikely ().guessed (); 5803 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx); 5804 loop_entry_edge->probability 5805 = profile_probability::likely ().guessed (); 5806 5807 gsi = gsi_for_stmt (cont_stmt); 5808 } 5809 5810 /* Increment offset. */ 5811 if (gimple_in_ssa_p (cfun)) 5812 expr = build2 (plus_code, iter_type, offset, 5813 fold_convert (plus_type, step)); 5814 else 5815 expr = build2 (PLUS_EXPR, diff_type, offset, step); 5816 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 5817 true, GSI_SAME_STMT); 5818 ass = gimple_build_assign (offset_incr, expr); 5819 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5820 expr = build2 (cond_code, boolean_type_node, offset_incr, bound); 5821 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT); 5822 5823 /* Remove the GIMPLE_OMP_CONTINUE. */ 5824 gsi_remove (&gsi, true); 5825 5826 /* Fixup edges from cont_bb. */ 5827 be = BRANCH_EDGE (cont_bb); 5828 fte = FALLTHRU_EDGE (cont_bb); 5829 be->flags |= EDGE_TRUE_VALUE; 5830 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE; 5831 5832 if (chunking) 5833 { 5834 /* Split the beginning of exit_bb to make bottom_bb. We 5835 need to insert a nop at the start, because splitting is 5836 after a stmt, not before. */ 5837 gsi = gsi_start_bb (exit_bb); 5838 stmt = gimple_build_nop (); 5839 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 5840 split = split_block (exit_bb, stmt); 5841 bottom_bb = split->src; 5842 exit_bb = split->dest; 5843 gsi = gsi_last_bb (bottom_bb); 5844 5845 /* Chunk increment and test goes into bottom_bb. */ 5846 expr = build2 (PLUS_EXPR, diff_type, chunk_no, 5847 build_int_cst (diff_type, 1)); 5848 ass = gimple_build_assign (chunk_no, expr); 5849 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING); 5850 5851 /* Chunk test at end of bottom_bb. */ 5852 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max); 5853 gsi_insert_after (&gsi, gimple_build_cond_empty (expr), 5854 GSI_CONTINUE_LINKING); 5855 5856 /* Fixup edges from bottom_bb. */ 5857 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE; 5858 split->probability = profile_probability::unlikely ().guessed (); 5859 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE); 5860 latch_edge->probability = profile_probability::likely ().guessed (); 5861 } 5862 } 5863 5864 gsi = gsi_last_nondebug_bb (exit_bb); 5865 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 5866 loc = gimple_location (gsi_stmt (gsi)); 5867 5868 if (!gimple_in_ssa_p (cfun)) 5869 { 5870 /* Insert the final value of V, in case it is live. This is the 5871 value for the only thread that survives past the join. */ 5872 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir); 5873 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s); 5874 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s); 5875 expr = fold_build2 (MULT_EXPR, diff_type, expr, s); 5876 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr)); 5877 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 5878 true, GSI_SAME_STMT); 5879 ass = gimple_build_assign (v, expr); 5880 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5881 } 5882 5883 /* Remove the OMP_RETURN. */ 5884 gsi_remove (&gsi, true); 5885 5886 if (cont_bb) 5887 { 5888 /* We now have one, two or three nested loops. Update the loop 5889 structures. */ 5890 struct loop *parent = entry_bb->loop_father; 5891 struct loop *body = body_bb->loop_father; 5892 5893 if (chunking) 5894 { 5895 struct loop *chunk_loop = alloc_loop (); 5896 chunk_loop->header = head_bb; 5897 chunk_loop->latch = bottom_bb; 5898 add_loop (chunk_loop, parent); 5899 parent = chunk_loop; 5900 } 5901 else if (parent != body) 5902 { 5903 gcc_assert (body->header == body_bb); 5904 gcc_assert (body->latch == cont_bb 5905 || single_pred (body->latch) == cont_bb); 5906 parent = NULL; 5907 } 5908 5909 if (parent) 5910 { 5911 struct loop *body_loop = alloc_loop (); 5912 body_loop->header = body_bb; 5913 body_loop->latch = cont_bb; 5914 add_loop (body_loop, parent); 5915 5916 if (fd->tiling) 5917 { 5918 /* Insert tiling's element loop. */ 5919 struct loop *inner_loop = alloc_loop (); 5920 inner_loop->header = elem_body_bb; 5921 inner_loop->latch = elem_cont_bb; 5922 add_loop (inner_loop, body_loop); 5923 } 5924 } 5925 } 5926 } 5927 5928 /* Expand the OMP loop defined by REGION. */ 5929 5930 static void 5931 expand_omp_for (struct omp_region *region, gimple *inner_stmt) 5932 { 5933 struct omp_for_data fd; 5934 struct omp_for_data_loop *loops; 5935 5936 loops 5937 = (struct omp_for_data_loop *) 5938 alloca (gimple_omp_for_collapse (last_stmt (region->entry)) 5939 * sizeof (struct omp_for_data_loop)); 5940 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)), 5941 &fd, loops); 5942 region->sched_kind = fd.sched_kind; 5943 region->sched_modifiers = fd.sched_modifiers; 5944 5945 gcc_assert (EDGE_COUNT (region->entry->succs) == 2); 5946 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; 5947 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; 5948 if (region->cont) 5949 { 5950 gcc_assert (EDGE_COUNT (region->cont->succs) == 2); 5951 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL; 5952 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL; 5953 } 5954 else 5955 /* If there isn't a continue then this is a degerate case where 5956 the introduction of abnormal edges during lowering will prevent 5957 original loops from being detected. Fix that up. */ 5958 loops_state_set (LOOPS_NEED_FIXUP); 5959 5960 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD) 5961 expand_omp_simd (region, &fd); 5962 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP) 5963 { 5964 gcc_assert (!inner_stmt); 5965 expand_oacc_for (region, &fd); 5966 } 5967 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP) 5968 { 5969 if (gimple_omp_for_combined_into_p (fd.for_stmt)) 5970 expand_omp_taskloop_for_inner (region, &fd, inner_stmt); 5971 else 5972 expand_omp_taskloop_for_outer (region, &fd, inner_stmt); 5973 } 5974 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC 5975 && !fd.have_ordered) 5976 { 5977 if (fd.chunk_size == NULL) 5978 expand_omp_for_static_nochunk (region, &fd, inner_stmt); 5979 else 5980 expand_omp_for_static_chunk (region, &fd, inner_stmt); 5981 } 5982 else 5983 { 5984 int fn_index, start_ix, next_ix; 5985 unsigned HOST_WIDE_INT sched = 0; 5986 tree sched_arg = NULL_TREE; 5987 5988 gcc_assert (gimple_omp_for_kind (fd.for_stmt) 5989 == GF_OMP_FOR_KIND_FOR); 5990 if (fd.chunk_size == NULL 5991 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC) 5992 fd.chunk_size = integer_zero_node; 5993 switch (fd.sched_kind) 5994 { 5995 case OMP_CLAUSE_SCHEDULE_RUNTIME: 5996 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0) 5997 { 5998 gcc_assert (!fd.have_ordered); 5999 fn_index = 6; 6000 sched = 4; 6001 } 6002 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0 6003 && !fd.have_ordered) 6004 fn_index = 7; 6005 else 6006 { 6007 fn_index = 3; 6008 sched = (HOST_WIDE_INT_1U << 31); 6009 } 6010 break; 6011 case OMP_CLAUSE_SCHEDULE_DYNAMIC: 6012 case OMP_CLAUSE_SCHEDULE_GUIDED: 6013 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0 6014 && !fd.have_ordered) 6015 { 6016 fn_index = 3 + fd.sched_kind; 6017 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2; 6018 break; 6019 } 6020 fn_index = fd.sched_kind; 6021 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2; 6022 sched += (HOST_WIDE_INT_1U << 31); 6023 break; 6024 case OMP_CLAUSE_SCHEDULE_STATIC: 6025 gcc_assert (fd.have_ordered); 6026 fn_index = 0; 6027 sched = (HOST_WIDE_INT_1U << 31) + 1; 6028 break; 6029 default: 6030 gcc_unreachable (); 6031 } 6032 if (!fd.ordered) 6033 fn_index += fd.have_ordered * 8; 6034 if (fd.ordered) 6035 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index; 6036 else 6037 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index; 6038 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index; 6039 if (fd.have_reductemp) 6040 { 6041 if (fd.ordered) 6042 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START; 6043 else if (fd.have_ordered) 6044 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START; 6045 else 6046 start_ix = (int)BUILT_IN_GOMP_LOOP_START; 6047 sched_arg = build_int_cstu (long_integer_type_node, sched); 6048 if (!fd.chunk_size) 6049 fd.chunk_size = integer_zero_node; 6050 } 6051 if (fd.iter_type == long_long_unsigned_type_node) 6052 { 6053 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START 6054 - (int)BUILT_IN_GOMP_LOOP_STATIC_START); 6055 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT 6056 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT); 6057 } 6058 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix, 6059 (enum built_in_function) next_ix, sched_arg, 6060 inner_stmt); 6061 } 6062 6063 if (gimple_in_ssa_p (cfun)) 6064 update_ssa (TODO_update_ssa_only_virtuals); 6065 } 6066 6067 /* Expand code for an OpenMP sections directive. In pseudo code, we generate 6068 6069 v = GOMP_sections_start (n); 6070 L0: 6071 switch (v) 6072 { 6073 case 0: 6074 goto L2; 6075 case 1: 6076 section 1; 6077 goto L1; 6078 case 2: 6079 ... 6080 case n: 6081 ... 6082 default: 6083 abort (); 6084 } 6085 L1: 6086 v = GOMP_sections_next (); 6087 goto L0; 6088 L2: 6089 reduction; 6090 6091 If this is a combined parallel sections, replace the call to 6092 GOMP_sections_start with call to GOMP_sections_next. */ 6093 6094 static void 6095 expand_omp_sections (struct omp_region *region) 6096 { 6097 tree t, u, vin = NULL, vmain, vnext, l2; 6098 unsigned len; 6099 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb; 6100 gimple_stmt_iterator si, switch_si; 6101 gomp_sections *sections_stmt; 6102 gimple *stmt; 6103 gomp_continue *cont; 6104 edge_iterator ei; 6105 edge e; 6106 struct omp_region *inner; 6107 unsigned i, casei; 6108 bool exit_reachable = region->cont != NULL; 6109 6110 gcc_assert (region->exit != NULL); 6111 entry_bb = region->entry; 6112 l0_bb = single_succ (entry_bb); 6113 l1_bb = region->cont; 6114 l2_bb = region->exit; 6115 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb) 6116 l2 = gimple_block_label (l2_bb); 6117 else 6118 { 6119 /* This can happen if there are reductions. */ 6120 len = EDGE_COUNT (l0_bb->succs); 6121 gcc_assert (len > 0); 6122 e = EDGE_SUCC (l0_bb, len - 1); 6123 si = gsi_last_nondebug_bb (e->dest); 6124 l2 = NULL_TREE; 6125 if (gsi_end_p (si) 6126 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION) 6127 l2 = gimple_block_label (e->dest); 6128 else 6129 FOR_EACH_EDGE (e, ei, l0_bb->succs) 6130 { 6131 si = gsi_last_nondebug_bb (e->dest); 6132 if (gsi_end_p (si) 6133 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION) 6134 { 6135 l2 = gimple_block_label (e->dest); 6136 break; 6137 } 6138 } 6139 } 6140 if (exit_reachable) 6141 default_bb = create_empty_bb (l1_bb->prev_bb); 6142 else 6143 default_bb = create_empty_bb (l0_bb); 6144 6145 /* We will build a switch() with enough cases for all the 6146 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work 6147 and a default case to abort if something goes wrong. */ 6148 len = EDGE_COUNT (l0_bb->succs); 6149 6150 /* Use vec::quick_push on label_vec throughout, since we know the size 6151 in advance. */ 6152 auto_vec<tree> label_vec (len); 6153 6154 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the 6155 GIMPLE_OMP_SECTIONS statement. */ 6156 si = gsi_last_nondebug_bb (entry_bb); 6157 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si)); 6158 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS); 6159 vin = gimple_omp_sections_control (sections_stmt); 6160 tree clauses = gimple_omp_sections_clauses (sections_stmt); 6161 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); 6162 if (reductmp) 6163 { 6164 tree reductions = OMP_CLAUSE_DECL (reductmp); 6165 gcc_assert (TREE_CODE (reductions) == SSA_NAME); 6166 gimple *g = SSA_NAME_DEF_STMT (reductions); 6167 reductions = gimple_assign_rhs1 (g); 6168 OMP_CLAUSE_DECL (reductmp) = reductions; 6169 gimple_stmt_iterator gsi = gsi_for_stmt (g); 6170 t = build_int_cst (unsigned_type_node, len - 1); 6171 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START); 6172 stmt = gimple_build_call (u, 3, t, reductions, null_pointer_node); 6173 gimple_call_set_lhs (stmt, vin); 6174 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 6175 gsi_remove (&gsi, true); 6176 release_ssa_name (gimple_assign_lhs (g)); 6177 } 6178 else if (!is_combined_parallel (region)) 6179 { 6180 /* If we are not inside a combined parallel+sections region, 6181 call GOMP_sections_start. */ 6182 t = build_int_cst (unsigned_type_node, len - 1); 6183 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START); 6184 stmt = gimple_build_call (u, 1, t); 6185 } 6186 else 6187 { 6188 /* Otherwise, call GOMP_sections_next. */ 6189 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT); 6190 stmt = gimple_build_call (u, 0); 6191 } 6192 if (!reductmp) 6193 { 6194 gimple_call_set_lhs (stmt, vin); 6195 gsi_insert_after (&si, stmt, GSI_SAME_STMT); 6196 } 6197 gsi_remove (&si, true); 6198 6199 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in 6200 L0_BB. */ 6201 switch_si = gsi_last_nondebug_bb (l0_bb); 6202 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH); 6203 if (exit_reachable) 6204 { 6205 cont = as_a <gomp_continue *> (last_stmt (l1_bb)); 6206 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE); 6207 vmain = gimple_omp_continue_control_use (cont); 6208 vnext = gimple_omp_continue_control_def (cont); 6209 } 6210 else 6211 { 6212 vmain = vin; 6213 vnext = NULL_TREE; 6214 } 6215 6216 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2); 6217 label_vec.quick_push (t); 6218 i = 1; 6219 6220 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */ 6221 for (inner = region->inner, casei = 1; 6222 inner; 6223 inner = inner->next, i++, casei++) 6224 { 6225 basic_block s_entry_bb, s_exit_bb; 6226 6227 /* Skip optional reduction region. */ 6228 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD) 6229 { 6230 --i; 6231 --casei; 6232 continue; 6233 } 6234 6235 s_entry_bb = inner->entry; 6236 s_exit_bb = inner->exit; 6237 6238 t = gimple_block_label (s_entry_bb); 6239 u = build_int_cst (unsigned_type_node, casei); 6240 u = build_case_label (u, NULL, t); 6241 label_vec.quick_push (u); 6242 6243 si = gsi_last_nondebug_bb (s_entry_bb); 6244 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION); 6245 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si))); 6246 gsi_remove (&si, true); 6247 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU; 6248 6249 if (s_exit_bb == NULL) 6250 continue; 6251 6252 si = gsi_last_nondebug_bb (s_exit_bb); 6253 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN); 6254 gsi_remove (&si, true); 6255 6256 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU; 6257 } 6258 6259 /* Error handling code goes in DEFAULT_BB. */ 6260 t = gimple_block_label (default_bb); 6261 u = build_case_label (NULL, NULL, t); 6262 make_edge (l0_bb, default_bb, 0); 6263 add_bb_to_loop (default_bb, current_loops->tree_root); 6264 6265 stmt = gimple_build_switch (vmain, u, label_vec); 6266 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT); 6267 gsi_remove (&switch_si, true); 6268 6269 si = gsi_start_bb (default_bb); 6270 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0); 6271 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING); 6272 6273 if (exit_reachable) 6274 { 6275 tree bfn_decl; 6276 6277 /* Code to get the next section goes in L1_BB. */ 6278 si = gsi_last_nondebug_bb (l1_bb); 6279 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE); 6280 6281 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT); 6282 stmt = gimple_build_call (bfn_decl, 0); 6283 gimple_call_set_lhs (stmt, vnext); 6284 gsi_insert_after (&si, stmt, GSI_SAME_STMT); 6285 gsi_remove (&si, true); 6286 6287 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU; 6288 } 6289 6290 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */ 6291 si = gsi_last_nondebug_bb (l2_bb); 6292 if (gimple_omp_return_nowait_p (gsi_stmt (si))) 6293 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT); 6294 else if (gimple_omp_return_lhs (gsi_stmt (si))) 6295 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL); 6296 else 6297 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END); 6298 stmt = gimple_build_call (t, 0); 6299 if (gimple_omp_return_lhs (gsi_stmt (si))) 6300 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si))); 6301 gsi_insert_after (&si, stmt, GSI_SAME_STMT); 6302 gsi_remove (&si, true); 6303 6304 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb); 6305 } 6306 6307 /* Expand code for an OpenMP single directive. We've already expanded 6308 much of the code, here we simply place the GOMP_barrier call. */ 6309 6310 static void 6311 expand_omp_single (struct omp_region *region) 6312 { 6313 basic_block entry_bb, exit_bb; 6314 gimple_stmt_iterator si; 6315 6316 entry_bb = region->entry; 6317 exit_bb = region->exit; 6318 6319 si = gsi_last_nondebug_bb (entry_bb); 6320 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE); 6321 gsi_remove (&si, true); 6322 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 6323 6324 si = gsi_last_nondebug_bb (exit_bb); 6325 if (!gimple_omp_return_nowait_p (gsi_stmt (si))) 6326 { 6327 tree t = gimple_omp_return_lhs (gsi_stmt (si)); 6328 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT); 6329 } 6330 gsi_remove (&si, true); 6331 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; 6332 } 6333 6334 /* Generic expansion for OpenMP synchronization directives: master, 6335 ordered and critical. All we need to do here is remove the entry 6336 and exit markers for REGION. */ 6337 6338 static void 6339 expand_omp_synch (struct omp_region *region) 6340 { 6341 basic_block entry_bb, exit_bb; 6342 gimple_stmt_iterator si; 6343 6344 entry_bb = region->entry; 6345 exit_bb = region->exit; 6346 6347 si = gsi_last_nondebug_bb (entry_bb); 6348 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE 6349 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER 6350 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP 6351 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED 6352 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL 6353 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS); 6354 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS 6355 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si)))) 6356 { 6357 expand_omp_taskreg (region); 6358 return; 6359 } 6360 gsi_remove (&si, true); 6361 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 6362 6363 if (exit_bb) 6364 { 6365 si = gsi_last_nondebug_bb (exit_bb); 6366 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN); 6367 gsi_remove (&si, true); 6368 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; 6369 } 6370 } 6371 6372 /* Translate enum omp_memory_order to enum memmodel. The two enums 6373 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED 6374 is 0. */ 6375 6376 static enum memmodel 6377 omp_memory_order_to_memmodel (enum omp_memory_order mo) 6378 { 6379 switch (mo) 6380 { 6381 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED; 6382 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE; 6383 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE; 6384 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL; 6385 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST; 6386 default: gcc_unreachable (); 6387 } 6388 } 6389 6390 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic 6391 operation as a normal volatile load. */ 6392 6393 static bool 6394 expand_omp_atomic_load (basic_block load_bb, tree addr, 6395 tree loaded_val, int index) 6396 { 6397 enum built_in_function tmpbase; 6398 gimple_stmt_iterator gsi; 6399 basic_block store_bb; 6400 location_t loc; 6401 gimple *stmt; 6402 tree decl, call, type, itype; 6403 6404 gsi = gsi_last_nondebug_bb (load_bb); 6405 stmt = gsi_stmt (gsi); 6406 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD); 6407 loc = gimple_location (stmt); 6408 6409 /* ??? If the target does not implement atomic_load_optab[mode], and mode 6410 is smaller than word size, then expand_atomic_load assumes that the load 6411 is atomic. We could avoid the builtin entirely in this case. */ 6412 6413 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1); 6414 decl = builtin_decl_explicit (tmpbase); 6415 if (decl == NULL_TREE) 6416 return false; 6417 6418 type = TREE_TYPE (loaded_val); 6419 itype = TREE_TYPE (TREE_TYPE (decl)); 6420 6421 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt); 6422 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo)); 6423 call = build_call_expr_loc (loc, decl, 2, addr, mo); 6424 if (!useless_type_conversion_p (type, itype)) 6425 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call); 6426 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call); 6427 6428 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); 6429 gsi_remove (&gsi, true); 6430 6431 store_bb = single_succ (load_bb); 6432 gsi = gsi_last_nondebug_bb (store_bb); 6433 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE); 6434 gsi_remove (&gsi, true); 6435 6436 if (gimple_in_ssa_p (cfun)) 6437 update_ssa (TODO_update_ssa_no_phi); 6438 6439 return true; 6440 } 6441 6442 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic 6443 operation as a normal volatile store. */ 6444 6445 static bool 6446 expand_omp_atomic_store (basic_block load_bb, tree addr, 6447 tree loaded_val, tree stored_val, int index) 6448 { 6449 enum built_in_function tmpbase; 6450 gimple_stmt_iterator gsi; 6451 basic_block store_bb = single_succ (load_bb); 6452 location_t loc; 6453 gimple *stmt; 6454 tree decl, call, type, itype; 6455 machine_mode imode; 6456 bool exchange; 6457 6458 gsi = gsi_last_nondebug_bb (load_bb); 6459 stmt = gsi_stmt (gsi); 6460 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD); 6461 6462 /* If the load value is needed, then this isn't a store but an exchange. */ 6463 exchange = gimple_omp_atomic_need_value_p (stmt); 6464 6465 gsi = gsi_last_nondebug_bb (store_bb); 6466 stmt = gsi_stmt (gsi); 6467 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE); 6468 loc = gimple_location (stmt); 6469 6470 /* ??? If the target does not implement atomic_store_optab[mode], and mode 6471 is smaller than word size, then expand_atomic_store assumes that the store 6472 is atomic. We could avoid the builtin entirely in this case. */ 6473 6474 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N); 6475 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1); 6476 decl = builtin_decl_explicit (tmpbase); 6477 if (decl == NULL_TREE) 6478 return false; 6479 6480 type = TREE_TYPE (stored_val); 6481 6482 /* Dig out the type of the function's second argument. */ 6483 itype = TREE_TYPE (decl); 6484 itype = TYPE_ARG_TYPES (itype); 6485 itype = TREE_CHAIN (itype); 6486 itype = TREE_VALUE (itype); 6487 imode = TYPE_MODE (itype); 6488 6489 if (exchange && !can_atomic_exchange_p (imode, true)) 6490 return false; 6491 6492 if (!useless_type_conversion_p (itype, type)) 6493 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val); 6494 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt); 6495 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo)); 6496 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo); 6497 if (exchange) 6498 { 6499 if (!useless_type_conversion_p (type, itype)) 6500 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call); 6501 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call); 6502 } 6503 6504 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); 6505 gsi_remove (&gsi, true); 6506 6507 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */ 6508 gsi = gsi_last_nondebug_bb (load_bb); 6509 gsi_remove (&gsi, true); 6510 6511 if (gimple_in_ssa_p (cfun)) 6512 update_ssa (TODO_update_ssa_no_phi); 6513 6514 return true; 6515 } 6516 6517 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic 6518 operation as a __atomic_fetch_op builtin. INDEX is log2 of the 6519 size of the data type, and thus usable to find the index of the builtin 6520 decl. Returns false if the expression is not of the proper form. */ 6521 6522 static bool 6523 expand_omp_atomic_fetch_op (basic_block load_bb, 6524 tree addr, tree loaded_val, 6525 tree stored_val, int index) 6526 { 6527 enum built_in_function oldbase, newbase, tmpbase; 6528 tree decl, itype, call; 6529 tree lhs, rhs; 6530 basic_block store_bb = single_succ (load_bb); 6531 gimple_stmt_iterator gsi; 6532 gimple *stmt; 6533 location_t loc; 6534 enum tree_code code; 6535 bool need_old, need_new; 6536 machine_mode imode; 6537 6538 /* We expect to find the following sequences: 6539 6540 load_bb: 6541 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem) 6542 6543 store_bb: 6544 val = tmp OP something; (or: something OP tmp) 6545 GIMPLE_OMP_STORE (val) 6546 6547 ???FIXME: Allow a more flexible sequence. 6548 Perhaps use data flow to pick the statements. 6549 6550 */ 6551 6552 gsi = gsi_after_labels (store_bb); 6553 stmt = gsi_stmt (gsi); 6554 if (is_gimple_debug (stmt)) 6555 { 6556 gsi_next_nondebug (&gsi); 6557 if (gsi_end_p (gsi)) 6558 return false; 6559 stmt = gsi_stmt (gsi); 6560 } 6561 loc = gimple_location (stmt); 6562 if (!is_gimple_assign (stmt)) 6563 return false; 6564 gsi_next_nondebug (&gsi); 6565 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE) 6566 return false; 6567 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi)); 6568 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb)); 6569 enum omp_memory_order omo 6570 = gimple_omp_atomic_memory_order (last_stmt (load_bb)); 6571 enum memmodel mo = omp_memory_order_to_memmodel (omo); 6572 gcc_checking_assert (!need_old || !need_new); 6573 6574 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0)) 6575 return false; 6576 6577 /* Check for one of the supported fetch-op operations. */ 6578 code = gimple_assign_rhs_code (stmt); 6579 switch (code) 6580 { 6581 case PLUS_EXPR: 6582 case POINTER_PLUS_EXPR: 6583 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N; 6584 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N; 6585 break; 6586 case MINUS_EXPR: 6587 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N; 6588 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N; 6589 break; 6590 case BIT_AND_EXPR: 6591 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N; 6592 newbase = BUILT_IN_ATOMIC_AND_FETCH_N; 6593 break; 6594 case BIT_IOR_EXPR: 6595 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N; 6596 newbase = BUILT_IN_ATOMIC_OR_FETCH_N; 6597 break; 6598 case BIT_XOR_EXPR: 6599 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N; 6600 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N; 6601 break; 6602 default: 6603 return false; 6604 } 6605 6606 /* Make sure the expression is of the proper form. */ 6607 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0)) 6608 rhs = gimple_assign_rhs2 (stmt); 6609 else if (commutative_tree_code (gimple_assign_rhs_code (stmt)) 6610 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0)) 6611 rhs = gimple_assign_rhs1 (stmt); 6612 else 6613 return false; 6614 6615 tmpbase = ((enum built_in_function) 6616 ((need_new ? newbase : oldbase) + index + 1)); 6617 decl = builtin_decl_explicit (tmpbase); 6618 if (decl == NULL_TREE) 6619 return false; 6620 itype = TREE_TYPE (TREE_TYPE (decl)); 6621 imode = TYPE_MODE (itype); 6622 6623 /* We could test all of the various optabs involved, but the fact of the 6624 matter is that (with the exception of i486 vs i586 and xadd) all targets 6625 that support any atomic operaton optab also implements compare-and-swap. 6626 Let optabs.c take care of expanding any compare-and-swap loop. */ 6627 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode)) 6628 return false; 6629 6630 gsi = gsi_last_nondebug_bb (load_bb); 6631 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD); 6632 6633 /* OpenMP does not imply any barrier-like semantics on its atomic ops. 6634 It only requires that the operation happen atomically. Thus we can 6635 use the RELAXED memory model. */ 6636 call = build_call_expr_loc (loc, decl, 3, addr, 6637 fold_convert_loc (loc, itype, rhs), 6638 build_int_cst (NULL, mo)); 6639 6640 if (need_old || need_new) 6641 { 6642 lhs = need_old ? loaded_val : stored_val; 6643 call = fold_convert_loc (loc, TREE_TYPE (lhs), call); 6644 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call); 6645 } 6646 else 6647 call = fold_convert_loc (loc, void_type_node, call); 6648 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); 6649 gsi_remove (&gsi, true); 6650 6651 gsi = gsi_last_nondebug_bb (store_bb); 6652 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE); 6653 gsi_remove (&gsi, true); 6654 gsi = gsi_last_nondebug_bb (store_bb); 6655 stmt = gsi_stmt (gsi); 6656 gsi_remove (&gsi, true); 6657 6658 if (gimple_in_ssa_p (cfun)) 6659 { 6660 release_defs (stmt); 6661 update_ssa (TODO_update_ssa_no_phi); 6662 } 6663 6664 return true; 6665 } 6666 6667 /* A subroutine of expand_omp_atomic. Implement the atomic operation as: 6668 6669 oldval = *addr; 6670 repeat: 6671 newval = rhs; // with oldval replacing *addr in rhs 6672 oldval = __sync_val_compare_and_swap (addr, oldval, newval); 6673 if (oldval != newval) 6674 goto repeat; 6675 6676 INDEX is log2 of the size of the data type, and thus usable to find the 6677 index of the builtin decl. */ 6678 6679 static bool 6680 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb, 6681 tree addr, tree loaded_val, tree stored_val, 6682 int index) 6683 { 6684 tree loadedi, storedi, initial, new_storedi, old_vali; 6685 tree type, itype, cmpxchg, iaddr, atype; 6686 gimple_stmt_iterator si; 6687 basic_block loop_header = single_succ (load_bb); 6688 gimple *phi, *stmt; 6689 edge e; 6690 enum built_in_function fncode; 6691 6692 /* ??? We need a non-pointer interface to __atomic_compare_exchange in 6693 order to use the RELAXED memory model effectively. */ 6694 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N 6695 + index + 1); 6696 cmpxchg = builtin_decl_explicit (fncode); 6697 if (cmpxchg == NULL_TREE) 6698 return false; 6699 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val)); 6700 atype = type; 6701 itype = TREE_TYPE (TREE_TYPE (cmpxchg)); 6702 6703 if (!can_compare_and_swap_p (TYPE_MODE (itype), true) 6704 || !can_atomic_load_p (TYPE_MODE (itype))) 6705 return false; 6706 6707 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */ 6708 si = gsi_last_nondebug_bb (load_bb); 6709 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD); 6710 6711 /* For floating-point values, we'll need to view-convert them to integers 6712 so that we can perform the atomic compare and swap. Simplify the 6713 following code by always setting up the "i"ntegral variables. */ 6714 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type)) 6715 { 6716 tree iaddr_val; 6717 6718 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode, 6719 true)); 6720 atype = itype; 6721 iaddr_val 6722 = force_gimple_operand_gsi (&si, 6723 fold_convert (TREE_TYPE (iaddr), addr), 6724 false, NULL_TREE, true, GSI_SAME_STMT); 6725 stmt = gimple_build_assign (iaddr, iaddr_val); 6726 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 6727 loadedi = create_tmp_var (itype); 6728 if (gimple_in_ssa_p (cfun)) 6729 loadedi = make_ssa_name (loadedi); 6730 } 6731 else 6732 { 6733 iaddr = addr; 6734 loadedi = loaded_val; 6735 } 6736 6737 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1); 6738 tree loaddecl = builtin_decl_explicit (fncode); 6739 if (loaddecl) 6740 initial 6741 = fold_convert (atype, 6742 build_call_expr (loaddecl, 2, iaddr, 6743 build_int_cst (NULL_TREE, 6744 MEMMODEL_RELAXED))); 6745 else 6746 { 6747 tree off 6748 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode, 6749 true), 0); 6750 initial = build2 (MEM_REF, atype, iaddr, off); 6751 } 6752 6753 initial 6754 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true, 6755 GSI_SAME_STMT); 6756 6757 /* Move the value to the LOADEDI temporary. */ 6758 if (gimple_in_ssa_p (cfun)) 6759 { 6760 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header))); 6761 phi = create_phi_node (loadedi, loop_header); 6762 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)), 6763 initial); 6764 } 6765 else 6766 gsi_insert_before (&si, 6767 gimple_build_assign (loadedi, initial), 6768 GSI_SAME_STMT); 6769 if (loadedi != loaded_val) 6770 { 6771 gimple_stmt_iterator gsi2; 6772 tree x; 6773 6774 x = build1 (VIEW_CONVERT_EXPR, type, loadedi); 6775 gsi2 = gsi_start_bb (loop_header); 6776 if (gimple_in_ssa_p (cfun)) 6777 { 6778 gassign *stmt; 6779 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE, 6780 true, GSI_SAME_STMT); 6781 stmt = gimple_build_assign (loaded_val, x); 6782 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT); 6783 } 6784 else 6785 { 6786 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x); 6787 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE, 6788 true, GSI_SAME_STMT); 6789 } 6790 } 6791 gsi_remove (&si, true); 6792 6793 si = gsi_last_nondebug_bb (store_bb); 6794 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE); 6795 6796 if (iaddr == addr) 6797 storedi = stored_val; 6798 else 6799 storedi 6800 = force_gimple_operand_gsi (&si, 6801 build1 (VIEW_CONVERT_EXPR, itype, 6802 stored_val), true, NULL_TREE, true, 6803 GSI_SAME_STMT); 6804 6805 /* Build the compare&swap statement. */ 6806 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi); 6807 new_storedi = force_gimple_operand_gsi (&si, 6808 fold_convert (TREE_TYPE (loadedi), 6809 new_storedi), 6810 true, NULL_TREE, 6811 true, GSI_SAME_STMT); 6812 6813 if (gimple_in_ssa_p (cfun)) 6814 old_vali = loadedi; 6815 else 6816 { 6817 old_vali = create_tmp_var (TREE_TYPE (loadedi)); 6818 stmt = gimple_build_assign (old_vali, loadedi); 6819 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 6820 6821 stmt = gimple_build_assign (loadedi, new_storedi); 6822 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 6823 } 6824 6825 /* Note that we always perform the comparison as an integer, even for 6826 floating point. This allows the atomic operation to properly 6827 succeed even with NaNs and -0.0. */ 6828 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali); 6829 stmt = gimple_build_cond_empty (ne); 6830 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 6831 6832 /* Update cfg. */ 6833 e = single_succ_edge (store_bb); 6834 e->flags &= ~EDGE_FALLTHRU; 6835 e->flags |= EDGE_FALSE_VALUE; 6836 /* Expect no looping. */ 6837 e->probability = profile_probability::guessed_always (); 6838 6839 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE); 6840 e->probability = profile_probability::guessed_never (); 6841 6842 /* Copy the new value to loadedi (we already did that before the condition 6843 if we are not in SSA). */ 6844 if (gimple_in_ssa_p (cfun)) 6845 { 6846 phi = gimple_seq_first_stmt (phi_nodes (loop_header)); 6847 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi); 6848 } 6849 6850 /* Remove GIMPLE_OMP_ATOMIC_STORE. */ 6851 gsi_remove (&si, true); 6852 6853 struct loop *loop = alloc_loop (); 6854 loop->header = loop_header; 6855 loop->latch = store_bb; 6856 add_loop (loop, loop_header->loop_father); 6857 6858 if (gimple_in_ssa_p (cfun)) 6859 update_ssa (TODO_update_ssa_no_phi); 6860 6861 return true; 6862 } 6863 6864 /* A subroutine of expand_omp_atomic. Implement the atomic operation as: 6865 6866 GOMP_atomic_start (); 6867 *addr = rhs; 6868 GOMP_atomic_end (); 6869 6870 The result is not globally atomic, but works so long as all parallel 6871 references are within #pragma omp atomic directives. According to 6872 responses received from omp@openmp.org, appears to be within spec. 6873 Which makes sense, since that's how several other compilers handle 6874 this situation as well. 6875 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're 6876 expanding. STORED_VAL is the operand of the matching 6877 GIMPLE_OMP_ATOMIC_STORE. 6878 6879 We replace 6880 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with 6881 loaded_val = *addr; 6882 6883 and replace 6884 GIMPLE_OMP_ATOMIC_STORE (stored_val) with 6885 *addr = stored_val; 6886 */ 6887 6888 static bool 6889 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb, 6890 tree addr, tree loaded_val, tree stored_val) 6891 { 6892 gimple_stmt_iterator si; 6893 gassign *stmt; 6894 tree t; 6895 6896 si = gsi_last_nondebug_bb (load_bb); 6897 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD); 6898 6899 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START); 6900 t = build_call_expr (t, 0); 6901 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT); 6902 6903 tree mem = build_simple_mem_ref (addr); 6904 TREE_TYPE (mem) = TREE_TYPE (loaded_val); 6905 TREE_OPERAND (mem, 1) 6906 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode, 6907 true), 6908 TREE_OPERAND (mem, 1)); 6909 stmt = gimple_build_assign (loaded_val, mem); 6910 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 6911 gsi_remove (&si, true); 6912 6913 si = gsi_last_nondebug_bb (store_bb); 6914 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE); 6915 6916 stmt = gimple_build_assign (unshare_expr (mem), stored_val); 6917 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 6918 6919 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END); 6920 t = build_call_expr (t, 0); 6921 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT); 6922 gsi_remove (&si, true); 6923 6924 if (gimple_in_ssa_p (cfun)) 6925 update_ssa (TODO_update_ssa_no_phi); 6926 return true; 6927 } 6928 6929 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand 6930 using expand_omp_atomic_fetch_op. If it failed, we try to 6931 call expand_omp_atomic_pipeline, and if it fails too, the 6932 ultimate fallback is wrapping the operation in a mutex 6933 (expand_omp_atomic_mutex). REGION is the atomic region built 6934 by build_omp_regions_1(). */ 6935 6936 static void 6937 expand_omp_atomic (struct omp_region *region) 6938 { 6939 basic_block load_bb = region->entry, store_bb = region->exit; 6940 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb)); 6941 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb)); 6942 tree loaded_val = gimple_omp_atomic_load_lhs (load); 6943 tree addr = gimple_omp_atomic_load_rhs (load); 6944 tree stored_val = gimple_omp_atomic_store_val (store); 6945 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val)); 6946 HOST_WIDE_INT index; 6947 6948 /* Make sure the type is one of the supported sizes. */ 6949 index = tree_to_uhwi (TYPE_SIZE_UNIT (type)); 6950 index = exact_log2 (index); 6951 if (index >= 0 && index <= 4) 6952 { 6953 unsigned int align = TYPE_ALIGN_UNIT (type); 6954 6955 /* __sync builtins require strict data alignment. */ 6956 if (exact_log2 (align) >= index) 6957 { 6958 /* Atomic load. */ 6959 scalar_mode smode; 6960 if (loaded_val == stored_val 6961 && (is_int_mode (TYPE_MODE (type), &smode) 6962 || is_float_mode (TYPE_MODE (type), &smode)) 6963 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD 6964 && expand_omp_atomic_load (load_bb, addr, loaded_val, index)) 6965 return; 6966 6967 /* Atomic store. */ 6968 if ((is_int_mode (TYPE_MODE (type), &smode) 6969 || is_float_mode (TYPE_MODE (type), &smode)) 6970 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD 6971 && store_bb == single_succ (load_bb) 6972 && first_stmt (store_bb) == store 6973 && expand_omp_atomic_store (load_bb, addr, loaded_val, 6974 stored_val, index)) 6975 return; 6976 6977 /* When possible, use specialized atomic update functions. */ 6978 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type)) 6979 && store_bb == single_succ (load_bb) 6980 && expand_omp_atomic_fetch_op (load_bb, addr, 6981 loaded_val, stored_val, index)) 6982 return; 6983 6984 /* If we don't have specialized __sync builtins, try and implement 6985 as a compare and swap loop. */ 6986 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr, 6987 loaded_val, stored_val, index)) 6988 return; 6989 } 6990 } 6991 6992 /* The ultimate fallback is wrapping the operation in a mutex. */ 6993 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val); 6994 } 6995 6996 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending 6997 at REGION_EXIT. */ 6998 6999 static void 7000 mark_loops_in_oacc_kernels_region (basic_block region_entry, 7001 basic_block region_exit) 7002 { 7003 struct loop *outer = region_entry->loop_father; 7004 gcc_assert (region_exit == NULL || outer == region_exit->loop_father); 7005 7006 /* Don't parallelize the kernels region if it contains more than one outer 7007 loop. */ 7008 unsigned int nr_outer_loops = 0; 7009 struct loop *single_outer = NULL; 7010 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next) 7011 { 7012 gcc_assert (loop_outer (loop) == outer); 7013 7014 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry)) 7015 continue; 7016 7017 if (region_exit != NULL 7018 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit)) 7019 continue; 7020 7021 nr_outer_loops++; 7022 single_outer = loop; 7023 } 7024 if (nr_outer_loops != 1) 7025 return; 7026 7027 for (struct loop *loop = single_outer->inner; 7028 loop != NULL; 7029 loop = loop->inner) 7030 if (loop->next) 7031 return; 7032 7033 /* Mark the loops in the region. */ 7034 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner) 7035 loop->in_oacc_kernels_region = true; 7036 } 7037 7038 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */ 7039 7040 struct GTY(()) grid_launch_attributes_trees 7041 { 7042 tree kernel_dim_array_type; 7043 tree kernel_lattrs_dimnum_decl; 7044 tree kernel_lattrs_grid_decl; 7045 tree kernel_lattrs_group_decl; 7046 tree kernel_launch_attributes_type; 7047 }; 7048 7049 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees; 7050 7051 /* Create types used to pass kernel launch attributes to target. */ 7052 7053 static void 7054 grid_create_kernel_launch_attr_types (void) 7055 { 7056 if (grid_attr_trees) 7057 return; 7058 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> (); 7059 7060 tree dim_arr_index_type 7061 = build_index_type (build_int_cst (integer_type_node, 2)); 7062 grid_attr_trees->kernel_dim_array_type 7063 = build_array_type (uint32_type_node, dim_arr_index_type); 7064 7065 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE); 7066 grid_attr_trees->kernel_lattrs_dimnum_decl 7067 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"), 7068 uint32_type_node); 7069 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE; 7070 7071 grid_attr_trees->kernel_lattrs_grid_decl 7072 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"), 7073 grid_attr_trees->kernel_dim_array_type); 7074 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl) 7075 = grid_attr_trees->kernel_lattrs_dimnum_decl; 7076 grid_attr_trees->kernel_lattrs_group_decl 7077 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"), 7078 grid_attr_trees->kernel_dim_array_type); 7079 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl) 7080 = grid_attr_trees->kernel_lattrs_grid_decl; 7081 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type, 7082 "__gomp_kernel_launch_attributes", 7083 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE); 7084 } 7085 7086 /* Insert before the current statement in GSI a store of VALUE to INDEX of 7087 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be 7088 of type uint32_type_node. */ 7089 7090 static void 7091 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var, 7092 tree fld_decl, int index, tree value) 7093 { 7094 tree ref = build4 (ARRAY_REF, uint32_type_node, 7095 build3 (COMPONENT_REF, 7096 grid_attr_trees->kernel_dim_array_type, 7097 range_var, fld_decl, NULL_TREE), 7098 build_int_cst (integer_type_node, index), 7099 NULL_TREE, NULL_TREE); 7100 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT); 7101 } 7102 7103 /* Return a tree representation of a pointer to a structure with grid and 7104 work-group size information. Statements filling that information will be 7105 inserted before GSI, TGT_STMT is the target statement which has the 7106 necessary information in it. */ 7107 7108 static tree 7109 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi, 7110 gomp_target *tgt_stmt) 7111 { 7112 grid_create_kernel_launch_attr_types (); 7113 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type, 7114 "__kernel_launch_attrs"); 7115 7116 unsigned max_dim = 0; 7117 for (tree clause = gimple_omp_target_clauses (tgt_stmt); 7118 clause; 7119 clause = OMP_CLAUSE_CHAIN (clause)) 7120 { 7121 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_) 7122 continue; 7123 7124 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause); 7125 max_dim = MAX (dim, max_dim); 7126 7127 grid_insert_store_range_dim (gsi, lattrs, 7128 grid_attr_trees->kernel_lattrs_grid_decl, 7129 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause)); 7130 grid_insert_store_range_dim (gsi, lattrs, 7131 grid_attr_trees->kernel_lattrs_group_decl, 7132 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause)); 7133 } 7134 7135 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs, 7136 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE); 7137 gcc_checking_assert (max_dim <= 2); 7138 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1); 7139 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions), 7140 GSI_SAME_STMT); 7141 TREE_ADDRESSABLE (lattrs) = 1; 7142 return build_fold_addr_expr (lattrs); 7143 } 7144 7145 /* Build target argument identifier from the DEVICE identifier, value 7146 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */ 7147 7148 static tree 7149 get_target_argument_identifier_1 (int device, bool subseqent_param, int id) 7150 { 7151 tree t = build_int_cst (integer_type_node, device); 7152 if (subseqent_param) 7153 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, 7154 build_int_cst (integer_type_node, 7155 GOMP_TARGET_ARG_SUBSEQUENT_PARAM)); 7156 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, 7157 build_int_cst (integer_type_node, id)); 7158 return t; 7159 } 7160 7161 /* Like above but return it in type that can be directly stored as an element 7162 of the argument array. */ 7163 7164 static tree 7165 get_target_argument_identifier (int device, bool subseqent_param, int id) 7166 { 7167 tree t = get_target_argument_identifier_1 (device, subseqent_param, id); 7168 return fold_convert (ptr_type_node, t); 7169 } 7170 7171 /* Return a target argument consisting of DEVICE identifier, value identifier 7172 ID, and the actual VALUE. */ 7173 7174 static tree 7175 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id, 7176 tree value) 7177 { 7178 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node, 7179 fold_convert (integer_type_node, value), 7180 build_int_cst (unsigned_type_node, 7181 GOMP_TARGET_ARG_VALUE_SHIFT)); 7182 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, 7183 get_target_argument_identifier_1 (device, false, id)); 7184 t = fold_convert (ptr_type_node, t); 7185 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT); 7186 } 7187 7188 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15, 7189 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it, 7190 otherwise push an identifier (with DEVICE and ID) and the VALUE in two 7191 arguments. */ 7192 7193 static void 7194 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device, 7195 int id, tree value, vec <tree> *args) 7196 { 7197 if (tree_fits_shwi_p (value) 7198 && tree_to_shwi (value) > -(1 << 15) 7199 && tree_to_shwi (value) < (1 << 15)) 7200 args->quick_push (get_target_argument_value (gsi, device, id, value)); 7201 else 7202 { 7203 args->quick_push (get_target_argument_identifier (device, true, id)); 7204 value = fold_convert (ptr_type_node, value); 7205 value = force_gimple_operand_gsi (gsi, value, true, NULL, true, 7206 GSI_SAME_STMT); 7207 args->quick_push (value); 7208 } 7209 } 7210 7211 /* Create an array of arguments that is then passed to GOMP_target. */ 7212 7213 static tree 7214 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt) 7215 { 7216 auto_vec <tree, 6> args; 7217 tree clauses = gimple_omp_target_clauses (tgt_stmt); 7218 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS); 7219 if (c) 7220 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c); 7221 else 7222 t = integer_minus_one_node; 7223 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL, 7224 GOMP_TARGET_ARG_NUM_TEAMS, t, &args); 7225 7226 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT); 7227 if (c) 7228 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c); 7229 else 7230 t = integer_minus_one_node; 7231 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL, 7232 GOMP_TARGET_ARG_THREAD_LIMIT, t, 7233 &args); 7234 7235 /* Add HSA-specific grid sizes, if available. */ 7236 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt), 7237 OMP_CLAUSE__GRIDDIM_)) 7238 { 7239 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES; 7240 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id); 7241 args.quick_push (t); 7242 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt)); 7243 } 7244 7245 /* Produce more, perhaps device specific, arguments here. */ 7246 7247 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node, 7248 args.length () + 1), 7249 ".omp_target_args"); 7250 for (unsigned i = 0; i < args.length (); i++) 7251 { 7252 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray, 7253 build_int_cst (integer_type_node, i), 7254 NULL_TREE, NULL_TREE); 7255 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]), 7256 GSI_SAME_STMT); 7257 } 7258 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray, 7259 build_int_cst (integer_type_node, args.length ()), 7260 NULL_TREE, NULL_TREE); 7261 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node), 7262 GSI_SAME_STMT); 7263 TREE_ADDRESSABLE (argarray) = 1; 7264 return build_fold_addr_expr (argarray); 7265 } 7266 7267 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */ 7268 7269 static void 7270 expand_omp_target (struct omp_region *region) 7271 { 7272 basic_block entry_bb, exit_bb, new_bb; 7273 struct function *child_cfun; 7274 tree child_fn, block, t; 7275 gimple_stmt_iterator gsi; 7276 gomp_target *entry_stmt; 7277 gimple *stmt; 7278 edge e; 7279 bool offloaded, data_region; 7280 7281 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry)); 7282 new_bb = region->entry; 7283 7284 offloaded = is_gimple_omp_offloaded (entry_stmt); 7285 switch (gimple_omp_target_kind (entry_stmt)) 7286 { 7287 case GF_OMP_TARGET_KIND_REGION: 7288 case GF_OMP_TARGET_KIND_UPDATE: 7289 case GF_OMP_TARGET_KIND_ENTER_DATA: 7290 case GF_OMP_TARGET_KIND_EXIT_DATA: 7291 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 7292 case GF_OMP_TARGET_KIND_OACC_KERNELS: 7293 case GF_OMP_TARGET_KIND_OACC_UPDATE: 7294 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: 7295 case GF_OMP_TARGET_KIND_OACC_DECLARE: 7296 data_region = false; 7297 break; 7298 case GF_OMP_TARGET_KIND_DATA: 7299 case GF_OMP_TARGET_KIND_OACC_DATA: 7300 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: 7301 data_region = true; 7302 break; 7303 default: 7304 gcc_unreachable (); 7305 } 7306 7307 child_fn = NULL_TREE; 7308 child_cfun = NULL; 7309 if (offloaded) 7310 { 7311 child_fn = gimple_omp_target_child_fn (entry_stmt); 7312 child_cfun = DECL_STRUCT_FUNCTION (child_fn); 7313 } 7314 7315 /* Supported by expand_omp_taskreg, but not here. */ 7316 if (child_cfun != NULL) 7317 gcc_checking_assert (!child_cfun->cfg); 7318 gcc_checking_assert (!gimple_in_ssa_p (cfun)); 7319 7320 entry_bb = region->entry; 7321 exit_bb = region->exit; 7322 7323 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS) 7324 { 7325 mark_loops_in_oacc_kernels_region (region->entry, region->exit); 7326 7327 /* Further down, both OpenACC kernels and OpenACC parallel constructs 7328 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the 7329 two, there is an "oacc kernels" attribute set for OpenACC kernels. */ 7330 DECL_ATTRIBUTES (child_fn) 7331 = tree_cons (get_identifier ("oacc kernels"), 7332 NULL_TREE, DECL_ATTRIBUTES (child_fn)); 7333 } 7334 7335 if (offloaded) 7336 { 7337 unsigned srcidx, dstidx, num; 7338 7339 /* If the offloading region needs data sent from the parent 7340 function, then the very first statement (except possible 7341 tree profile counter updates) of the offloading body 7342 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since 7343 &.OMP_DATA_O is passed as an argument to the child function, 7344 we need to replace it with the argument as seen by the child 7345 function. 7346 7347 In most cases, this will end up being the identity assignment 7348 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had 7349 a function call that has been inlined, the original PARM_DECL 7350 .OMP_DATA_I may have been converted into a different local 7351 variable. In which case, we need to keep the assignment. */ 7352 tree data_arg = gimple_omp_target_data_arg (entry_stmt); 7353 if (data_arg) 7354 { 7355 basic_block entry_succ_bb = single_succ (entry_bb); 7356 gimple_stmt_iterator gsi; 7357 tree arg; 7358 gimple *tgtcopy_stmt = NULL; 7359 tree sender = TREE_VEC_ELT (data_arg, 0); 7360 7361 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi)) 7362 { 7363 gcc_assert (!gsi_end_p (gsi)); 7364 stmt = gsi_stmt (gsi); 7365 if (gimple_code (stmt) != GIMPLE_ASSIGN) 7366 continue; 7367 7368 if (gimple_num_ops (stmt) == 2) 7369 { 7370 tree arg = gimple_assign_rhs1 (stmt); 7371 7372 /* We're ignoring the subcode because we're 7373 effectively doing a STRIP_NOPS. */ 7374 7375 if (TREE_CODE (arg) == ADDR_EXPR 7376 && TREE_OPERAND (arg, 0) == sender) 7377 { 7378 tgtcopy_stmt = stmt; 7379 break; 7380 } 7381 } 7382 } 7383 7384 gcc_assert (tgtcopy_stmt != NULL); 7385 arg = DECL_ARGUMENTS (child_fn); 7386 7387 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg); 7388 gsi_remove (&gsi, true); 7389 } 7390 7391 /* Declare local variables needed in CHILD_CFUN. */ 7392 block = DECL_INITIAL (child_fn); 7393 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls); 7394 /* The gimplifier could record temporaries in the offloading block 7395 rather than in containing function's local_decls chain, 7396 which would mean cgraph missed finalizing them. Do it now. */ 7397 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t)) 7398 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t)) 7399 varpool_node::finalize_decl (t); 7400 DECL_SAVED_TREE (child_fn) = NULL; 7401 /* We'll create a CFG for child_fn, so no gimple body is needed. */ 7402 gimple_set_body (child_fn, NULL); 7403 TREE_USED (block) = 1; 7404 7405 /* Reset DECL_CONTEXT on function arguments. */ 7406 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t)) 7407 DECL_CONTEXT (t) = child_fn; 7408 7409 /* Split ENTRY_BB at GIMPLE_*, 7410 so that it can be moved to the child function. */ 7411 gsi = gsi_last_nondebug_bb (entry_bb); 7412 stmt = gsi_stmt (gsi); 7413 gcc_assert (stmt 7414 && gimple_code (stmt) == gimple_code (entry_stmt)); 7415 e = split_block (entry_bb, stmt); 7416 gsi_remove (&gsi, true); 7417 entry_bb = e->dest; 7418 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 7419 7420 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */ 7421 if (exit_bb) 7422 { 7423 gsi = gsi_last_nondebug_bb (exit_bb); 7424 gcc_assert (!gsi_end_p (gsi) 7425 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 7426 stmt = gimple_build_return (NULL); 7427 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); 7428 gsi_remove (&gsi, true); 7429 } 7430 7431 /* Move the offloading region into CHILD_CFUN. */ 7432 7433 block = gimple_block (entry_stmt); 7434 7435 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block); 7436 if (exit_bb) 7437 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; 7438 /* When the OMP expansion process cannot guarantee an up-to-date 7439 loop tree arrange for the child function to fixup loops. */ 7440 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 7441 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP; 7442 7443 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */ 7444 num = vec_safe_length (child_cfun->local_decls); 7445 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++) 7446 { 7447 t = (*child_cfun->local_decls)[srcidx]; 7448 if (DECL_CONTEXT (t) == cfun->decl) 7449 continue; 7450 if (srcidx != dstidx) 7451 (*child_cfun->local_decls)[dstidx] = t; 7452 dstidx++; 7453 } 7454 if (dstidx != num) 7455 vec_safe_truncate (child_cfun->local_decls, dstidx); 7456 7457 /* Inform the callgraph about the new function. */ 7458 child_cfun->curr_properties = cfun->curr_properties; 7459 child_cfun->has_simduid_loops |= cfun->has_simduid_loops; 7460 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops; 7461 cgraph_node *node = cgraph_node::get_create (child_fn); 7462 node->parallelized_function = 1; 7463 cgraph_node::add_new_function (child_fn, true); 7464 7465 /* Add the new function to the offload table. */ 7466 if (ENABLE_OFFLOADING) 7467 { 7468 if (in_lto_p) 7469 DECL_PRESERVE_P (child_fn) = 1; 7470 vec_safe_push (offload_funcs, child_fn); 7471 } 7472 7473 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl) 7474 && !DECL_ASSEMBLER_NAME_SET_P (child_fn); 7475 7476 /* Fix the callgraph edges for child_cfun. Those for cfun will be 7477 fixed in a following pass. */ 7478 push_cfun (child_cfun); 7479 if (need_asm) 7480 assign_assembler_name_if_needed (child_fn); 7481 cgraph_edge::rebuild_edges (); 7482 7483 /* Some EH regions might become dead, see PR34608. If 7484 pass_cleanup_cfg isn't the first pass to happen with the 7485 new child, these dead EH edges might cause problems. 7486 Clean them up now. */ 7487 if (flag_exceptions) 7488 { 7489 basic_block bb; 7490 bool changed = false; 7491 7492 FOR_EACH_BB_FN (bb, cfun) 7493 changed |= gimple_purge_dead_eh_edges (bb); 7494 if (changed) 7495 cleanup_tree_cfg (); 7496 } 7497 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 7498 verify_loop_structure (); 7499 pop_cfun (); 7500 7501 if (dump_file && !gimple_in_ssa_p (cfun)) 7502 { 7503 omp_any_child_fn_dumped = true; 7504 dump_function_header (dump_file, child_fn, dump_flags); 7505 dump_function_to_file (child_fn, dump_file, dump_flags); 7506 } 7507 7508 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn); 7509 } 7510 7511 /* Emit a library call to launch the offloading region, or do data 7512 transfers. */ 7513 tree t1, t2, t3, t4, depend, c, clauses; 7514 enum built_in_function start_ix; 7515 unsigned int flags_i = 0; 7516 7517 switch (gimple_omp_target_kind (entry_stmt)) 7518 { 7519 case GF_OMP_TARGET_KIND_REGION: 7520 start_ix = BUILT_IN_GOMP_TARGET; 7521 break; 7522 case GF_OMP_TARGET_KIND_DATA: 7523 start_ix = BUILT_IN_GOMP_TARGET_DATA; 7524 break; 7525 case GF_OMP_TARGET_KIND_UPDATE: 7526 start_ix = BUILT_IN_GOMP_TARGET_UPDATE; 7527 break; 7528 case GF_OMP_TARGET_KIND_ENTER_DATA: 7529 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA; 7530 break; 7531 case GF_OMP_TARGET_KIND_EXIT_DATA: 7532 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA; 7533 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA; 7534 break; 7535 case GF_OMP_TARGET_KIND_OACC_KERNELS: 7536 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 7537 start_ix = BUILT_IN_GOACC_PARALLEL; 7538 break; 7539 case GF_OMP_TARGET_KIND_OACC_DATA: 7540 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: 7541 start_ix = BUILT_IN_GOACC_DATA_START; 7542 break; 7543 case GF_OMP_TARGET_KIND_OACC_UPDATE: 7544 start_ix = BUILT_IN_GOACC_UPDATE; 7545 break; 7546 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: 7547 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA; 7548 break; 7549 case GF_OMP_TARGET_KIND_OACC_DECLARE: 7550 start_ix = BUILT_IN_GOACC_DECLARE; 7551 break; 7552 default: 7553 gcc_unreachable (); 7554 } 7555 7556 clauses = gimple_omp_target_clauses (entry_stmt); 7557 7558 tree device = NULL_TREE; 7559 location_t device_loc = UNKNOWN_LOCATION; 7560 tree goacc_flags = NULL_TREE; 7561 if (is_gimple_omp_oacc (entry_stmt)) 7562 { 7563 /* By default, no GOACC_FLAGs are set. */ 7564 goacc_flags = integer_zero_node; 7565 } 7566 else 7567 { 7568 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE); 7569 if (c) 7570 { 7571 device = OMP_CLAUSE_DEVICE_ID (c); 7572 device_loc = OMP_CLAUSE_LOCATION (c); 7573 } 7574 else 7575 { 7576 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime 7577 library choose). */ 7578 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV); 7579 device_loc = gimple_location (entry_stmt); 7580 } 7581 7582 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT); 7583 if (c) 7584 flags_i |= GOMP_TARGET_FLAG_NOWAIT; 7585 } 7586 7587 /* By default, there is no conditional. */ 7588 tree cond = NULL_TREE; 7589 c = omp_find_clause (clauses, OMP_CLAUSE_IF); 7590 if (c) 7591 cond = OMP_CLAUSE_IF_EXPR (c); 7592 /* If we found the clause 'if (cond)', build: 7593 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK) 7594 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */ 7595 if (cond) 7596 { 7597 tree *tp; 7598 if (is_gimple_omp_oacc (entry_stmt)) 7599 tp = &goacc_flags; 7600 else 7601 { 7602 /* Ensure 'device' is of the correct type. */ 7603 device = fold_convert_loc (device_loc, integer_type_node, device); 7604 7605 tp = &device; 7606 } 7607 7608 cond = gimple_boolify (cond); 7609 7610 basic_block cond_bb, then_bb, else_bb; 7611 edge e; 7612 tree tmp_var; 7613 7614 tmp_var = create_tmp_var (TREE_TYPE (*tp)); 7615 if (offloaded) 7616 e = split_block_after_labels (new_bb); 7617 else 7618 { 7619 gsi = gsi_last_nondebug_bb (new_bb); 7620 gsi_prev (&gsi); 7621 e = split_block (new_bb, gsi_stmt (gsi)); 7622 } 7623 cond_bb = e->src; 7624 new_bb = e->dest; 7625 remove_edge (e); 7626 7627 then_bb = create_empty_bb (cond_bb); 7628 else_bb = create_empty_bb (then_bb); 7629 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); 7630 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb); 7631 7632 stmt = gimple_build_cond_empty (cond); 7633 gsi = gsi_last_bb (cond_bb); 7634 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 7635 7636 gsi = gsi_start_bb (then_bb); 7637 stmt = gimple_build_assign (tmp_var, *tp); 7638 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 7639 7640 gsi = gsi_start_bb (else_bb); 7641 if (is_gimple_omp_oacc (entry_stmt)) 7642 stmt = gimple_build_assign (tmp_var, 7643 BIT_IOR_EXPR, 7644 *tp, 7645 build_int_cst (integer_type_node, 7646 GOACC_FLAG_HOST_FALLBACK)); 7647 else 7648 stmt = gimple_build_assign (tmp_var, 7649 build_int_cst (integer_type_node, 7650 GOMP_DEVICE_HOST_FALLBACK)); 7651 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 7652 7653 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); 7654 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE); 7655 add_bb_to_loop (then_bb, cond_bb->loop_father); 7656 add_bb_to_loop (else_bb, cond_bb->loop_father); 7657 make_edge (then_bb, new_bb, EDGE_FALLTHRU); 7658 make_edge (else_bb, new_bb, EDGE_FALLTHRU); 7659 7660 *tp = tmp_var; 7661 7662 gsi = gsi_last_nondebug_bb (new_bb); 7663 } 7664 else 7665 { 7666 gsi = gsi_last_nondebug_bb (new_bb); 7667 7668 if (device != NULL_TREE) 7669 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE, 7670 true, GSI_SAME_STMT); 7671 } 7672 7673 t = gimple_omp_target_data_arg (entry_stmt); 7674 if (t == NULL) 7675 { 7676 t1 = size_zero_node; 7677 t2 = build_zero_cst (ptr_type_node); 7678 t3 = t2; 7679 t4 = t2; 7680 } 7681 else 7682 { 7683 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1)))); 7684 t1 = size_binop (PLUS_EXPR, t1, size_int (1)); 7685 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0)); 7686 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1)); 7687 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2)); 7688 } 7689 7690 gimple *g; 7691 bool tagging = false; 7692 /* The maximum number used by any start_ix, without varargs. */ 7693 auto_vec<tree, 11> args; 7694 if (is_gimple_omp_oacc (entry_stmt)) 7695 { 7696 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP, 7697 TREE_TYPE (goacc_flags), goacc_flags); 7698 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true, 7699 NULL_TREE, true, 7700 GSI_SAME_STMT); 7701 args.quick_push (goacc_flags_m); 7702 } 7703 else 7704 args.quick_push (device); 7705 if (offloaded) 7706 args.quick_push (build_fold_addr_expr (child_fn)); 7707 args.quick_push (t1); 7708 args.quick_push (t2); 7709 args.quick_push (t3); 7710 args.quick_push (t4); 7711 switch (start_ix) 7712 { 7713 case BUILT_IN_GOACC_DATA_START: 7714 case BUILT_IN_GOACC_DECLARE: 7715 case BUILT_IN_GOMP_TARGET_DATA: 7716 break; 7717 case BUILT_IN_GOMP_TARGET: 7718 case BUILT_IN_GOMP_TARGET_UPDATE: 7719 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA: 7720 args.quick_push (build_int_cst (unsigned_type_node, flags_i)); 7721 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND); 7722 if (c) 7723 depend = OMP_CLAUSE_DECL (c); 7724 else 7725 depend = build_int_cst (ptr_type_node, 0); 7726 args.quick_push (depend); 7727 if (start_ix == BUILT_IN_GOMP_TARGET) 7728 args.quick_push (get_target_arguments (&gsi, entry_stmt)); 7729 break; 7730 case BUILT_IN_GOACC_PARALLEL: 7731 oacc_set_fn_attrib (child_fn, clauses, &args); 7732 tagging = true; 7733 /* FALLTHRU */ 7734 case BUILT_IN_GOACC_ENTER_EXIT_DATA: 7735 case BUILT_IN_GOACC_UPDATE: 7736 { 7737 tree t_async = NULL_TREE; 7738 7739 /* If present, use the value specified by the respective 7740 clause, making sure that is of the correct type. */ 7741 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC); 7742 if (c) 7743 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c), 7744 integer_type_node, 7745 OMP_CLAUSE_ASYNC_EXPR (c)); 7746 else if (!tagging) 7747 /* Default values for t_async. */ 7748 t_async = fold_convert_loc (gimple_location (entry_stmt), 7749 integer_type_node, 7750 build_int_cst (integer_type_node, 7751 GOMP_ASYNC_SYNC)); 7752 if (tagging && t_async) 7753 { 7754 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX; 7755 7756 if (TREE_CODE (t_async) == INTEGER_CST) 7757 { 7758 /* See if we can pack the async arg in to the tag's 7759 operand. */ 7760 i_async = TREE_INT_CST_LOW (t_async); 7761 if (i_async < GOMP_LAUNCH_OP_MAX) 7762 t_async = NULL_TREE; 7763 else 7764 i_async = GOMP_LAUNCH_OP_MAX; 7765 } 7766 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE, 7767 i_async)); 7768 } 7769 if (t_async) 7770 args.safe_push (t_async); 7771 7772 /* Save the argument index, and ... */ 7773 unsigned t_wait_idx = args.length (); 7774 unsigned num_waits = 0; 7775 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT); 7776 if (!tagging || c) 7777 /* ... push a placeholder. */ 7778 args.safe_push (integer_zero_node); 7779 7780 for (; c; c = OMP_CLAUSE_CHAIN (c)) 7781 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT) 7782 { 7783 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c), 7784 integer_type_node, 7785 OMP_CLAUSE_WAIT_EXPR (c))); 7786 num_waits++; 7787 } 7788 7789 if (!tagging || num_waits) 7790 { 7791 tree len; 7792 7793 /* Now that we know the number, update the placeholder. */ 7794 if (tagging) 7795 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits); 7796 else 7797 len = build_int_cst (integer_type_node, num_waits); 7798 len = fold_convert_loc (gimple_location (entry_stmt), 7799 unsigned_type_node, len); 7800 args[t_wait_idx] = len; 7801 } 7802 } 7803 break; 7804 default: 7805 gcc_unreachable (); 7806 } 7807 if (tagging) 7808 /* Push terminal marker - zero. */ 7809 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0)); 7810 7811 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args); 7812 gimple_set_location (g, gimple_location (entry_stmt)); 7813 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 7814 if (!offloaded) 7815 { 7816 g = gsi_stmt (gsi); 7817 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET); 7818 gsi_remove (&gsi, true); 7819 } 7820 if (data_region && region->exit) 7821 { 7822 gsi = gsi_last_nondebug_bb (region->exit); 7823 g = gsi_stmt (gsi); 7824 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN); 7825 gsi_remove (&gsi, true); 7826 } 7827 } 7828 7829 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with 7830 iteration variable derived from the thread number. INTRA_GROUP means this 7831 is an expansion of a loop iterating over work-items within a separate 7832 iteration over groups. */ 7833 7834 static void 7835 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group) 7836 { 7837 gimple_stmt_iterator gsi; 7838 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry)); 7839 gcc_checking_assert (gimple_omp_for_kind (for_stmt) 7840 == GF_OMP_FOR_KIND_GRID_LOOP); 7841 size_t collapse = gimple_omp_for_collapse (for_stmt); 7842 struct omp_for_data_loop *loops 7843 = XALLOCAVEC (struct omp_for_data_loop, 7844 gimple_omp_for_collapse (for_stmt)); 7845 struct omp_for_data fd; 7846 7847 remove_edge (BRANCH_EDGE (kfor->entry)); 7848 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest; 7849 7850 gcc_assert (kfor->cont); 7851 omp_extract_for_data (for_stmt, &fd, loops); 7852 7853 gsi = gsi_start_bb (body_bb); 7854 7855 for (size_t dim = 0; dim < collapse; dim++) 7856 { 7857 tree type, itype; 7858 itype = type = TREE_TYPE (fd.loops[dim].v); 7859 if (POINTER_TYPE_P (type)) 7860 itype = signed_type_for (type); 7861 7862 tree n1 = fd.loops[dim].n1; 7863 tree step = fd.loops[dim].step; 7864 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), 7865 true, NULL_TREE, true, GSI_SAME_STMT); 7866 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), 7867 true, NULL_TREE, true, GSI_SAME_STMT); 7868 tree threadid; 7869 if (gimple_omp_for_grid_group_iter (for_stmt)) 7870 { 7871 gcc_checking_assert (!intra_group); 7872 threadid = build_call_expr (builtin_decl_explicit 7873 (BUILT_IN_HSA_WORKGROUPID), 1, 7874 build_int_cstu (unsigned_type_node, dim)); 7875 } 7876 else if (intra_group) 7877 threadid = build_call_expr (builtin_decl_explicit 7878 (BUILT_IN_HSA_WORKITEMID), 1, 7879 build_int_cstu (unsigned_type_node, dim)); 7880 else 7881 threadid = build_call_expr (builtin_decl_explicit 7882 (BUILT_IN_HSA_WORKITEMABSID), 1, 7883 build_int_cstu (unsigned_type_node, dim)); 7884 threadid = fold_convert (itype, threadid); 7885 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, 7886 true, GSI_SAME_STMT); 7887 7888 tree startvar = fd.loops[dim].v; 7889 tree t = fold_build2 (MULT_EXPR, itype, threadid, step); 7890 if (POINTER_TYPE_P (type)) 7891 t = fold_build_pointer_plus (n1, t); 7892 else 7893 t = fold_build2 (PLUS_EXPR, type, t, n1); 7894 t = fold_convert (type, t); 7895 t = force_gimple_operand_gsi (&gsi, t, 7896 DECL_P (startvar) 7897 && TREE_ADDRESSABLE (startvar), 7898 NULL_TREE, true, GSI_SAME_STMT); 7899 gassign *assign_stmt = gimple_build_assign (startvar, t); 7900 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 7901 } 7902 /* Remove the omp for statement. */ 7903 gsi = gsi_last_nondebug_bb (kfor->entry); 7904 gsi_remove (&gsi, true); 7905 7906 /* Remove the GIMPLE_OMP_CONTINUE statement. */ 7907 gsi = gsi_last_nondebug_bb (kfor->cont); 7908 gcc_assert (!gsi_end_p (gsi) 7909 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE); 7910 gsi_remove (&gsi, true); 7911 7912 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */ 7913 gsi = gsi_last_nondebug_bb (kfor->exit); 7914 gcc_assert (!gsi_end_p (gsi) 7915 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 7916 if (intra_group) 7917 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT); 7918 gsi_remove (&gsi, true); 7919 7920 /* Fixup the much simpler CFG. */ 7921 remove_edge (find_edge (kfor->cont, body_bb)); 7922 7923 if (kfor->cont != body_bb) 7924 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb); 7925 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont); 7926 } 7927 7928 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap 7929 argument_decls. */ 7930 7931 struct grid_arg_decl_map 7932 { 7933 tree old_arg; 7934 tree new_arg; 7935 }; 7936 7937 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones 7938 pertaining to kernel function. */ 7939 7940 static tree 7941 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data) 7942 { 7943 struct walk_stmt_info *wi = (struct walk_stmt_info *) data; 7944 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info; 7945 tree t = *tp; 7946 7947 if (t == adm->old_arg) 7948 *tp = adm->new_arg; 7949 *walk_subtrees = !TYPE_P (t) && !DECL_P (t); 7950 return NULL_TREE; 7951 } 7952 7953 /* If TARGET region contains a kernel body for loop, remove its region from the 7954 TARGET and expand it in HSA gridified kernel fashion. */ 7955 7956 static void 7957 grid_expand_target_grid_body (struct omp_region *target) 7958 { 7959 if (!hsa_gen_requested_p ()) 7960 return; 7961 7962 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry)); 7963 struct omp_region **pp; 7964 7965 for (pp = &target->inner; *pp; pp = &(*pp)->next) 7966 if ((*pp)->type == GIMPLE_OMP_GRID_BODY) 7967 break; 7968 7969 struct omp_region *gpukernel = *pp; 7970 7971 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt); 7972 if (!gpukernel) 7973 { 7974 /* HSA cannot handle OACC stuff. */ 7975 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION) 7976 return; 7977 gcc_checking_assert (orig_child_fndecl); 7978 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt), 7979 OMP_CLAUSE__GRIDDIM_)); 7980 cgraph_node *n = cgraph_node::get (orig_child_fndecl); 7981 7982 hsa_register_kernel (n); 7983 return; 7984 } 7985 7986 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt), 7987 OMP_CLAUSE__GRIDDIM_)); 7988 tree inside_block 7989 = gimple_block (first_stmt (single_succ (gpukernel->entry))); 7990 *pp = gpukernel->next; 7991 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next) 7992 if ((*pp)->type == GIMPLE_OMP_FOR) 7993 break; 7994 7995 struct omp_region *kfor = *pp; 7996 gcc_assert (kfor); 7997 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry)); 7998 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP); 7999 *pp = kfor->next; 8000 if (kfor->inner) 8001 { 8002 if (gimple_omp_for_grid_group_iter (for_stmt)) 8003 { 8004 struct omp_region **next_pp; 8005 for (pp = &kfor->inner; *pp; pp = next_pp) 8006 { 8007 next_pp = &(*pp)->next; 8008 if ((*pp)->type != GIMPLE_OMP_FOR) 8009 continue; 8010 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry)); 8011 gcc_assert (gimple_omp_for_kind (inner) 8012 == GF_OMP_FOR_KIND_GRID_LOOP); 8013 grid_expand_omp_for_loop (*pp, true); 8014 *pp = (*pp)->next; 8015 next_pp = pp; 8016 } 8017 } 8018 expand_omp (kfor->inner); 8019 } 8020 if (gpukernel->inner) 8021 expand_omp (gpukernel->inner); 8022 8023 tree kern_fndecl = copy_node (orig_child_fndecl); 8024 DECL_NAME (kern_fndecl) = clone_function_name_numbered (kern_fndecl, 8025 "kernel"); 8026 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl)); 8027 tree tgtblock = gimple_block (tgt_stmt); 8028 tree fniniblock = make_node (BLOCK); 8029 BLOCK_ABSTRACT_ORIGIN (fniniblock) = BLOCK_ORIGIN (tgtblock); 8030 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock); 8031 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock); 8032 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl; 8033 DECL_INITIAL (kern_fndecl) = fniniblock; 8034 push_struct_function (kern_fndecl); 8035 cfun->function_end_locus = gimple_location (tgt_stmt); 8036 init_tree_ssa (cfun); 8037 pop_cfun (); 8038 8039 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl); 8040 gcc_assert (!DECL_CHAIN (old_parm_decl)); 8041 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl)); 8042 DECL_CONTEXT (new_parm_decl) = kern_fndecl; 8043 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl; 8044 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl)))); 8045 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl)); 8046 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl; 8047 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl); 8048 kern_cfun->curr_properties = cfun->curr_properties; 8049 8050 grid_expand_omp_for_loop (kfor, false); 8051 8052 /* Remove the omp for statement. */ 8053 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry); 8054 gsi_remove (&gsi, true); 8055 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real 8056 return. */ 8057 gsi = gsi_last_nondebug_bb (gpukernel->exit); 8058 gcc_assert (!gsi_end_p (gsi) 8059 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 8060 gimple *ret_stmt = gimple_build_return (NULL); 8061 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT); 8062 gsi_remove (&gsi, true); 8063 8064 /* Statements in the first BB in the target construct have been produced by 8065 target lowering and must be copied inside the GPUKERNEL, with the two 8066 exceptions of the first OMP statement and the OMP_DATA assignment 8067 statement. */ 8068 gsi = gsi_start_bb (single_succ (gpukernel->entry)); 8069 tree data_arg = gimple_omp_target_data_arg (tgt_stmt); 8070 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL; 8071 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry)); 8072 !gsi_end_p (tsi); gsi_next (&tsi)) 8073 { 8074 gimple *stmt = gsi_stmt (tsi); 8075 if (is_gimple_omp (stmt)) 8076 break; 8077 if (sender 8078 && is_gimple_assign (stmt) 8079 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR 8080 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender) 8081 continue; 8082 gimple *copy = gimple_copy (stmt); 8083 gsi_insert_before (&gsi, copy, GSI_SAME_STMT); 8084 gimple_set_block (copy, fniniblock); 8085 } 8086 8087 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry), 8088 gpukernel->exit, inside_block); 8089 8090 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl); 8091 kcn->mark_force_output (); 8092 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl); 8093 8094 hsa_register_kernel (kcn, orig_child); 8095 8096 cgraph_node::add_new_function (kern_fndecl, true); 8097 push_cfun (kern_cfun); 8098 cgraph_edge::rebuild_edges (); 8099 8100 /* Re-map any mention of the PARM_DECL of the original function to the 8101 PARM_DECL of the new one. 8102 8103 TODO: It would be great if lowering produced references into the GPU 8104 kernel decl straight away and we did not have to do this. */ 8105 struct grid_arg_decl_map adm; 8106 adm.old_arg = old_parm_decl; 8107 adm.new_arg = new_parm_decl; 8108 basic_block bb; 8109 FOR_EACH_BB_FN (bb, kern_cfun) 8110 { 8111 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 8112 { 8113 gimple *stmt = gsi_stmt (gsi); 8114 struct walk_stmt_info wi; 8115 memset (&wi, 0, sizeof (wi)); 8116 wi.info = &adm; 8117 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi); 8118 } 8119 } 8120 pop_cfun (); 8121 8122 return; 8123 } 8124 8125 /* Expand the parallel region tree rooted at REGION. Expansion 8126 proceeds in depth-first order. Innermost regions are expanded 8127 first. This way, parallel regions that require a new function to 8128 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any 8129 internal dependencies in their body. */ 8130 8131 static void 8132 expand_omp (struct omp_region *region) 8133 { 8134 omp_any_child_fn_dumped = false; 8135 while (region) 8136 { 8137 location_t saved_location; 8138 gimple *inner_stmt = NULL; 8139 8140 /* First, determine whether this is a combined parallel+workshare 8141 region. */ 8142 if (region->type == GIMPLE_OMP_PARALLEL) 8143 determine_parallel_type (region); 8144 else if (region->type == GIMPLE_OMP_TARGET) 8145 grid_expand_target_grid_body (region); 8146 8147 if (region->type == GIMPLE_OMP_FOR 8148 && gimple_omp_for_combined_p (last_stmt (region->entry))) 8149 inner_stmt = last_stmt (region->inner->entry); 8150 8151 if (region->inner) 8152 expand_omp (region->inner); 8153 8154 saved_location = input_location; 8155 if (gimple_has_location (last_stmt (region->entry))) 8156 input_location = gimple_location (last_stmt (region->entry)); 8157 8158 switch (region->type) 8159 { 8160 case GIMPLE_OMP_PARALLEL: 8161 case GIMPLE_OMP_TASK: 8162 expand_omp_taskreg (region); 8163 break; 8164 8165 case GIMPLE_OMP_FOR: 8166 expand_omp_for (region, inner_stmt); 8167 break; 8168 8169 case GIMPLE_OMP_SECTIONS: 8170 expand_omp_sections (region); 8171 break; 8172 8173 case GIMPLE_OMP_SECTION: 8174 /* Individual omp sections are handled together with their 8175 parent GIMPLE_OMP_SECTIONS region. */ 8176 break; 8177 8178 case GIMPLE_OMP_SINGLE: 8179 expand_omp_single (region); 8180 break; 8181 8182 case GIMPLE_OMP_ORDERED: 8183 { 8184 gomp_ordered *ord_stmt 8185 = as_a <gomp_ordered *> (last_stmt (region->entry)); 8186 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt), 8187 OMP_CLAUSE_DEPEND)) 8188 { 8189 /* We'll expand these when expanding corresponding 8190 worksharing region with ordered(n) clause. */ 8191 gcc_assert (region->outer 8192 && region->outer->type == GIMPLE_OMP_FOR); 8193 region->ord_stmt = ord_stmt; 8194 break; 8195 } 8196 } 8197 /* FALLTHRU */ 8198 case GIMPLE_OMP_MASTER: 8199 case GIMPLE_OMP_TASKGROUP: 8200 case GIMPLE_OMP_CRITICAL: 8201 case GIMPLE_OMP_TEAMS: 8202 expand_omp_synch (region); 8203 break; 8204 8205 case GIMPLE_OMP_ATOMIC_LOAD: 8206 expand_omp_atomic (region); 8207 break; 8208 8209 case GIMPLE_OMP_TARGET: 8210 expand_omp_target (region); 8211 break; 8212 8213 default: 8214 gcc_unreachable (); 8215 } 8216 8217 input_location = saved_location; 8218 region = region->next; 8219 } 8220 if (omp_any_child_fn_dumped) 8221 { 8222 if (dump_file) 8223 dump_function_header (dump_file, current_function_decl, dump_flags); 8224 omp_any_child_fn_dumped = false; 8225 } 8226 } 8227 8228 /* Helper for build_omp_regions. Scan the dominator tree starting at 8229 block BB. PARENT is the region that contains BB. If SINGLE_TREE is 8230 true, the function ends once a single tree is built (otherwise, whole 8231 forest of OMP constructs may be built). */ 8232 8233 static void 8234 build_omp_regions_1 (basic_block bb, struct omp_region *parent, 8235 bool single_tree) 8236 { 8237 gimple_stmt_iterator gsi; 8238 gimple *stmt; 8239 basic_block son; 8240 8241 gsi = gsi_last_nondebug_bb (bb); 8242 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi))) 8243 { 8244 struct omp_region *region; 8245 enum gimple_code code; 8246 8247 stmt = gsi_stmt (gsi); 8248 code = gimple_code (stmt); 8249 if (code == GIMPLE_OMP_RETURN) 8250 { 8251 /* STMT is the return point out of region PARENT. Mark it 8252 as the exit point and make PARENT the immediately 8253 enclosing region. */ 8254 gcc_assert (parent); 8255 region = parent; 8256 region->exit = bb; 8257 parent = parent->outer; 8258 } 8259 else if (code == GIMPLE_OMP_ATOMIC_STORE) 8260 { 8261 /* GIMPLE_OMP_ATOMIC_STORE is analogous to 8262 GIMPLE_OMP_RETURN, but matches with 8263 GIMPLE_OMP_ATOMIC_LOAD. */ 8264 gcc_assert (parent); 8265 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD); 8266 region = parent; 8267 region->exit = bb; 8268 parent = parent->outer; 8269 } 8270 else if (code == GIMPLE_OMP_CONTINUE) 8271 { 8272 gcc_assert (parent); 8273 parent->cont = bb; 8274 } 8275 else if (code == GIMPLE_OMP_SECTIONS_SWITCH) 8276 { 8277 /* GIMPLE_OMP_SECTIONS_SWITCH is part of 8278 GIMPLE_OMP_SECTIONS, and we do nothing for it. */ 8279 } 8280 else 8281 { 8282 region = new_omp_region (bb, code, parent); 8283 /* Otherwise... */ 8284 if (code == GIMPLE_OMP_TARGET) 8285 { 8286 switch (gimple_omp_target_kind (stmt)) 8287 { 8288 case GF_OMP_TARGET_KIND_REGION: 8289 case GF_OMP_TARGET_KIND_DATA: 8290 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 8291 case GF_OMP_TARGET_KIND_OACC_KERNELS: 8292 case GF_OMP_TARGET_KIND_OACC_DATA: 8293 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: 8294 break; 8295 case GF_OMP_TARGET_KIND_UPDATE: 8296 case GF_OMP_TARGET_KIND_ENTER_DATA: 8297 case GF_OMP_TARGET_KIND_EXIT_DATA: 8298 case GF_OMP_TARGET_KIND_OACC_UPDATE: 8299 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: 8300 case GF_OMP_TARGET_KIND_OACC_DECLARE: 8301 /* ..., other than for those stand-alone directives... */ 8302 region = NULL; 8303 break; 8304 default: 8305 gcc_unreachable (); 8306 } 8307 } 8308 else if (code == GIMPLE_OMP_ORDERED 8309 && omp_find_clause (gimple_omp_ordered_clauses 8310 (as_a <gomp_ordered *> (stmt)), 8311 OMP_CLAUSE_DEPEND)) 8312 /* #pragma omp ordered depend is also just a stand-alone 8313 directive. */ 8314 region = NULL; 8315 else if (code == GIMPLE_OMP_TASK 8316 && gimple_omp_task_taskwait_p (stmt)) 8317 /* #pragma omp taskwait depend(...) is a stand-alone directive. */ 8318 region = NULL; 8319 /* ..., this directive becomes the parent for a new region. */ 8320 if (region) 8321 parent = region; 8322 } 8323 } 8324 8325 if (single_tree && !parent) 8326 return; 8327 8328 for (son = first_dom_son (CDI_DOMINATORS, bb); 8329 son; 8330 son = next_dom_son (CDI_DOMINATORS, son)) 8331 build_omp_regions_1 (son, parent, single_tree); 8332 } 8333 8334 /* Builds the tree of OMP regions rooted at ROOT, storing it to 8335 root_omp_region. */ 8336 8337 static void 8338 build_omp_regions_root (basic_block root) 8339 { 8340 gcc_assert (root_omp_region == NULL); 8341 build_omp_regions_1 (root, NULL, true); 8342 gcc_assert (root_omp_region != NULL); 8343 } 8344 8345 /* Expands omp construct (and its subconstructs) starting in HEAD. */ 8346 8347 void 8348 omp_expand_local (basic_block head) 8349 { 8350 build_omp_regions_root (head); 8351 if (dump_file && (dump_flags & TDF_DETAILS)) 8352 { 8353 fprintf (dump_file, "\nOMP region tree\n\n"); 8354 dump_omp_region (dump_file, root_omp_region, 0); 8355 fprintf (dump_file, "\n"); 8356 } 8357 8358 remove_exit_barriers (root_omp_region); 8359 expand_omp (root_omp_region); 8360 8361 omp_free_regions (); 8362 } 8363 8364 /* Scan the CFG and build a tree of OMP regions. Return the root of 8365 the OMP region tree. */ 8366 8367 static void 8368 build_omp_regions (void) 8369 { 8370 gcc_assert (root_omp_region == NULL); 8371 calculate_dominance_info (CDI_DOMINATORS); 8372 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false); 8373 } 8374 8375 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */ 8376 8377 static unsigned int 8378 execute_expand_omp (void) 8379 { 8380 build_omp_regions (); 8381 8382 if (!root_omp_region) 8383 return 0; 8384 8385 if (dump_file) 8386 { 8387 fprintf (dump_file, "\nOMP region tree\n\n"); 8388 dump_omp_region (dump_file, root_omp_region, 0); 8389 fprintf (dump_file, "\n"); 8390 } 8391 8392 remove_exit_barriers (root_omp_region); 8393 8394 expand_omp (root_omp_region); 8395 8396 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 8397 verify_loop_structure (); 8398 cleanup_tree_cfg (); 8399 8400 omp_free_regions (); 8401 8402 return 0; 8403 } 8404 8405 /* OMP expansion -- the default pass, run before creation of SSA form. */ 8406 8407 namespace { 8408 8409 const pass_data pass_data_expand_omp = 8410 { 8411 GIMPLE_PASS, /* type */ 8412 "ompexp", /* name */ 8413 OPTGROUP_OMP, /* optinfo_flags */ 8414 TV_NONE, /* tv_id */ 8415 PROP_gimple_any, /* properties_required */ 8416 PROP_gimple_eomp, /* properties_provided */ 8417 0, /* properties_destroyed */ 8418 0, /* todo_flags_start */ 8419 0, /* todo_flags_finish */ 8420 }; 8421 8422 class pass_expand_omp : public gimple_opt_pass 8423 { 8424 public: 8425 pass_expand_omp (gcc::context *ctxt) 8426 : gimple_opt_pass (pass_data_expand_omp, ctxt) 8427 {} 8428 8429 /* opt_pass methods: */ 8430 virtual unsigned int execute (function *) 8431 { 8432 bool gate = ((flag_openacc != 0 || flag_openmp != 0 8433 || flag_openmp_simd != 0) 8434 && !seen_error ()); 8435 8436 /* This pass always runs, to provide PROP_gimple_eomp. 8437 But often, there is nothing to do. */ 8438 if (!gate) 8439 return 0; 8440 8441 return execute_expand_omp (); 8442 } 8443 8444 }; // class pass_expand_omp 8445 8446 } // anon namespace 8447 8448 gimple_opt_pass * 8449 make_pass_expand_omp (gcc::context *ctxt) 8450 { 8451 return new pass_expand_omp (ctxt); 8452 } 8453 8454 namespace { 8455 8456 const pass_data pass_data_expand_omp_ssa = 8457 { 8458 GIMPLE_PASS, /* type */ 8459 "ompexpssa", /* name */ 8460 OPTGROUP_OMP, /* optinfo_flags */ 8461 TV_NONE, /* tv_id */ 8462 PROP_cfg | PROP_ssa, /* properties_required */ 8463 PROP_gimple_eomp, /* properties_provided */ 8464 0, /* properties_destroyed */ 8465 0, /* todo_flags_start */ 8466 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */ 8467 }; 8468 8469 class pass_expand_omp_ssa : public gimple_opt_pass 8470 { 8471 public: 8472 pass_expand_omp_ssa (gcc::context *ctxt) 8473 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt) 8474 {} 8475 8476 /* opt_pass methods: */ 8477 virtual bool gate (function *fun) 8478 { 8479 return !(fun->curr_properties & PROP_gimple_eomp); 8480 } 8481 virtual unsigned int execute (function *) { return execute_expand_omp (); } 8482 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); } 8483 8484 }; // class pass_expand_omp_ssa 8485 8486 } // anon namespace 8487 8488 gimple_opt_pass * 8489 make_pass_expand_omp_ssa (gcc::context *ctxt) 8490 { 8491 return new pass_expand_omp_ssa (ctxt); 8492 } 8493 8494 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant 8495 GIMPLE_* codes. */ 8496 8497 bool 8498 omp_make_gimple_edges (basic_block bb, struct omp_region **region, 8499 int *region_idx) 8500 { 8501 gimple *last = last_stmt (bb); 8502 enum gimple_code code = gimple_code (last); 8503 struct omp_region *cur_region = *region; 8504 bool fallthru = false; 8505 8506 switch (code) 8507 { 8508 case GIMPLE_OMP_PARALLEL: 8509 case GIMPLE_OMP_FOR: 8510 case GIMPLE_OMP_SINGLE: 8511 case GIMPLE_OMP_TEAMS: 8512 case GIMPLE_OMP_MASTER: 8513 case GIMPLE_OMP_TASKGROUP: 8514 case GIMPLE_OMP_CRITICAL: 8515 case GIMPLE_OMP_SECTION: 8516 case GIMPLE_OMP_GRID_BODY: 8517 cur_region = new_omp_region (bb, code, cur_region); 8518 fallthru = true; 8519 break; 8520 8521 case GIMPLE_OMP_TASK: 8522 cur_region = new_omp_region (bb, code, cur_region); 8523 fallthru = true; 8524 if (gimple_omp_task_taskwait_p (last)) 8525 cur_region = cur_region->outer; 8526 break; 8527 8528 case GIMPLE_OMP_ORDERED: 8529 cur_region = new_omp_region (bb, code, cur_region); 8530 fallthru = true; 8531 if (omp_find_clause (gimple_omp_ordered_clauses 8532 (as_a <gomp_ordered *> (last)), 8533 OMP_CLAUSE_DEPEND)) 8534 cur_region = cur_region->outer; 8535 break; 8536 8537 case GIMPLE_OMP_TARGET: 8538 cur_region = new_omp_region (bb, code, cur_region); 8539 fallthru = true; 8540 switch (gimple_omp_target_kind (last)) 8541 { 8542 case GF_OMP_TARGET_KIND_REGION: 8543 case GF_OMP_TARGET_KIND_DATA: 8544 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 8545 case GF_OMP_TARGET_KIND_OACC_KERNELS: 8546 case GF_OMP_TARGET_KIND_OACC_DATA: 8547 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: 8548 break; 8549 case GF_OMP_TARGET_KIND_UPDATE: 8550 case GF_OMP_TARGET_KIND_ENTER_DATA: 8551 case GF_OMP_TARGET_KIND_EXIT_DATA: 8552 case GF_OMP_TARGET_KIND_OACC_UPDATE: 8553 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: 8554 case GF_OMP_TARGET_KIND_OACC_DECLARE: 8555 cur_region = cur_region->outer; 8556 break; 8557 default: 8558 gcc_unreachable (); 8559 } 8560 break; 8561 8562 case GIMPLE_OMP_SECTIONS: 8563 cur_region = new_omp_region (bb, code, cur_region); 8564 fallthru = true; 8565 break; 8566 8567 case GIMPLE_OMP_SECTIONS_SWITCH: 8568 fallthru = false; 8569 break; 8570 8571 case GIMPLE_OMP_ATOMIC_LOAD: 8572 case GIMPLE_OMP_ATOMIC_STORE: 8573 fallthru = true; 8574 break; 8575 8576 case GIMPLE_OMP_RETURN: 8577 /* In the case of a GIMPLE_OMP_SECTION, the edge will go 8578 somewhere other than the next block. This will be 8579 created later. */ 8580 cur_region->exit = bb; 8581 if (cur_region->type == GIMPLE_OMP_TASK) 8582 /* Add an edge corresponding to not scheduling the task 8583 immediately. */ 8584 make_edge (cur_region->entry, bb, EDGE_ABNORMAL); 8585 fallthru = cur_region->type != GIMPLE_OMP_SECTION; 8586 cur_region = cur_region->outer; 8587 break; 8588 8589 case GIMPLE_OMP_CONTINUE: 8590 cur_region->cont = bb; 8591 switch (cur_region->type) 8592 { 8593 case GIMPLE_OMP_FOR: 8594 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE 8595 succs edges as abnormal to prevent splitting 8596 them. */ 8597 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL; 8598 /* Make the loopback edge. */ 8599 make_edge (bb, single_succ (cur_region->entry), 8600 EDGE_ABNORMAL); 8601 8602 /* Create an edge from GIMPLE_OMP_FOR to exit, which 8603 corresponds to the case that the body of the loop 8604 is not executed at all. */ 8605 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL); 8606 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL); 8607 fallthru = false; 8608 break; 8609 8610 case GIMPLE_OMP_SECTIONS: 8611 /* Wire up the edges into and out of the nested sections. */ 8612 { 8613 basic_block switch_bb = single_succ (cur_region->entry); 8614 8615 struct omp_region *i; 8616 for (i = cur_region->inner; i ; i = i->next) 8617 { 8618 gcc_assert (i->type == GIMPLE_OMP_SECTION); 8619 make_edge (switch_bb, i->entry, 0); 8620 make_edge (i->exit, bb, EDGE_FALLTHRU); 8621 } 8622 8623 /* Make the loopback edge to the block with 8624 GIMPLE_OMP_SECTIONS_SWITCH. */ 8625 make_edge (bb, switch_bb, 0); 8626 8627 /* Make the edge from the switch to exit. */ 8628 make_edge (switch_bb, bb->next_bb, 0); 8629 fallthru = false; 8630 } 8631 break; 8632 8633 case GIMPLE_OMP_TASK: 8634 fallthru = true; 8635 break; 8636 8637 default: 8638 gcc_unreachable (); 8639 } 8640 break; 8641 8642 default: 8643 gcc_unreachable (); 8644 } 8645 8646 if (*region != cur_region) 8647 { 8648 *region = cur_region; 8649 if (cur_region) 8650 *region_idx = cur_region->entry->index; 8651 else 8652 *region_idx = 0; 8653 } 8654 8655 return fallthru; 8656 } 8657 8658 #include "gt-omp-expand.h" 8659