1 /* Expansion pass for OMP directives. Outlines regions of certain OMP 2 directives to separate functions, converts others into explicit calls to the 3 runtime library (libgomp) and so forth 4 5 Copyright (C) 2005-2020 Free Software Foundation, Inc. 6 7 This file is part of GCC. 8 9 GCC is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free 11 Software Foundation; either version 3, or (at your option) any later 12 version. 13 14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 15 WARRANTY; without even the implied warranty of MERCHANTABILITY or 16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 17 for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with GCC; see the file COPYING3. If not see 21 <http://www.gnu.org/licenses/>. */ 22 23 #include "config.h" 24 #include "system.h" 25 #include "coretypes.h" 26 #include "memmodel.h" 27 #include "backend.h" 28 #include "target.h" 29 #include "rtl.h" 30 #include "tree.h" 31 #include "gimple.h" 32 #include "cfghooks.h" 33 #include "tree-pass.h" 34 #include "ssa.h" 35 #include "optabs.h" 36 #include "cgraph.h" 37 #include "pretty-print.h" 38 #include "diagnostic-core.h" 39 #include "fold-const.h" 40 #include "stor-layout.h" 41 #include "cfganal.h" 42 #include "internal-fn.h" 43 #include "gimplify.h" 44 #include "gimple-iterator.h" 45 #include "gimplify-me.h" 46 #include "gimple-walk.h" 47 #include "tree-cfg.h" 48 #include "tree-into-ssa.h" 49 #include "tree-ssa.h" 50 #include "splay-tree.h" 51 #include "cfgloop.h" 52 #include "omp-general.h" 53 #include "omp-offload.h" 54 #include "tree-cfgcleanup.h" 55 #include "alloc-pool.h" 56 #include "symbol-summary.h" 57 #include "gomp-constants.h" 58 #include "gimple-pretty-print.h" 59 #include "hsa-common.h" 60 #include "stringpool.h" 61 #include "attribs.h" 62 63 /* OMP region information. Every parallel and workshare 64 directive is enclosed between two markers, the OMP_* directive 65 and a corresponding GIMPLE_OMP_RETURN statement. */ 66 67 struct omp_region 68 { 69 /* The enclosing region. */ 70 struct omp_region *outer; 71 72 /* First child region. */ 73 struct omp_region *inner; 74 75 /* Next peer region. */ 76 struct omp_region *next; 77 78 /* Block containing the omp directive as its last stmt. */ 79 basic_block entry; 80 81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */ 82 basic_block exit; 83 84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */ 85 basic_block cont; 86 87 /* If this is a combined parallel+workshare region, this is a list 88 of additional arguments needed by the combined parallel+workshare 89 library call. */ 90 vec<tree, va_gc> *ws_args; 91 92 /* The code for the omp directive of this region. */ 93 enum gimple_code type; 94 95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */ 96 enum omp_clause_schedule_kind sched_kind; 97 98 /* Schedule modifiers. */ 99 unsigned char sched_modifiers; 100 101 /* True if this is a combined parallel+workshare region. */ 102 bool is_combined_parallel; 103 104 /* Copy of fd.lastprivate_conditional != 0. */ 105 bool has_lastprivate_conditional; 106 107 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has 108 a depend clause. */ 109 gomp_ordered *ord_stmt; 110 }; 111 112 static struct omp_region *root_omp_region; 113 static bool omp_any_child_fn_dumped; 114 115 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree, 116 bool = false); 117 static gphi *find_phi_with_arg_on_edge (tree, edge); 118 static void expand_omp (struct omp_region *region); 119 120 /* Return true if REGION is a combined parallel+workshare region. */ 121 122 static inline bool 123 is_combined_parallel (struct omp_region *region) 124 { 125 return region->is_combined_parallel; 126 } 127 128 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB 129 is the immediate dominator of PAR_ENTRY_BB, return true if there 130 are no data dependencies that would prevent expanding the parallel 131 directive at PAR_ENTRY_BB as a combined parallel+workshare region. 132 133 When expanding a combined parallel+workshare region, the call to 134 the child function may need additional arguments in the case of 135 GIMPLE_OMP_FOR regions. In some cases, these arguments are 136 computed out of variables passed in from the parent to the child 137 via 'struct .omp_data_s'. For instance: 138 139 #pragma omp parallel for schedule (guided, i * 4) 140 for (j ...) 141 142 Is lowered into: 143 144 # BLOCK 2 (PAR_ENTRY_BB) 145 .omp_data_o.i = i; 146 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598) 147 148 # BLOCK 3 (WS_ENTRY_BB) 149 .omp_data_i = &.omp_data_o; 150 D.1667 = .omp_data_i->i; 151 D.1598 = D.1667 * 4; 152 #pragma omp for schedule (guided, D.1598) 153 154 When we outline the parallel region, the call to the child function 155 'bar.omp_fn.0' will need the value D.1598 in its argument list, but 156 that value is computed *after* the call site. So, in principle we 157 cannot do the transformation. 158 159 To see whether the code in WS_ENTRY_BB blocks the combined 160 parallel+workshare call, we collect all the variables used in the 161 GIMPLE_OMP_FOR header check whether they appear on the LHS of any 162 statement in WS_ENTRY_BB. If so, then we cannot emit the combined 163 call. 164 165 FIXME. If we had the SSA form built at this point, we could merely 166 hoist the code in block 3 into block 2 and be done with it. But at 167 this point we don't have dataflow information and though we could 168 hack something up here, it is really not worth the aggravation. */ 169 170 static bool 171 workshare_safe_to_combine_p (basic_block ws_entry_bb) 172 { 173 struct omp_for_data fd; 174 gimple *ws_stmt = last_stmt (ws_entry_bb); 175 176 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS) 177 return true; 178 179 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR); 180 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR) 181 return false; 182 183 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL); 184 185 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST) 186 return false; 187 if (fd.iter_type != long_integer_type_node) 188 return false; 189 190 /* FIXME. We give up too easily here. If any of these arguments 191 are not constants, they will likely involve variables that have 192 been mapped into fields of .omp_data_s for sharing with the child 193 function. With appropriate data flow, it would be possible to 194 see through this. */ 195 if (!is_gimple_min_invariant (fd.loop.n1) 196 || !is_gimple_min_invariant (fd.loop.n2) 197 || !is_gimple_min_invariant (fd.loop.step) 198 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size))) 199 return false; 200 201 return true; 202 } 203 204 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier 205 presence (SIMD_SCHEDULE). */ 206 207 static tree 208 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule) 209 { 210 if (!simd_schedule || integer_zerop (chunk_size)) 211 return chunk_size; 212 213 poly_uint64 vf = omp_max_vf (); 214 if (known_eq (vf, 1U)) 215 return chunk_size; 216 217 tree type = TREE_TYPE (chunk_size); 218 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size, 219 build_int_cst (type, vf - 1)); 220 return fold_build2 (BIT_AND_EXPR, type, chunk_size, 221 build_int_cst (type, -vf)); 222 } 223 224 /* Collect additional arguments needed to emit a combined 225 parallel+workshare call. WS_STMT is the workshare directive being 226 expanded. */ 227 228 static vec<tree, va_gc> * 229 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt) 230 { 231 tree t; 232 location_t loc = gimple_location (ws_stmt); 233 vec<tree, va_gc> *ws_args; 234 235 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt)) 236 { 237 struct omp_for_data fd; 238 tree n1, n2; 239 240 omp_extract_for_data (for_stmt, &fd, NULL); 241 n1 = fd.loop.n1; 242 n2 = fd.loop.n2; 243 244 if (gimple_omp_for_combined_into_p (for_stmt)) 245 { 246 tree innerc 247 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt), 248 OMP_CLAUSE__LOOPTEMP_); 249 gcc_assert (innerc); 250 n1 = OMP_CLAUSE_DECL (innerc); 251 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 252 OMP_CLAUSE__LOOPTEMP_); 253 gcc_assert (innerc); 254 n2 = OMP_CLAUSE_DECL (innerc); 255 } 256 257 vec_alloc (ws_args, 3 + (fd.chunk_size != 0)); 258 259 t = fold_convert_loc (loc, long_integer_type_node, n1); 260 ws_args->quick_push (t); 261 262 t = fold_convert_loc (loc, long_integer_type_node, n2); 263 ws_args->quick_push (t); 264 265 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step); 266 ws_args->quick_push (t); 267 268 if (fd.chunk_size) 269 { 270 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size); 271 t = omp_adjust_chunk_size (t, fd.simd_schedule); 272 ws_args->quick_push (t); 273 } 274 275 return ws_args; 276 } 277 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS) 278 { 279 /* Number of sections is equal to the number of edges from the 280 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to 281 the exit of the sections region. */ 282 basic_block bb = single_succ (gimple_bb (ws_stmt)); 283 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1); 284 vec_alloc (ws_args, 1); 285 ws_args->quick_push (t); 286 return ws_args; 287 } 288 289 gcc_unreachable (); 290 } 291 292 /* Discover whether REGION is a combined parallel+workshare region. */ 293 294 static void 295 determine_parallel_type (struct omp_region *region) 296 { 297 basic_block par_entry_bb, par_exit_bb; 298 basic_block ws_entry_bb, ws_exit_bb; 299 300 if (region == NULL || region->inner == NULL 301 || region->exit == NULL || region->inner->exit == NULL 302 || region->inner->cont == NULL) 303 return; 304 305 /* We only support parallel+for and parallel+sections. */ 306 if (region->type != GIMPLE_OMP_PARALLEL 307 || (region->inner->type != GIMPLE_OMP_FOR 308 && region->inner->type != GIMPLE_OMP_SECTIONS)) 309 return; 310 311 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and 312 WS_EXIT_BB -> PAR_EXIT_BB. */ 313 par_entry_bb = region->entry; 314 par_exit_bb = region->exit; 315 ws_entry_bb = region->inner->entry; 316 ws_exit_bb = region->inner->exit; 317 318 /* Give up for task reductions on the parallel, while it is implementable, 319 adding another big set of APIs or slowing down the normal paths is 320 not acceptable. */ 321 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb)); 322 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_)) 323 return; 324 325 if (single_succ (par_entry_bb) == ws_entry_bb 326 && single_succ (ws_exit_bb) == par_exit_bb 327 && workshare_safe_to_combine_p (ws_entry_bb) 328 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb)) 329 || (last_and_only_stmt (ws_entry_bb) 330 && last_and_only_stmt (par_exit_bb)))) 331 { 332 gimple *par_stmt = last_stmt (par_entry_bb); 333 gimple *ws_stmt = last_stmt (ws_entry_bb); 334 335 if (region->inner->type == GIMPLE_OMP_FOR) 336 { 337 /* If this is a combined parallel loop, we need to determine 338 whether or not to use the combined library calls. There 339 are two cases where we do not apply the transformation: 340 static loops and any kind of ordered loop. In the first 341 case, we already open code the loop so there is no need 342 to do anything else. In the latter case, the combined 343 parallel loop call would still need extra synchronization 344 to implement ordered semantics, so there would not be any 345 gain in using the combined call. */ 346 tree clauses = gimple_omp_for_clauses (ws_stmt); 347 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE); 348 if (c == NULL 349 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK) 350 == OMP_CLAUSE_SCHEDULE_STATIC) 351 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED) 352 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_) 353 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_)) 354 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c))))) 355 return; 356 } 357 else if (region->inner->type == GIMPLE_OMP_SECTIONS 358 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt), 359 OMP_CLAUSE__REDUCTEMP_) 360 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt), 361 OMP_CLAUSE__CONDTEMP_))) 362 return; 363 364 region->is_combined_parallel = true; 365 region->inner->is_combined_parallel = true; 366 region->ws_args = get_ws_args_for (par_stmt, ws_stmt); 367 } 368 } 369 370 /* Debugging dumps for parallel regions. */ 371 void dump_omp_region (FILE *, struct omp_region *, int); 372 void debug_omp_region (struct omp_region *); 373 void debug_all_omp_regions (void); 374 375 /* Dump the parallel region tree rooted at REGION. */ 376 377 void 378 dump_omp_region (FILE *file, struct omp_region *region, int indent) 379 { 380 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index, 381 gimple_code_name[region->type]); 382 383 if (region->inner) 384 dump_omp_region (file, region->inner, indent + 4); 385 386 if (region->cont) 387 { 388 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "", 389 region->cont->index); 390 } 391 392 if (region->exit) 393 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "", 394 region->exit->index); 395 else 396 fprintf (file, "%*s[no exit marker]\n", indent, ""); 397 398 if (region->next) 399 dump_omp_region (file, region->next, indent); 400 } 401 402 DEBUG_FUNCTION void 403 debug_omp_region (struct omp_region *region) 404 { 405 dump_omp_region (stderr, region, 0); 406 } 407 408 DEBUG_FUNCTION void 409 debug_all_omp_regions (void) 410 { 411 dump_omp_region (stderr, root_omp_region, 0); 412 } 413 414 /* Create a new parallel region starting at STMT inside region PARENT. */ 415 416 static struct omp_region * 417 new_omp_region (basic_block bb, enum gimple_code type, 418 struct omp_region *parent) 419 { 420 struct omp_region *region = XCNEW (struct omp_region); 421 422 region->outer = parent; 423 region->entry = bb; 424 region->type = type; 425 426 if (parent) 427 { 428 /* This is a nested region. Add it to the list of inner 429 regions in PARENT. */ 430 region->next = parent->inner; 431 parent->inner = region; 432 } 433 else 434 { 435 /* This is a toplevel region. Add it to the list of toplevel 436 regions in ROOT_OMP_REGION. */ 437 region->next = root_omp_region; 438 root_omp_region = region; 439 } 440 441 return region; 442 } 443 444 /* Release the memory associated with the region tree rooted at REGION. */ 445 446 static void 447 free_omp_region_1 (struct omp_region *region) 448 { 449 struct omp_region *i, *n; 450 451 for (i = region->inner; i ; i = n) 452 { 453 n = i->next; 454 free_omp_region_1 (i); 455 } 456 457 free (region); 458 } 459 460 /* Release the memory for the entire omp region tree. */ 461 462 void 463 omp_free_regions (void) 464 { 465 struct omp_region *r, *n; 466 for (r = root_omp_region; r ; r = n) 467 { 468 n = r->next; 469 free_omp_region_1 (r); 470 } 471 root_omp_region = NULL; 472 } 473 474 /* A convenience function to build an empty GIMPLE_COND with just the 475 condition. */ 476 477 static gcond * 478 gimple_build_cond_empty (tree cond) 479 { 480 enum tree_code pred_code; 481 tree lhs, rhs; 482 483 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs); 484 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE); 485 } 486 487 /* Return true if a parallel REGION is within a declare target function or 488 within a target region and is not a part of a gridified target. */ 489 490 static bool 491 parallel_needs_hsa_kernel_p (struct omp_region *region) 492 { 493 bool indirect = false; 494 for (region = region->outer; region; region = region->outer) 495 { 496 if (region->type == GIMPLE_OMP_PARALLEL) 497 indirect = true; 498 else if (region->type == GIMPLE_OMP_TARGET) 499 { 500 gomp_target *tgt_stmt 501 = as_a <gomp_target *> (last_stmt (region->entry)); 502 503 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt), 504 OMP_CLAUSE__GRIDDIM_)) 505 return indirect; 506 else 507 return true; 508 } 509 } 510 511 if (lookup_attribute ("omp declare target", 512 DECL_ATTRIBUTES (current_function_decl))) 513 return true; 514 515 return false; 516 } 517 518 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function. 519 Add CHILD_FNDECL to decl chain of the supercontext of the block 520 ENTRY_BLOCK - this is the block which originally contained the 521 code from which CHILD_FNDECL was created. 522 523 Together, these actions ensure that the debug info for the outlined 524 function will be emitted with the correct lexical scope. */ 525 526 static void 527 adjust_context_and_scope (struct omp_region *region, tree entry_block, 528 tree child_fndecl) 529 { 530 tree parent_fndecl = NULL_TREE; 531 gimple *entry_stmt; 532 /* OMP expansion expands inner regions before outer ones, so if 533 we e.g. have explicit task region nested in parallel region, when 534 expanding the task region current_function_decl will be the original 535 source function, but we actually want to use as context the child 536 function of the parallel. */ 537 for (region = region->outer; 538 region && parent_fndecl == NULL_TREE; region = region->outer) 539 switch (region->type) 540 { 541 case GIMPLE_OMP_PARALLEL: 542 case GIMPLE_OMP_TASK: 543 case GIMPLE_OMP_TEAMS: 544 entry_stmt = last_stmt (region->entry); 545 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt); 546 break; 547 case GIMPLE_OMP_TARGET: 548 entry_stmt = last_stmt (region->entry); 549 parent_fndecl 550 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt)); 551 break; 552 default: 553 break; 554 } 555 556 if (parent_fndecl == NULL_TREE) 557 parent_fndecl = current_function_decl; 558 DECL_CONTEXT (child_fndecl) = parent_fndecl; 559 560 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK) 561 { 562 tree b = BLOCK_SUPERCONTEXT (entry_block); 563 if (TREE_CODE (b) == BLOCK) 564 { 565 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b); 566 BLOCK_VARS (b) = child_fndecl; 567 } 568 } 569 } 570 571 /* Build the function calls to GOMP_parallel etc to actually 572 generate the parallel operation. REGION is the parallel region 573 being expanded. BB is the block where to insert the code. WS_ARGS 574 will be set if this is a call to a combined parallel+workshare 575 construct, it contains the list of additional arguments needed by 576 the workshare construct. */ 577 578 static void 579 expand_parallel_call (struct omp_region *region, basic_block bb, 580 gomp_parallel *entry_stmt, 581 vec<tree, va_gc> *ws_args) 582 { 583 tree t, t1, t2, val, cond, c, clauses, flags; 584 gimple_stmt_iterator gsi; 585 gimple *stmt; 586 enum built_in_function start_ix; 587 int start_ix2; 588 location_t clause_loc; 589 vec<tree, va_gc> *args; 590 591 clauses = gimple_omp_parallel_clauses (entry_stmt); 592 593 /* Determine what flavor of GOMP_parallel we will be 594 emitting. */ 595 start_ix = BUILT_IN_GOMP_PARALLEL; 596 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); 597 if (rtmp) 598 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS; 599 else if (is_combined_parallel (region)) 600 { 601 switch (region->inner->type) 602 { 603 case GIMPLE_OMP_FOR: 604 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO); 605 switch (region->inner->sched_kind) 606 { 607 case OMP_CLAUSE_SCHEDULE_RUNTIME: 608 /* For lastprivate(conditional:), our implementation 609 requires monotonic behavior. */ 610 if (region->inner->has_lastprivate_conditional != 0) 611 start_ix2 = 3; 612 else if ((region->inner->sched_modifiers 613 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0) 614 start_ix2 = 6; 615 else if ((region->inner->sched_modifiers 616 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0) 617 start_ix2 = 7; 618 else 619 start_ix2 = 3; 620 break; 621 case OMP_CLAUSE_SCHEDULE_DYNAMIC: 622 case OMP_CLAUSE_SCHEDULE_GUIDED: 623 if ((region->inner->sched_modifiers 624 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0 625 && !region->inner->has_lastprivate_conditional) 626 { 627 start_ix2 = 3 + region->inner->sched_kind; 628 break; 629 } 630 /* FALLTHRU */ 631 default: 632 start_ix2 = region->inner->sched_kind; 633 break; 634 } 635 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC; 636 start_ix = (enum built_in_function) start_ix2; 637 break; 638 case GIMPLE_OMP_SECTIONS: 639 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS; 640 break; 641 default: 642 gcc_unreachable (); 643 } 644 } 645 646 /* By default, the value of NUM_THREADS is zero (selected at run time) 647 and there is no conditional. */ 648 cond = NULL_TREE; 649 val = build_int_cst (unsigned_type_node, 0); 650 flags = build_int_cst (unsigned_type_node, 0); 651 652 c = omp_find_clause (clauses, OMP_CLAUSE_IF); 653 if (c) 654 cond = OMP_CLAUSE_IF_EXPR (c); 655 656 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS); 657 if (c) 658 { 659 val = OMP_CLAUSE_NUM_THREADS_EXPR (c); 660 clause_loc = OMP_CLAUSE_LOCATION (c); 661 } 662 else 663 clause_loc = gimple_location (entry_stmt); 664 665 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND); 666 if (c) 667 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c)); 668 669 /* Ensure 'val' is of the correct type. */ 670 val = fold_convert_loc (clause_loc, unsigned_type_node, val); 671 672 /* If we found the clause 'if (cond)', build either 673 (cond != 0) or (cond ? val : 1u). */ 674 if (cond) 675 { 676 cond = gimple_boolify (cond); 677 678 if (integer_zerop (val)) 679 val = fold_build2_loc (clause_loc, 680 EQ_EXPR, unsigned_type_node, cond, 681 build_int_cst (TREE_TYPE (cond), 0)); 682 else 683 { 684 basic_block cond_bb, then_bb, else_bb; 685 edge e, e_then, e_else; 686 tree tmp_then, tmp_else, tmp_join, tmp_var; 687 688 tmp_var = create_tmp_var (TREE_TYPE (val)); 689 if (gimple_in_ssa_p (cfun)) 690 { 691 tmp_then = make_ssa_name (tmp_var); 692 tmp_else = make_ssa_name (tmp_var); 693 tmp_join = make_ssa_name (tmp_var); 694 } 695 else 696 { 697 tmp_then = tmp_var; 698 tmp_else = tmp_var; 699 tmp_join = tmp_var; 700 } 701 702 e = split_block_after_labels (bb); 703 cond_bb = e->src; 704 bb = e->dest; 705 remove_edge (e); 706 707 then_bb = create_empty_bb (cond_bb); 708 else_bb = create_empty_bb (then_bb); 709 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); 710 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb); 711 712 stmt = gimple_build_cond_empty (cond); 713 gsi = gsi_start_bb (cond_bb); 714 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 715 716 gsi = gsi_start_bb (then_bb); 717 expand_omp_build_assign (&gsi, tmp_then, val, true); 718 719 gsi = gsi_start_bb (else_bb); 720 expand_omp_build_assign (&gsi, tmp_else, 721 build_int_cst (unsigned_type_node, 1), 722 true); 723 724 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); 725 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE); 726 add_bb_to_loop (then_bb, cond_bb->loop_father); 727 add_bb_to_loop (else_bb, cond_bb->loop_father); 728 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU); 729 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU); 730 731 if (gimple_in_ssa_p (cfun)) 732 { 733 gphi *phi = create_phi_node (tmp_join, bb); 734 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION); 735 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION); 736 } 737 738 val = tmp_join; 739 } 740 741 gsi = gsi_start_bb (bb); 742 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE, 743 false, GSI_CONTINUE_LINKING); 744 } 745 746 gsi = gsi_last_nondebug_bb (bb); 747 t = gimple_omp_parallel_data_arg (entry_stmt); 748 if (t == NULL) 749 t1 = null_pointer_node; 750 else 751 t1 = build_fold_addr_expr (t); 752 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt); 753 t2 = build_fold_addr_expr (child_fndecl); 754 755 vec_alloc (args, 4 + vec_safe_length (ws_args)); 756 args->quick_push (t2); 757 args->quick_push (t1); 758 args->quick_push (val); 759 if (ws_args) 760 args->splice (*ws_args); 761 args->quick_push (flags); 762 763 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, 764 builtin_decl_explicit (start_ix), args); 765 766 if (rtmp) 767 { 768 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp)); 769 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp), 770 fold_convert (type, 771 fold_convert (pointer_sized_int_node, t))); 772 } 773 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 774 false, GSI_CONTINUE_LINKING); 775 776 if (hsa_gen_requested_p () 777 && parallel_needs_hsa_kernel_p (region)) 778 { 779 cgraph_node *child_cnode = cgraph_node::get (child_fndecl); 780 hsa_register_kernel (child_cnode); 781 } 782 } 783 784 /* Build the function call to GOMP_task to actually 785 generate the task operation. BB is the block where to insert the code. */ 786 787 static void 788 expand_task_call (struct omp_region *region, basic_block bb, 789 gomp_task *entry_stmt) 790 { 791 tree t1, t2, t3; 792 gimple_stmt_iterator gsi; 793 location_t loc = gimple_location (entry_stmt); 794 795 tree clauses = gimple_omp_task_clauses (entry_stmt); 796 797 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF); 798 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED); 799 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE); 800 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND); 801 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL); 802 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY); 803 804 unsigned int iflags 805 = (untied ? GOMP_TASK_FLAG_UNTIED : 0) 806 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0) 807 | (depend ? GOMP_TASK_FLAG_DEPEND : 0); 808 809 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt); 810 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE; 811 tree num_tasks = NULL_TREE; 812 bool ull = false; 813 if (taskloop_p) 814 { 815 gimple *g = last_stmt (region->outer->entry); 816 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR 817 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP); 818 struct omp_for_data fd; 819 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL); 820 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 821 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar), 822 OMP_CLAUSE__LOOPTEMP_); 823 startvar = OMP_CLAUSE_DECL (startvar); 824 endvar = OMP_CLAUSE_DECL (endvar); 825 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step); 826 if (fd.loop.cond_code == LT_EXPR) 827 iflags |= GOMP_TASK_FLAG_UP; 828 tree tclauses = gimple_omp_for_clauses (g); 829 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS); 830 if (num_tasks) 831 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks); 832 else 833 { 834 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE); 835 if (num_tasks) 836 { 837 iflags |= GOMP_TASK_FLAG_GRAINSIZE; 838 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks); 839 } 840 else 841 num_tasks = integer_zero_node; 842 } 843 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks); 844 if (ifc == NULL_TREE) 845 iflags |= GOMP_TASK_FLAG_IF; 846 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP)) 847 iflags |= GOMP_TASK_FLAG_NOGROUP; 848 ull = fd.iter_type == long_long_unsigned_type_node; 849 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION)) 850 iflags |= GOMP_TASK_FLAG_REDUCTION; 851 } 852 else if (priority) 853 iflags |= GOMP_TASK_FLAG_PRIORITY; 854 855 tree flags = build_int_cst (unsigned_type_node, iflags); 856 857 tree cond = boolean_true_node; 858 if (ifc) 859 { 860 if (taskloop_p) 861 { 862 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc)); 863 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t, 864 build_int_cst (unsigned_type_node, 865 GOMP_TASK_FLAG_IF), 866 build_int_cst (unsigned_type_node, 0)); 867 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, 868 flags, t); 869 } 870 else 871 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc)); 872 } 873 874 if (finalc) 875 { 876 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc)); 877 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t, 878 build_int_cst (unsigned_type_node, 879 GOMP_TASK_FLAG_FINAL), 880 build_int_cst (unsigned_type_node, 0)); 881 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t); 882 } 883 if (depend) 884 depend = OMP_CLAUSE_DECL (depend); 885 else 886 depend = build_int_cst (ptr_type_node, 0); 887 if (priority) 888 priority = fold_convert (integer_type_node, 889 OMP_CLAUSE_PRIORITY_EXPR (priority)); 890 else 891 priority = integer_zero_node; 892 893 gsi = gsi_last_nondebug_bb (bb); 894 tree t = gimple_omp_task_data_arg (entry_stmt); 895 if (t == NULL) 896 t2 = null_pointer_node; 897 else 898 t2 = build_fold_addr_expr_loc (loc, t); 899 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt)); 900 t = gimple_omp_task_copy_fn (entry_stmt); 901 if (t == NULL) 902 t3 = null_pointer_node; 903 else 904 t3 = build_fold_addr_expr_loc (loc, t); 905 906 if (taskloop_p) 907 t = build_call_expr (ull 908 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL) 909 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP), 910 11, t1, t2, t3, 911 gimple_omp_task_arg_size (entry_stmt), 912 gimple_omp_task_arg_align (entry_stmt), flags, 913 num_tasks, priority, startvar, endvar, step); 914 else 915 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK), 916 9, t1, t2, t3, 917 gimple_omp_task_arg_size (entry_stmt), 918 gimple_omp_task_arg_align (entry_stmt), cond, flags, 919 depend, priority); 920 921 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 922 false, GSI_CONTINUE_LINKING); 923 } 924 925 /* Build the function call to GOMP_taskwait_depend to actually 926 generate the taskwait operation. BB is the block where to insert the 927 code. */ 928 929 static void 930 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt) 931 { 932 tree clauses = gimple_omp_task_clauses (entry_stmt); 933 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND); 934 if (depend == NULL_TREE) 935 return; 936 937 depend = OMP_CLAUSE_DECL (depend); 938 939 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb); 940 tree t 941 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND), 942 1, depend); 943 944 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 945 false, GSI_CONTINUE_LINKING); 946 } 947 948 /* Build the function call to GOMP_teams_reg to actually 949 generate the host teams operation. REGION is the teams region 950 being expanded. BB is the block where to insert the code. */ 951 952 static void 953 expand_teams_call (basic_block bb, gomp_teams *entry_stmt) 954 { 955 tree clauses = gimple_omp_teams_clauses (entry_stmt); 956 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS); 957 if (num_teams == NULL_TREE) 958 num_teams = build_int_cst (unsigned_type_node, 0); 959 else 960 { 961 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams); 962 num_teams = fold_convert (unsigned_type_node, num_teams); 963 } 964 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT); 965 if (thread_limit == NULL_TREE) 966 thread_limit = build_int_cst (unsigned_type_node, 0); 967 else 968 { 969 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit); 970 thread_limit = fold_convert (unsigned_type_node, thread_limit); 971 } 972 973 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb); 974 tree t = gimple_omp_teams_data_arg (entry_stmt), t1; 975 if (t == NULL) 976 t1 = null_pointer_node; 977 else 978 t1 = build_fold_addr_expr (t); 979 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt); 980 tree t2 = build_fold_addr_expr (child_fndecl); 981 982 vec<tree, va_gc> *args; 983 vec_alloc (args, 5); 984 args->quick_push (t2); 985 args->quick_push (t1); 986 args->quick_push (num_teams); 987 args->quick_push (thread_limit); 988 /* For future extensibility. */ 989 args->quick_push (build_zero_cst (unsigned_type_node)); 990 991 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, 992 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG), 993 args); 994 995 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 996 false, GSI_CONTINUE_LINKING); 997 } 998 999 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */ 1000 1001 static tree 1002 vec2chain (vec<tree, va_gc> *v) 1003 { 1004 tree chain = NULL_TREE, t; 1005 unsigned ix; 1006 1007 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t) 1008 { 1009 DECL_CHAIN (t) = chain; 1010 chain = t; 1011 } 1012 1013 return chain; 1014 } 1015 1016 /* Remove barriers in REGION->EXIT's block. Note that this is only 1017 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region 1018 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that 1019 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be 1020 removed. */ 1021 1022 static void 1023 remove_exit_barrier (struct omp_region *region) 1024 { 1025 gimple_stmt_iterator gsi; 1026 basic_block exit_bb; 1027 edge_iterator ei; 1028 edge e; 1029 gimple *stmt; 1030 int any_addressable_vars = -1; 1031 1032 exit_bb = region->exit; 1033 1034 /* If the parallel region doesn't return, we don't have REGION->EXIT 1035 block at all. */ 1036 if (! exit_bb) 1037 return; 1038 1039 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The 1040 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of 1041 statements that can appear in between are extremely limited -- no 1042 memory operations at all. Here, we allow nothing at all, so the 1043 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */ 1044 gsi = gsi_last_nondebug_bb (exit_bb); 1045 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 1046 gsi_prev_nondebug (&gsi); 1047 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL) 1048 return; 1049 1050 FOR_EACH_EDGE (e, ei, exit_bb->preds) 1051 { 1052 gsi = gsi_last_nondebug_bb (e->src); 1053 if (gsi_end_p (gsi)) 1054 continue; 1055 stmt = gsi_stmt (gsi); 1056 if (gimple_code (stmt) == GIMPLE_OMP_RETURN 1057 && !gimple_omp_return_nowait_p (stmt)) 1058 { 1059 /* OpenMP 3.0 tasks unfortunately prevent this optimization 1060 in many cases. If there could be tasks queued, the barrier 1061 might be needed to let the tasks run before some local 1062 variable of the parallel that the task uses as shared 1063 runs out of scope. The task can be spawned either 1064 from within current function (this would be easy to check) 1065 or from some function it calls and gets passed an address 1066 of such a variable. */ 1067 if (any_addressable_vars < 0) 1068 { 1069 gomp_parallel *parallel_stmt 1070 = as_a <gomp_parallel *> (last_stmt (region->entry)); 1071 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt); 1072 tree local_decls, block, decl; 1073 unsigned ix; 1074 1075 any_addressable_vars = 0; 1076 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl) 1077 if (TREE_ADDRESSABLE (decl)) 1078 { 1079 any_addressable_vars = 1; 1080 break; 1081 } 1082 for (block = gimple_block (stmt); 1083 !any_addressable_vars 1084 && block 1085 && TREE_CODE (block) == BLOCK; 1086 block = BLOCK_SUPERCONTEXT (block)) 1087 { 1088 for (local_decls = BLOCK_VARS (block); 1089 local_decls; 1090 local_decls = DECL_CHAIN (local_decls)) 1091 if (TREE_ADDRESSABLE (local_decls)) 1092 { 1093 any_addressable_vars = 1; 1094 break; 1095 } 1096 if (block == gimple_block (parallel_stmt)) 1097 break; 1098 } 1099 } 1100 if (!any_addressable_vars) 1101 gimple_omp_return_set_nowait (stmt); 1102 } 1103 } 1104 } 1105 1106 static void 1107 remove_exit_barriers (struct omp_region *region) 1108 { 1109 if (region->type == GIMPLE_OMP_PARALLEL) 1110 remove_exit_barrier (region); 1111 1112 if (region->inner) 1113 { 1114 region = region->inner; 1115 remove_exit_barriers (region); 1116 while (region->next) 1117 { 1118 region = region->next; 1119 remove_exit_barriers (region); 1120 } 1121 } 1122 } 1123 1124 /* Optimize omp_get_thread_num () and omp_get_num_threads () 1125 calls. These can't be declared as const functions, but 1126 within one parallel body they are constant, so they can be 1127 transformed there into __builtin_omp_get_{thread_num,num_threads} () 1128 which are declared const. Similarly for task body, except 1129 that in untied task omp_get_thread_num () can change at any task 1130 scheduling point. */ 1131 1132 static void 1133 optimize_omp_library_calls (gimple *entry_stmt) 1134 { 1135 basic_block bb; 1136 gimple_stmt_iterator gsi; 1137 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); 1138 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree); 1139 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 1140 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree); 1141 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK 1142 && omp_find_clause (gimple_omp_task_clauses (entry_stmt), 1143 OMP_CLAUSE_UNTIED) != NULL); 1144 1145 FOR_EACH_BB_FN (bb, cfun) 1146 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1147 { 1148 gimple *call = gsi_stmt (gsi); 1149 tree decl; 1150 1151 if (is_gimple_call (call) 1152 && (decl = gimple_call_fndecl (call)) 1153 && DECL_EXTERNAL (decl) 1154 && TREE_PUBLIC (decl) 1155 && DECL_INITIAL (decl) == NULL) 1156 { 1157 tree built_in; 1158 1159 if (DECL_NAME (decl) == thr_num_id) 1160 { 1161 /* In #pragma omp task untied omp_get_thread_num () can change 1162 during the execution of the task region. */ 1163 if (untied_task) 1164 continue; 1165 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); 1166 } 1167 else if (DECL_NAME (decl) == num_thr_id) 1168 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 1169 else 1170 continue; 1171 1172 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in) 1173 || gimple_call_num_args (call) != 0) 1174 continue; 1175 1176 if (flag_exceptions && !TREE_NOTHROW (decl)) 1177 continue; 1178 1179 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE 1180 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)), 1181 TREE_TYPE (TREE_TYPE (built_in)))) 1182 continue; 1183 1184 gimple_call_set_fndecl (call, built_in); 1185 } 1186 } 1187 } 1188 1189 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be 1190 regimplified. */ 1191 1192 static tree 1193 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *) 1194 { 1195 tree t = *tp; 1196 1197 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */ 1198 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t)) 1199 return t; 1200 1201 if (TREE_CODE (t) == ADDR_EXPR) 1202 recompute_tree_invariant_for_addr_expr (t); 1203 1204 *walk_subtrees = !TYPE_P (t) && !DECL_P (t); 1205 return NULL_TREE; 1206 } 1207 1208 /* Prepend or append TO = FROM assignment before or after *GSI_P. */ 1209 1210 static void 1211 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from, 1212 bool after) 1213 { 1214 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to); 1215 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE, 1216 !after, after ? GSI_CONTINUE_LINKING 1217 : GSI_SAME_STMT); 1218 gimple *stmt = gimple_build_assign (to, from); 1219 if (after) 1220 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING); 1221 else 1222 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT); 1223 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL) 1224 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL)) 1225 { 1226 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 1227 gimple_regimplify_operands (stmt, &gsi); 1228 } 1229 } 1230 1231 /* Expand the OpenMP parallel or task directive starting at REGION. */ 1232 1233 static void 1234 expand_omp_taskreg (struct omp_region *region) 1235 { 1236 basic_block entry_bb, exit_bb, new_bb; 1237 struct function *child_cfun; 1238 tree child_fn, block, t; 1239 gimple_stmt_iterator gsi; 1240 gimple *entry_stmt, *stmt; 1241 edge e; 1242 vec<tree, va_gc> *ws_args; 1243 1244 entry_stmt = last_stmt (region->entry); 1245 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK 1246 && gimple_omp_task_taskwait_p (entry_stmt)) 1247 { 1248 new_bb = region->entry; 1249 gsi = gsi_last_nondebug_bb (region->entry); 1250 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK); 1251 gsi_remove (&gsi, true); 1252 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt)); 1253 return; 1254 } 1255 1256 child_fn = gimple_omp_taskreg_child_fn (entry_stmt); 1257 child_cfun = DECL_STRUCT_FUNCTION (child_fn); 1258 1259 entry_bb = region->entry; 1260 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK) 1261 exit_bb = region->cont; 1262 else 1263 exit_bb = region->exit; 1264 1265 if (is_combined_parallel (region)) 1266 ws_args = region->ws_args; 1267 else 1268 ws_args = NULL; 1269 1270 if (child_cfun->cfg) 1271 { 1272 /* Due to inlining, it may happen that we have already outlined 1273 the region, in which case all we need to do is make the 1274 sub-graph unreachable and emit the parallel call. */ 1275 edge entry_succ_e, exit_succ_e; 1276 1277 entry_succ_e = single_succ_edge (entry_bb); 1278 1279 gsi = gsi_last_nondebug_bb (entry_bb); 1280 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL 1281 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK 1282 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS); 1283 gsi_remove (&gsi, true); 1284 1285 new_bb = entry_bb; 1286 if (exit_bb) 1287 { 1288 exit_succ_e = single_succ_edge (exit_bb); 1289 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU); 1290 } 1291 remove_edge_and_dominated_blocks (entry_succ_e); 1292 } 1293 else 1294 { 1295 unsigned srcidx, dstidx, num; 1296 1297 /* If the parallel region needs data sent from the parent 1298 function, then the very first statement (except possible 1299 tree profile counter updates) of the parallel body 1300 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since 1301 &.OMP_DATA_O is passed as an argument to the child function, 1302 we need to replace it with the argument as seen by the child 1303 function. 1304 1305 In most cases, this will end up being the identity assignment 1306 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had 1307 a function call that has been inlined, the original PARM_DECL 1308 .OMP_DATA_I may have been converted into a different local 1309 variable. In which case, we need to keep the assignment. */ 1310 if (gimple_omp_taskreg_data_arg (entry_stmt)) 1311 { 1312 basic_block entry_succ_bb 1313 = single_succ_p (entry_bb) ? single_succ (entry_bb) 1314 : FALLTHRU_EDGE (entry_bb)->dest; 1315 tree arg; 1316 gimple *parcopy_stmt = NULL; 1317 1318 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi)) 1319 { 1320 gimple *stmt; 1321 1322 gcc_assert (!gsi_end_p (gsi)); 1323 stmt = gsi_stmt (gsi); 1324 if (gimple_code (stmt) != GIMPLE_ASSIGN) 1325 continue; 1326 1327 if (gimple_num_ops (stmt) == 2) 1328 { 1329 tree arg = gimple_assign_rhs1 (stmt); 1330 1331 /* We're ignore the subcode because we're 1332 effectively doing a STRIP_NOPS. */ 1333 1334 if (TREE_CODE (arg) == ADDR_EXPR 1335 && (TREE_OPERAND (arg, 0) 1336 == gimple_omp_taskreg_data_arg (entry_stmt))) 1337 { 1338 parcopy_stmt = stmt; 1339 break; 1340 } 1341 } 1342 } 1343 1344 gcc_assert (parcopy_stmt != NULL); 1345 arg = DECL_ARGUMENTS (child_fn); 1346 1347 if (!gimple_in_ssa_p (cfun)) 1348 { 1349 if (gimple_assign_lhs (parcopy_stmt) == arg) 1350 gsi_remove (&gsi, true); 1351 else 1352 { 1353 /* ?? Is setting the subcode really necessary ?? */ 1354 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg)); 1355 gimple_assign_set_rhs1 (parcopy_stmt, arg); 1356 } 1357 } 1358 else 1359 { 1360 tree lhs = gimple_assign_lhs (parcopy_stmt); 1361 gcc_assert (SSA_NAME_VAR (lhs) == arg); 1362 /* We'd like to set the rhs to the default def in the child_fn, 1363 but it's too early to create ssa names in the child_fn. 1364 Instead, we set the rhs to the parm. In 1365 move_sese_region_to_fn, we introduce a default def for the 1366 parm, map the parm to it's default def, and once we encounter 1367 this stmt, replace the parm with the default def. */ 1368 gimple_assign_set_rhs1 (parcopy_stmt, arg); 1369 update_stmt (parcopy_stmt); 1370 } 1371 } 1372 1373 /* Declare local variables needed in CHILD_CFUN. */ 1374 block = DECL_INITIAL (child_fn); 1375 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls); 1376 /* The gimplifier could record temporaries in parallel/task block 1377 rather than in containing function's local_decls chain, 1378 which would mean cgraph missed finalizing them. Do it now. */ 1379 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t)) 1380 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t)) 1381 varpool_node::finalize_decl (t); 1382 DECL_SAVED_TREE (child_fn) = NULL; 1383 /* We'll create a CFG for child_fn, so no gimple body is needed. */ 1384 gimple_set_body (child_fn, NULL); 1385 TREE_USED (block) = 1; 1386 1387 /* Reset DECL_CONTEXT on function arguments. */ 1388 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t)) 1389 DECL_CONTEXT (t) = child_fn; 1390 1391 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK, 1392 so that it can be moved to the child function. */ 1393 gsi = gsi_last_nondebug_bb (entry_bb); 1394 stmt = gsi_stmt (gsi); 1395 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL 1396 || gimple_code (stmt) == GIMPLE_OMP_TASK 1397 || gimple_code (stmt) == GIMPLE_OMP_TEAMS)); 1398 e = split_block (entry_bb, stmt); 1399 gsi_remove (&gsi, true); 1400 entry_bb = e->dest; 1401 edge e2 = NULL; 1402 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK) 1403 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 1404 else 1405 { 1406 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL); 1407 gcc_assert (e2->dest == region->exit); 1408 remove_edge (BRANCH_EDGE (entry_bb)); 1409 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src); 1410 gsi = gsi_last_nondebug_bb (region->exit); 1411 gcc_assert (!gsi_end_p (gsi) 1412 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 1413 gsi_remove (&gsi, true); 1414 } 1415 1416 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */ 1417 if (exit_bb) 1418 { 1419 gsi = gsi_last_nondebug_bb (exit_bb); 1420 gcc_assert (!gsi_end_p (gsi) 1421 && (gimple_code (gsi_stmt (gsi)) 1422 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN))); 1423 stmt = gimple_build_return (NULL); 1424 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); 1425 gsi_remove (&gsi, true); 1426 } 1427 1428 /* Move the parallel region into CHILD_CFUN. */ 1429 1430 if (gimple_in_ssa_p (cfun)) 1431 { 1432 init_tree_ssa (child_cfun); 1433 init_ssa_operands (child_cfun); 1434 child_cfun->gimple_df->in_ssa_p = true; 1435 block = NULL_TREE; 1436 } 1437 else 1438 block = gimple_block (entry_stmt); 1439 1440 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block); 1441 if (exit_bb) 1442 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; 1443 if (e2) 1444 { 1445 basic_block dest_bb = e2->dest; 1446 if (!exit_bb) 1447 make_edge (new_bb, dest_bb, EDGE_FALLTHRU); 1448 remove_edge (e2); 1449 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb); 1450 } 1451 /* When the OMP expansion process cannot guarantee an up-to-date 1452 loop tree arrange for the child function to fixup loops. */ 1453 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 1454 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP; 1455 1456 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */ 1457 num = vec_safe_length (child_cfun->local_decls); 1458 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++) 1459 { 1460 t = (*child_cfun->local_decls)[srcidx]; 1461 if (DECL_CONTEXT (t) == cfun->decl) 1462 continue; 1463 if (srcidx != dstidx) 1464 (*child_cfun->local_decls)[dstidx] = t; 1465 dstidx++; 1466 } 1467 if (dstidx != num) 1468 vec_safe_truncate (child_cfun->local_decls, dstidx); 1469 1470 /* Inform the callgraph about the new function. */ 1471 child_cfun->curr_properties = cfun->curr_properties; 1472 child_cfun->has_simduid_loops |= cfun->has_simduid_loops; 1473 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops; 1474 cgraph_node *node = cgraph_node::get_create (child_fn); 1475 node->parallelized_function = 1; 1476 cgraph_node::add_new_function (child_fn, true); 1477 1478 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl) 1479 && !DECL_ASSEMBLER_NAME_SET_P (child_fn); 1480 1481 /* Fix the callgraph edges for child_cfun. Those for cfun will be 1482 fixed in a following pass. */ 1483 push_cfun (child_cfun); 1484 if (need_asm) 1485 assign_assembler_name_if_needed (child_fn); 1486 1487 if (optimize) 1488 optimize_omp_library_calls (entry_stmt); 1489 update_max_bb_count (); 1490 cgraph_edge::rebuild_edges (); 1491 1492 /* Some EH regions might become dead, see PR34608. If 1493 pass_cleanup_cfg isn't the first pass to happen with the 1494 new child, these dead EH edges might cause problems. 1495 Clean them up now. */ 1496 if (flag_exceptions) 1497 { 1498 basic_block bb; 1499 bool changed = false; 1500 1501 FOR_EACH_BB_FN (bb, cfun) 1502 changed |= gimple_purge_dead_eh_edges (bb); 1503 if (changed) 1504 cleanup_tree_cfg (); 1505 } 1506 if (gimple_in_ssa_p (cfun)) 1507 update_ssa (TODO_update_ssa); 1508 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 1509 verify_loop_structure (); 1510 pop_cfun (); 1511 1512 if (dump_file && !gimple_in_ssa_p (cfun)) 1513 { 1514 omp_any_child_fn_dumped = true; 1515 dump_function_header (dump_file, child_fn, dump_flags); 1516 dump_function_to_file (child_fn, dump_file, dump_flags); 1517 } 1518 } 1519 1520 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn); 1521 1522 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL) 1523 expand_parallel_call (region, new_bb, 1524 as_a <gomp_parallel *> (entry_stmt), ws_args); 1525 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS) 1526 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt)); 1527 else 1528 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt)); 1529 if (gimple_in_ssa_p (cfun)) 1530 update_ssa (TODO_update_ssa_only_virtuals); 1531 } 1532 1533 /* Information about members of an OpenACC collapsed loop nest. */ 1534 1535 struct oacc_collapse 1536 { 1537 tree base; /* Base value. */ 1538 tree iters; /* Number of steps. */ 1539 tree step; /* Step size. */ 1540 tree tile; /* Tile increment (if tiled). */ 1541 tree outer; /* Tile iterator var. */ 1542 }; 1543 1544 /* Helper for expand_oacc_for. Determine collapsed loop information. 1545 Fill in COUNTS array. Emit any initialization code before GSI. 1546 Return the calculated outer loop bound of BOUND_TYPE. */ 1547 1548 static tree 1549 expand_oacc_collapse_init (const struct omp_for_data *fd, 1550 gimple_stmt_iterator *gsi, 1551 oacc_collapse *counts, tree diff_type, 1552 tree bound_type, location_t loc) 1553 { 1554 tree tiling = fd->tiling; 1555 tree total = build_int_cst (bound_type, 1); 1556 int ix; 1557 1558 gcc_assert (integer_onep (fd->loop.step)); 1559 gcc_assert (integer_zerop (fd->loop.n1)); 1560 1561 /* When tiling, the first operand of the tile clause applies to the 1562 innermost loop, and we work outwards from there. Seems 1563 backwards, but whatever. */ 1564 for (ix = fd->collapse; ix--;) 1565 { 1566 const omp_for_data_loop *loop = &fd->loops[ix]; 1567 1568 tree iter_type = TREE_TYPE (loop->v); 1569 tree plus_type = iter_type; 1570 1571 gcc_assert (loop->cond_code == LT_EXPR || loop->cond_code == GT_EXPR); 1572 1573 if (POINTER_TYPE_P (iter_type)) 1574 plus_type = sizetype; 1575 1576 if (tiling) 1577 { 1578 tree num = build_int_cst (integer_type_node, fd->collapse); 1579 tree loop_no = build_int_cst (integer_type_node, ix); 1580 tree tile = TREE_VALUE (tiling); 1581 gcall *call 1582 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile, 1583 /* gwv-outer=*/integer_zero_node, 1584 /* gwv-inner=*/integer_zero_node); 1585 1586 counts[ix].outer = create_tmp_var (iter_type, ".outer"); 1587 counts[ix].tile = create_tmp_var (diff_type, ".tile"); 1588 gimple_call_set_lhs (call, counts[ix].tile); 1589 gimple_set_location (call, loc); 1590 gsi_insert_before (gsi, call, GSI_SAME_STMT); 1591 1592 tiling = TREE_CHAIN (tiling); 1593 } 1594 else 1595 { 1596 counts[ix].tile = NULL; 1597 counts[ix].outer = loop->v; 1598 } 1599 1600 tree b = loop->n1; 1601 tree e = loop->n2; 1602 tree s = loop->step; 1603 bool up = loop->cond_code == LT_EXPR; 1604 tree dir = build_int_cst (diff_type, up ? +1 : -1); 1605 bool negating; 1606 tree expr; 1607 1608 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE, 1609 true, GSI_SAME_STMT); 1610 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE, 1611 true, GSI_SAME_STMT); 1612 1613 /* Convert the step, avoiding possible unsigned->signed overflow. */ 1614 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s)); 1615 if (negating) 1616 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s); 1617 s = fold_convert (diff_type, s); 1618 if (negating) 1619 s = fold_build1 (NEGATE_EXPR, diff_type, s); 1620 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE, 1621 true, GSI_SAME_STMT); 1622 1623 /* Determine the range, avoiding possible unsigned->signed overflow. */ 1624 negating = !up && TYPE_UNSIGNED (iter_type); 1625 expr = fold_build2 (MINUS_EXPR, plus_type, 1626 fold_convert (plus_type, negating ? b : e), 1627 fold_convert (plus_type, negating ? e : b)); 1628 expr = fold_convert (diff_type, expr); 1629 if (negating) 1630 expr = fold_build1 (NEGATE_EXPR, diff_type, expr); 1631 tree range = force_gimple_operand_gsi 1632 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT); 1633 1634 /* Determine number of iterations. */ 1635 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir); 1636 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s); 1637 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s); 1638 1639 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, 1640 true, GSI_SAME_STMT); 1641 1642 counts[ix].base = b; 1643 counts[ix].iters = iters; 1644 counts[ix].step = s; 1645 1646 total = fold_build2 (MULT_EXPR, bound_type, total, 1647 fold_convert (bound_type, iters)); 1648 } 1649 1650 return total; 1651 } 1652 1653 /* Emit initializers for collapsed loop members. INNER is true if 1654 this is for the element loop of a TILE. IVAR is the outer 1655 loop iteration variable, from which collapsed loop iteration values 1656 are calculated. COUNTS array has been initialized by 1657 expand_oacc_collapse_inits. */ 1658 1659 static void 1660 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner, 1661 gimple_stmt_iterator *gsi, 1662 const oacc_collapse *counts, tree ivar, 1663 tree diff_type) 1664 { 1665 tree ivar_type = TREE_TYPE (ivar); 1666 1667 /* The most rapidly changing iteration variable is the innermost 1668 one. */ 1669 for (int ix = fd->collapse; ix--;) 1670 { 1671 const omp_for_data_loop *loop = &fd->loops[ix]; 1672 const oacc_collapse *collapse = &counts[ix]; 1673 tree v = inner ? loop->v : collapse->outer; 1674 tree iter_type = TREE_TYPE (v); 1675 tree plus_type = iter_type; 1676 enum tree_code plus_code = PLUS_EXPR; 1677 tree expr; 1678 1679 if (POINTER_TYPE_P (iter_type)) 1680 { 1681 plus_code = POINTER_PLUS_EXPR; 1682 plus_type = sizetype; 1683 } 1684 1685 expr = ivar; 1686 if (ix) 1687 { 1688 tree mod = fold_convert (ivar_type, collapse->iters); 1689 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod); 1690 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod); 1691 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE, 1692 true, GSI_SAME_STMT); 1693 } 1694 1695 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr), 1696 fold_convert (diff_type, collapse->step)); 1697 expr = fold_build2 (plus_code, iter_type, 1698 inner ? collapse->outer : collapse->base, 1699 fold_convert (plus_type, expr)); 1700 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE, 1701 true, GSI_SAME_STMT); 1702 gassign *ass = gimple_build_assign (v, expr); 1703 gsi_insert_before (gsi, ass, GSI_SAME_STMT); 1704 } 1705 } 1706 1707 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost 1708 of the combined collapse > 1 loop constructs, generate code like: 1709 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB; 1710 if (cond3 is <) 1711 adj = STEP3 - 1; 1712 else 1713 adj = STEP3 + 1; 1714 count3 = (adj + N32 - N31) / STEP3; 1715 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB; 1716 if (cond2 is <) 1717 adj = STEP2 - 1; 1718 else 1719 adj = STEP2 + 1; 1720 count2 = (adj + N22 - N21) / STEP2; 1721 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB; 1722 if (cond1 is <) 1723 adj = STEP1 - 1; 1724 else 1725 adj = STEP1 + 1; 1726 count1 = (adj + N12 - N11) / STEP1; 1727 count = count1 * count2 * count3; 1728 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does: 1729 count = 0; 1730 and set ZERO_ITER_BB to that bb. If this isn't the outermost 1731 of the combined loop constructs, just initialize COUNTS array 1732 from the _looptemp_ clauses. */ 1733 1734 /* NOTE: It *could* be better to moosh all of the BBs together, 1735 creating one larger BB with all the computation and the unexpected 1736 jump at the end. I.e. 1737 1738 bool zero3, zero2, zero1, zero; 1739 1740 zero3 = N32 c3 N31; 1741 count3 = (N32 - N31) /[cl] STEP3; 1742 zero2 = N22 c2 N21; 1743 count2 = (N22 - N21) /[cl] STEP2; 1744 zero1 = N12 c1 N11; 1745 count1 = (N12 - N11) /[cl] STEP1; 1746 zero = zero3 || zero2 || zero1; 1747 count = count1 * count2 * count3; 1748 if (__builtin_expect(zero, false)) goto zero_iter_bb; 1749 1750 After all, we expect the zero=false, and thus we expect to have to 1751 evaluate all of the comparison expressions, so short-circuiting 1752 oughtn't be a win. Since the condition isn't protecting a 1753 denominator, we're not concerned about divide-by-zero, so we can 1754 fully evaluate count even if a numerator turned out to be wrong. 1755 1756 It seems like putting this all together would create much better 1757 scheduling opportunities, and less pressure on the chip's branch 1758 predictor. */ 1759 1760 static void 1761 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi, 1762 basic_block &entry_bb, tree *counts, 1763 basic_block &zero_iter1_bb, int &first_zero_iter1, 1764 basic_block &zero_iter2_bb, int &first_zero_iter2, 1765 basic_block &l2_dom_bb) 1766 { 1767 tree t, type = TREE_TYPE (fd->loop.v); 1768 edge e, ne; 1769 int i; 1770 1771 /* Collapsed loops need work for expansion into SSA form. */ 1772 gcc_assert (!gimple_in_ssa_p (cfun)); 1773 1774 if (gimple_omp_for_combined_into_p (fd->for_stmt) 1775 && TREE_CODE (fd->loop.n2) != INTEGER_CST) 1776 { 1777 gcc_assert (fd->ordered == 0); 1778 /* First two _looptemp_ clauses are for istart/iend, counts[0] 1779 isn't supposed to be handled, as the inner loop doesn't 1780 use it. */ 1781 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 1782 OMP_CLAUSE__LOOPTEMP_); 1783 gcc_assert (innerc); 1784 for (i = 0; i < fd->collapse; i++) 1785 { 1786 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 1787 OMP_CLAUSE__LOOPTEMP_); 1788 gcc_assert (innerc); 1789 if (i) 1790 counts[i] = OMP_CLAUSE_DECL (innerc); 1791 else 1792 counts[0] = NULL_TREE; 1793 } 1794 return; 1795 } 1796 1797 for (i = fd->collapse; i < fd->ordered; i++) 1798 { 1799 tree itype = TREE_TYPE (fd->loops[i].v); 1800 counts[i] = NULL_TREE; 1801 t = fold_binary (fd->loops[i].cond_code, boolean_type_node, 1802 fold_convert (itype, fd->loops[i].n1), 1803 fold_convert (itype, fd->loops[i].n2)); 1804 if (t && integer_zerop (t)) 1805 { 1806 for (i = fd->collapse; i < fd->ordered; i++) 1807 counts[i] = build_int_cst (type, 0); 1808 break; 1809 } 1810 } 1811 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++) 1812 { 1813 tree itype = TREE_TYPE (fd->loops[i].v); 1814 1815 if (i >= fd->collapse && counts[i]) 1816 continue; 1817 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse) 1818 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node, 1819 fold_convert (itype, fd->loops[i].n1), 1820 fold_convert (itype, fd->loops[i].n2))) 1821 == NULL_TREE || !integer_onep (t))) 1822 { 1823 gcond *cond_stmt; 1824 tree n1, n2; 1825 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1)); 1826 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE, 1827 true, GSI_SAME_STMT); 1828 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2)); 1829 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE, 1830 true, GSI_SAME_STMT); 1831 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2, 1832 NULL_TREE, NULL_TREE); 1833 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT); 1834 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), 1835 expand_omp_regimplify_p, NULL, NULL) 1836 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), 1837 expand_omp_regimplify_p, NULL, NULL)) 1838 { 1839 *gsi = gsi_for_stmt (cond_stmt); 1840 gimple_regimplify_operands (cond_stmt, gsi); 1841 } 1842 e = split_block (entry_bb, cond_stmt); 1843 basic_block &zero_iter_bb 1844 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb; 1845 int &first_zero_iter 1846 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2; 1847 if (zero_iter_bb == NULL) 1848 { 1849 gassign *assign_stmt; 1850 first_zero_iter = i; 1851 zero_iter_bb = create_empty_bb (entry_bb); 1852 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father); 1853 *gsi = gsi_after_labels (zero_iter_bb); 1854 if (i < fd->collapse) 1855 assign_stmt = gimple_build_assign (fd->loop.n2, 1856 build_zero_cst (type)); 1857 else 1858 { 1859 counts[i] = create_tmp_reg (type, ".count"); 1860 assign_stmt 1861 = gimple_build_assign (counts[i], build_zero_cst (type)); 1862 } 1863 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT); 1864 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb, 1865 entry_bb); 1866 } 1867 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE); 1868 ne->probability = profile_probability::very_unlikely (); 1869 e->flags = EDGE_TRUE_VALUE; 1870 e->probability = ne->probability.invert (); 1871 if (l2_dom_bb == NULL) 1872 l2_dom_bb = entry_bb; 1873 entry_bb = e->dest; 1874 *gsi = gsi_last_nondebug_bb (entry_bb); 1875 } 1876 1877 if (POINTER_TYPE_P (itype)) 1878 itype = signed_type_for (itype); 1879 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR 1880 ? -1 : 1)); 1881 t = fold_build2 (PLUS_EXPR, itype, 1882 fold_convert (itype, fd->loops[i].step), t); 1883 t = fold_build2 (PLUS_EXPR, itype, t, 1884 fold_convert (itype, fd->loops[i].n2)); 1885 t = fold_build2 (MINUS_EXPR, itype, t, 1886 fold_convert (itype, fd->loops[i].n1)); 1887 /* ?? We could probably use CEIL_DIV_EXPR instead of 1888 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't 1889 generate the same code in the end because generically we 1890 don't know that the values involved must be negative for 1891 GT?? */ 1892 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR) 1893 t = fold_build2 (TRUNC_DIV_EXPR, itype, 1894 fold_build1 (NEGATE_EXPR, itype, t), 1895 fold_build1 (NEGATE_EXPR, itype, 1896 fold_convert (itype, 1897 fd->loops[i].step))); 1898 else 1899 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, 1900 fold_convert (itype, fd->loops[i].step)); 1901 t = fold_convert (type, t); 1902 if (TREE_CODE (t) == INTEGER_CST) 1903 counts[i] = t; 1904 else 1905 { 1906 if (i < fd->collapse || i != first_zero_iter2) 1907 counts[i] = create_tmp_reg (type, ".count"); 1908 expand_omp_build_assign (gsi, counts[i], t); 1909 } 1910 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse) 1911 { 1912 if (i == 0) 1913 t = counts[0]; 1914 else 1915 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]); 1916 expand_omp_build_assign (gsi, fd->loop.n2, t); 1917 } 1918 } 1919 } 1920 1921 /* Helper function for expand_omp_{for_*,simd}. Generate code like: 1922 T = V; 1923 V3 = N31 + (T % count3) * STEP3; 1924 T = T / count3; 1925 V2 = N21 + (T % count2) * STEP2; 1926 T = T / count2; 1927 V1 = N11 + T * STEP1; 1928 if this loop doesn't have an inner loop construct combined with it. 1929 If it does have an inner loop construct combined with it and the 1930 iteration count isn't known constant, store values from counts array 1931 into its _looptemp_ temporaries instead. */ 1932 1933 static void 1934 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi, 1935 tree *counts, gimple *inner_stmt, tree startvar) 1936 { 1937 int i; 1938 if (gimple_omp_for_combined_p (fd->for_stmt)) 1939 { 1940 /* If fd->loop.n2 is constant, then no propagation of the counts 1941 is needed, they are constant. */ 1942 if (TREE_CODE (fd->loop.n2) == INTEGER_CST) 1943 return; 1944 1945 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR 1946 ? gimple_omp_taskreg_clauses (inner_stmt) 1947 : gimple_omp_for_clauses (inner_stmt); 1948 /* First two _looptemp_ clauses are for istart/iend, counts[0] 1949 isn't supposed to be handled, as the inner loop doesn't 1950 use it. */ 1951 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 1952 gcc_assert (innerc); 1953 for (i = 0; i < fd->collapse; i++) 1954 { 1955 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 1956 OMP_CLAUSE__LOOPTEMP_); 1957 gcc_assert (innerc); 1958 if (i) 1959 { 1960 tree tem = OMP_CLAUSE_DECL (innerc); 1961 tree t = fold_convert (TREE_TYPE (tem), counts[i]); 1962 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE, 1963 false, GSI_CONTINUE_LINKING); 1964 gassign *stmt = gimple_build_assign (tem, t); 1965 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); 1966 } 1967 } 1968 return; 1969 } 1970 1971 tree type = TREE_TYPE (fd->loop.v); 1972 tree tem = create_tmp_reg (type, ".tem"); 1973 gassign *stmt = gimple_build_assign (tem, startvar); 1974 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); 1975 1976 for (i = fd->collapse - 1; i >= 0; i--) 1977 { 1978 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t; 1979 itype = vtype; 1980 if (POINTER_TYPE_P (vtype)) 1981 itype = signed_type_for (vtype); 1982 if (i != 0) 1983 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]); 1984 else 1985 t = tem; 1986 t = fold_convert (itype, t); 1987 t = fold_build2 (MULT_EXPR, itype, t, 1988 fold_convert (itype, fd->loops[i].step)); 1989 if (POINTER_TYPE_P (vtype)) 1990 t = fold_build_pointer_plus (fd->loops[i].n1, t); 1991 else 1992 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t); 1993 t = force_gimple_operand_gsi (gsi, t, 1994 DECL_P (fd->loops[i].v) 1995 && TREE_ADDRESSABLE (fd->loops[i].v), 1996 NULL_TREE, false, 1997 GSI_CONTINUE_LINKING); 1998 stmt = gimple_build_assign (fd->loops[i].v, t); 1999 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); 2000 if (i != 0) 2001 { 2002 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]); 2003 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE, 2004 false, GSI_CONTINUE_LINKING); 2005 stmt = gimple_build_assign (tem, t); 2006 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); 2007 } 2008 } 2009 } 2010 2011 /* Helper function for expand_omp_for_*. Generate code like: 2012 L10: 2013 V3 += STEP3; 2014 if (V3 cond3 N32) goto BODY_BB; else goto L11; 2015 L11: 2016 V3 = N31; 2017 V2 += STEP2; 2018 if (V2 cond2 N22) goto BODY_BB; else goto L12; 2019 L12: 2020 V2 = N21; 2021 V1 += STEP1; 2022 goto BODY_BB; */ 2023 2024 static basic_block 2025 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb, 2026 basic_block body_bb) 2027 { 2028 basic_block last_bb, bb, collapse_bb = NULL; 2029 int i; 2030 gimple_stmt_iterator gsi; 2031 edge e; 2032 tree t; 2033 gimple *stmt; 2034 2035 last_bb = cont_bb; 2036 for (i = fd->collapse - 1; i >= 0; i--) 2037 { 2038 tree vtype = TREE_TYPE (fd->loops[i].v); 2039 2040 bb = create_empty_bb (last_bb); 2041 add_bb_to_loop (bb, last_bb->loop_father); 2042 gsi = gsi_start_bb (bb); 2043 2044 if (i < fd->collapse - 1) 2045 { 2046 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE); 2047 e->probability = profile_probability::guessed_always ().apply_scale (1, 8); 2048 2049 t = fd->loops[i + 1].n1; 2050 t = force_gimple_operand_gsi (&gsi, t, 2051 DECL_P (fd->loops[i + 1].v) 2052 && TREE_ADDRESSABLE (fd->loops[i 2053 + 1].v), 2054 NULL_TREE, false, 2055 GSI_CONTINUE_LINKING); 2056 stmt = gimple_build_assign (fd->loops[i + 1].v, t); 2057 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 2058 } 2059 else 2060 collapse_bb = bb; 2061 2062 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb); 2063 2064 if (POINTER_TYPE_P (vtype)) 2065 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step); 2066 else 2067 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step); 2068 t = force_gimple_operand_gsi (&gsi, t, 2069 DECL_P (fd->loops[i].v) 2070 && TREE_ADDRESSABLE (fd->loops[i].v), 2071 NULL_TREE, false, GSI_CONTINUE_LINKING); 2072 stmt = gimple_build_assign (fd->loops[i].v, t); 2073 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 2074 2075 if (i > 0) 2076 { 2077 t = fd->loops[i].n2; 2078 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 2079 false, GSI_CONTINUE_LINKING); 2080 tree v = fd->loops[i].v; 2081 if (DECL_P (v) && TREE_ADDRESSABLE (v)) 2082 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, 2083 false, GSI_CONTINUE_LINKING); 2084 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t); 2085 stmt = gimple_build_cond_empty (t); 2086 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 2087 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)), 2088 expand_omp_regimplify_p, NULL, NULL) 2089 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)), 2090 expand_omp_regimplify_p, NULL, NULL)) 2091 gimple_regimplify_operands (stmt, &gsi); 2092 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE); 2093 e->probability = profile_probability::guessed_always ().apply_scale (7, 8); 2094 } 2095 else 2096 make_edge (bb, body_bb, EDGE_FALLTHRU); 2097 last_bb = bb; 2098 } 2099 2100 return collapse_bb; 2101 } 2102 2103 /* Expand #pragma omp ordered depend(source). */ 2104 2105 static void 2106 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd, 2107 tree *counts, location_t loc) 2108 { 2109 enum built_in_function source_ix 2110 = fd->iter_type == long_integer_type_node 2111 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST; 2112 gimple *g 2113 = gimple_build_call (builtin_decl_explicit (source_ix), 1, 2114 build_fold_addr_expr (counts[fd->ordered])); 2115 gimple_set_location (g, loc); 2116 gsi_insert_before (gsi, g, GSI_SAME_STMT); 2117 } 2118 2119 /* Expand a single depend from #pragma omp ordered depend(sink:...). */ 2120 2121 static void 2122 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd, 2123 tree *counts, tree c, location_t loc) 2124 { 2125 auto_vec<tree, 10> args; 2126 enum built_in_function sink_ix 2127 = fd->iter_type == long_integer_type_node 2128 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT; 2129 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE; 2130 int i; 2131 gimple_stmt_iterator gsi2 = *gsi; 2132 bool warned_step = false; 2133 2134 for (i = 0; i < fd->ordered; i++) 2135 { 2136 tree step = NULL_TREE; 2137 off = TREE_PURPOSE (deps); 2138 if (TREE_CODE (off) == TRUNC_DIV_EXPR) 2139 { 2140 step = TREE_OPERAND (off, 1); 2141 off = TREE_OPERAND (off, 0); 2142 } 2143 if (!integer_zerop (off)) 2144 { 2145 gcc_assert (fd->loops[i].cond_code == LT_EXPR 2146 || fd->loops[i].cond_code == GT_EXPR); 2147 bool forward = fd->loops[i].cond_code == LT_EXPR; 2148 if (step) 2149 { 2150 /* Non-simple Fortran DO loops. If step is variable, 2151 we don't know at compile even the direction, so can't 2152 warn. */ 2153 if (TREE_CODE (step) != INTEGER_CST) 2154 break; 2155 forward = tree_int_cst_sgn (step) != -1; 2156 } 2157 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2158 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier " 2159 "waiting for lexically later iteration"); 2160 break; 2161 } 2162 deps = TREE_CHAIN (deps); 2163 } 2164 /* If all offsets corresponding to the collapsed loops are zero, 2165 this depend clause can be ignored. FIXME: but there is still a 2166 flush needed. We need to emit one __sync_synchronize () for it 2167 though (perhaps conditionally)? Solve this together with the 2168 conservative dependence folding optimization. 2169 if (i >= fd->collapse) 2170 return; */ 2171 2172 deps = OMP_CLAUSE_DECL (c); 2173 gsi_prev (&gsi2); 2174 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2)); 2175 edge e2 = split_block_after_labels (e1->dest); 2176 2177 gsi2 = gsi_after_labels (e1->dest); 2178 *gsi = gsi_last_bb (e1->src); 2179 for (i = 0; i < fd->ordered; i++) 2180 { 2181 tree itype = TREE_TYPE (fd->loops[i].v); 2182 tree step = NULL_TREE; 2183 tree orig_off = NULL_TREE; 2184 if (POINTER_TYPE_P (itype)) 2185 itype = sizetype; 2186 if (i) 2187 deps = TREE_CHAIN (deps); 2188 off = TREE_PURPOSE (deps); 2189 if (TREE_CODE (off) == TRUNC_DIV_EXPR) 2190 { 2191 step = TREE_OPERAND (off, 1); 2192 off = TREE_OPERAND (off, 0); 2193 gcc_assert (fd->loops[i].cond_code == LT_EXPR 2194 && integer_onep (fd->loops[i].step) 2195 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))); 2196 } 2197 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step); 2198 if (step) 2199 { 2200 off = fold_convert_loc (loc, itype, off); 2201 orig_off = off; 2202 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s); 2203 } 2204 2205 if (integer_zerop (off)) 2206 t = boolean_true_node; 2207 else 2208 { 2209 tree a; 2210 tree co = fold_convert_loc (loc, itype, off); 2211 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))) 2212 { 2213 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2214 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co); 2215 a = fold_build2_loc (loc, POINTER_PLUS_EXPR, 2216 TREE_TYPE (fd->loops[i].v), fd->loops[i].v, 2217 co); 2218 } 2219 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2220 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v), 2221 fd->loops[i].v, co); 2222 else 2223 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v), 2224 fd->loops[i].v, co); 2225 if (step) 2226 { 2227 tree t1, t2; 2228 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2229 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, 2230 fd->loops[i].n1); 2231 else 2232 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, 2233 fd->loops[i].n2); 2234 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2235 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, 2236 fd->loops[i].n2); 2237 else 2238 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, 2239 fd->loops[i].n1); 2240 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, 2241 step, build_int_cst (TREE_TYPE (step), 0)); 2242 if (TREE_CODE (step) != INTEGER_CST) 2243 { 2244 t1 = unshare_expr (t1); 2245 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE, 2246 false, GSI_CONTINUE_LINKING); 2247 t2 = unshare_expr (t2); 2248 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE, 2249 false, GSI_CONTINUE_LINKING); 2250 } 2251 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node, 2252 t, t2, t1); 2253 } 2254 else if (fd->loops[i].cond_code == LT_EXPR) 2255 { 2256 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2257 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, 2258 fd->loops[i].n1); 2259 else 2260 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, 2261 fd->loops[i].n2); 2262 } 2263 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2264 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a, 2265 fd->loops[i].n2); 2266 else 2267 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a, 2268 fd->loops[i].n1); 2269 } 2270 if (cond) 2271 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t); 2272 else 2273 cond = t; 2274 2275 off = fold_convert_loc (loc, itype, off); 2276 2277 if (step 2278 || (fd->loops[i].cond_code == LT_EXPR 2279 ? !integer_onep (fd->loops[i].step) 2280 : !integer_minus_onep (fd->loops[i].step))) 2281 { 2282 if (step == NULL_TREE 2283 && TYPE_UNSIGNED (itype) 2284 && fd->loops[i].cond_code == GT_EXPR) 2285 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off, 2286 fold_build1_loc (loc, NEGATE_EXPR, itype, 2287 s)); 2288 else 2289 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, 2290 orig_off ? orig_off : off, s); 2291 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t, 2292 build_int_cst (itype, 0)); 2293 if (integer_zerop (t) && !warned_step) 2294 { 2295 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier " 2296 "refers to iteration never in the iteration " 2297 "space"); 2298 warned_step = true; 2299 } 2300 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, 2301 cond, t); 2302 } 2303 2304 if (i <= fd->collapse - 1 && fd->collapse > 1) 2305 t = fd->loop.v; 2306 else if (counts[i]) 2307 t = counts[i]; 2308 else 2309 { 2310 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v), 2311 fd->loops[i].v, fd->loops[i].n1); 2312 t = fold_convert_loc (loc, fd->iter_type, t); 2313 } 2314 if (step) 2315 /* We have divided off by step already earlier. */; 2316 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR) 2317 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, 2318 fold_build1_loc (loc, NEGATE_EXPR, itype, 2319 s)); 2320 else 2321 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s); 2322 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2323 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off); 2324 off = fold_convert_loc (loc, fd->iter_type, off); 2325 if (i <= fd->collapse - 1 && fd->collapse > 1) 2326 { 2327 if (i) 2328 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff, 2329 off); 2330 if (i < fd->collapse - 1) 2331 { 2332 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off, 2333 counts[i]); 2334 continue; 2335 } 2336 } 2337 off = unshare_expr (off); 2338 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off); 2339 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, 2340 true, GSI_SAME_STMT); 2341 args.safe_push (t); 2342 } 2343 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args); 2344 gimple_set_location (g, loc); 2345 gsi_insert_before (&gsi2, g, GSI_SAME_STMT); 2346 2347 cond = unshare_expr (cond); 2348 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false, 2349 GSI_CONTINUE_LINKING); 2350 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT); 2351 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE); 2352 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8); 2353 e1->probability = e3->probability.invert (); 2354 e1->flags = EDGE_TRUE_VALUE; 2355 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src); 2356 2357 *gsi = gsi_after_labels (e2->dest); 2358 } 2359 2360 /* Expand all #pragma omp ordered depend(source) and 2361 #pragma omp ordered depend(sink:...) constructs in the current 2362 #pragma omp for ordered(n) region. */ 2363 2364 static void 2365 expand_omp_ordered_source_sink (struct omp_region *region, 2366 struct omp_for_data *fd, tree *counts, 2367 basic_block cont_bb) 2368 { 2369 struct omp_region *inner; 2370 int i; 2371 for (i = fd->collapse - 1; i < fd->ordered; i++) 2372 if (i == fd->collapse - 1 && fd->collapse > 1) 2373 counts[i] = NULL_TREE; 2374 else if (i >= fd->collapse && !cont_bb) 2375 counts[i] = build_zero_cst (fd->iter_type); 2376 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)) 2377 && integer_onep (fd->loops[i].step)) 2378 counts[i] = NULL_TREE; 2379 else 2380 counts[i] = create_tmp_var (fd->iter_type, ".orditer"); 2381 tree atype 2382 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1); 2383 counts[fd->ordered] = create_tmp_var (atype, ".orditera"); 2384 TREE_ADDRESSABLE (counts[fd->ordered]) = 1; 2385 2386 for (inner = region->inner; inner; inner = inner->next) 2387 if (inner->type == GIMPLE_OMP_ORDERED) 2388 { 2389 gomp_ordered *ord_stmt = inner->ord_stmt; 2390 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt); 2391 location_t loc = gimple_location (ord_stmt); 2392 tree c; 2393 for (c = gimple_omp_ordered_clauses (ord_stmt); 2394 c; c = OMP_CLAUSE_CHAIN (c)) 2395 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE) 2396 break; 2397 if (c) 2398 expand_omp_ordered_source (&gsi, fd, counts, loc); 2399 for (c = gimple_omp_ordered_clauses (ord_stmt); 2400 c; c = OMP_CLAUSE_CHAIN (c)) 2401 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK) 2402 expand_omp_ordered_sink (&gsi, fd, counts, c, loc); 2403 gsi_remove (&gsi, true); 2404 } 2405 } 2406 2407 /* Wrap the body into fd->ordered - fd->collapse loops that aren't 2408 collapsed. */ 2409 2410 static basic_block 2411 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts, 2412 basic_block cont_bb, basic_block body_bb, 2413 bool ordered_lastprivate) 2414 { 2415 if (fd->ordered == fd->collapse) 2416 return cont_bb; 2417 2418 if (!cont_bb) 2419 { 2420 gimple_stmt_iterator gsi = gsi_after_labels (body_bb); 2421 for (int i = fd->collapse; i < fd->ordered; i++) 2422 { 2423 tree type = TREE_TYPE (fd->loops[i].v); 2424 tree n1 = fold_convert (type, fd->loops[i].n1); 2425 expand_omp_build_assign (&gsi, fd->loops[i].v, n1); 2426 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 2427 size_int (i - fd->collapse + 1), 2428 NULL_TREE, NULL_TREE); 2429 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type)); 2430 } 2431 return NULL; 2432 } 2433 2434 for (int i = fd->ordered - 1; i >= fd->collapse; i--) 2435 { 2436 tree t, type = TREE_TYPE (fd->loops[i].v); 2437 gimple_stmt_iterator gsi = gsi_after_labels (body_bb); 2438 expand_omp_build_assign (&gsi, fd->loops[i].v, 2439 fold_convert (type, fd->loops[i].n1)); 2440 if (counts[i]) 2441 expand_omp_build_assign (&gsi, counts[i], 2442 build_zero_cst (fd->iter_type)); 2443 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 2444 size_int (i - fd->collapse + 1), 2445 NULL_TREE, NULL_TREE); 2446 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type)); 2447 if (!gsi_end_p (gsi)) 2448 gsi_prev (&gsi); 2449 else 2450 gsi = gsi_last_bb (body_bb); 2451 edge e1 = split_block (body_bb, gsi_stmt (gsi)); 2452 basic_block new_body = e1->dest; 2453 if (body_bb == cont_bb) 2454 cont_bb = new_body; 2455 edge e2 = NULL; 2456 basic_block new_header; 2457 if (EDGE_COUNT (cont_bb->preds) > 0) 2458 { 2459 gsi = gsi_last_bb (cont_bb); 2460 if (POINTER_TYPE_P (type)) 2461 t = fold_build_pointer_plus (fd->loops[i].v, 2462 fold_convert (sizetype, 2463 fd->loops[i].step)); 2464 else 2465 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v, 2466 fold_convert (type, fd->loops[i].step)); 2467 expand_omp_build_assign (&gsi, fd->loops[i].v, t); 2468 if (counts[i]) 2469 { 2470 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i], 2471 build_int_cst (fd->iter_type, 1)); 2472 expand_omp_build_assign (&gsi, counts[i], t); 2473 t = counts[i]; 2474 } 2475 else 2476 { 2477 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v), 2478 fd->loops[i].v, fd->loops[i].n1); 2479 t = fold_convert (fd->iter_type, t); 2480 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 2481 true, GSI_SAME_STMT); 2482 } 2483 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 2484 size_int (i - fd->collapse + 1), 2485 NULL_TREE, NULL_TREE); 2486 expand_omp_build_assign (&gsi, aref, t); 2487 gsi_prev (&gsi); 2488 e2 = split_block (cont_bb, gsi_stmt (gsi)); 2489 new_header = e2->dest; 2490 } 2491 else 2492 new_header = cont_bb; 2493 gsi = gsi_after_labels (new_header); 2494 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE, 2495 true, GSI_SAME_STMT); 2496 tree n2 2497 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2), 2498 true, NULL_TREE, true, GSI_SAME_STMT); 2499 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2); 2500 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT); 2501 edge e3 = split_block (new_header, gsi_stmt (gsi)); 2502 cont_bb = e3->dest; 2503 remove_edge (e1); 2504 make_edge (body_bb, new_header, EDGE_FALLTHRU); 2505 e3->flags = EDGE_FALSE_VALUE; 2506 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8); 2507 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE); 2508 e1->probability = e3->probability.invert (); 2509 2510 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb); 2511 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header); 2512 2513 if (e2) 2514 { 2515 class loop *loop = alloc_loop (); 2516 loop->header = new_header; 2517 loop->latch = e2->src; 2518 add_loop (loop, body_bb->loop_father); 2519 } 2520 } 2521 2522 /* If there are any lastprivate clauses and it is possible some loops 2523 might have zero iterations, ensure all the decls are initialized, 2524 otherwise we could crash evaluating C++ class iterators with lastprivate 2525 clauses. */ 2526 bool need_inits = false; 2527 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++) 2528 if (need_inits) 2529 { 2530 tree type = TREE_TYPE (fd->loops[i].v); 2531 gimple_stmt_iterator gsi = gsi_after_labels (body_bb); 2532 expand_omp_build_assign (&gsi, fd->loops[i].v, 2533 fold_convert (type, fd->loops[i].n1)); 2534 } 2535 else 2536 { 2537 tree type = TREE_TYPE (fd->loops[i].v); 2538 tree this_cond = fold_build2 (fd->loops[i].cond_code, 2539 boolean_type_node, 2540 fold_convert (type, fd->loops[i].n1), 2541 fold_convert (type, fd->loops[i].n2)); 2542 if (!integer_onep (this_cond)) 2543 need_inits = true; 2544 } 2545 2546 return cont_bb; 2547 } 2548 2549 /* A subroutine of expand_omp_for. Generate code for a parallel 2550 loop with any schedule. Given parameters: 2551 2552 for (V = N1; V cond N2; V += STEP) BODY; 2553 2554 where COND is "<" or ">", we generate pseudocode 2555 2556 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0); 2557 if (more) goto L0; else goto L3; 2558 L0: 2559 V = istart0; 2560 iend = iend0; 2561 L1: 2562 BODY; 2563 V += STEP; 2564 if (V cond iend) goto L1; else goto L2; 2565 L2: 2566 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3; 2567 L3: 2568 2569 If this is a combined omp parallel loop, instead of the call to 2570 GOMP_loop_foo_start, we call GOMP_loop_foo_next. 2571 If this is gimple_omp_for_combined_p loop, then instead of assigning 2572 V and iend in L0 we assign the first two _looptemp_ clause decls of the 2573 inner GIMPLE_OMP_FOR and V += STEP; and 2574 if (V cond iend) goto L1; else goto L2; are removed. 2575 2576 For collapsed loops, given parameters: 2577 collapse(3) 2578 for (V1 = N11; V1 cond1 N12; V1 += STEP1) 2579 for (V2 = N21; V2 cond2 N22; V2 += STEP2) 2580 for (V3 = N31; V3 cond3 N32; V3 += STEP3) 2581 BODY; 2582 2583 we generate pseudocode 2584 2585 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0; 2586 if (cond3 is <) 2587 adj = STEP3 - 1; 2588 else 2589 adj = STEP3 + 1; 2590 count3 = (adj + N32 - N31) / STEP3; 2591 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0; 2592 if (cond2 is <) 2593 adj = STEP2 - 1; 2594 else 2595 adj = STEP2 + 1; 2596 count2 = (adj + N22 - N21) / STEP2; 2597 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0; 2598 if (cond1 is <) 2599 adj = STEP1 - 1; 2600 else 2601 adj = STEP1 + 1; 2602 count1 = (adj + N12 - N11) / STEP1; 2603 count = count1 * count2 * count3; 2604 goto Z1; 2605 Z0: 2606 count = 0; 2607 Z1: 2608 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0); 2609 if (more) goto L0; else goto L3; 2610 L0: 2611 V = istart0; 2612 T = V; 2613 V3 = N31 + (T % count3) * STEP3; 2614 T = T / count3; 2615 V2 = N21 + (T % count2) * STEP2; 2616 T = T / count2; 2617 V1 = N11 + T * STEP1; 2618 iend = iend0; 2619 L1: 2620 BODY; 2621 V += 1; 2622 if (V < iend) goto L10; else goto L2; 2623 L10: 2624 V3 += STEP3; 2625 if (V3 cond3 N32) goto L1; else goto L11; 2626 L11: 2627 V3 = N31; 2628 V2 += STEP2; 2629 if (V2 cond2 N22) goto L1; else goto L12; 2630 L12: 2631 V2 = N21; 2632 V1 += STEP1; 2633 goto L1; 2634 L2: 2635 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3; 2636 L3: 2637 2638 */ 2639 2640 static void 2641 expand_omp_for_generic (struct omp_region *region, 2642 struct omp_for_data *fd, 2643 enum built_in_function start_fn, 2644 enum built_in_function next_fn, 2645 tree sched_arg, 2646 gimple *inner_stmt) 2647 { 2648 tree type, istart0, iend0, iend; 2649 tree t, vmain, vback, bias = NULL_TREE; 2650 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb; 2651 basic_block l2_bb = NULL, l3_bb = NULL; 2652 gimple_stmt_iterator gsi; 2653 gassign *assign_stmt; 2654 bool in_combined_parallel = is_combined_parallel (region); 2655 bool broken_loop = region->cont == NULL; 2656 edge e, ne; 2657 tree *counts = NULL; 2658 int i; 2659 bool ordered_lastprivate = false; 2660 2661 gcc_assert (!broken_loop || !in_combined_parallel); 2662 gcc_assert (fd->iter_type == long_integer_type_node 2663 || !in_combined_parallel); 2664 2665 entry_bb = region->entry; 2666 cont_bb = region->cont; 2667 collapse_bb = NULL; 2668 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 2669 gcc_assert (broken_loop 2670 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); 2671 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb)); 2672 l1_bb = single_succ (l0_bb); 2673 if (!broken_loop) 2674 { 2675 l2_bb = create_empty_bb (cont_bb); 2676 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb 2677 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest 2678 == l1_bb)); 2679 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 2680 } 2681 else 2682 l2_bb = NULL; 2683 l3_bb = BRANCH_EDGE (entry_bb)->dest; 2684 exit_bb = region->exit; 2685 2686 gsi = gsi_last_nondebug_bb (entry_bb); 2687 2688 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 2689 if (fd->ordered 2690 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 2691 OMP_CLAUSE_LASTPRIVATE)) 2692 ordered_lastprivate = false; 2693 tree reductions = NULL_TREE; 2694 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE; 2695 tree memv = NULL_TREE; 2696 if (fd->lastprivate_conditional) 2697 { 2698 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 2699 OMP_CLAUSE__CONDTEMP_); 2700 if (fd->have_pointer_condtemp) 2701 condtemp = OMP_CLAUSE_DECL (c); 2702 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_); 2703 cond_var = OMP_CLAUSE_DECL (c); 2704 } 2705 if (sched_arg) 2706 { 2707 if (fd->have_reductemp) 2708 { 2709 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 2710 OMP_CLAUSE__REDUCTEMP_); 2711 reductions = OMP_CLAUSE_DECL (c); 2712 gcc_assert (TREE_CODE (reductions) == SSA_NAME); 2713 gimple *g = SSA_NAME_DEF_STMT (reductions); 2714 reductions = gimple_assign_rhs1 (g); 2715 OMP_CLAUSE_DECL (c) = reductions; 2716 entry_bb = gimple_bb (g); 2717 edge e = split_block (entry_bb, g); 2718 if (region->entry == entry_bb) 2719 region->entry = e->dest; 2720 gsi = gsi_last_bb (entry_bb); 2721 } 2722 else 2723 reductions = null_pointer_node; 2724 if (fd->have_pointer_condtemp) 2725 { 2726 tree type = TREE_TYPE (condtemp); 2727 memv = create_tmp_var (type); 2728 TREE_ADDRESSABLE (memv) = 1; 2729 unsigned HOST_WIDE_INT sz 2730 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); 2731 sz *= fd->lastprivate_conditional; 2732 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz), 2733 false); 2734 mem = build_fold_addr_expr (memv); 2735 } 2736 else 2737 mem = null_pointer_node; 2738 } 2739 if (fd->collapse > 1 || fd->ordered) 2740 { 2741 int first_zero_iter1 = -1, first_zero_iter2 = -1; 2742 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL; 2743 2744 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse); 2745 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 2746 zero_iter1_bb, first_zero_iter1, 2747 zero_iter2_bb, first_zero_iter2, l2_dom_bb); 2748 2749 if (zero_iter1_bb) 2750 { 2751 /* Some counts[i] vars might be uninitialized if 2752 some loop has zero iterations. But the body shouldn't 2753 be executed in that case, so just avoid uninit warnings. */ 2754 for (i = first_zero_iter1; 2755 i < (fd->ordered ? fd->ordered : fd->collapse); i++) 2756 if (SSA_VAR_P (counts[i])) 2757 TREE_NO_WARNING (counts[i]) = 1; 2758 gsi_prev (&gsi); 2759 e = split_block (entry_bb, gsi_stmt (gsi)); 2760 entry_bb = e->dest; 2761 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU); 2762 gsi = gsi_last_nondebug_bb (entry_bb); 2763 set_immediate_dominator (CDI_DOMINATORS, entry_bb, 2764 get_immediate_dominator (CDI_DOMINATORS, 2765 zero_iter1_bb)); 2766 } 2767 if (zero_iter2_bb) 2768 { 2769 /* Some counts[i] vars might be uninitialized if 2770 some loop has zero iterations. But the body shouldn't 2771 be executed in that case, so just avoid uninit warnings. */ 2772 for (i = first_zero_iter2; i < fd->ordered; i++) 2773 if (SSA_VAR_P (counts[i])) 2774 TREE_NO_WARNING (counts[i]) = 1; 2775 if (zero_iter1_bb) 2776 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU); 2777 else 2778 { 2779 gsi_prev (&gsi); 2780 e = split_block (entry_bb, gsi_stmt (gsi)); 2781 entry_bb = e->dest; 2782 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU); 2783 gsi = gsi_last_nondebug_bb (entry_bb); 2784 set_immediate_dominator (CDI_DOMINATORS, entry_bb, 2785 get_immediate_dominator 2786 (CDI_DOMINATORS, zero_iter2_bb)); 2787 } 2788 } 2789 if (fd->collapse == 1) 2790 { 2791 counts[0] = fd->loop.n2; 2792 fd->loop = fd->loops[0]; 2793 } 2794 } 2795 2796 type = TREE_TYPE (fd->loop.v); 2797 istart0 = create_tmp_var (fd->iter_type, ".istart0"); 2798 iend0 = create_tmp_var (fd->iter_type, ".iend0"); 2799 TREE_ADDRESSABLE (istart0) = 1; 2800 TREE_ADDRESSABLE (iend0) = 1; 2801 2802 /* See if we need to bias by LLONG_MIN. */ 2803 if (fd->iter_type == long_long_unsigned_type_node 2804 && TREE_CODE (type) == INTEGER_TYPE 2805 && !TYPE_UNSIGNED (type) 2806 && fd->ordered == 0) 2807 { 2808 tree n1, n2; 2809 2810 if (fd->loop.cond_code == LT_EXPR) 2811 { 2812 n1 = fd->loop.n1; 2813 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); 2814 } 2815 else 2816 { 2817 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); 2818 n2 = fd->loop.n1; 2819 } 2820 if (TREE_CODE (n1) != INTEGER_CST 2821 || TREE_CODE (n2) != INTEGER_CST 2822 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) 2823 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); 2824 } 2825 2826 gimple_stmt_iterator gsif = gsi; 2827 gsi_prev (&gsif); 2828 2829 tree arr = NULL_TREE; 2830 if (in_combined_parallel) 2831 { 2832 gcc_assert (fd->ordered == 0); 2833 /* In a combined parallel loop, emit a call to 2834 GOMP_loop_foo_next. */ 2835 t = build_call_expr (builtin_decl_explicit (next_fn), 2, 2836 build_fold_addr_expr (istart0), 2837 build_fold_addr_expr (iend0)); 2838 } 2839 else 2840 { 2841 tree t0, t1, t2, t3, t4; 2842 /* If this is not a combined parallel loop, emit a call to 2843 GOMP_loop_foo_start in ENTRY_BB. */ 2844 t4 = build_fold_addr_expr (iend0); 2845 t3 = build_fold_addr_expr (istart0); 2846 if (fd->ordered) 2847 { 2848 t0 = build_int_cst (unsigned_type_node, 2849 fd->ordered - fd->collapse + 1); 2850 arr = create_tmp_var (build_array_type_nelts (fd->iter_type, 2851 fd->ordered 2852 - fd->collapse + 1), 2853 ".omp_counts"); 2854 DECL_NAMELESS (arr) = 1; 2855 TREE_ADDRESSABLE (arr) = 1; 2856 TREE_STATIC (arr) = 1; 2857 vec<constructor_elt, va_gc> *v; 2858 vec_alloc (v, fd->ordered - fd->collapse + 1); 2859 int idx; 2860 2861 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++) 2862 { 2863 tree c; 2864 if (idx == 0 && fd->collapse > 1) 2865 c = fd->loop.n2; 2866 else 2867 c = counts[idx + fd->collapse - 1]; 2868 tree purpose = size_int (idx); 2869 CONSTRUCTOR_APPEND_ELT (v, purpose, c); 2870 if (TREE_CODE (c) != INTEGER_CST) 2871 TREE_STATIC (arr) = 0; 2872 } 2873 2874 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v); 2875 if (!TREE_STATIC (arr)) 2876 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR, 2877 void_type_node, arr), 2878 true, NULL_TREE, true, GSI_SAME_STMT); 2879 t1 = build_fold_addr_expr (arr); 2880 t2 = NULL_TREE; 2881 } 2882 else 2883 { 2884 t2 = fold_convert (fd->iter_type, fd->loop.step); 2885 t1 = fd->loop.n2; 2886 t0 = fd->loop.n1; 2887 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 2888 { 2889 tree innerc 2890 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 2891 OMP_CLAUSE__LOOPTEMP_); 2892 gcc_assert (innerc); 2893 t0 = OMP_CLAUSE_DECL (innerc); 2894 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 2895 OMP_CLAUSE__LOOPTEMP_); 2896 gcc_assert (innerc); 2897 t1 = OMP_CLAUSE_DECL (innerc); 2898 } 2899 if (POINTER_TYPE_P (TREE_TYPE (t0)) 2900 && TYPE_PRECISION (TREE_TYPE (t0)) 2901 != TYPE_PRECISION (fd->iter_type)) 2902 { 2903 /* Avoid casting pointers to integer of a different size. */ 2904 tree itype = signed_type_for (type); 2905 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1)); 2906 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0)); 2907 } 2908 else 2909 { 2910 t1 = fold_convert (fd->iter_type, t1); 2911 t0 = fold_convert (fd->iter_type, t0); 2912 } 2913 if (bias) 2914 { 2915 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias); 2916 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias); 2917 } 2918 } 2919 if (fd->iter_type == long_integer_type_node || fd->ordered) 2920 { 2921 if (fd->chunk_size) 2922 { 2923 t = fold_convert (fd->iter_type, fd->chunk_size); 2924 t = omp_adjust_chunk_size (t, fd->simd_schedule); 2925 if (sched_arg) 2926 { 2927 if (fd->ordered) 2928 t = build_call_expr (builtin_decl_explicit (start_fn), 2929 8, t0, t1, sched_arg, t, t3, t4, 2930 reductions, mem); 2931 else 2932 t = build_call_expr (builtin_decl_explicit (start_fn), 2933 9, t0, t1, t2, sched_arg, t, t3, t4, 2934 reductions, mem); 2935 } 2936 else if (fd->ordered) 2937 t = build_call_expr (builtin_decl_explicit (start_fn), 2938 5, t0, t1, t, t3, t4); 2939 else 2940 t = build_call_expr (builtin_decl_explicit (start_fn), 2941 6, t0, t1, t2, t, t3, t4); 2942 } 2943 else if (fd->ordered) 2944 t = build_call_expr (builtin_decl_explicit (start_fn), 2945 4, t0, t1, t3, t4); 2946 else 2947 t = build_call_expr (builtin_decl_explicit (start_fn), 2948 5, t0, t1, t2, t3, t4); 2949 } 2950 else 2951 { 2952 tree t5; 2953 tree c_bool_type; 2954 tree bfn_decl; 2955 2956 /* The GOMP_loop_ull_*start functions have additional boolean 2957 argument, true for < loops and false for > loops. 2958 In Fortran, the C bool type can be different from 2959 boolean_type_node. */ 2960 bfn_decl = builtin_decl_explicit (start_fn); 2961 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl)); 2962 t5 = build_int_cst (c_bool_type, 2963 fd->loop.cond_code == LT_EXPR ? 1 : 0); 2964 if (fd->chunk_size) 2965 { 2966 tree bfn_decl = builtin_decl_explicit (start_fn); 2967 t = fold_convert (fd->iter_type, fd->chunk_size); 2968 t = omp_adjust_chunk_size (t, fd->simd_schedule); 2969 if (sched_arg) 2970 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg, 2971 t, t3, t4, reductions, mem); 2972 else 2973 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4); 2974 } 2975 else 2976 t = build_call_expr (builtin_decl_explicit (start_fn), 2977 6, t5, t0, t1, t2, t3, t4); 2978 } 2979 } 2980 if (TREE_TYPE (t) != boolean_type_node) 2981 t = fold_build2 (NE_EXPR, boolean_type_node, 2982 t, build_int_cst (TREE_TYPE (t), 0)); 2983 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 2984 true, GSI_SAME_STMT); 2985 if (arr && !TREE_STATIC (arr)) 2986 { 2987 tree clobber = build_clobber (TREE_TYPE (arr)); 2988 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber), 2989 GSI_SAME_STMT); 2990 } 2991 if (fd->have_pointer_condtemp) 2992 expand_omp_build_assign (&gsi, condtemp, memv, false); 2993 if (fd->have_reductemp) 2994 { 2995 gimple *g = gsi_stmt (gsi); 2996 gsi_remove (&gsi, true); 2997 release_ssa_name (gimple_assign_lhs (g)); 2998 2999 entry_bb = region->entry; 3000 gsi = gsi_last_nondebug_bb (entry_bb); 3001 3002 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 3003 } 3004 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 3005 3006 /* Remove the GIMPLE_OMP_FOR statement. */ 3007 gsi_remove (&gsi, true); 3008 3009 if (gsi_end_p (gsif)) 3010 gsif = gsi_after_labels (gsi_bb (gsif)); 3011 gsi_next (&gsif); 3012 3013 /* Iteration setup for sequential loop goes in L0_BB. */ 3014 tree startvar = fd->loop.v; 3015 tree endvar = NULL_TREE; 3016 3017 if (gimple_omp_for_combined_p (fd->for_stmt)) 3018 { 3019 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR 3020 && gimple_omp_for_kind (inner_stmt) 3021 == GF_OMP_FOR_KIND_SIMD); 3022 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt), 3023 OMP_CLAUSE__LOOPTEMP_); 3024 gcc_assert (innerc); 3025 startvar = OMP_CLAUSE_DECL (innerc); 3026 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3027 OMP_CLAUSE__LOOPTEMP_); 3028 gcc_assert (innerc); 3029 endvar = OMP_CLAUSE_DECL (innerc); 3030 } 3031 3032 gsi = gsi_start_bb (l0_bb); 3033 t = istart0; 3034 if (fd->ordered && fd->collapse == 1) 3035 t = fold_build2 (MULT_EXPR, fd->iter_type, t, 3036 fold_convert (fd->iter_type, fd->loop.step)); 3037 else if (bias) 3038 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias); 3039 if (fd->ordered && fd->collapse == 1) 3040 { 3041 if (POINTER_TYPE_P (TREE_TYPE (startvar))) 3042 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar), 3043 fd->loop.n1, fold_convert (sizetype, t)); 3044 else 3045 { 3046 t = fold_convert (TREE_TYPE (startvar), t); 3047 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar), 3048 fd->loop.n1, t); 3049 } 3050 } 3051 else 3052 { 3053 if (POINTER_TYPE_P (TREE_TYPE (startvar))) 3054 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t); 3055 t = fold_convert (TREE_TYPE (startvar), t); 3056 } 3057 t = force_gimple_operand_gsi (&gsi, t, 3058 DECL_P (startvar) 3059 && TREE_ADDRESSABLE (startvar), 3060 NULL_TREE, false, GSI_CONTINUE_LINKING); 3061 assign_stmt = gimple_build_assign (startvar, t); 3062 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3063 if (cond_var) 3064 { 3065 tree itype = TREE_TYPE (cond_var); 3066 /* For lastprivate(conditional:) itervar, we need some iteration 3067 counter that starts at unsigned non-zero and increases. 3068 Prefer as few IVs as possible, so if we can use startvar 3069 itself, use that, or startvar + constant (those would be 3070 incremented with step), and as last resort use the s0 + 1 3071 incremented by 1. */ 3072 if ((fd->ordered && fd->collapse == 1) 3073 || bias 3074 || POINTER_TYPE_P (type) 3075 || TREE_CODE (fd->loop.n1) != INTEGER_CST 3076 || fd->loop.cond_code != LT_EXPR) 3077 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0), 3078 build_int_cst (itype, 1)); 3079 else if (tree_int_cst_sgn (fd->loop.n1) == 1) 3080 t = fold_convert (itype, t); 3081 else 3082 { 3083 tree c = fold_convert (itype, fd->loop.n1); 3084 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c); 3085 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c); 3086 } 3087 t = force_gimple_operand_gsi (&gsi, t, false, 3088 NULL_TREE, false, GSI_CONTINUE_LINKING); 3089 assign_stmt = gimple_build_assign (cond_var, t); 3090 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3091 } 3092 3093 t = iend0; 3094 if (fd->ordered && fd->collapse == 1) 3095 t = fold_build2 (MULT_EXPR, fd->iter_type, t, 3096 fold_convert (fd->iter_type, fd->loop.step)); 3097 else if (bias) 3098 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias); 3099 if (fd->ordered && fd->collapse == 1) 3100 { 3101 if (POINTER_TYPE_P (TREE_TYPE (startvar))) 3102 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar), 3103 fd->loop.n1, fold_convert (sizetype, t)); 3104 else 3105 { 3106 t = fold_convert (TREE_TYPE (startvar), t); 3107 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar), 3108 fd->loop.n1, t); 3109 } 3110 } 3111 else 3112 { 3113 if (POINTER_TYPE_P (TREE_TYPE (startvar))) 3114 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t); 3115 t = fold_convert (TREE_TYPE (startvar), t); 3116 } 3117 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3118 false, GSI_CONTINUE_LINKING); 3119 if (endvar) 3120 { 3121 assign_stmt = gimple_build_assign (endvar, iend); 3122 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3123 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend))) 3124 assign_stmt = gimple_build_assign (fd->loop.v, iend); 3125 else 3126 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend); 3127 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3128 } 3129 /* Handle linear clause adjustments. */ 3130 tree itercnt = NULL_TREE; 3131 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) 3132 for (tree c = gimple_omp_for_clauses (fd->for_stmt); 3133 c; c = OMP_CLAUSE_CHAIN (c)) 3134 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR 3135 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) 3136 { 3137 tree d = OMP_CLAUSE_DECL (c); 3138 bool is_ref = omp_is_reference (d); 3139 tree t = d, a, dest; 3140 if (is_ref) 3141 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); 3142 tree type = TREE_TYPE (t); 3143 if (POINTER_TYPE_P (type)) 3144 type = sizetype; 3145 dest = unshare_expr (t); 3146 tree v = create_tmp_var (TREE_TYPE (t), NULL); 3147 expand_omp_build_assign (&gsif, v, t); 3148 if (itercnt == NULL_TREE) 3149 { 3150 itercnt = startvar; 3151 tree n1 = fd->loop.n1; 3152 if (POINTER_TYPE_P (TREE_TYPE (itercnt))) 3153 { 3154 itercnt 3155 = fold_convert (signed_type_for (TREE_TYPE (itercnt)), 3156 itercnt); 3157 n1 = fold_convert (TREE_TYPE (itercnt), n1); 3158 } 3159 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt), 3160 itercnt, n1); 3161 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt), 3162 itercnt, fd->loop.step); 3163 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, 3164 NULL_TREE, false, 3165 GSI_CONTINUE_LINKING); 3166 } 3167 a = fold_build2 (MULT_EXPR, type, 3168 fold_convert (type, itercnt), 3169 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); 3170 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR 3171 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a); 3172 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3173 false, GSI_CONTINUE_LINKING); 3174 assign_stmt = gimple_build_assign (dest, t); 3175 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3176 } 3177 if (fd->collapse > 1) 3178 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 3179 3180 if (fd->ordered) 3181 { 3182 /* Until now, counts array contained number of iterations or 3183 variable containing it for ith loop. From now on, we need 3184 those counts only for collapsed loops, and only for the 2nd 3185 till the last collapsed one. Move those one element earlier, 3186 we'll use counts[fd->collapse - 1] for the first source/sink 3187 iteration counter and so on and counts[fd->ordered] 3188 as the array holding the current counter values for 3189 depend(source). */ 3190 if (fd->collapse > 1) 3191 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0])); 3192 if (broken_loop) 3193 { 3194 int i; 3195 for (i = fd->collapse; i < fd->ordered; i++) 3196 { 3197 tree type = TREE_TYPE (fd->loops[i].v); 3198 tree this_cond 3199 = fold_build2 (fd->loops[i].cond_code, boolean_type_node, 3200 fold_convert (type, fd->loops[i].n1), 3201 fold_convert (type, fd->loops[i].n2)); 3202 if (!integer_onep (this_cond)) 3203 break; 3204 } 3205 if (i < fd->ordered) 3206 { 3207 cont_bb 3208 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb); 3209 add_bb_to_loop (cont_bb, l1_bb->loop_father); 3210 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb); 3211 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v); 3212 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3213 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU); 3214 make_edge (cont_bb, l1_bb, 0); 3215 l2_bb = create_empty_bb (cont_bb); 3216 broken_loop = false; 3217 } 3218 } 3219 expand_omp_ordered_source_sink (region, fd, counts, cont_bb); 3220 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb, 3221 ordered_lastprivate); 3222 if (counts[fd->collapse - 1]) 3223 { 3224 gcc_assert (fd->collapse == 1); 3225 gsi = gsi_last_bb (l0_bb); 3226 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], 3227 istart0, true); 3228 if (cont_bb) 3229 { 3230 gsi = gsi_last_bb (cont_bb); 3231 t = fold_build2 (PLUS_EXPR, fd->iter_type, 3232 counts[fd->collapse - 1], 3233 build_int_cst (fd->iter_type, 1)); 3234 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t); 3235 tree aref = build4 (ARRAY_REF, fd->iter_type, 3236 counts[fd->ordered], size_zero_node, 3237 NULL_TREE, NULL_TREE); 3238 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]); 3239 } 3240 t = counts[fd->collapse - 1]; 3241 } 3242 else if (fd->collapse > 1) 3243 t = fd->loop.v; 3244 else 3245 { 3246 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v), 3247 fd->loops[0].v, fd->loops[0].n1); 3248 t = fold_convert (fd->iter_type, t); 3249 } 3250 gsi = gsi_last_bb (l0_bb); 3251 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 3252 size_zero_node, NULL_TREE, NULL_TREE); 3253 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3254 false, GSI_CONTINUE_LINKING); 3255 expand_omp_build_assign (&gsi, aref, t, true); 3256 } 3257 3258 if (!broken_loop) 3259 { 3260 /* Code to control the increment and predicate for the sequential 3261 loop goes in the CONT_BB. */ 3262 gsi = gsi_last_nondebug_bb (cont_bb); 3263 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 3264 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); 3265 vmain = gimple_omp_continue_control_use (cont_stmt); 3266 vback = gimple_omp_continue_control_def (cont_stmt); 3267 3268 if (cond_var) 3269 { 3270 tree itype = TREE_TYPE (cond_var); 3271 tree t2; 3272 if ((fd->ordered && fd->collapse == 1) 3273 || bias 3274 || POINTER_TYPE_P (type) 3275 || TREE_CODE (fd->loop.n1) != INTEGER_CST 3276 || fd->loop.cond_code != LT_EXPR) 3277 t2 = build_int_cst (itype, 1); 3278 else 3279 t2 = fold_convert (itype, fd->loop.step); 3280 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2); 3281 t2 = force_gimple_operand_gsi (&gsi, t2, false, 3282 NULL_TREE, true, GSI_SAME_STMT); 3283 assign_stmt = gimple_build_assign (cond_var, t2); 3284 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 3285 } 3286 3287 if (!gimple_omp_for_combined_p (fd->for_stmt)) 3288 { 3289 if (POINTER_TYPE_P (type)) 3290 t = fold_build_pointer_plus (vmain, fd->loop.step); 3291 else 3292 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step); 3293 t = force_gimple_operand_gsi (&gsi, t, 3294 DECL_P (vback) 3295 && TREE_ADDRESSABLE (vback), 3296 NULL_TREE, true, GSI_SAME_STMT); 3297 assign_stmt = gimple_build_assign (vback, t); 3298 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 3299 3300 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE) 3301 { 3302 tree tem; 3303 if (fd->collapse > 1) 3304 tem = fd->loop.v; 3305 else 3306 { 3307 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v), 3308 fd->loops[0].v, fd->loops[0].n1); 3309 tem = fold_convert (fd->iter_type, tem); 3310 } 3311 tree aref = build4 (ARRAY_REF, fd->iter_type, 3312 counts[fd->ordered], size_zero_node, 3313 NULL_TREE, NULL_TREE); 3314 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, 3315 true, GSI_SAME_STMT); 3316 expand_omp_build_assign (&gsi, aref, tem); 3317 } 3318 3319 t = build2 (fd->loop.cond_code, boolean_type_node, 3320 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback, 3321 iend); 3322 gcond *cond_stmt = gimple_build_cond_empty (t); 3323 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); 3324 } 3325 3326 /* Remove GIMPLE_OMP_CONTINUE. */ 3327 gsi_remove (&gsi, true); 3328 3329 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) 3330 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb); 3331 3332 /* Emit code to get the next parallel iteration in L2_BB. */ 3333 gsi = gsi_start_bb (l2_bb); 3334 3335 t = build_call_expr (builtin_decl_explicit (next_fn), 2, 3336 build_fold_addr_expr (istart0), 3337 build_fold_addr_expr (iend0)); 3338 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3339 false, GSI_CONTINUE_LINKING); 3340 if (TREE_TYPE (t) != boolean_type_node) 3341 t = fold_build2 (NE_EXPR, boolean_type_node, 3342 t, build_int_cst (TREE_TYPE (t), 0)); 3343 gcond *cond_stmt = gimple_build_cond_empty (t); 3344 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); 3345 } 3346 3347 /* Add the loop cleanup function. */ 3348 gsi = gsi_last_nondebug_bb (exit_bb); 3349 if (gimple_omp_return_nowait_p (gsi_stmt (gsi))) 3350 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT); 3351 else if (gimple_omp_return_lhs (gsi_stmt (gsi))) 3352 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL); 3353 else 3354 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); 3355 gcall *call_stmt = gimple_build_call (t, 0); 3356 if (fd->ordered) 3357 { 3358 tree arr = counts[fd->ordered]; 3359 tree clobber = build_clobber (TREE_TYPE (arr)); 3360 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber), 3361 GSI_SAME_STMT); 3362 } 3363 if (gimple_omp_return_lhs (gsi_stmt (gsi))) 3364 { 3365 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi))); 3366 if (fd->have_reductemp) 3367 { 3368 gimple *g = gimple_build_assign (reductions, NOP_EXPR, 3369 gimple_call_lhs (call_stmt)); 3370 gsi_insert_after (&gsi, g, GSI_SAME_STMT); 3371 } 3372 } 3373 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT); 3374 gsi_remove (&gsi, true); 3375 3376 /* Connect the new blocks. */ 3377 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE; 3378 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE; 3379 3380 if (!broken_loop) 3381 { 3382 gimple_seq phis; 3383 3384 e = find_edge (cont_bb, l3_bb); 3385 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE); 3386 3387 phis = phi_nodes (l3_bb); 3388 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi)) 3389 { 3390 gimple *phi = gsi_stmt (gsi); 3391 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne), 3392 PHI_ARG_DEF_FROM_EDGE (phi, e)); 3393 } 3394 remove_edge (e); 3395 3396 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE); 3397 e = find_edge (cont_bb, l1_bb); 3398 if (e == NULL) 3399 { 3400 e = BRANCH_EDGE (cont_bb); 3401 gcc_assert (single_succ (e->dest) == l1_bb); 3402 } 3403 if (gimple_omp_for_combined_p (fd->for_stmt)) 3404 { 3405 remove_edge (e); 3406 e = NULL; 3407 } 3408 else if (fd->collapse > 1) 3409 { 3410 remove_edge (e); 3411 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); 3412 } 3413 else 3414 e->flags = EDGE_TRUE_VALUE; 3415 if (e) 3416 { 3417 e->probability = profile_probability::guessed_always ().apply_scale (7, 8); 3418 find_edge (cont_bb, l2_bb)->probability = e->probability.invert (); 3419 } 3420 else 3421 { 3422 e = find_edge (cont_bb, l2_bb); 3423 e->flags = EDGE_FALLTHRU; 3424 } 3425 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE); 3426 3427 if (gimple_in_ssa_p (cfun)) 3428 { 3429 /* Add phis to the outer loop that connect to the phis in the inner, 3430 original loop, and move the loop entry value of the inner phi to 3431 the loop entry value of the outer phi. */ 3432 gphi_iterator psi; 3433 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi)) 3434 { 3435 location_t locus; 3436 gphi *nphi; 3437 gphi *exit_phi = psi.phi (); 3438 3439 if (virtual_operand_p (gimple_phi_result (exit_phi))) 3440 continue; 3441 3442 edge l2_to_l3 = find_edge (l2_bb, l3_bb); 3443 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3); 3444 3445 basic_block latch = BRANCH_EDGE (cont_bb)->dest; 3446 edge latch_to_l1 = find_edge (latch, l1_bb); 3447 gphi *inner_phi 3448 = find_phi_with_arg_on_edge (exit_res, latch_to_l1); 3449 3450 tree t = gimple_phi_result (exit_phi); 3451 tree new_res = copy_ssa_name (t, NULL); 3452 nphi = create_phi_node (new_res, l0_bb); 3453 3454 edge l0_to_l1 = find_edge (l0_bb, l1_bb); 3455 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1); 3456 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1); 3457 edge entry_to_l0 = find_edge (entry_bb, l0_bb); 3458 add_phi_arg (nphi, t, entry_to_l0, locus); 3459 3460 edge l2_to_l0 = find_edge (l2_bb, l0_bb); 3461 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION); 3462 3463 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION); 3464 } 3465 } 3466 3467 set_immediate_dominator (CDI_DOMINATORS, l2_bb, 3468 recompute_dominator (CDI_DOMINATORS, l2_bb)); 3469 set_immediate_dominator (CDI_DOMINATORS, l3_bb, 3470 recompute_dominator (CDI_DOMINATORS, l3_bb)); 3471 set_immediate_dominator (CDI_DOMINATORS, l0_bb, 3472 recompute_dominator (CDI_DOMINATORS, l0_bb)); 3473 set_immediate_dominator (CDI_DOMINATORS, l1_bb, 3474 recompute_dominator (CDI_DOMINATORS, l1_bb)); 3475 3476 /* We enter expand_omp_for_generic with a loop. This original loop may 3477 have its own loop struct, or it may be part of an outer loop struct 3478 (which may be the fake loop). */ 3479 class loop *outer_loop = entry_bb->loop_father; 3480 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop; 3481 3482 add_bb_to_loop (l2_bb, outer_loop); 3483 3484 /* We've added a new loop around the original loop. Allocate the 3485 corresponding loop struct. */ 3486 class loop *new_loop = alloc_loop (); 3487 new_loop->header = l0_bb; 3488 new_loop->latch = l2_bb; 3489 add_loop (new_loop, outer_loop); 3490 3491 /* Allocate a loop structure for the original loop unless we already 3492 had one. */ 3493 if (!orig_loop_has_loop_struct 3494 && !gimple_omp_for_combined_p (fd->for_stmt)) 3495 { 3496 class loop *orig_loop = alloc_loop (); 3497 orig_loop->header = l1_bb; 3498 /* The loop may have multiple latches. */ 3499 add_loop (orig_loop, new_loop); 3500 } 3501 } 3502 } 3503 3504 /* Helper function for expand_omp_for_static_nochunk. If PTR is NULL, 3505 compute needed allocation size. If !ALLOC of team allocations, 3506 if ALLOC of thread allocation. SZ is the initial needed size for 3507 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes, 3508 CNT number of elements of each array, for !ALLOC this is 3509 omp_get_num_threads (), for ALLOC number of iterations handled by the 3510 current thread. If PTR is non-NULL, it is the start of the allocation 3511 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_ 3512 clauses pointers to the corresponding arrays. */ 3513 3514 static tree 3515 expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz, 3516 unsigned HOST_WIDE_INT alloc_align, tree cnt, 3517 gimple_stmt_iterator *gsi, bool alloc) 3518 { 3519 tree eltsz = NULL_TREE; 3520 unsigned HOST_WIDE_INT preval = 0; 3521 if (ptr && sz) 3522 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), 3523 ptr, size_int (sz)); 3524 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) 3525 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_ 3526 && !OMP_CLAUSE__SCANTEMP__CONTROL (c) 3527 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc) 3528 { 3529 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c))); 3530 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type); 3531 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type))) 3532 { 3533 unsigned HOST_WIDE_INT szl 3534 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type)); 3535 szl = least_bit_hwi (szl); 3536 if (szl) 3537 al = MIN (al, szl); 3538 } 3539 if (ptr == NULL_TREE) 3540 { 3541 if (eltsz == NULL_TREE) 3542 eltsz = TYPE_SIZE_UNIT (pointee_type); 3543 else 3544 eltsz = size_binop (PLUS_EXPR, eltsz, 3545 TYPE_SIZE_UNIT (pointee_type)); 3546 } 3547 if (preval == 0 && al <= alloc_align) 3548 { 3549 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz; 3550 sz += diff; 3551 if (diff && ptr) 3552 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), 3553 ptr, size_int (diff)); 3554 } 3555 else if (al > preval) 3556 { 3557 if (ptr) 3558 { 3559 ptr = fold_convert (pointer_sized_int_node, ptr); 3560 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr, 3561 build_int_cst (pointer_sized_int_node, 3562 al - 1)); 3563 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr, 3564 build_int_cst (pointer_sized_int_node, 3565 -(HOST_WIDE_INT) al)); 3566 ptr = fold_convert (ptr_type_node, ptr); 3567 } 3568 else 3569 sz += al - 1; 3570 } 3571 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type))) 3572 preval = al; 3573 else 3574 preval = 1; 3575 if (ptr) 3576 { 3577 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false); 3578 ptr = OMP_CLAUSE_DECL (c); 3579 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr, 3580 size_binop (MULT_EXPR, cnt, 3581 TYPE_SIZE_UNIT (pointee_type))); 3582 } 3583 } 3584 3585 if (ptr == NULL_TREE) 3586 { 3587 eltsz = size_binop (MULT_EXPR, eltsz, cnt); 3588 if (sz) 3589 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz)); 3590 return eltsz; 3591 } 3592 else 3593 return ptr; 3594 } 3595 3596 /* A subroutine of expand_omp_for. Generate code for a parallel 3597 loop with static schedule and no specified chunk size. Given 3598 parameters: 3599 3600 for (V = N1; V cond N2; V += STEP) BODY; 3601 3602 where COND is "<" or ">", we generate pseudocode 3603 3604 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2; 3605 if (cond is <) 3606 adj = STEP - 1; 3607 else 3608 adj = STEP + 1; 3609 if ((__typeof (V)) -1 > 0 && cond is >) 3610 n = -(adj + N2 - N1) / -STEP; 3611 else 3612 n = (adj + N2 - N1) / STEP; 3613 q = n / nthreads; 3614 tt = n % nthreads; 3615 if (threadid < tt) goto L3; else goto L4; 3616 L3: 3617 tt = 0; 3618 q = q + 1; 3619 L4: 3620 s0 = q * threadid + tt; 3621 e0 = s0 + q; 3622 V = s0 * STEP + N1; 3623 if (s0 >= e0) goto L2; else goto L0; 3624 L0: 3625 e = e0 * STEP + N1; 3626 L1: 3627 BODY; 3628 V += STEP; 3629 if (V cond e) goto L1; 3630 L2: 3631 */ 3632 3633 static void 3634 expand_omp_for_static_nochunk (struct omp_region *region, 3635 struct omp_for_data *fd, 3636 gimple *inner_stmt) 3637 { 3638 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid; 3639 tree type, itype, vmain, vback; 3640 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb; 3641 basic_block body_bb, cont_bb, collapse_bb = NULL; 3642 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL; 3643 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL; 3644 gimple_stmt_iterator gsi, gsip; 3645 edge ep; 3646 bool broken_loop = region->cont == NULL; 3647 tree *counts = NULL; 3648 tree n1, n2, step; 3649 tree reductions = NULL_TREE; 3650 tree cond_var = NULL_TREE, condtemp = NULL_TREE; 3651 3652 itype = type = TREE_TYPE (fd->loop.v); 3653 if (POINTER_TYPE_P (type)) 3654 itype = signed_type_for (type); 3655 3656 entry_bb = region->entry; 3657 cont_bb = region->cont; 3658 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 3659 fin_bb = BRANCH_EDGE (entry_bb)->dest; 3660 gcc_assert (broken_loop 3661 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest)); 3662 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb)); 3663 body_bb = single_succ (seq_start_bb); 3664 if (!broken_loop) 3665 { 3666 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb 3667 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb); 3668 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 3669 } 3670 exit_bb = region->exit; 3671 3672 /* Iteration space partitioning goes in ENTRY_BB. */ 3673 gsi = gsi_last_nondebug_bb (entry_bb); 3674 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 3675 gsip = gsi; 3676 gsi_prev (&gsip); 3677 3678 if (fd->collapse > 1) 3679 { 3680 int first_zero_iter = -1, dummy = -1; 3681 basic_block l2_dom_bb = NULL, dummy_bb = NULL; 3682 3683 counts = XALLOCAVEC (tree, fd->collapse); 3684 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 3685 fin_bb, first_zero_iter, 3686 dummy_bb, dummy, l2_dom_bb); 3687 t = NULL_TREE; 3688 } 3689 else if (gimple_omp_for_combined_into_p (fd->for_stmt)) 3690 t = integer_one_node; 3691 else 3692 t = fold_binary (fd->loop.cond_code, boolean_type_node, 3693 fold_convert (type, fd->loop.n1), 3694 fold_convert (type, fd->loop.n2)); 3695 if (fd->collapse == 1 3696 && TYPE_UNSIGNED (type) 3697 && (t == NULL_TREE || !integer_onep (t))) 3698 { 3699 n1 = fold_convert (type, unshare_expr (fd->loop.n1)); 3700 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE, 3701 true, GSI_SAME_STMT); 3702 n2 = fold_convert (type, unshare_expr (fd->loop.n2)); 3703 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE, 3704 true, GSI_SAME_STMT); 3705 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2, 3706 NULL_TREE, NULL_TREE); 3707 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); 3708 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), 3709 expand_omp_regimplify_p, NULL, NULL) 3710 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), 3711 expand_omp_regimplify_p, NULL, NULL)) 3712 { 3713 gsi = gsi_for_stmt (cond_stmt); 3714 gimple_regimplify_operands (cond_stmt, &gsi); 3715 } 3716 ep = split_block (entry_bb, cond_stmt); 3717 ep->flags = EDGE_TRUE_VALUE; 3718 entry_bb = ep->dest; 3719 ep->probability = profile_probability::very_likely (); 3720 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE); 3721 ep->probability = profile_probability::very_unlikely (); 3722 if (gimple_in_ssa_p (cfun)) 3723 { 3724 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx; 3725 for (gphi_iterator gpi = gsi_start_phis (fin_bb); 3726 !gsi_end_p (gpi); gsi_next (&gpi)) 3727 { 3728 gphi *phi = gpi.phi (); 3729 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx), 3730 ep, UNKNOWN_LOCATION); 3731 } 3732 } 3733 gsi = gsi_last_bb (entry_bb); 3734 } 3735 3736 if (fd->lastprivate_conditional) 3737 { 3738 tree clauses = gimple_omp_for_clauses (fd->for_stmt); 3739 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_); 3740 if (fd->have_pointer_condtemp) 3741 condtemp = OMP_CLAUSE_DECL (c); 3742 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_); 3743 cond_var = OMP_CLAUSE_DECL (c); 3744 } 3745 if (fd->have_reductemp 3746 /* For scan, we don't want to reinitialize condtemp before the 3747 second loop. */ 3748 || (fd->have_pointer_condtemp && !fd->have_scantemp) 3749 || fd->have_nonctrl_scantemp) 3750 { 3751 tree t1 = build_int_cst (long_integer_type_node, 0); 3752 tree t2 = build_int_cst (long_integer_type_node, 1); 3753 tree t3 = build_int_cstu (long_integer_type_node, 3754 (HOST_WIDE_INT_1U << 31) + 1); 3755 tree clauses = gimple_omp_for_clauses (fd->for_stmt); 3756 gimple_stmt_iterator gsi2 = gsi_none (); 3757 gimple *g = NULL; 3758 tree mem = null_pointer_node, memv = NULL_TREE; 3759 unsigned HOST_WIDE_INT condtemp_sz = 0; 3760 unsigned HOST_WIDE_INT alloc_align = 0; 3761 if (fd->have_reductemp) 3762 { 3763 gcc_assert (!fd->have_nonctrl_scantemp); 3764 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); 3765 reductions = OMP_CLAUSE_DECL (c); 3766 gcc_assert (TREE_CODE (reductions) == SSA_NAME); 3767 g = SSA_NAME_DEF_STMT (reductions); 3768 reductions = gimple_assign_rhs1 (g); 3769 OMP_CLAUSE_DECL (c) = reductions; 3770 gsi2 = gsi_for_stmt (g); 3771 } 3772 else 3773 { 3774 if (gsi_end_p (gsip)) 3775 gsi2 = gsi_after_labels (region->entry); 3776 else 3777 gsi2 = gsip; 3778 reductions = null_pointer_node; 3779 } 3780 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp) 3781 { 3782 tree type; 3783 if (fd->have_pointer_condtemp) 3784 type = TREE_TYPE (condtemp); 3785 else 3786 type = ptr_type_node; 3787 memv = create_tmp_var (type); 3788 TREE_ADDRESSABLE (memv) = 1; 3789 unsigned HOST_WIDE_INT sz = 0; 3790 tree size = NULL_TREE; 3791 if (fd->have_pointer_condtemp) 3792 { 3793 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); 3794 sz *= fd->lastprivate_conditional; 3795 condtemp_sz = sz; 3796 } 3797 if (fd->have_nonctrl_scantemp) 3798 { 3799 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 3800 gimple *g = gimple_build_call (nthreads, 0); 3801 nthreads = create_tmp_var (integer_type_node); 3802 gimple_call_set_lhs (g, nthreads); 3803 gsi_insert_before (&gsi2, g, GSI_SAME_STMT); 3804 nthreads = fold_convert (sizetype, nthreads); 3805 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node); 3806 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz, 3807 alloc_align, nthreads, NULL, 3808 false); 3809 size = fold_convert (type, size); 3810 } 3811 else 3812 size = build_int_cst (type, sz); 3813 expand_omp_build_assign (&gsi2, memv, size, false); 3814 mem = build_fold_addr_expr (memv); 3815 } 3816 tree t 3817 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START), 3818 9, t1, t2, t2, t3, t1, null_pointer_node, 3819 null_pointer_node, reductions, mem); 3820 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, 3821 true, GSI_SAME_STMT); 3822 if (fd->have_pointer_condtemp) 3823 expand_omp_build_assign (&gsi2, condtemp, memv, false); 3824 if (fd->have_nonctrl_scantemp) 3825 { 3826 tree ptr = fd->have_pointer_condtemp ? condtemp : memv; 3827 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz, 3828 alloc_align, nthreads, &gsi2, false); 3829 } 3830 if (fd->have_reductemp) 3831 { 3832 gsi_remove (&gsi2, true); 3833 release_ssa_name (gimple_assign_lhs (g)); 3834 } 3835 } 3836 switch (gimple_omp_for_kind (fd->for_stmt)) 3837 { 3838 case GF_OMP_FOR_KIND_FOR: 3839 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 3840 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); 3841 break; 3842 case GF_OMP_FOR_KIND_DISTRIBUTE: 3843 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS); 3844 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM); 3845 break; 3846 default: 3847 gcc_unreachable (); 3848 } 3849 nthreads = build_call_expr (nthreads, 0); 3850 nthreads = fold_convert (itype, nthreads); 3851 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE, 3852 true, GSI_SAME_STMT); 3853 threadid = build_call_expr (threadid, 0); 3854 threadid = fold_convert (itype, threadid); 3855 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, 3856 true, GSI_SAME_STMT); 3857 3858 n1 = fd->loop.n1; 3859 n2 = fd->loop.n2; 3860 step = fd->loop.step; 3861 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 3862 { 3863 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 3864 OMP_CLAUSE__LOOPTEMP_); 3865 gcc_assert (innerc); 3866 n1 = OMP_CLAUSE_DECL (innerc); 3867 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3868 OMP_CLAUSE__LOOPTEMP_); 3869 gcc_assert (innerc); 3870 n2 = OMP_CLAUSE_DECL (innerc); 3871 } 3872 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), 3873 true, NULL_TREE, true, GSI_SAME_STMT); 3874 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), 3875 true, NULL_TREE, true, GSI_SAME_STMT); 3876 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), 3877 true, NULL_TREE, true, GSI_SAME_STMT); 3878 3879 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1)); 3880 t = fold_build2 (PLUS_EXPR, itype, step, t); 3881 t = fold_build2 (PLUS_EXPR, itype, t, n2); 3882 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1)); 3883 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR) 3884 t = fold_build2 (TRUNC_DIV_EXPR, itype, 3885 fold_build1 (NEGATE_EXPR, itype, t), 3886 fold_build1 (NEGATE_EXPR, itype, step)); 3887 else 3888 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); 3889 t = fold_convert (itype, t); 3890 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); 3891 3892 q = create_tmp_reg (itype, "q"); 3893 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads); 3894 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT); 3895 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT); 3896 3897 tt = create_tmp_reg (itype, "tt"); 3898 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads); 3899 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT); 3900 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT); 3901 3902 t = build2 (LT_EXPR, boolean_type_node, threadid, tt); 3903 gcond *cond_stmt = gimple_build_cond_empty (t); 3904 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); 3905 3906 second_bb = split_block (entry_bb, cond_stmt)->dest; 3907 gsi = gsi_last_nondebug_bb (second_bb); 3908 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 3909 3910 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)), 3911 GSI_SAME_STMT); 3912 gassign *assign_stmt 3913 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1)); 3914 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 3915 3916 third_bb = split_block (second_bb, assign_stmt)->dest; 3917 gsi = gsi_last_nondebug_bb (third_bb); 3918 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 3919 3920 if (fd->have_nonctrl_scantemp) 3921 { 3922 tree clauses = gimple_omp_for_clauses (fd->for_stmt); 3923 tree controlp = NULL_TREE, controlb = NULL_TREE; 3924 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) 3925 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_ 3926 && OMP_CLAUSE__SCANTEMP__CONTROL (c)) 3927 { 3928 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node) 3929 controlb = OMP_CLAUSE_DECL (c); 3930 else 3931 controlp = OMP_CLAUSE_DECL (c); 3932 if (controlb && controlp) 3933 break; 3934 } 3935 gcc_assert (controlp && controlb); 3936 tree cnt = create_tmp_var (sizetype); 3937 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q); 3938 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3939 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node); 3940 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0, 3941 alloc_align, cnt, NULL, true); 3942 tree size = create_tmp_var (sizetype); 3943 expand_omp_build_assign (&gsi, size, sz, false); 3944 tree cmp = fold_build2 (GT_EXPR, boolean_type_node, 3945 size, size_int (16384)); 3946 expand_omp_build_assign (&gsi, controlb, cmp); 3947 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node, 3948 NULL_TREE, NULL_TREE); 3949 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3950 fourth_bb = split_block (third_bb, g)->dest; 3951 gsi = gsi_last_nondebug_bb (fourth_bb); 3952 /* FIXME: Once we have allocators, this should use allocator. */ 3953 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size); 3954 gimple_call_set_lhs (g, controlp); 3955 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3956 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt, 3957 &gsi, true); 3958 gsi_prev (&gsi); 3959 g = gsi_stmt (gsi); 3960 fifth_bb = split_block (fourth_bb, g)->dest; 3961 gsi = gsi_last_nondebug_bb (fifth_bb); 3962 3963 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0); 3964 gimple_call_set_lhs (g, controlp); 3965 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3966 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN); 3967 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) 3968 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_ 3969 && OMP_CLAUSE__SCANTEMP__ALLOC (c)) 3970 { 3971 tree tmp = create_tmp_var (sizetype); 3972 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c))); 3973 g = gimple_build_assign (tmp, MULT_EXPR, cnt, 3974 TYPE_SIZE_UNIT (pointee_type)); 3975 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3976 g = gimple_build_call (alloca_decl, 2, tmp, 3977 size_int (TYPE_ALIGN (pointee_type))); 3978 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c)); 3979 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3980 } 3981 3982 sixth_bb = split_block (fifth_bb, g)->dest; 3983 gsi = gsi_last_nondebug_bb (sixth_bb); 3984 } 3985 3986 t = build2 (MULT_EXPR, itype, q, threadid); 3987 t = build2 (PLUS_EXPR, itype, t, tt); 3988 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); 3989 3990 t = fold_build2 (PLUS_EXPR, itype, s0, q); 3991 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); 3992 3993 t = build2 (GE_EXPR, boolean_type_node, s0, e0); 3994 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 3995 3996 /* Remove the GIMPLE_OMP_FOR statement. */ 3997 gsi_remove (&gsi, true); 3998 3999 /* Setup code for sequential iteration goes in SEQ_START_BB. */ 4000 gsi = gsi_start_bb (seq_start_bb); 4001 4002 tree startvar = fd->loop.v; 4003 tree endvar = NULL_TREE; 4004 4005 if (gimple_omp_for_combined_p (fd->for_stmt)) 4006 { 4007 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL 4008 ? gimple_omp_parallel_clauses (inner_stmt) 4009 : gimple_omp_for_clauses (inner_stmt); 4010 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 4011 gcc_assert (innerc); 4012 startvar = OMP_CLAUSE_DECL (innerc); 4013 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4014 OMP_CLAUSE__LOOPTEMP_); 4015 gcc_assert (innerc); 4016 endvar = OMP_CLAUSE_DECL (innerc); 4017 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST 4018 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE) 4019 { 4020 int i; 4021 for (i = 1; i < fd->collapse; i++) 4022 { 4023 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4024 OMP_CLAUSE__LOOPTEMP_); 4025 gcc_assert (innerc); 4026 } 4027 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4028 OMP_CLAUSE__LOOPTEMP_); 4029 if (innerc) 4030 { 4031 /* If needed (distribute parallel for with lastprivate), 4032 propagate down the total number of iterations. */ 4033 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)), 4034 fd->loop.n2); 4035 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false, 4036 GSI_CONTINUE_LINKING); 4037 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); 4038 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4039 } 4040 } 4041 } 4042 t = fold_convert (itype, s0); 4043 t = fold_build2 (MULT_EXPR, itype, t, step); 4044 if (POINTER_TYPE_P (type)) 4045 { 4046 t = fold_build_pointer_plus (n1, t); 4047 if (!POINTER_TYPE_P (TREE_TYPE (startvar)) 4048 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type)) 4049 t = fold_convert (signed_type_for (type), t); 4050 } 4051 else 4052 t = fold_build2 (PLUS_EXPR, type, t, n1); 4053 t = fold_convert (TREE_TYPE (startvar), t); 4054 t = force_gimple_operand_gsi (&gsi, t, 4055 DECL_P (startvar) 4056 && TREE_ADDRESSABLE (startvar), 4057 NULL_TREE, false, GSI_CONTINUE_LINKING); 4058 assign_stmt = gimple_build_assign (startvar, t); 4059 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4060 if (cond_var) 4061 { 4062 tree itype = TREE_TYPE (cond_var); 4063 /* For lastprivate(conditional:) itervar, we need some iteration 4064 counter that starts at unsigned non-zero and increases. 4065 Prefer as few IVs as possible, so if we can use startvar 4066 itself, use that, or startvar + constant (those would be 4067 incremented with step), and as last resort use the s0 + 1 4068 incremented by 1. */ 4069 if (POINTER_TYPE_P (type) 4070 || TREE_CODE (n1) != INTEGER_CST 4071 || fd->loop.cond_code != LT_EXPR) 4072 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0), 4073 build_int_cst (itype, 1)); 4074 else if (tree_int_cst_sgn (n1) == 1) 4075 t = fold_convert (itype, t); 4076 else 4077 { 4078 tree c = fold_convert (itype, n1); 4079 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c); 4080 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c); 4081 } 4082 t = force_gimple_operand_gsi (&gsi, t, false, 4083 NULL_TREE, false, GSI_CONTINUE_LINKING); 4084 assign_stmt = gimple_build_assign (cond_var, t); 4085 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4086 } 4087 4088 t = fold_convert (itype, e0); 4089 t = fold_build2 (MULT_EXPR, itype, t, step); 4090 if (POINTER_TYPE_P (type)) 4091 { 4092 t = fold_build_pointer_plus (n1, t); 4093 if (!POINTER_TYPE_P (TREE_TYPE (startvar)) 4094 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type)) 4095 t = fold_convert (signed_type_for (type), t); 4096 } 4097 else 4098 t = fold_build2 (PLUS_EXPR, type, t, n1); 4099 t = fold_convert (TREE_TYPE (startvar), t); 4100 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4101 false, GSI_CONTINUE_LINKING); 4102 if (endvar) 4103 { 4104 assign_stmt = gimple_build_assign (endvar, e); 4105 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4106 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) 4107 assign_stmt = gimple_build_assign (fd->loop.v, e); 4108 else 4109 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); 4110 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4111 } 4112 /* Handle linear clause adjustments. */ 4113 tree itercnt = NULL_TREE; 4114 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) 4115 for (tree c = gimple_omp_for_clauses (fd->for_stmt); 4116 c; c = OMP_CLAUSE_CHAIN (c)) 4117 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR 4118 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) 4119 { 4120 tree d = OMP_CLAUSE_DECL (c); 4121 bool is_ref = omp_is_reference (d); 4122 tree t = d, a, dest; 4123 if (is_ref) 4124 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); 4125 if (itercnt == NULL_TREE) 4126 { 4127 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 4128 { 4129 itercnt = fold_build2 (MINUS_EXPR, itype, 4130 fold_convert (itype, n1), 4131 fold_convert (itype, fd->loop.n1)); 4132 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step); 4133 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0); 4134 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, 4135 NULL_TREE, false, 4136 GSI_CONTINUE_LINKING); 4137 } 4138 else 4139 itercnt = s0; 4140 } 4141 tree type = TREE_TYPE (t); 4142 if (POINTER_TYPE_P (type)) 4143 type = sizetype; 4144 a = fold_build2 (MULT_EXPR, type, 4145 fold_convert (type, itercnt), 4146 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); 4147 dest = unshare_expr (t); 4148 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR 4149 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a); 4150 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4151 false, GSI_CONTINUE_LINKING); 4152 assign_stmt = gimple_build_assign (dest, t); 4153 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4154 } 4155 if (fd->collapse > 1) 4156 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 4157 4158 if (!broken_loop) 4159 { 4160 /* The code controlling the sequential loop replaces the 4161 GIMPLE_OMP_CONTINUE. */ 4162 gsi = gsi_last_nondebug_bb (cont_bb); 4163 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 4164 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); 4165 vmain = gimple_omp_continue_control_use (cont_stmt); 4166 vback = gimple_omp_continue_control_def (cont_stmt); 4167 4168 if (cond_var) 4169 { 4170 tree itype = TREE_TYPE (cond_var); 4171 tree t2; 4172 if (POINTER_TYPE_P (type) 4173 || TREE_CODE (n1) != INTEGER_CST 4174 || fd->loop.cond_code != LT_EXPR) 4175 t2 = build_int_cst (itype, 1); 4176 else 4177 t2 = fold_convert (itype, step); 4178 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2); 4179 t2 = force_gimple_operand_gsi (&gsi, t2, false, 4180 NULL_TREE, true, GSI_SAME_STMT); 4181 assign_stmt = gimple_build_assign (cond_var, t2); 4182 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 4183 } 4184 4185 if (!gimple_omp_for_combined_p (fd->for_stmt)) 4186 { 4187 if (POINTER_TYPE_P (type)) 4188 t = fold_build_pointer_plus (vmain, step); 4189 else 4190 t = fold_build2 (PLUS_EXPR, type, vmain, step); 4191 t = force_gimple_operand_gsi (&gsi, t, 4192 DECL_P (vback) 4193 && TREE_ADDRESSABLE (vback), 4194 NULL_TREE, true, GSI_SAME_STMT); 4195 assign_stmt = gimple_build_assign (vback, t); 4196 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 4197 4198 t = build2 (fd->loop.cond_code, boolean_type_node, 4199 DECL_P (vback) && TREE_ADDRESSABLE (vback) 4200 ? t : vback, e); 4201 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 4202 } 4203 4204 /* Remove the GIMPLE_OMP_CONTINUE statement. */ 4205 gsi_remove (&gsi, true); 4206 4207 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) 4208 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); 4209 } 4210 4211 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */ 4212 gsi = gsi_last_nondebug_bb (exit_bb); 4213 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) 4214 { 4215 t = gimple_omp_return_lhs (gsi_stmt (gsi)); 4216 if (fd->have_reductemp 4217 || ((fd->have_pointer_condtemp || fd->have_scantemp) 4218 && !fd->have_nonctrl_scantemp)) 4219 { 4220 tree fn; 4221 if (t) 4222 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL); 4223 else 4224 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); 4225 gcall *g = gimple_build_call (fn, 0); 4226 if (t) 4227 { 4228 gimple_call_set_lhs (g, t); 4229 if (fd->have_reductemp) 4230 gsi_insert_after (&gsi, gimple_build_assign (reductions, 4231 NOP_EXPR, t), 4232 GSI_SAME_STMT); 4233 } 4234 gsi_insert_after (&gsi, g, GSI_SAME_STMT); 4235 } 4236 else 4237 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT); 4238 } 4239 else if ((fd->have_pointer_condtemp || fd->have_scantemp) 4240 && !fd->have_nonctrl_scantemp) 4241 { 4242 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT); 4243 gcall *g = gimple_build_call (fn, 0); 4244 gsi_insert_after (&gsi, g, GSI_SAME_STMT); 4245 } 4246 if (fd->have_scantemp && !fd->have_nonctrl_scantemp) 4247 { 4248 tree clauses = gimple_omp_for_clauses (fd->for_stmt); 4249 tree controlp = NULL_TREE, controlb = NULL_TREE; 4250 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) 4251 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_ 4252 && OMP_CLAUSE__SCANTEMP__CONTROL (c)) 4253 { 4254 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node) 4255 controlb = OMP_CLAUSE_DECL (c); 4256 else 4257 controlp = OMP_CLAUSE_DECL (c); 4258 if (controlb && controlp) 4259 break; 4260 } 4261 gcc_assert (controlp && controlb); 4262 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node, 4263 NULL_TREE, NULL_TREE); 4264 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 4265 exit1_bb = split_block (exit_bb, g)->dest; 4266 gsi = gsi_after_labels (exit1_bb); 4267 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1, 4268 controlp); 4269 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 4270 exit2_bb = split_block (exit1_bb, g)->dest; 4271 gsi = gsi_after_labels (exit2_bb); 4272 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1, 4273 controlp); 4274 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 4275 exit3_bb = split_block (exit2_bb, g)->dest; 4276 gsi = gsi_after_labels (exit3_bb); 4277 } 4278 gsi_remove (&gsi, true); 4279 4280 /* Connect all the blocks. */ 4281 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE); 4282 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4); 4283 ep = find_edge (entry_bb, second_bb); 4284 ep->flags = EDGE_TRUE_VALUE; 4285 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4); 4286 if (fourth_bb) 4287 { 4288 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE); 4289 ep->probability 4290 = profile_probability::guessed_always ().apply_scale (1, 2); 4291 ep = find_edge (third_bb, fourth_bb); 4292 ep->flags = EDGE_TRUE_VALUE; 4293 ep->probability 4294 = profile_probability::guessed_always ().apply_scale (1, 2); 4295 ep = find_edge (fourth_bb, fifth_bb); 4296 redirect_edge_and_branch (ep, sixth_bb); 4297 } 4298 else 4299 sixth_bb = third_bb; 4300 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE; 4301 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE; 4302 if (exit1_bb) 4303 { 4304 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE); 4305 ep->probability 4306 = profile_probability::guessed_always ().apply_scale (1, 2); 4307 ep = find_edge (exit_bb, exit1_bb); 4308 ep->flags = EDGE_TRUE_VALUE; 4309 ep->probability 4310 = profile_probability::guessed_always ().apply_scale (1, 2); 4311 ep = find_edge (exit1_bb, exit2_bb); 4312 redirect_edge_and_branch (ep, exit3_bb); 4313 } 4314 4315 if (!broken_loop) 4316 { 4317 ep = find_edge (cont_bb, body_bb); 4318 if (ep == NULL) 4319 { 4320 ep = BRANCH_EDGE (cont_bb); 4321 gcc_assert (single_succ (ep->dest) == body_bb); 4322 } 4323 if (gimple_omp_for_combined_p (fd->for_stmt)) 4324 { 4325 remove_edge (ep); 4326 ep = NULL; 4327 } 4328 else if (fd->collapse > 1) 4329 { 4330 remove_edge (ep); 4331 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); 4332 } 4333 else 4334 ep->flags = EDGE_TRUE_VALUE; 4335 find_edge (cont_bb, fin_bb)->flags 4336 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; 4337 } 4338 4339 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb); 4340 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb); 4341 if (fourth_bb) 4342 { 4343 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb); 4344 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb); 4345 } 4346 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb); 4347 4348 set_immediate_dominator (CDI_DOMINATORS, body_bb, 4349 recompute_dominator (CDI_DOMINATORS, body_bb)); 4350 set_immediate_dominator (CDI_DOMINATORS, fin_bb, 4351 recompute_dominator (CDI_DOMINATORS, fin_bb)); 4352 if (exit1_bb) 4353 { 4354 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb); 4355 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb); 4356 } 4357 4358 class loop *loop = body_bb->loop_father; 4359 if (loop != entry_bb->loop_father) 4360 { 4361 gcc_assert (broken_loop || loop->header == body_bb); 4362 gcc_assert (broken_loop 4363 || loop->latch == region->cont 4364 || single_pred (loop->latch) == region->cont); 4365 return; 4366 } 4367 4368 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt)) 4369 { 4370 loop = alloc_loop (); 4371 loop->header = body_bb; 4372 if (collapse_bb == NULL) 4373 loop->latch = cont_bb; 4374 add_loop (loop, body_bb->loop_father); 4375 } 4376 } 4377 4378 /* Return phi in E->DEST with ARG on edge E. */ 4379 4380 static gphi * 4381 find_phi_with_arg_on_edge (tree arg, edge e) 4382 { 4383 basic_block bb = e->dest; 4384 4385 for (gphi_iterator gpi = gsi_start_phis (bb); 4386 !gsi_end_p (gpi); 4387 gsi_next (&gpi)) 4388 { 4389 gphi *phi = gpi.phi (); 4390 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg) 4391 return phi; 4392 } 4393 4394 return NULL; 4395 } 4396 4397 /* A subroutine of expand_omp_for. Generate code for a parallel 4398 loop with static schedule and a specified chunk size. Given 4399 parameters: 4400 4401 for (V = N1; V cond N2; V += STEP) BODY; 4402 4403 where COND is "<" or ">", we generate pseudocode 4404 4405 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2; 4406 if (cond is <) 4407 adj = STEP - 1; 4408 else 4409 adj = STEP + 1; 4410 if ((__typeof (V)) -1 > 0 && cond is >) 4411 n = -(adj + N2 - N1) / -STEP; 4412 else 4413 n = (adj + N2 - N1) / STEP; 4414 trip = 0; 4415 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is 4416 here so that V is defined 4417 if the loop is not entered 4418 L0: 4419 s0 = (trip * nthreads + threadid) * CHUNK; 4420 e0 = min (s0 + CHUNK, n); 4421 if (s0 < n) goto L1; else goto L4; 4422 L1: 4423 V = s0 * STEP + N1; 4424 e = e0 * STEP + N1; 4425 L2: 4426 BODY; 4427 V += STEP; 4428 if (V cond e) goto L2; else goto L3; 4429 L3: 4430 trip += 1; 4431 goto L0; 4432 L4: 4433 */ 4434 4435 static void 4436 expand_omp_for_static_chunk (struct omp_region *region, 4437 struct omp_for_data *fd, gimple *inner_stmt) 4438 { 4439 tree n, s0, e0, e, t; 4440 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid; 4441 tree type, itype, vmain, vback, vextra; 4442 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb; 4443 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb; 4444 gimple_stmt_iterator gsi, gsip; 4445 edge se; 4446 bool broken_loop = region->cont == NULL; 4447 tree *counts = NULL; 4448 tree n1, n2, step; 4449 tree reductions = NULL_TREE; 4450 tree cond_var = NULL_TREE, condtemp = NULL_TREE; 4451 4452 itype = type = TREE_TYPE (fd->loop.v); 4453 if (POINTER_TYPE_P (type)) 4454 itype = signed_type_for (type); 4455 4456 entry_bb = region->entry; 4457 se = split_block (entry_bb, last_stmt (entry_bb)); 4458 entry_bb = se->src; 4459 iter_part_bb = se->dest; 4460 cont_bb = region->cont; 4461 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2); 4462 fin_bb = BRANCH_EDGE (iter_part_bb)->dest; 4463 gcc_assert (broken_loop 4464 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest); 4465 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb)); 4466 body_bb = single_succ (seq_start_bb); 4467 if (!broken_loop) 4468 { 4469 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb 4470 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb); 4471 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 4472 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb)); 4473 } 4474 exit_bb = region->exit; 4475 4476 /* Trip and adjustment setup goes in ENTRY_BB. */ 4477 gsi = gsi_last_nondebug_bb (entry_bb); 4478 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 4479 gsip = gsi; 4480 gsi_prev (&gsip); 4481 4482 if (fd->collapse > 1) 4483 { 4484 int first_zero_iter = -1, dummy = -1; 4485 basic_block l2_dom_bb = NULL, dummy_bb = NULL; 4486 4487 counts = XALLOCAVEC (tree, fd->collapse); 4488 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 4489 fin_bb, first_zero_iter, 4490 dummy_bb, dummy, l2_dom_bb); 4491 t = NULL_TREE; 4492 } 4493 else if (gimple_omp_for_combined_into_p (fd->for_stmt)) 4494 t = integer_one_node; 4495 else 4496 t = fold_binary (fd->loop.cond_code, boolean_type_node, 4497 fold_convert (type, fd->loop.n1), 4498 fold_convert (type, fd->loop.n2)); 4499 if (fd->collapse == 1 4500 && TYPE_UNSIGNED (type) 4501 && (t == NULL_TREE || !integer_onep (t))) 4502 { 4503 n1 = fold_convert (type, unshare_expr (fd->loop.n1)); 4504 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE, 4505 true, GSI_SAME_STMT); 4506 n2 = fold_convert (type, unshare_expr (fd->loop.n2)); 4507 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE, 4508 true, GSI_SAME_STMT); 4509 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2, 4510 NULL_TREE, NULL_TREE); 4511 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); 4512 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), 4513 expand_omp_regimplify_p, NULL, NULL) 4514 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), 4515 expand_omp_regimplify_p, NULL, NULL)) 4516 { 4517 gsi = gsi_for_stmt (cond_stmt); 4518 gimple_regimplify_operands (cond_stmt, &gsi); 4519 } 4520 se = split_block (entry_bb, cond_stmt); 4521 se->flags = EDGE_TRUE_VALUE; 4522 entry_bb = se->dest; 4523 se->probability = profile_probability::very_likely (); 4524 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE); 4525 se->probability = profile_probability::very_unlikely (); 4526 if (gimple_in_ssa_p (cfun)) 4527 { 4528 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx; 4529 for (gphi_iterator gpi = gsi_start_phis (fin_bb); 4530 !gsi_end_p (gpi); gsi_next (&gpi)) 4531 { 4532 gphi *phi = gpi.phi (); 4533 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx), 4534 se, UNKNOWN_LOCATION); 4535 } 4536 } 4537 gsi = gsi_last_bb (entry_bb); 4538 } 4539 4540 if (fd->lastprivate_conditional) 4541 { 4542 tree clauses = gimple_omp_for_clauses (fd->for_stmt); 4543 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_); 4544 if (fd->have_pointer_condtemp) 4545 condtemp = OMP_CLAUSE_DECL (c); 4546 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_); 4547 cond_var = OMP_CLAUSE_DECL (c); 4548 } 4549 if (fd->have_reductemp || fd->have_pointer_condtemp) 4550 { 4551 tree t1 = build_int_cst (long_integer_type_node, 0); 4552 tree t2 = build_int_cst (long_integer_type_node, 1); 4553 tree t3 = build_int_cstu (long_integer_type_node, 4554 (HOST_WIDE_INT_1U << 31) + 1); 4555 tree clauses = gimple_omp_for_clauses (fd->for_stmt); 4556 gimple_stmt_iterator gsi2 = gsi_none (); 4557 gimple *g = NULL; 4558 tree mem = null_pointer_node, memv = NULL_TREE; 4559 if (fd->have_reductemp) 4560 { 4561 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); 4562 reductions = OMP_CLAUSE_DECL (c); 4563 gcc_assert (TREE_CODE (reductions) == SSA_NAME); 4564 g = SSA_NAME_DEF_STMT (reductions); 4565 reductions = gimple_assign_rhs1 (g); 4566 OMP_CLAUSE_DECL (c) = reductions; 4567 gsi2 = gsi_for_stmt (g); 4568 } 4569 else 4570 { 4571 if (gsi_end_p (gsip)) 4572 gsi2 = gsi_after_labels (region->entry); 4573 else 4574 gsi2 = gsip; 4575 reductions = null_pointer_node; 4576 } 4577 if (fd->have_pointer_condtemp) 4578 { 4579 tree type = TREE_TYPE (condtemp); 4580 memv = create_tmp_var (type); 4581 TREE_ADDRESSABLE (memv) = 1; 4582 unsigned HOST_WIDE_INT sz 4583 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); 4584 sz *= fd->lastprivate_conditional; 4585 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz), 4586 false); 4587 mem = build_fold_addr_expr (memv); 4588 } 4589 tree t 4590 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START), 4591 9, t1, t2, t2, t3, t1, null_pointer_node, 4592 null_pointer_node, reductions, mem); 4593 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, 4594 true, GSI_SAME_STMT); 4595 if (fd->have_pointer_condtemp) 4596 expand_omp_build_assign (&gsi2, condtemp, memv, false); 4597 if (fd->have_reductemp) 4598 { 4599 gsi_remove (&gsi2, true); 4600 release_ssa_name (gimple_assign_lhs (g)); 4601 } 4602 } 4603 switch (gimple_omp_for_kind (fd->for_stmt)) 4604 { 4605 case GF_OMP_FOR_KIND_FOR: 4606 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 4607 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); 4608 break; 4609 case GF_OMP_FOR_KIND_DISTRIBUTE: 4610 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS); 4611 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM); 4612 break; 4613 default: 4614 gcc_unreachable (); 4615 } 4616 nthreads = build_call_expr (nthreads, 0); 4617 nthreads = fold_convert (itype, nthreads); 4618 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE, 4619 true, GSI_SAME_STMT); 4620 threadid = build_call_expr (threadid, 0); 4621 threadid = fold_convert (itype, threadid); 4622 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, 4623 true, GSI_SAME_STMT); 4624 4625 n1 = fd->loop.n1; 4626 n2 = fd->loop.n2; 4627 step = fd->loop.step; 4628 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 4629 { 4630 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 4631 OMP_CLAUSE__LOOPTEMP_); 4632 gcc_assert (innerc); 4633 n1 = OMP_CLAUSE_DECL (innerc); 4634 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4635 OMP_CLAUSE__LOOPTEMP_); 4636 gcc_assert (innerc); 4637 n2 = OMP_CLAUSE_DECL (innerc); 4638 } 4639 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), 4640 true, NULL_TREE, true, GSI_SAME_STMT); 4641 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), 4642 true, NULL_TREE, true, GSI_SAME_STMT); 4643 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), 4644 true, NULL_TREE, true, GSI_SAME_STMT); 4645 tree chunk_size = fold_convert (itype, fd->chunk_size); 4646 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule); 4647 chunk_size 4648 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true, 4649 GSI_SAME_STMT); 4650 4651 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1)); 4652 t = fold_build2 (PLUS_EXPR, itype, step, t); 4653 t = fold_build2 (PLUS_EXPR, itype, t, n2); 4654 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1)); 4655 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR) 4656 t = fold_build2 (TRUNC_DIV_EXPR, itype, 4657 fold_build1 (NEGATE_EXPR, itype, t), 4658 fold_build1 (NEGATE_EXPR, itype, step)); 4659 else 4660 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); 4661 t = fold_convert (itype, t); 4662 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4663 true, GSI_SAME_STMT); 4664 4665 trip_var = create_tmp_reg (itype, ".trip"); 4666 if (gimple_in_ssa_p (cfun)) 4667 { 4668 trip_init = make_ssa_name (trip_var); 4669 trip_main = make_ssa_name (trip_var); 4670 trip_back = make_ssa_name (trip_var); 4671 } 4672 else 4673 { 4674 trip_init = trip_var; 4675 trip_main = trip_var; 4676 trip_back = trip_var; 4677 } 4678 4679 gassign *assign_stmt 4680 = gimple_build_assign (trip_init, build_int_cst (itype, 0)); 4681 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 4682 4683 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size); 4684 t = fold_build2 (MULT_EXPR, itype, t, step); 4685 if (POINTER_TYPE_P (type)) 4686 t = fold_build_pointer_plus (n1, t); 4687 else 4688 t = fold_build2 (PLUS_EXPR, type, t, n1); 4689 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4690 true, GSI_SAME_STMT); 4691 4692 /* Remove the GIMPLE_OMP_FOR. */ 4693 gsi_remove (&gsi, true); 4694 4695 gimple_stmt_iterator gsif = gsi; 4696 4697 /* Iteration space partitioning goes in ITER_PART_BB. */ 4698 gsi = gsi_last_bb (iter_part_bb); 4699 4700 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads); 4701 t = fold_build2 (PLUS_EXPR, itype, t, threadid); 4702 t = fold_build2 (MULT_EXPR, itype, t, chunk_size); 4703 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4704 false, GSI_CONTINUE_LINKING); 4705 4706 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size); 4707 t = fold_build2 (MIN_EXPR, itype, t, n); 4708 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4709 false, GSI_CONTINUE_LINKING); 4710 4711 t = build2 (LT_EXPR, boolean_type_node, s0, n); 4712 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING); 4713 4714 /* Setup code for sequential iteration goes in SEQ_START_BB. */ 4715 gsi = gsi_start_bb (seq_start_bb); 4716 4717 tree startvar = fd->loop.v; 4718 tree endvar = NULL_TREE; 4719 4720 if (gimple_omp_for_combined_p (fd->for_stmt)) 4721 { 4722 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL 4723 ? gimple_omp_parallel_clauses (inner_stmt) 4724 : gimple_omp_for_clauses (inner_stmt); 4725 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 4726 gcc_assert (innerc); 4727 startvar = OMP_CLAUSE_DECL (innerc); 4728 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4729 OMP_CLAUSE__LOOPTEMP_); 4730 gcc_assert (innerc); 4731 endvar = OMP_CLAUSE_DECL (innerc); 4732 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST 4733 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE) 4734 { 4735 int i; 4736 for (i = 1; i < fd->collapse; i++) 4737 { 4738 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4739 OMP_CLAUSE__LOOPTEMP_); 4740 gcc_assert (innerc); 4741 } 4742 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4743 OMP_CLAUSE__LOOPTEMP_); 4744 if (innerc) 4745 { 4746 /* If needed (distribute parallel for with lastprivate), 4747 propagate down the total number of iterations. */ 4748 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)), 4749 fd->loop.n2); 4750 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false, 4751 GSI_CONTINUE_LINKING); 4752 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); 4753 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4754 } 4755 } 4756 } 4757 4758 t = fold_convert (itype, s0); 4759 t = fold_build2 (MULT_EXPR, itype, t, step); 4760 if (POINTER_TYPE_P (type)) 4761 { 4762 t = fold_build_pointer_plus (n1, t); 4763 if (!POINTER_TYPE_P (TREE_TYPE (startvar)) 4764 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type)) 4765 t = fold_convert (signed_type_for (type), t); 4766 } 4767 else 4768 t = fold_build2 (PLUS_EXPR, type, t, n1); 4769 t = fold_convert (TREE_TYPE (startvar), t); 4770 t = force_gimple_operand_gsi (&gsi, t, 4771 DECL_P (startvar) 4772 && TREE_ADDRESSABLE (startvar), 4773 NULL_TREE, false, GSI_CONTINUE_LINKING); 4774 assign_stmt = gimple_build_assign (startvar, t); 4775 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4776 if (cond_var) 4777 { 4778 tree itype = TREE_TYPE (cond_var); 4779 /* For lastprivate(conditional:) itervar, we need some iteration 4780 counter that starts at unsigned non-zero and increases. 4781 Prefer as few IVs as possible, so if we can use startvar 4782 itself, use that, or startvar + constant (those would be 4783 incremented with step), and as last resort use the s0 + 1 4784 incremented by 1. */ 4785 if (POINTER_TYPE_P (type) 4786 || TREE_CODE (n1) != INTEGER_CST 4787 || fd->loop.cond_code != LT_EXPR) 4788 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0), 4789 build_int_cst (itype, 1)); 4790 else if (tree_int_cst_sgn (n1) == 1) 4791 t = fold_convert (itype, t); 4792 else 4793 { 4794 tree c = fold_convert (itype, n1); 4795 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c); 4796 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c); 4797 } 4798 t = force_gimple_operand_gsi (&gsi, t, false, 4799 NULL_TREE, false, GSI_CONTINUE_LINKING); 4800 assign_stmt = gimple_build_assign (cond_var, t); 4801 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4802 } 4803 4804 t = fold_convert (itype, e0); 4805 t = fold_build2 (MULT_EXPR, itype, t, step); 4806 if (POINTER_TYPE_P (type)) 4807 { 4808 t = fold_build_pointer_plus (n1, t); 4809 if (!POINTER_TYPE_P (TREE_TYPE (startvar)) 4810 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type)) 4811 t = fold_convert (signed_type_for (type), t); 4812 } 4813 else 4814 t = fold_build2 (PLUS_EXPR, type, t, n1); 4815 t = fold_convert (TREE_TYPE (startvar), t); 4816 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4817 false, GSI_CONTINUE_LINKING); 4818 if (endvar) 4819 { 4820 assign_stmt = gimple_build_assign (endvar, e); 4821 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4822 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) 4823 assign_stmt = gimple_build_assign (fd->loop.v, e); 4824 else 4825 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); 4826 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4827 } 4828 /* Handle linear clause adjustments. */ 4829 tree itercnt = NULL_TREE, itercntbias = NULL_TREE; 4830 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) 4831 for (tree c = gimple_omp_for_clauses (fd->for_stmt); 4832 c; c = OMP_CLAUSE_CHAIN (c)) 4833 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR 4834 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) 4835 { 4836 tree d = OMP_CLAUSE_DECL (c); 4837 bool is_ref = omp_is_reference (d); 4838 tree t = d, a, dest; 4839 if (is_ref) 4840 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); 4841 tree type = TREE_TYPE (t); 4842 if (POINTER_TYPE_P (type)) 4843 type = sizetype; 4844 dest = unshare_expr (t); 4845 tree v = create_tmp_var (TREE_TYPE (t), NULL); 4846 expand_omp_build_assign (&gsif, v, t); 4847 if (itercnt == NULL_TREE) 4848 { 4849 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 4850 { 4851 itercntbias 4852 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1), 4853 fold_convert (itype, fd->loop.n1)); 4854 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype, 4855 itercntbias, step); 4856 itercntbias 4857 = force_gimple_operand_gsi (&gsif, itercntbias, true, 4858 NULL_TREE, true, 4859 GSI_SAME_STMT); 4860 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0); 4861 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, 4862 NULL_TREE, false, 4863 GSI_CONTINUE_LINKING); 4864 } 4865 else 4866 itercnt = s0; 4867 } 4868 a = fold_build2 (MULT_EXPR, type, 4869 fold_convert (type, itercnt), 4870 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); 4871 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR 4872 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a); 4873 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4874 false, GSI_CONTINUE_LINKING); 4875 assign_stmt = gimple_build_assign (dest, t); 4876 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4877 } 4878 if (fd->collapse > 1) 4879 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 4880 4881 if (!broken_loop) 4882 { 4883 /* The code controlling the sequential loop goes in CONT_BB, 4884 replacing the GIMPLE_OMP_CONTINUE. */ 4885 gsi = gsi_last_nondebug_bb (cont_bb); 4886 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 4887 vmain = gimple_omp_continue_control_use (cont_stmt); 4888 vback = gimple_omp_continue_control_def (cont_stmt); 4889 4890 if (cond_var) 4891 { 4892 tree itype = TREE_TYPE (cond_var); 4893 tree t2; 4894 if (POINTER_TYPE_P (type) 4895 || TREE_CODE (n1) != INTEGER_CST 4896 || fd->loop.cond_code != LT_EXPR) 4897 t2 = build_int_cst (itype, 1); 4898 else 4899 t2 = fold_convert (itype, step); 4900 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2); 4901 t2 = force_gimple_operand_gsi (&gsi, t2, false, 4902 NULL_TREE, true, GSI_SAME_STMT); 4903 assign_stmt = gimple_build_assign (cond_var, t2); 4904 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 4905 } 4906 4907 if (!gimple_omp_for_combined_p (fd->for_stmt)) 4908 { 4909 if (POINTER_TYPE_P (type)) 4910 t = fold_build_pointer_plus (vmain, step); 4911 else 4912 t = fold_build2 (PLUS_EXPR, type, vmain, step); 4913 if (DECL_P (vback) && TREE_ADDRESSABLE (vback)) 4914 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4915 true, GSI_SAME_STMT); 4916 assign_stmt = gimple_build_assign (vback, t); 4917 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 4918 4919 if (tree_int_cst_equal (fd->chunk_size, integer_one_node)) 4920 t = build2 (EQ_EXPR, boolean_type_node, 4921 build_int_cst (itype, 0), 4922 build_int_cst (itype, 1)); 4923 else 4924 t = build2 (fd->loop.cond_code, boolean_type_node, 4925 DECL_P (vback) && TREE_ADDRESSABLE (vback) 4926 ? t : vback, e); 4927 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 4928 } 4929 4930 /* Remove GIMPLE_OMP_CONTINUE. */ 4931 gsi_remove (&gsi, true); 4932 4933 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) 4934 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); 4935 4936 /* Trip update code goes into TRIP_UPDATE_BB. */ 4937 gsi = gsi_start_bb (trip_update_bb); 4938 4939 t = build_int_cst (itype, 1); 4940 t = build2 (PLUS_EXPR, itype, trip_main, t); 4941 assign_stmt = gimple_build_assign (trip_back, t); 4942 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4943 } 4944 4945 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */ 4946 gsi = gsi_last_nondebug_bb (exit_bb); 4947 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) 4948 { 4949 t = gimple_omp_return_lhs (gsi_stmt (gsi)); 4950 if (fd->have_reductemp || fd->have_pointer_condtemp) 4951 { 4952 tree fn; 4953 if (t) 4954 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL); 4955 else 4956 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); 4957 gcall *g = gimple_build_call (fn, 0); 4958 if (t) 4959 { 4960 gimple_call_set_lhs (g, t); 4961 if (fd->have_reductemp) 4962 gsi_insert_after (&gsi, gimple_build_assign (reductions, 4963 NOP_EXPR, t), 4964 GSI_SAME_STMT); 4965 } 4966 gsi_insert_after (&gsi, g, GSI_SAME_STMT); 4967 } 4968 else 4969 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT); 4970 } 4971 else if (fd->have_pointer_condtemp) 4972 { 4973 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT); 4974 gcall *g = gimple_build_call (fn, 0); 4975 gsi_insert_after (&gsi, g, GSI_SAME_STMT); 4976 } 4977 gsi_remove (&gsi, true); 4978 4979 /* Connect the new blocks. */ 4980 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE; 4981 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE; 4982 4983 if (!broken_loop) 4984 { 4985 se = find_edge (cont_bb, body_bb); 4986 if (se == NULL) 4987 { 4988 se = BRANCH_EDGE (cont_bb); 4989 gcc_assert (single_succ (se->dest) == body_bb); 4990 } 4991 if (gimple_omp_for_combined_p (fd->for_stmt)) 4992 { 4993 remove_edge (se); 4994 se = NULL; 4995 } 4996 else if (fd->collapse > 1) 4997 { 4998 remove_edge (se); 4999 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); 5000 } 5001 else 5002 se->flags = EDGE_TRUE_VALUE; 5003 find_edge (cont_bb, trip_update_bb)->flags 5004 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; 5005 5006 redirect_edge_and_branch (single_succ_edge (trip_update_bb), 5007 iter_part_bb); 5008 } 5009 5010 if (gimple_in_ssa_p (cfun)) 5011 { 5012 gphi_iterator psi; 5013 gphi *phi; 5014 edge re, ene; 5015 edge_var_map *vm; 5016 size_t i; 5017 5018 gcc_assert (fd->collapse == 1 && !broken_loop); 5019 5020 /* When we redirect the edge from trip_update_bb to iter_part_bb, we 5021 remove arguments of the phi nodes in fin_bb. We need to create 5022 appropriate phi nodes in iter_part_bb instead. */ 5023 se = find_edge (iter_part_bb, fin_bb); 5024 re = single_succ_edge (trip_update_bb); 5025 vec<edge_var_map> *head = redirect_edge_var_map_vector (re); 5026 ene = single_succ_edge (entry_bb); 5027 5028 psi = gsi_start_phis (fin_bb); 5029 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm); 5030 gsi_next (&psi), ++i) 5031 { 5032 gphi *nphi; 5033 location_t locus; 5034 5035 phi = psi.phi (); 5036 if (operand_equal_p (gimple_phi_arg_def (phi, 0), 5037 redirect_edge_var_map_def (vm), 0)) 5038 continue; 5039 5040 t = gimple_phi_result (phi); 5041 gcc_assert (t == redirect_edge_var_map_result (vm)); 5042 5043 if (!single_pred_p (fin_bb)) 5044 t = copy_ssa_name (t, phi); 5045 5046 nphi = create_phi_node (t, iter_part_bb); 5047 5048 t = PHI_ARG_DEF_FROM_EDGE (phi, se); 5049 locus = gimple_phi_arg_location_from_edge (phi, se); 5050 5051 /* A special case -- fd->loop.v is not yet computed in 5052 iter_part_bb, we need to use vextra instead. */ 5053 if (t == fd->loop.v) 5054 t = vextra; 5055 add_phi_arg (nphi, t, ene, locus); 5056 locus = redirect_edge_var_map_location (vm); 5057 tree back_arg = redirect_edge_var_map_def (vm); 5058 add_phi_arg (nphi, back_arg, re, locus); 5059 edge ce = find_edge (cont_bb, body_bb); 5060 if (ce == NULL) 5061 { 5062 ce = BRANCH_EDGE (cont_bb); 5063 gcc_assert (single_succ (ce->dest) == body_bb); 5064 ce = single_succ_edge (ce->dest); 5065 } 5066 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce); 5067 gcc_assert (inner_loop_phi != NULL); 5068 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi), 5069 find_edge (seq_start_bb, body_bb), locus); 5070 5071 if (!single_pred_p (fin_bb)) 5072 add_phi_arg (phi, gimple_phi_result (nphi), se, locus); 5073 } 5074 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ())); 5075 redirect_edge_var_map_clear (re); 5076 if (single_pred_p (fin_bb)) 5077 while (1) 5078 { 5079 psi = gsi_start_phis (fin_bb); 5080 if (gsi_end_p (psi)) 5081 break; 5082 remove_phi_node (&psi, false); 5083 } 5084 5085 /* Make phi node for trip. */ 5086 phi = create_phi_node (trip_main, iter_part_bb); 5087 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb), 5088 UNKNOWN_LOCATION); 5089 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb), 5090 UNKNOWN_LOCATION); 5091 } 5092 5093 if (!broken_loop) 5094 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb); 5095 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb, 5096 recompute_dominator (CDI_DOMINATORS, iter_part_bb)); 5097 set_immediate_dominator (CDI_DOMINATORS, fin_bb, 5098 recompute_dominator (CDI_DOMINATORS, fin_bb)); 5099 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, 5100 recompute_dominator (CDI_DOMINATORS, seq_start_bb)); 5101 set_immediate_dominator (CDI_DOMINATORS, body_bb, 5102 recompute_dominator (CDI_DOMINATORS, body_bb)); 5103 5104 if (!broken_loop) 5105 { 5106 class loop *loop = body_bb->loop_father; 5107 class loop *trip_loop = alloc_loop (); 5108 trip_loop->header = iter_part_bb; 5109 trip_loop->latch = trip_update_bb; 5110 add_loop (trip_loop, iter_part_bb->loop_father); 5111 5112 if (loop != entry_bb->loop_father) 5113 { 5114 gcc_assert (loop->header == body_bb); 5115 gcc_assert (loop->latch == region->cont 5116 || single_pred (loop->latch) == region->cont); 5117 trip_loop->inner = loop; 5118 return; 5119 } 5120 5121 if (!gimple_omp_for_combined_p (fd->for_stmt)) 5122 { 5123 loop = alloc_loop (); 5124 loop->header = body_bb; 5125 if (collapse_bb == NULL) 5126 loop->latch = cont_bb; 5127 add_loop (loop, trip_loop); 5128 } 5129 } 5130 } 5131 5132 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing 5133 loop. Given parameters: 5134 5135 for (V = N1; V cond N2; V += STEP) BODY; 5136 5137 where COND is "<" or ">", we generate pseudocode 5138 5139 V = N1; 5140 goto L1; 5141 L0: 5142 BODY; 5143 V += STEP; 5144 L1: 5145 if (V cond N2) goto L0; else goto L2; 5146 L2: 5147 5148 For collapsed loops, given parameters: 5149 collapse(3) 5150 for (V1 = N11; V1 cond1 N12; V1 += STEP1) 5151 for (V2 = N21; V2 cond2 N22; V2 += STEP2) 5152 for (V3 = N31; V3 cond3 N32; V3 += STEP3) 5153 BODY; 5154 5155 we generate pseudocode 5156 5157 if (cond3 is <) 5158 adj = STEP3 - 1; 5159 else 5160 adj = STEP3 + 1; 5161 count3 = (adj + N32 - N31) / STEP3; 5162 if (cond2 is <) 5163 adj = STEP2 - 1; 5164 else 5165 adj = STEP2 + 1; 5166 count2 = (adj + N22 - N21) / STEP2; 5167 if (cond1 is <) 5168 adj = STEP1 - 1; 5169 else 5170 adj = STEP1 + 1; 5171 count1 = (adj + N12 - N11) / STEP1; 5172 count = count1 * count2 * count3; 5173 V = 0; 5174 V1 = N11; 5175 V2 = N21; 5176 V3 = N31; 5177 goto L1; 5178 L0: 5179 BODY; 5180 V += 1; 5181 V3 += STEP3; 5182 V2 += (V3 cond3 N32) ? 0 : STEP2; 5183 V3 = (V3 cond3 N32) ? V3 : N31; 5184 V1 += (V2 cond2 N22) ? 0 : STEP1; 5185 V2 = (V2 cond2 N22) ? V2 : N21; 5186 L1: 5187 if (V < count) goto L0; else goto L2; 5188 L2: 5189 5190 */ 5191 5192 static void 5193 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) 5194 { 5195 tree type, t; 5196 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb; 5197 gimple_stmt_iterator gsi; 5198 gimple *stmt; 5199 gcond *cond_stmt; 5200 bool broken_loop = region->cont == NULL; 5201 edge e, ne; 5202 tree *counts = NULL; 5203 int i; 5204 int safelen_int = INT_MAX; 5205 bool dont_vectorize = false; 5206 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 5207 OMP_CLAUSE_SAFELEN); 5208 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 5209 OMP_CLAUSE__SIMDUID_); 5210 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 5211 OMP_CLAUSE_IF); 5212 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 5213 OMP_CLAUSE_SIMDLEN); 5214 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 5215 OMP_CLAUSE__CONDTEMP_); 5216 tree n1, n2; 5217 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE; 5218 5219 if (safelen) 5220 { 5221 poly_uint64 val; 5222 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen); 5223 if (!poly_int_tree_p (safelen, &val)) 5224 safelen_int = 0; 5225 else 5226 safelen_int = MIN (constant_lower_bound (val), INT_MAX); 5227 if (safelen_int == 1) 5228 safelen_int = 0; 5229 } 5230 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc))) 5231 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))) 5232 { 5233 safelen_int = 0; 5234 dont_vectorize = true; 5235 } 5236 type = TREE_TYPE (fd->loop.v); 5237 entry_bb = region->entry; 5238 cont_bb = region->cont; 5239 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 5240 gcc_assert (broken_loop 5241 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); 5242 l0_bb = FALLTHRU_EDGE (entry_bb)->dest; 5243 if (!broken_loop) 5244 { 5245 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb); 5246 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 5247 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest; 5248 l2_bb = BRANCH_EDGE (entry_bb)->dest; 5249 } 5250 else 5251 { 5252 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL; 5253 l1_bb = split_edge (BRANCH_EDGE (entry_bb)); 5254 l2_bb = single_succ (l1_bb); 5255 } 5256 exit_bb = region->exit; 5257 l2_dom_bb = NULL; 5258 5259 gsi = gsi_last_nondebug_bb (entry_bb); 5260 5261 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 5262 /* Not needed in SSA form right now. */ 5263 gcc_assert (!gimple_in_ssa_p (cfun)); 5264 if (fd->collapse > 1) 5265 { 5266 int first_zero_iter = -1, dummy = -1; 5267 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL; 5268 5269 counts = XALLOCAVEC (tree, fd->collapse); 5270 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 5271 zero_iter_bb, first_zero_iter, 5272 dummy_bb, dummy, l2_dom_bb); 5273 } 5274 if (l2_dom_bb == NULL) 5275 l2_dom_bb = l1_bb; 5276 5277 n1 = fd->loop.n1; 5278 n2 = fd->loop.n2; 5279 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 5280 { 5281 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 5282 OMP_CLAUSE__LOOPTEMP_); 5283 gcc_assert (innerc); 5284 n1 = OMP_CLAUSE_DECL (innerc); 5285 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 5286 OMP_CLAUSE__LOOPTEMP_); 5287 gcc_assert (innerc); 5288 n2 = OMP_CLAUSE_DECL (innerc); 5289 } 5290 tree step = fd->loop.step; 5291 5292 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 5293 OMP_CLAUSE__SIMT_); 5294 if (is_simt) 5295 { 5296 cfun->curr_properties &= ~PROP_gimple_lomp_dev; 5297 is_simt = safelen_int > 1; 5298 } 5299 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE; 5300 if (is_simt) 5301 { 5302 simt_lane = create_tmp_var (unsigned_type_node); 5303 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0); 5304 gimple_call_set_lhs (g, simt_lane); 5305 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 5306 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, 5307 fold_convert (TREE_TYPE (step), simt_lane)); 5308 n1 = fold_convert (type, n1); 5309 if (POINTER_TYPE_P (type)) 5310 n1 = fold_build_pointer_plus (n1, offset); 5311 else 5312 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset)); 5313 5314 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */ 5315 if (fd->collapse > 1) 5316 simt_maxlane = build_one_cst (unsigned_type_node); 5317 else if (safelen_int < omp_max_simt_vf ()) 5318 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int); 5319 tree vf 5320 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF, 5321 unsigned_type_node, 0); 5322 if (simt_maxlane) 5323 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane); 5324 vf = fold_convert (TREE_TYPE (step), vf); 5325 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf); 5326 } 5327 5328 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1)); 5329 if (fd->collapse > 1) 5330 { 5331 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 5332 { 5333 gsi_prev (&gsi); 5334 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1); 5335 gsi_next (&gsi); 5336 } 5337 else 5338 for (i = 0; i < fd->collapse; i++) 5339 { 5340 tree itype = TREE_TYPE (fd->loops[i].v); 5341 if (POINTER_TYPE_P (itype)) 5342 itype = signed_type_for (itype); 5343 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1); 5344 expand_omp_build_assign (&gsi, fd->loops[i].v, t); 5345 } 5346 } 5347 if (cond_var) 5348 { 5349 if (POINTER_TYPE_P (type) 5350 || TREE_CODE (n1) != INTEGER_CST 5351 || fd->loop.cond_code != LT_EXPR 5352 || tree_int_cst_sgn (n1) != 1) 5353 expand_omp_build_assign (&gsi, cond_var, 5354 build_one_cst (TREE_TYPE (cond_var))); 5355 else 5356 expand_omp_build_assign (&gsi, cond_var, 5357 fold_convert (TREE_TYPE (cond_var), n1)); 5358 } 5359 5360 /* Remove the GIMPLE_OMP_FOR statement. */ 5361 gsi_remove (&gsi, true); 5362 5363 if (!broken_loop) 5364 { 5365 /* Code to control the increment goes in the CONT_BB. */ 5366 gsi = gsi_last_nondebug_bb (cont_bb); 5367 stmt = gsi_stmt (gsi); 5368 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE); 5369 5370 if (POINTER_TYPE_P (type)) 5371 t = fold_build_pointer_plus (fd->loop.v, step); 5372 else 5373 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step); 5374 expand_omp_build_assign (&gsi, fd->loop.v, t); 5375 5376 if (fd->collapse > 1) 5377 { 5378 i = fd->collapse - 1; 5379 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))) 5380 { 5381 t = fold_convert (sizetype, fd->loops[i].step); 5382 t = fold_build_pointer_plus (fd->loops[i].v, t); 5383 } 5384 else 5385 { 5386 t = fold_convert (TREE_TYPE (fd->loops[i].v), 5387 fd->loops[i].step); 5388 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v), 5389 fd->loops[i].v, t); 5390 } 5391 expand_omp_build_assign (&gsi, fd->loops[i].v, t); 5392 5393 for (i = fd->collapse - 1; i > 0; i--) 5394 { 5395 tree itype = TREE_TYPE (fd->loops[i].v); 5396 tree itype2 = TREE_TYPE (fd->loops[i - 1].v); 5397 if (POINTER_TYPE_P (itype2)) 5398 itype2 = signed_type_for (itype2); 5399 t = fold_convert (itype2, fd->loops[i - 1].step); 5400 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, 5401 GSI_SAME_STMT); 5402 t = build3 (COND_EXPR, itype2, 5403 build2 (fd->loops[i].cond_code, boolean_type_node, 5404 fd->loops[i].v, 5405 fold_convert (itype, fd->loops[i].n2)), 5406 build_int_cst (itype2, 0), t); 5407 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v))) 5408 t = fold_build_pointer_plus (fd->loops[i - 1].v, t); 5409 else 5410 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t); 5411 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t); 5412 5413 t = fold_convert (itype, fd->loops[i].n1); 5414 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, 5415 GSI_SAME_STMT); 5416 t = build3 (COND_EXPR, itype, 5417 build2 (fd->loops[i].cond_code, boolean_type_node, 5418 fd->loops[i].v, 5419 fold_convert (itype, fd->loops[i].n2)), 5420 fd->loops[i].v, t); 5421 expand_omp_build_assign (&gsi, fd->loops[i].v, t); 5422 } 5423 } 5424 if (cond_var) 5425 { 5426 if (POINTER_TYPE_P (type) 5427 || TREE_CODE (n1) != INTEGER_CST 5428 || fd->loop.cond_code != LT_EXPR 5429 || tree_int_cst_sgn (n1) != 1) 5430 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var, 5431 build_one_cst (TREE_TYPE (cond_var))); 5432 else 5433 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var, 5434 fold_convert (TREE_TYPE (cond_var), step)); 5435 expand_omp_build_assign (&gsi, cond_var, t); 5436 } 5437 5438 /* Remove GIMPLE_OMP_CONTINUE. */ 5439 gsi_remove (&gsi, true); 5440 } 5441 5442 /* Emit the condition in L1_BB. */ 5443 gsi = gsi_start_bb (l1_bb); 5444 5445 t = fold_convert (type, n2); 5446 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 5447 false, GSI_CONTINUE_LINKING); 5448 tree v = fd->loop.v; 5449 if (DECL_P (v) && TREE_ADDRESSABLE (v)) 5450 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, 5451 false, GSI_CONTINUE_LINKING); 5452 t = build2 (fd->loop.cond_code, boolean_type_node, v, t); 5453 cond_stmt = gimple_build_cond_empty (t); 5454 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); 5455 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p, 5456 NULL, NULL) 5457 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p, 5458 NULL, NULL)) 5459 { 5460 gsi = gsi_for_stmt (cond_stmt); 5461 gimple_regimplify_operands (cond_stmt, &gsi); 5462 } 5463 5464 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */ 5465 if (is_simt) 5466 { 5467 gsi = gsi_start_bb (l2_bb); 5468 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step); 5469 if (POINTER_TYPE_P (type)) 5470 t = fold_build_pointer_plus (fd->loop.v, step); 5471 else 5472 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step); 5473 expand_omp_build_assign (&gsi, fd->loop.v, t); 5474 } 5475 5476 /* Remove GIMPLE_OMP_RETURN. */ 5477 gsi = gsi_last_nondebug_bb (exit_bb); 5478 gsi_remove (&gsi, true); 5479 5480 /* Connect the new blocks. */ 5481 remove_edge (FALLTHRU_EDGE (entry_bb)); 5482 5483 if (!broken_loop) 5484 { 5485 remove_edge (BRANCH_EDGE (entry_bb)); 5486 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU); 5487 5488 e = BRANCH_EDGE (l1_bb); 5489 ne = FALLTHRU_EDGE (l1_bb); 5490 e->flags = EDGE_TRUE_VALUE; 5491 } 5492 else 5493 { 5494 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 5495 5496 ne = single_succ_edge (l1_bb); 5497 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE); 5498 5499 } 5500 ne->flags = EDGE_FALSE_VALUE; 5501 e->probability = profile_probability::guessed_always ().apply_scale (7, 8); 5502 ne->probability = e->probability.invert (); 5503 5504 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb); 5505 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb); 5506 5507 if (simt_maxlane) 5508 { 5509 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane, 5510 NULL_TREE, NULL_TREE); 5511 gsi = gsi_last_bb (entry_bb); 5512 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT); 5513 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE); 5514 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE; 5515 FALLTHRU_EDGE (entry_bb)->probability 5516 = profile_probability::guessed_always ().apply_scale (7, 8); 5517 BRANCH_EDGE (entry_bb)->probability 5518 = FALLTHRU_EDGE (entry_bb)->probability.invert (); 5519 l2_dom_bb = entry_bb; 5520 } 5521 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb); 5522 5523 if (!broken_loop) 5524 { 5525 class loop *loop = alloc_loop (); 5526 loop->header = l1_bb; 5527 loop->latch = cont_bb; 5528 add_loop (loop, l1_bb->loop_father); 5529 loop->safelen = safelen_int; 5530 if (simduid) 5531 { 5532 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid); 5533 cfun->has_simduid_loops = true; 5534 } 5535 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize 5536 the loop. */ 5537 if ((flag_tree_loop_vectorize 5538 || !global_options_set.x_flag_tree_loop_vectorize) 5539 && flag_tree_loop_optimize 5540 && loop->safelen > 1) 5541 { 5542 loop->force_vectorize = true; 5543 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))) 5544 { 5545 unsigned HOST_WIDE_INT v 5546 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)); 5547 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen) 5548 loop->simdlen = v; 5549 } 5550 cfun->has_force_vectorize_loops = true; 5551 } 5552 else if (dont_vectorize) 5553 loop->dont_vectorize = true; 5554 } 5555 else if (simduid) 5556 cfun->has_simduid_loops = true; 5557 } 5558 5559 /* Taskloop construct is represented after gimplification with 5560 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched 5561 in between them. This routine expands the outer GIMPLE_OMP_FOR, 5562 which should just compute all the needed loop temporaries 5563 for GIMPLE_OMP_TASK. */ 5564 5565 static void 5566 expand_omp_taskloop_for_outer (struct omp_region *region, 5567 struct omp_for_data *fd, 5568 gimple *inner_stmt) 5569 { 5570 tree type, bias = NULL_TREE; 5571 basic_block entry_bb, cont_bb, exit_bb; 5572 gimple_stmt_iterator gsi; 5573 gassign *assign_stmt; 5574 tree *counts = NULL; 5575 int i; 5576 5577 gcc_assert (inner_stmt); 5578 gcc_assert (region->cont); 5579 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK 5580 && gimple_omp_task_taskloop_p (inner_stmt)); 5581 type = TREE_TYPE (fd->loop.v); 5582 5583 /* See if we need to bias by LLONG_MIN. */ 5584 if (fd->iter_type == long_long_unsigned_type_node 5585 && TREE_CODE (type) == INTEGER_TYPE 5586 && !TYPE_UNSIGNED (type)) 5587 { 5588 tree n1, n2; 5589 5590 if (fd->loop.cond_code == LT_EXPR) 5591 { 5592 n1 = fd->loop.n1; 5593 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); 5594 } 5595 else 5596 { 5597 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); 5598 n2 = fd->loop.n1; 5599 } 5600 if (TREE_CODE (n1) != INTEGER_CST 5601 || TREE_CODE (n2) != INTEGER_CST 5602 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) 5603 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); 5604 } 5605 5606 entry_bb = region->entry; 5607 cont_bb = region->cont; 5608 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 5609 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); 5610 exit_bb = region->exit; 5611 5612 gsi = gsi_last_nondebug_bb (entry_bb); 5613 gimple *for_stmt = gsi_stmt (gsi); 5614 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR); 5615 if (fd->collapse > 1) 5616 { 5617 int first_zero_iter = -1, dummy = -1; 5618 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL; 5619 5620 counts = XALLOCAVEC (tree, fd->collapse); 5621 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 5622 zero_iter_bb, first_zero_iter, 5623 dummy_bb, dummy, l2_dom_bb); 5624 5625 if (zero_iter_bb) 5626 { 5627 /* Some counts[i] vars might be uninitialized if 5628 some loop has zero iterations. But the body shouldn't 5629 be executed in that case, so just avoid uninit warnings. */ 5630 for (i = first_zero_iter; i < fd->collapse; i++) 5631 if (SSA_VAR_P (counts[i])) 5632 TREE_NO_WARNING (counts[i]) = 1; 5633 gsi_prev (&gsi); 5634 edge e = split_block (entry_bb, gsi_stmt (gsi)); 5635 entry_bb = e->dest; 5636 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU); 5637 gsi = gsi_last_bb (entry_bb); 5638 set_immediate_dominator (CDI_DOMINATORS, entry_bb, 5639 get_immediate_dominator (CDI_DOMINATORS, 5640 zero_iter_bb)); 5641 } 5642 } 5643 5644 tree t0, t1; 5645 t1 = fd->loop.n2; 5646 t0 = fd->loop.n1; 5647 if (POINTER_TYPE_P (TREE_TYPE (t0)) 5648 && TYPE_PRECISION (TREE_TYPE (t0)) 5649 != TYPE_PRECISION (fd->iter_type)) 5650 { 5651 /* Avoid casting pointers to integer of a different size. */ 5652 tree itype = signed_type_for (type); 5653 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1)); 5654 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0)); 5655 } 5656 else 5657 { 5658 t1 = fold_convert (fd->iter_type, t1); 5659 t0 = fold_convert (fd->iter_type, t0); 5660 } 5661 if (bias) 5662 { 5663 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias); 5664 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias); 5665 } 5666 5667 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt), 5668 OMP_CLAUSE__LOOPTEMP_); 5669 gcc_assert (innerc); 5670 tree startvar = OMP_CLAUSE_DECL (innerc); 5671 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_); 5672 gcc_assert (innerc); 5673 tree endvar = OMP_CLAUSE_DECL (innerc); 5674 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST) 5675 { 5676 gcc_assert (innerc); 5677 for (i = 1; i < fd->collapse; i++) 5678 { 5679 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 5680 OMP_CLAUSE__LOOPTEMP_); 5681 gcc_assert (innerc); 5682 } 5683 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 5684 OMP_CLAUSE__LOOPTEMP_); 5685 if (innerc) 5686 { 5687 /* If needed (inner taskloop has lastprivate clause), propagate 5688 down the total number of iterations. */ 5689 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false, 5690 NULL_TREE, false, 5691 GSI_CONTINUE_LINKING); 5692 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); 5693 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5694 } 5695 } 5696 5697 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false, 5698 GSI_CONTINUE_LINKING); 5699 assign_stmt = gimple_build_assign (startvar, t0); 5700 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5701 5702 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false, 5703 GSI_CONTINUE_LINKING); 5704 assign_stmt = gimple_build_assign (endvar, t1); 5705 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5706 if (fd->collapse > 1) 5707 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 5708 5709 /* Remove the GIMPLE_OMP_FOR statement. */ 5710 gsi = gsi_for_stmt (for_stmt); 5711 gsi_remove (&gsi, true); 5712 5713 gsi = gsi_last_nondebug_bb (cont_bb); 5714 gsi_remove (&gsi, true); 5715 5716 gsi = gsi_last_nondebug_bb (exit_bb); 5717 gsi_remove (&gsi, true); 5718 5719 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always (); 5720 remove_edge (BRANCH_EDGE (entry_bb)); 5721 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always (); 5722 remove_edge (BRANCH_EDGE (cont_bb)); 5723 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb); 5724 set_immediate_dominator (CDI_DOMINATORS, region->entry, 5725 recompute_dominator (CDI_DOMINATORS, region->entry)); 5726 } 5727 5728 /* Taskloop construct is represented after gimplification with 5729 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched 5730 in between them. This routine expands the inner GIMPLE_OMP_FOR. 5731 GOMP_taskloop{,_ull} function arranges for each task to be given just 5732 a single range of iterations. */ 5733 5734 static void 5735 expand_omp_taskloop_for_inner (struct omp_region *region, 5736 struct omp_for_data *fd, 5737 gimple *inner_stmt) 5738 { 5739 tree e, t, type, itype, vmain, vback, bias = NULL_TREE; 5740 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL; 5741 basic_block fin_bb; 5742 gimple_stmt_iterator gsi; 5743 edge ep; 5744 bool broken_loop = region->cont == NULL; 5745 tree *counts = NULL; 5746 tree n1, n2, step; 5747 5748 itype = type = TREE_TYPE (fd->loop.v); 5749 if (POINTER_TYPE_P (type)) 5750 itype = signed_type_for (type); 5751 5752 /* See if we need to bias by LLONG_MIN. */ 5753 if (fd->iter_type == long_long_unsigned_type_node 5754 && TREE_CODE (type) == INTEGER_TYPE 5755 && !TYPE_UNSIGNED (type)) 5756 { 5757 tree n1, n2; 5758 5759 if (fd->loop.cond_code == LT_EXPR) 5760 { 5761 n1 = fd->loop.n1; 5762 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); 5763 } 5764 else 5765 { 5766 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); 5767 n2 = fd->loop.n1; 5768 } 5769 if (TREE_CODE (n1) != INTEGER_CST 5770 || TREE_CODE (n2) != INTEGER_CST 5771 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) 5772 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); 5773 } 5774 5775 entry_bb = region->entry; 5776 cont_bb = region->cont; 5777 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 5778 fin_bb = BRANCH_EDGE (entry_bb)->dest; 5779 gcc_assert (broken_loop 5780 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest)); 5781 body_bb = FALLTHRU_EDGE (entry_bb)->dest; 5782 if (!broken_loop) 5783 { 5784 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb); 5785 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 5786 } 5787 exit_bb = region->exit; 5788 5789 /* Iteration space partitioning goes in ENTRY_BB. */ 5790 gsi = gsi_last_nondebug_bb (entry_bb); 5791 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 5792 5793 if (fd->collapse > 1) 5794 { 5795 int first_zero_iter = -1, dummy = -1; 5796 basic_block l2_dom_bb = NULL, dummy_bb = NULL; 5797 5798 counts = XALLOCAVEC (tree, fd->collapse); 5799 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 5800 fin_bb, first_zero_iter, 5801 dummy_bb, dummy, l2_dom_bb); 5802 t = NULL_TREE; 5803 } 5804 else 5805 t = integer_one_node; 5806 5807 step = fd->loop.step; 5808 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 5809 OMP_CLAUSE__LOOPTEMP_); 5810 gcc_assert (innerc); 5811 n1 = OMP_CLAUSE_DECL (innerc); 5812 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_); 5813 gcc_assert (innerc); 5814 n2 = OMP_CLAUSE_DECL (innerc); 5815 if (bias) 5816 { 5817 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias); 5818 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias); 5819 } 5820 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), 5821 true, NULL_TREE, true, GSI_SAME_STMT); 5822 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), 5823 true, NULL_TREE, true, GSI_SAME_STMT); 5824 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), 5825 true, NULL_TREE, true, GSI_SAME_STMT); 5826 5827 tree startvar = fd->loop.v; 5828 tree endvar = NULL_TREE; 5829 5830 if (gimple_omp_for_combined_p (fd->for_stmt)) 5831 { 5832 tree clauses = gimple_omp_for_clauses (inner_stmt); 5833 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 5834 gcc_assert (innerc); 5835 startvar = OMP_CLAUSE_DECL (innerc); 5836 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 5837 OMP_CLAUSE__LOOPTEMP_); 5838 gcc_assert (innerc); 5839 endvar = OMP_CLAUSE_DECL (innerc); 5840 } 5841 t = fold_convert (TREE_TYPE (startvar), n1); 5842 t = force_gimple_operand_gsi (&gsi, t, 5843 DECL_P (startvar) 5844 && TREE_ADDRESSABLE (startvar), 5845 NULL_TREE, false, GSI_CONTINUE_LINKING); 5846 gimple *assign_stmt = gimple_build_assign (startvar, t); 5847 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5848 5849 t = fold_convert (TREE_TYPE (startvar), n2); 5850 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 5851 false, GSI_CONTINUE_LINKING); 5852 if (endvar) 5853 { 5854 assign_stmt = gimple_build_assign (endvar, e); 5855 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5856 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) 5857 assign_stmt = gimple_build_assign (fd->loop.v, e); 5858 else 5859 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); 5860 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5861 } 5862 if (fd->collapse > 1) 5863 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 5864 5865 if (!broken_loop) 5866 { 5867 /* The code controlling the sequential loop replaces the 5868 GIMPLE_OMP_CONTINUE. */ 5869 gsi = gsi_last_nondebug_bb (cont_bb); 5870 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 5871 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); 5872 vmain = gimple_omp_continue_control_use (cont_stmt); 5873 vback = gimple_omp_continue_control_def (cont_stmt); 5874 5875 if (!gimple_omp_for_combined_p (fd->for_stmt)) 5876 { 5877 if (POINTER_TYPE_P (type)) 5878 t = fold_build_pointer_plus (vmain, step); 5879 else 5880 t = fold_build2 (PLUS_EXPR, type, vmain, step); 5881 t = force_gimple_operand_gsi (&gsi, t, 5882 DECL_P (vback) 5883 && TREE_ADDRESSABLE (vback), 5884 NULL_TREE, true, GSI_SAME_STMT); 5885 assign_stmt = gimple_build_assign (vback, t); 5886 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 5887 5888 t = build2 (fd->loop.cond_code, boolean_type_node, 5889 DECL_P (vback) && TREE_ADDRESSABLE (vback) 5890 ? t : vback, e); 5891 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 5892 } 5893 5894 /* Remove the GIMPLE_OMP_CONTINUE statement. */ 5895 gsi_remove (&gsi, true); 5896 5897 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) 5898 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); 5899 } 5900 5901 /* Remove the GIMPLE_OMP_FOR statement. */ 5902 gsi = gsi_for_stmt (fd->for_stmt); 5903 gsi_remove (&gsi, true); 5904 5905 /* Remove the GIMPLE_OMP_RETURN statement. */ 5906 gsi = gsi_last_nondebug_bb (exit_bb); 5907 gsi_remove (&gsi, true); 5908 5909 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always (); 5910 if (!broken_loop) 5911 remove_edge (BRANCH_EDGE (entry_bb)); 5912 else 5913 { 5914 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb)); 5915 region->outer->cont = NULL; 5916 } 5917 5918 /* Connect all the blocks. */ 5919 if (!broken_loop) 5920 { 5921 ep = find_edge (cont_bb, body_bb); 5922 if (gimple_omp_for_combined_p (fd->for_stmt)) 5923 { 5924 remove_edge (ep); 5925 ep = NULL; 5926 } 5927 else if (fd->collapse > 1) 5928 { 5929 remove_edge (ep); 5930 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); 5931 } 5932 else 5933 ep->flags = EDGE_TRUE_VALUE; 5934 find_edge (cont_bb, fin_bb)->flags 5935 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; 5936 } 5937 5938 set_immediate_dominator (CDI_DOMINATORS, body_bb, 5939 recompute_dominator (CDI_DOMINATORS, body_bb)); 5940 if (!broken_loop) 5941 set_immediate_dominator (CDI_DOMINATORS, fin_bb, 5942 recompute_dominator (CDI_DOMINATORS, fin_bb)); 5943 5944 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt)) 5945 { 5946 class loop *loop = alloc_loop (); 5947 loop->header = body_bb; 5948 if (collapse_bb == NULL) 5949 loop->latch = cont_bb; 5950 add_loop (loop, body_bb->loop_father); 5951 } 5952 } 5953 5954 /* A subroutine of expand_omp_for. Generate code for an OpenACC 5955 partitioned loop. The lowering here is abstracted, in that the 5956 loop parameters are passed through internal functions, which are 5957 further lowered by oacc_device_lower, once we get to the target 5958 compiler. The loop is of the form: 5959 5960 for (V = B; V LTGT E; V += S) {BODY} 5961 5962 where LTGT is < or >. We may have a specified chunking size, CHUNKING 5963 (constant 0 for no chunking) and we will have a GWV partitioning 5964 mask, specifying dimensions over which the loop is to be 5965 partitioned (see note below). We generate code that looks like 5966 (this ignores tiling): 5967 5968 <entry_bb> [incoming FALL->body, BRANCH->exit] 5969 typedef signedintify (typeof (V)) T; // underlying signed integral type 5970 T range = E - B; 5971 T chunk_no = 0; 5972 T DIR = LTGT == '<' ? +1 : -1; 5973 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV); 5974 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV); 5975 5976 <head_bb> [created by splitting end of entry_bb] 5977 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no); 5978 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset); 5979 if (!(offset LTGT bound)) goto bottom_bb; 5980 5981 <body_bb> [incoming] 5982 V = B + offset; 5983 {BODY} 5984 5985 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb] 5986 offset += step; 5987 if (offset LTGT bound) goto body_bb; [*] 5988 5989 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb 5990 chunk_no++; 5991 if (chunk < chunk_max) goto head_bb; 5992 5993 <exit_bb> [incoming] 5994 V = B + ((range -/+ 1) / S +/- 1) * S [*] 5995 5996 [*] Needed if V live at end of loop. */ 5997 5998 static void 5999 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd) 6000 { 6001 bool is_oacc_kernels_parallelized 6002 = (lookup_attribute ("oacc kernels parallelized", 6003 DECL_ATTRIBUTES (current_function_decl)) != NULL); 6004 { 6005 bool is_oacc_kernels 6006 = (lookup_attribute ("oacc kernels", 6007 DECL_ATTRIBUTES (current_function_decl)) != NULL); 6008 if (is_oacc_kernels_parallelized) 6009 gcc_checking_assert (is_oacc_kernels); 6010 } 6011 gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized); 6012 /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are 6013 for SSA specifics, and some are for 'parloops' OpenACC 6014 'kernels'-parallelized specifics. */ 6015 6016 tree v = fd->loop.v; 6017 enum tree_code cond_code = fd->loop.cond_code; 6018 enum tree_code plus_code = PLUS_EXPR; 6019 6020 tree chunk_size = integer_minus_one_node; 6021 tree gwv = integer_zero_node; 6022 tree iter_type = TREE_TYPE (v); 6023 tree diff_type = iter_type; 6024 tree plus_type = iter_type; 6025 struct oacc_collapse *counts = NULL; 6026 6027 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt) 6028 == GF_OMP_FOR_KIND_OACC_LOOP); 6029 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt)); 6030 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR); 6031 6032 if (POINTER_TYPE_P (iter_type)) 6033 { 6034 plus_code = POINTER_PLUS_EXPR; 6035 plus_type = sizetype; 6036 } 6037 for (int ix = fd->collapse; ix--;) 6038 { 6039 tree diff_type2 = TREE_TYPE (fd->loops[ix].step); 6040 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2)) 6041 diff_type = diff_type2; 6042 } 6043 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type)) 6044 diff_type = signed_type_for (diff_type); 6045 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node)) 6046 diff_type = integer_type_node; 6047 6048 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */ 6049 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */ 6050 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */ 6051 basic_block bottom_bb = NULL; 6052 6053 /* entry_bb has two successors; the branch edge is to the exit 6054 block, fallthrough edge to body. */ 6055 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2 6056 && BRANCH_EDGE (entry_bb)->dest == exit_bb); 6057 6058 /* If cont_bb non-NULL, it has 2 successors. The branch successor is 6059 body_bb, or to a block whose only successor is the body_bb. Its 6060 fallthrough successor is the final block (same as the branch 6061 successor of the entry_bb). */ 6062 if (cont_bb) 6063 { 6064 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest; 6065 basic_block bed = BRANCH_EDGE (cont_bb)->dest; 6066 6067 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb); 6068 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb); 6069 } 6070 else 6071 gcc_assert (!gimple_in_ssa_p (cfun)); 6072 6073 /* The exit block only has entry_bb and cont_bb as predecessors. */ 6074 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL)); 6075 6076 tree chunk_no; 6077 tree chunk_max = NULL_TREE; 6078 tree bound, offset; 6079 tree step = create_tmp_var (diff_type, ".step"); 6080 bool up = cond_code == LT_EXPR; 6081 tree dir = build_int_cst (diff_type, up ? +1 : -1); 6082 bool chunking = !gimple_in_ssa_p (cfun); 6083 bool negating; 6084 6085 /* Tiling vars. */ 6086 tree tile_size = NULL_TREE; 6087 tree element_s = NULL_TREE; 6088 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE; 6089 basic_block elem_body_bb = NULL; 6090 basic_block elem_cont_bb = NULL; 6091 6092 /* SSA instances. */ 6093 tree offset_incr = NULL_TREE; 6094 tree offset_init = NULL_TREE; 6095 6096 gimple_stmt_iterator gsi; 6097 gassign *ass; 6098 gcall *call; 6099 gimple *stmt; 6100 tree expr; 6101 location_t loc; 6102 edge split, be, fte; 6103 6104 /* Split the end of entry_bb to create head_bb. */ 6105 split = split_block (entry_bb, last_stmt (entry_bb)); 6106 basic_block head_bb = split->dest; 6107 entry_bb = split->src; 6108 6109 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */ 6110 gsi = gsi_last_nondebug_bb (entry_bb); 6111 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi)); 6112 loc = gimple_location (for_stmt); 6113 6114 if (gimple_in_ssa_p (cfun)) 6115 { 6116 offset_init = gimple_omp_for_index (for_stmt, 0); 6117 gcc_assert (integer_zerop (fd->loop.n1)); 6118 /* The SSA parallelizer does gang parallelism. */ 6119 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG)); 6120 } 6121 6122 if (fd->collapse > 1 || fd->tiling) 6123 { 6124 gcc_assert (!gimple_in_ssa_p (cfun) && up); 6125 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse); 6126 tree total = expand_oacc_collapse_init (fd, &gsi, counts, diff_type, 6127 TREE_TYPE (fd->loop.n2), loc); 6128 6129 if (SSA_VAR_P (fd->loop.n2)) 6130 { 6131 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE, 6132 true, GSI_SAME_STMT); 6133 ass = gimple_build_assign (fd->loop.n2, total); 6134 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 6135 } 6136 } 6137 6138 tree b = fd->loop.n1; 6139 tree e = fd->loop.n2; 6140 tree s = fd->loop.step; 6141 6142 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT); 6143 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT); 6144 6145 /* Convert the step, avoiding possible unsigned->signed overflow. */ 6146 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s)); 6147 if (negating) 6148 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s); 6149 s = fold_convert (diff_type, s); 6150 if (negating) 6151 s = fold_build1 (NEGATE_EXPR, diff_type, s); 6152 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT); 6153 6154 if (!chunking) 6155 chunk_size = integer_zero_node; 6156 expr = fold_convert (diff_type, chunk_size); 6157 chunk_size = force_gimple_operand_gsi (&gsi, expr, true, 6158 NULL_TREE, true, GSI_SAME_STMT); 6159 6160 if (fd->tiling) 6161 { 6162 /* Determine the tile size and element step, 6163 modify the outer loop step size. */ 6164 tile_size = create_tmp_var (diff_type, ".tile_size"); 6165 expr = build_int_cst (diff_type, 1); 6166 for (int ix = 0; ix < fd->collapse; ix++) 6167 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr); 6168 expr = force_gimple_operand_gsi (&gsi, expr, true, 6169 NULL_TREE, true, GSI_SAME_STMT); 6170 ass = gimple_build_assign (tile_size, expr); 6171 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 6172 6173 element_s = create_tmp_var (diff_type, ".element_s"); 6174 ass = gimple_build_assign (element_s, s); 6175 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 6176 6177 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size); 6178 s = force_gimple_operand_gsi (&gsi, expr, true, 6179 NULL_TREE, true, GSI_SAME_STMT); 6180 } 6181 6182 /* Determine the range, avoiding possible unsigned->signed overflow. */ 6183 negating = !up && TYPE_UNSIGNED (iter_type); 6184 expr = fold_build2 (MINUS_EXPR, plus_type, 6185 fold_convert (plus_type, negating ? b : e), 6186 fold_convert (plus_type, negating ? e : b)); 6187 expr = fold_convert (diff_type, expr); 6188 if (negating) 6189 expr = fold_build1 (NEGATE_EXPR, diff_type, expr); 6190 tree range = force_gimple_operand_gsi (&gsi, expr, true, 6191 NULL_TREE, true, GSI_SAME_STMT); 6192 6193 chunk_no = build_int_cst (diff_type, 0); 6194 if (chunking) 6195 { 6196 gcc_assert (!gimple_in_ssa_p (cfun)); 6197 6198 expr = chunk_no; 6199 chunk_max = create_tmp_var (diff_type, ".chunk_max"); 6200 chunk_no = create_tmp_var (diff_type, ".chunk_no"); 6201 6202 ass = gimple_build_assign (chunk_no, expr); 6203 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 6204 6205 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, 6206 build_int_cst (integer_type_node, 6207 IFN_GOACC_LOOP_CHUNKS), 6208 dir, range, s, chunk_size, gwv); 6209 gimple_call_set_lhs (call, chunk_max); 6210 gimple_set_location (call, loc); 6211 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 6212 } 6213 else 6214 chunk_size = chunk_no; 6215 6216 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, 6217 build_int_cst (integer_type_node, 6218 IFN_GOACC_LOOP_STEP), 6219 dir, range, s, chunk_size, gwv); 6220 gimple_call_set_lhs (call, step); 6221 gimple_set_location (call, loc); 6222 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 6223 6224 /* Remove the GIMPLE_OMP_FOR. */ 6225 gsi_remove (&gsi, true); 6226 6227 /* Fixup edges from head_bb. */ 6228 be = BRANCH_EDGE (head_bb); 6229 fte = FALLTHRU_EDGE (head_bb); 6230 be->flags |= EDGE_FALSE_VALUE; 6231 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE; 6232 6233 basic_block body_bb = fte->dest; 6234 6235 if (gimple_in_ssa_p (cfun)) 6236 { 6237 gsi = gsi_last_nondebug_bb (cont_bb); 6238 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 6239 6240 offset = gimple_omp_continue_control_use (cont_stmt); 6241 offset_incr = gimple_omp_continue_control_def (cont_stmt); 6242 } 6243 else 6244 { 6245 offset = create_tmp_var (diff_type, ".offset"); 6246 offset_init = offset_incr = offset; 6247 } 6248 bound = create_tmp_var (TREE_TYPE (offset), ".bound"); 6249 6250 /* Loop offset & bound go into head_bb. */ 6251 gsi = gsi_start_bb (head_bb); 6252 6253 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, 6254 build_int_cst (integer_type_node, 6255 IFN_GOACC_LOOP_OFFSET), 6256 dir, range, s, 6257 chunk_size, gwv, chunk_no); 6258 gimple_call_set_lhs (call, offset_init); 6259 gimple_set_location (call, loc); 6260 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING); 6261 6262 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, 6263 build_int_cst (integer_type_node, 6264 IFN_GOACC_LOOP_BOUND), 6265 dir, range, s, 6266 chunk_size, gwv, offset_init); 6267 gimple_call_set_lhs (call, bound); 6268 gimple_set_location (call, loc); 6269 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING); 6270 6271 expr = build2 (cond_code, boolean_type_node, offset_init, bound); 6272 gsi_insert_after (&gsi, gimple_build_cond_empty (expr), 6273 GSI_CONTINUE_LINKING); 6274 6275 /* V assignment goes into body_bb. */ 6276 if (!gimple_in_ssa_p (cfun)) 6277 { 6278 gsi = gsi_start_bb (body_bb); 6279 6280 expr = build2 (plus_code, iter_type, b, 6281 fold_convert (plus_type, offset)); 6282 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 6283 true, GSI_SAME_STMT); 6284 ass = gimple_build_assign (v, expr); 6285 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 6286 6287 if (fd->collapse > 1 || fd->tiling) 6288 expand_oacc_collapse_vars (fd, false, &gsi, counts, v, diff_type); 6289 6290 if (fd->tiling) 6291 { 6292 /* Determine the range of the element loop -- usually simply 6293 the tile_size, but could be smaller if the final 6294 iteration of the outer loop is a partial tile. */ 6295 tree e_range = create_tmp_var (diff_type, ".e_range"); 6296 6297 expr = build2 (MIN_EXPR, diff_type, 6298 build2 (MINUS_EXPR, diff_type, bound, offset), 6299 build2 (MULT_EXPR, diff_type, tile_size, 6300 element_s)); 6301 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 6302 true, GSI_SAME_STMT); 6303 ass = gimple_build_assign (e_range, expr); 6304 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 6305 6306 /* Determine bound, offset & step of inner loop. */ 6307 e_bound = create_tmp_var (diff_type, ".e_bound"); 6308 e_offset = create_tmp_var (diff_type, ".e_offset"); 6309 e_step = create_tmp_var (diff_type, ".e_step"); 6310 6311 /* Mark these as element loops. */ 6312 tree t, e_gwv = integer_minus_one_node; 6313 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */ 6314 6315 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET); 6316 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range, 6317 element_s, chunk, e_gwv, chunk); 6318 gimple_call_set_lhs (call, e_offset); 6319 gimple_set_location (call, loc); 6320 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 6321 6322 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND); 6323 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range, 6324 element_s, chunk, e_gwv, e_offset); 6325 gimple_call_set_lhs (call, e_bound); 6326 gimple_set_location (call, loc); 6327 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 6328 6329 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP); 6330 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range, 6331 element_s, chunk, e_gwv); 6332 gimple_call_set_lhs (call, e_step); 6333 gimple_set_location (call, loc); 6334 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 6335 6336 /* Add test and split block. */ 6337 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound); 6338 stmt = gimple_build_cond_empty (expr); 6339 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 6340 split = split_block (body_bb, stmt); 6341 elem_body_bb = split->dest; 6342 if (cont_bb == body_bb) 6343 cont_bb = elem_body_bb; 6344 body_bb = split->src; 6345 6346 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE; 6347 6348 /* Add a dummy exit for the tiled block when cont_bb is missing. */ 6349 if (cont_bb == NULL) 6350 { 6351 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE); 6352 e->probability = profile_probability::even (); 6353 split->probability = profile_probability::even (); 6354 } 6355 6356 /* Initialize the user's loop vars. */ 6357 gsi = gsi_start_bb (elem_body_bb); 6358 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset, 6359 diff_type); 6360 } 6361 } 6362 6363 /* Loop increment goes into cont_bb. If this is not a loop, we 6364 will have spawned threads as if it was, and each one will 6365 execute one iteration. The specification is not explicit about 6366 whether such constructs are ill-formed or not, and they can 6367 occur, especially when noreturn routines are involved. */ 6368 if (cont_bb) 6369 { 6370 gsi = gsi_last_nondebug_bb (cont_bb); 6371 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 6372 loc = gimple_location (cont_stmt); 6373 6374 if (fd->tiling) 6375 { 6376 /* Insert element loop increment and test. */ 6377 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step); 6378 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 6379 true, GSI_SAME_STMT); 6380 ass = gimple_build_assign (e_offset, expr); 6381 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 6382 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound); 6383 6384 stmt = gimple_build_cond_empty (expr); 6385 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 6386 split = split_block (cont_bb, stmt); 6387 elem_cont_bb = split->src; 6388 cont_bb = split->dest; 6389 6390 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE; 6391 split->probability = profile_probability::unlikely ().guessed (); 6392 edge latch_edge 6393 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE); 6394 latch_edge->probability = profile_probability::likely ().guessed (); 6395 6396 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE); 6397 skip_edge->probability = profile_probability::unlikely ().guessed (); 6398 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx); 6399 loop_entry_edge->probability 6400 = profile_probability::likely ().guessed (); 6401 6402 gsi = gsi_for_stmt (cont_stmt); 6403 } 6404 6405 /* Increment offset. */ 6406 if (gimple_in_ssa_p (cfun)) 6407 expr = build2 (plus_code, iter_type, offset, 6408 fold_convert (plus_type, step)); 6409 else 6410 expr = build2 (PLUS_EXPR, diff_type, offset, step); 6411 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 6412 true, GSI_SAME_STMT); 6413 ass = gimple_build_assign (offset_incr, expr); 6414 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 6415 expr = build2 (cond_code, boolean_type_node, offset_incr, bound); 6416 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT); 6417 6418 /* Remove the GIMPLE_OMP_CONTINUE. */ 6419 gsi_remove (&gsi, true); 6420 6421 /* Fixup edges from cont_bb. */ 6422 be = BRANCH_EDGE (cont_bb); 6423 fte = FALLTHRU_EDGE (cont_bb); 6424 be->flags |= EDGE_TRUE_VALUE; 6425 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE; 6426 6427 if (chunking) 6428 { 6429 /* Split the beginning of exit_bb to make bottom_bb. We 6430 need to insert a nop at the start, because splitting is 6431 after a stmt, not before. */ 6432 gsi = gsi_start_bb (exit_bb); 6433 stmt = gimple_build_nop (); 6434 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 6435 split = split_block (exit_bb, stmt); 6436 bottom_bb = split->src; 6437 exit_bb = split->dest; 6438 gsi = gsi_last_bb (bottom_bb); 6439 6440 /* Chunk increment and test goes into bottom_bb. */ 6441 expr = build2 (PLUS_EXPR, diff_type, chunk_no, 6442 build_int_cst (diff_type, 1)); 6443 ass = gimple_build_assign (chunk_no, expr); 6444 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING); 6445 6446 /* Chunk test at end of bottom_bb. */ 6447 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max); 6448 gsi_insert_after (&gsi, gimple_build_cond_empty (expr), 6449 GSI_CONTINUE_LINKING); 6450 6451 /* Fixup edges from bottom_bb. */ 6452 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE; 6453 split->probability = profile_probability::unlikely ().guessed (); 6454 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE); 6455 latch_edge->probability = profile_probability::likely ().guessed (); 6456 } 6457 } 6458 6459 gsi = gsi_last_nondebug_bb (exit_bb); 6460 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 6461 loc = gimple_location (gsi_stmt (gsi)); 6462 6463 if (!gimple_in_ssa_p (cfun)) 6464 { 6465 /* Insert the final value of V, in case it is live. This is the 6466 value for the only thread that survives past the join. */ 6467 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir); 6468 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s); 6469 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s); 6470 expr = fold_build2 (MULT_EXPR, diff_type, expr, s); 6471 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr)); 6472 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 6473 true, GSI_SAME_STMT); 6474 ass = gimple_build_assign (v, expr); 6475 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 6476 } 6477 6478 /* Remove the OMP_RETURN. */ 6479 gsi_remove (&gsi, true); 6480 6481 if (cont_bb) 6482 { 6483 /* We now have one, two or three nested loops. Update the loop 6484 structures. */ 6485 class loop *parent = entry_bb->loop_father; 6486 class loop *body = body_bb->loop_father; 6487 6488 if (chunking) 6489 { 6490 class loop *chunk_loop = alloc_loop (); 6491 chunk_loop->header = head_bb; 6492 chunk_loop->latch = bottom_bb; 6493 add_loop (chunk_loop, parent); 6494 parent = chunk_loop; 6495 } 6496 else if (parent != body) 6497 { 6498 gcc_assert (body->header == body_bb); 6499 gcc_assert (body->latch == cont_bb 6500 || single_pred (body->latch) == cont_bb); 6501 parent = NULL; 6502 } 6503 6504 if (parent) 6505 { 6506 class loop *body_loop = alloc_loop (); 6507 body_loop->header = body_bb; 6508 body_loop->latch = cont_bb; 6509 add_loop (body_loop, parent); 6510 6511 if (fd->tiling) 6512 { 6513 /* Insert tiling's element loop. */ 6514 class loop *inner_loop = alloc_loop (); 6515 inner_loop->header = elem_body_bb; 6516 inner_loop->latch = elem_cont_bb; 6517 add_loop (inner_loop, body_loop); 6518 } 6519 } 6520 } 6521 } 6522 6523 /* Expand the OMP loop defined by REGION. */ 6524 6525 static void 6526 expand_omp_for (struct omp_region *region, gimple *inner_stmt) 6527 { 6528 struct omp_for_data fd; 6529 struct omp_for_data_loop *loops; 6530 6531 loops 6532 = (struct omp_for_data_loop *) 6533 alloca (gimple_omp_for_collapse (last_stmt (region->entry)) 6534 * sizeof (struct omp_for_data_loop)); 6535 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)), 6536 &fd, loops); 6537 region->sched_kind = fd.sched_kind; 6538 region->sched_modifiers = fd.sched_modifiers; 6539 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0; 6540 6541 gcc_assert (EDGE_COUNT (region->entry->succs) == 2); 6542 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; 6543 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; 6544 if (region->cont) 6545 { 6546 gcc_assert (EDGE_COUNT (region->cont->succs) == 2); 6547 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL; 6548 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL; 6549 } 6550 else 6551 /* If there isn't a continue then this is a degerate case where 6552 the introduction of abnormal edges during lowering will prevent 6553 original loops from being detected. Fix that up. */ 6554 loops_state_set (LOOPS_NEED_FIXUP); 6555 6556 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD) 6557 expand_omp_simd (region, &fd); 6558 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP) 6559 { 6560 gcc_assert (!inner_stmt); 6561 expand_oacc_for (region, &fd); 6562 } 6563 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP) 6564 { 6565 if (gimple_omp_for_combined_into_p (fd.for_stmt)) 6566 expand_omp_taskloop_for_inner (region, &fd, inner_stmt); 6567 else 6568 expand_omp_taskloop_for_outer (region, &fd, inner_stmt); 6569 } 6570 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC 6571 && !fd.have_ordered) 6572 { 6573 if (fd.chunk_size == NULL) 6574 expand_omp_for_static_nochunk (region, &fd, inner_stmt); 6575 else 6576 expand_omp_for_static_chunk (region, &fd, inner_stmt); 6577 } 6578 else 6579 { 6580 int fn_index, start_ix, next_ix; 6581 unsigned HOST_WIDE_INT sched = 0; 6582 tree sched_arg = NULL_TREE; 6583 6584 gcc_assert (gimple_omp_for_kind (fd.for_stmt) 6585 == GF_OMP_FOR_KIND_FOR); 6586 if (fd.chunk_size == NULL 6587 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC) 6588 fd.chunk_size = integer_zero_node; 6589 switch (fd.sched_kind) 6590 { 6591 case OMP_CLAUSE_SCHEDULE_RUNTIME: 6592 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0 6593 && fd.lastprivate_conditional == 0) 6594 { 6595 gcc_assert (!fd.have_ordered); 6596 fn_index = 6; 6597 sched = 4; 6598 } 6599 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0 6600 && !fd.have_ordered 6601 && fd.lastprivate_conditional == 0) 6602 fn_index = 7; 6603 else 6604 { 6605 fn_index = 3; 6606 sched = (HOST_WIDE_INT_1U << 31); 6607 } 6608 break; 6609 case OMP_CLAUSE_SCHEDULE_DYNAMIC: 6610 case OMP_CLAUSE_SCHEDULE_GUIDED: 6611 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0 6612 && !fd.have_ordered 6613 && fd.lastprivate_conditional == 0) 6614 { 6615 fn_index = 3 + fd.sched_kind; 6616 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2; 6617 break; 6618 } 6619 fn_index = fd.sched_kind; 6620 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2; 6621 sched += (HOST_WIDE_INT_1U << 31); 6622 break; 6623 case OMP_CLAUSE_SCHEDULE_STATIC: 6624 gcc_assert (fd.have_ordered); 6625 fn_index = 0; 6626 sched = (HOST_WIDE_INT_1U << 31) + 1; 6627 break; 6628 default: 6629 gcc_unreachable (); 6630 } 6631 if (!fd.ordered) 6632 fn_index += fd.have_ordered * 8; 6633 if (fd.ordered) 6634 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index; 6635 else 6636 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index; 6637 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index; 6638 if (fd.have_reductemp || fd.have_pointer_condtemp) 6639 { 6640 if (fd.ordered) 6641 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START; 6642 else if (fd.have_ordered) 6643 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START; 6644 else 6645 start_ix = (int)BUILT_IN_GOMP_LOOP_START; 6646 sched_arg = build_int_cstu (long_integer_type_node, sched); 6647 if (!fd.chunk_size) 6648 fd.chunk_size = integer_zero_node; 6649 } 6650 if (fd.iter_type == long_long_unsigned_type_node) 6651 { 6652 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START 6653 - (int)BUILT_IN_GOMP_LOOP_STATIC_START); 6654 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT 6655 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT); 6656 } 6657 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix, 6658 (enum built_in_function) next_ix, sched_arg, 6659 inner_stmt); 6660 } 6661 6662 if (gimple_in_ssa_p (cfun)) 6663 update_ssa (TODO_update_ssa_only_virtuals); 6664 } 6665 6666 /* Expand code for an OpenMP sections directive. In pseudo code, we generate 6667 6668 v = GOMP_sections_start (n); 6669 L0: 6670 switch (v) 6671 { 6672 case 0: 6673 goto L2; 6674 case 1: 6675 section 1; 6676 goto L1; 6677 case 2: 6678 ... 6679 case n: 6680 ... 6681 default: 6682 abort (); 6683 } 6684 L1: 6685 v = GOMP_sections_next (); 6686 goto L0; 6687 L2: 6688 reduction; 6689 6690 If this is a combined parallel sections, replace the call to 6691 GOMP_sections_start with call to GOMP_sections_next. */ 6692 6693 static void 6694 expand_omp_sections (struct omp_region *region) 6695 { 6696 tree t, u, vin = NULL, vmain, vnext, l2; 6697 unsigned len; 6698 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb; 6699 gimple_stmt_iterator si, switch_si; 6700 gomp_sections *sections_stmt; 6701 gimple *stmt; 6702 gomp_continue *cont; 6703 edge_iterator ei; 6704 edge e; 6705 struct omp_region *inner; 6706 unsigned i, casei; 6707 bool exit_reachable = region->cont != NULL; 6708 6709 gcc_assert (region->exit != NULL); 6710 entry_bb = region->entry; 6711 l0_bb = single_succ (entry_bb); 6712 l1_bb = region->cont; 6713 l2_bb = region->exit; 6714 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb) 6715 l2 = gimple_block_label (l2_bb); 6716 else 6717 { 6718 /* This can happen if there are reductions. */ 6719 len = EDGE_COUNT (l0_bb->succs); 6720 gcc_assert (len > 0); 6721 e = EDGE_SUCC (l0_bb, len - 1); 6722 si = gsi_last_nondebug_bb (e->dest); 6723 l2 = NULL_TREE; 6724 if (gsi_end_p (si) 6725 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION) 6726 l2 = gimple_block_label (e->dest); 6727 else 6728 FOR_EACH_EDGE (e, ei, l0_bb->succs) 6729 { 6730 si = gsi_last_nondebug_bb (e->dest); 6731 if (gsi_end_p (si) 6732 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION) 6733 { 6734 l2 = gimple_block_label (e->dest); 6735 break; 6736 } 6737 } 6738 } 6739 if (exit_reachable) 6740 default_bb = create_empty_bb (l1_bb->prev_bb); 6741 else 6742 default_bb = create_empty_bb (l0_bb); 6743 6744 /* We will build a switch() with enough cases for all the 6745 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work 6746 and a default case to abort if something goes wrong. */ 6747 len = EDGE_COUNT (l0_bb->succs); 6748 6749 /* Use vec::quick_push on label_vec throughout, since we know the size 6750 in advance. */ 6751 auto_vec<tree> label_vec (len); 6752 6753 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the 6754 GIMPLE_OMP_SECTIONS statement. */ 6755 si = gsi_last_nondebug_bb (entry_bb); 6756 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si)); 6757 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS); 6758 vin = gimple_omp_sections_control (sections_stmt); 6759 tree clauses = gimple_omp_sections_clauses (sections_stmt); 6760 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); 6761 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_); 6762 tree cond_var = NULL_TREE; 6763 if (reductmp || condtmp) 6764 { 6765 tree reductions = null_pointer_node, mem = null_pointer_node; 6766 tree memv = NULL_TREE, condtemp = NULL_TREE; 6767 gimple_stmt_iterator gsi = gsi_none (); 6768 gimple *g = NULL; 6769 if (reductmp) 6770 { 6771 reductions = OMP_CLAUSE_DECL (reductmp); 6772 gcc_assert (TREE_CODE (reductions) == SSA_NAME); 6773 g = SSA_NAME_DEF_STMT (reductions); 6774 reductions = gimple_assign_rhs1 (g); 6775 OMP_CLAUSE_DECL (reductmp) = reductions; 6776 gsi = gsi_for_stmt (g); 6777 } 6778 else 6779 gsi = si; 6780 if (condtmp) 6781 { 6782 condtemp = OMP_CLAUSE_DECL (condtmp); 6783 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp), 6784 OMP_CLAUSE__CONDTEMP_); 6785 cond_var = OMP_CLAUSE_DECL (c); 6786 tree type = TREE_TYPE (condtemp); 6787 memv = create_tmp_var (type); 6788 TREE_ADDRESSABLE (memv) = 1; 6789 unsigned cnt = 0; 6790 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) 6791 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE 6792 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c)) 6793 ++cnt; 6794 unsigned HOST_WIDE_INT sz 6795 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt; 6796 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz), 6797 false); 6798 mem = build_fold_addr_expr (memv); 6799 } 6800 t = build_int_cst (unsigned_type_node, len - 1); 6801 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START); 6802 stmt = gimple_build_call (u, 3, t, reductions, mem); 6803 gimple_call_set_lhs (stmt, vin); 6804 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 6805 if (condtmp) 6806 { 6807 expand_omp_build_assign (&gsi, condtemp, memv, false); 6808 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var), 6809 vin, build_one_cst (TREE_TYPE (cond_var))); 6810 expand_omp_build_assign (&gsi, cond_var, t, false); 6811 } 6812 if (reductmp) 6813 { 6814 gsi_remove (&gsi, true); 6815 release_ssa_name (gimple_assign_lhs (g)); 6816 } 6817 } 6818 else if (!is_combined_parallel (region)) 6819 { 6820 /* If we are not inside a combined parallel+sections region, 6821 call GOMP_sections_start. */ 6822 t = build_int_cst (unsigned_type_node, len - 1); 6823 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START); 6824 stmt = gimple_build_call (u, 1, t); 6825 } 6826 else 6827 { 6828 /* Otherwise, call GOMP_sections_next. */ 6829 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT); 6830 stmt = gimple_build_call (u, 0); 6831 } 6832 if (!reductmp && !condtmp) 6833 { 6834 gimple_call_set_lhs (stmt, vin); 6835 gsi_insert_after (&si, stmt, GSI_SAME_STMT); 6836 } 6837 gsi_remove (&si, true); 6838 6839 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in 6840 L0_BB. */ 6841 switch_si = gsi_last_nondebug_bb (l0_bb); 6842 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH); 6843 if (exit_reachable) 6844 { 6845 cont = as_a <gomp_continue *> (last_stmt (l1_bb)); 6846 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE); 6847 vmain = gimple_omp_continue_control_use (cont); 6848 vnext = gimple_omp_continue_control_def (cont); 6849 } 6850 else 6851 { 6852 vmain = vin; 6853 vnext = NULL_TREE; 6854 } 6855 6856 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2); 6857 label_vec.quick_push (t); 6858 i = 1; 6859 6860 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */ 6861 for (inner = region->inner, casei = 1; 6862 inner; 6863 inner = inner->next, i++, casei++) 6864 { 6865 basic_block s_entry_bb, s_exit_bb; 6866 6867 /* Skip optional reduction region. */ 6868 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD) 6869 { 6870 --i; 6871 --casei; 6872 continue; 6873 } 6874 6875 s_entry_bb = inner->entry; 6876 s_exit_bb = inner->exit; 6877 6878 t = gimple_block_label (s_entry_bb); 6879 u = build_int_cst (unsigned_type_node, casei); 6880 u = build_case_label (u, NULL, t); 6881 label_vec.quick_push (u); 6882 6883 si = gsi_last_nondebug_bb (s_entry_bb); 6884 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION); 6885 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si))); 6886 gsi_remove (&si, true); 6887 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU; 6888 6889 if (s_exit_bb == NULL) 6890 continue; 6891 6892 si = gsi_last_nondebug_bb (s_exit_bb); 6893 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN); 6894 gsi_remove (&si, true); 6895 6896 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU; 6897 } 6898 6899 /* Error handling code goes in DEFAULT_BB. */ 6900 t = gimple_block_label (default_bb); 6901 u = build_case_label (NULL, NULL, t); 6902 make_edge (l0_bb, default_bb, 0); 6903 add_bb_to_loop (default_bb, current_loops->tree_root); 6904 6905 stmt = gimple_build_switch (vmain, u, label_vec); 6906 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT); 6907 gsi_remove (&switch_si, true); 6908 6909 si = gsi_start_bb (default_bb); 6910 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0); 6911 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING); 6912 6913 if (exit_reachable) 6914 { 6915 tree bfn_decl; 6916 6917 /* Code to get the next section goes in L1_BB. */ 6918 si = gsi_last_nondebug_bb (l1_bb); 6919 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE); 6920 6921 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT); 6922 stmt = gimple_build_call (bfn_decl, 0); 6923 gimple_call_set_lhs (stmt, vnext); 6924 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 6925 if (cond_var) 6926 { 6927 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var), 6928 vnext, build_one_cst (TREE_TYPE (cond_var))); 6929 expand_omp_build_assign (&si, cond_var, t, false); 6930 } 6931 gsi_remove (&si, true); 6932 6933 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU; 6934 } 6935 6936 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */ 6937 si = gsi_last_nondebug_bb (l2_bb); 6938 if (gimple_omp_return_nowait_p (gsi_stmt (si))) 6939 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT); 6940 else if (gimple_omp_return_lhs (gsi_stmt (si))) 6941 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL); 6942 else 6943 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END); 6944 stmt = gimple_build_call (t, 0); 6945 if (gimple_omp_return_lhs (gsi_stmt (si))) 6946 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si))); 6947 gsi_insert_after (&si, stmt, GSI_SAME_STMT); 6948 gsi_remove (&si, true); 6949 6950 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb); 6951 } 6952 6953 /* Expand code for an OpenMP single directive. We've already expanded 6954 much of the code, here we simply place the GOMP_barrier call. */ 6955 6956 static void 6957 expand_omp_single (struct omp_region *region) 6958 { 6959 basic_block entry_bb, exit_bb; 6960 gimple_stmt_iterator si; 6961 6962 entry_bb = region->entry; 6963 exit_bb = region->exit; 6964 6965 si = gsi_last_nondebug_bb (entry_bb); 6966 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE); 6967 gsi_remove (&si, true); 6968 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 6969 6970 si = gsi_last_nondebug_bb (exit_bb); 6971 if (!gimple_omp_return_nowait_p (gsi_stmt (si))) 6972 { 6973 tree t = gimple_omp_return_lhs (gsi_stmt (si)); 6974 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT); 6975 } 6976 gsi_remove (&si, true); 6977 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; 6978 } 6979 6980 /* Generic expansion for OpenMP synchronization directives: master, 6981 ordered and critical. All we need to do here is remove the entry 6982 and exit markers for REGION. */ 6983 6984 static void 6985 expand_omp_synch (struct omp_region *region) 6986 { 6987 basic_block entry_bb, exit_bb; 6988 gimple_stmt_iterator si; 6989 6990 entry_bb = region->entry; 6991 exit_bb = region->exit; 6992 6993 si = gsi_last_nondebug_bb (entry_bb); 6994 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE 6995 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER 6996 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP 6997 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED 6998 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL 6999 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS); 7000 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS 7001 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si)))) 7002 { 7003 expand_omp_taskreg (region); 7004 return; 7005 } 7006 gsi_remove (&si, true); 7007 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 7008 7009 if (exit_bb) 7010 { 7011 si = gsi_last_nondebug_bb (exit_bb); 7012 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN); 7013 gsi_remove (&si, true); 7014 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; 7015 } 7016 } 7017 7018 /* Translate enum omp_memory_order to enum memmodel. The two enums 7019 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED 7020 is 0. */ 7021 7022 static enum memmodel 7023 omp_memory_order_to_memmodel (enum omp_memory_order mo) 7024 { 7025 switch (mo) 7026 { 7027 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED; 7028 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE; 7029 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE; 7030 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL; 7031 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST; 7032 default: gcc_unreachable (); 7033 } 7034 } 7035 7036 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic 7037 operation as a normal volatile load. */ 7038 7039 static bool 7040 expand_omp_atomic_load (basic_block load_bb, tree addr, 7041 tree loaded_val, int index) 7042 { 7043 enum built_in_function tmpbase; 7044 gimple_stmt_iterator gsi; 7045 basic_block store_bb; 7046 location_t loc; 7047 gimple *stmt; 7048 tree decl, call, type, itype; 7049 7050 gsi = gsi_last_nondebug_bb (load_bb); 7051 stmt = gsi_stmt (gsi); 7052 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD); 7053 loc = gimple_location (stmt); 7054 7055 /* ??? If the target does not implement atomic_load_optab[mode], and mode 7056 is smaller than word size, then expand_atomic_load assumes that the load 7057 is atomic. We could avoid the builtin entirely in this case. */ 7058 7059 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1); 7060 decl = builtin_decl_explicit (tmpbase); 7061 if (decl == NULL_TREE) 7062 return false; 7063 7064 type = TREE_TYPE (loaded_val); 7065 itype = TREE_TYPE (TREE_TYPE (decl)); 7066 7067 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt); 7068 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo)); 7069 call = build_call_expr_loc (loc, decl, 2, addr, mo); 7070 if (!useless_type_conversion_p (type, itype)) 7071 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call); 7072 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call); 7073 7074 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); 7075 gsi_remove (&gsi, true); 7076 7077 store_bb = single_succ (load_bb); 7078 gsi = gsi_last_nondebug_bb (store_bb); 7079 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE); 7080 gsi_remove (&gsi, true); 7081 7082 if (gimple_in_ssa_p (cfun)) 7083 update_ssa (TODO_update_ssa_no_phi); 7084 7085 return true; 7086 } 7087 7088 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic 7089 operation as a normal volatile store. */ 7090 7091 static bool 7092 expand_omp_atomic_store (basic_block load_bb, tree addr, 7093 tree loaded_val, tree stored_val, int index) 7094 { 7095 enum built_in_function tmpbase; 7096 gimple_stmt_iterator gsi; 7097 basic_block store_bb = single_succ (load_bb); 7098 location_t loc; 7099 gimple *stmt; 7100 tree decl, call, type, itype; 7101 machine_mode imode; 7102 bool exchange; 7103 7104 gsi = gsi_last_nondebug_bb (load_bb); 7105 stmt = gsi_stmt (gsi); 7106 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD); 7107 7108 /* If the load value is needed, then this isn't a store but an exchange. */ 7109 exchange = gimple_omp_atomic_need_value_p (stmt); 7110 7111 gsi = gsi_last_nondebug_bb (store_bb); 7112 stmt = gsi_stmt (gsi); 7113 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE); 7114 loc = gimple_location (stmt); 7115 7116 /* ??? If the target does not implement atomic_store_optab[mode], and mode 7117 is smaller than word size, then expand_atomic_store assumes that the store 7118 is atomic. We could avoid the builtin entirely in this case. */ 7119 7120 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N); 7121 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1); 7122 decl = builtin_decl_explicit (tmpbase); 7123 if (decl == NULL_TREE) 7124 return false; 7125 7126 type = TREE_TYPE (stored_val); 7127 7128 /* Dig out the type of the function's second argument. */ 7129 itype = TREE_TYPE (decl); 7130 itype = TYPE_ARG_TYPES (itype); 7131 itype = TREE_CHAIN (itype); 7132 itype = TREE_VALUE (itype); 7133 imode = TYPE_MODE (itype); 7134 7135 if (exchange && !can_atomic_exchange_p (imode, true)) 7136 return false; 7137 7138 if (!useless_type_conversion_p (itype, type)) 7139 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val); 7140 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt); 7141 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo)); 7142 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo); 7143 if (exchange) 7144 { 7145 if (!useless_type_conversion_p (type, itype)) 7146 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call); 7147 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call); 7148 } 7149 7150 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); 7151 gsi_remove (&gsi, true); 7152 7153 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */ 7154 gsi = gsi_last_nondebug_bb (load_bb); 7155 gsi_remove (&gsi, true); 7156 7157 if (gimple_in_ssa_p (cfun)) 7158 update_ssa (TODO_update_ssa_no_phi); 7159 7160 return true; 7161 } 7162 7163 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic 7164 operation as a __atomic_fetch_op builtin. INDEX is log2 of the 7165 size of the data type, and thus usable to find the index of the builtin 7166 decl. Returns false if the expression is not of the proper form. */ 7167 7168 static bool 7169 expand_omp_atomic_fetch_op (basic_block load_bb, 7170 tree addr, tree loaded_val, 7171 tree stored_val, int index) 7172 { 7173 enum built_in_function oldbase, newbase, tmpbase; 7174 tree decl, itype, call; 7175 tree lhs, rhs; 7176 basic_block store_bb = single_succ (load_bb); 7177 gimple_stmt_iterator gsi; 7178 gimple *stmt; 7179 location_t loc; 7180 enum tree_code code; 7181 bool need_old, need_new; 7182 machine_mode imode; 7183 7184 /* We expect to find the following sequences: 7185 7186 load_bb: 7187 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem) 7188 7189 store_bb: 7190 val = tmp OP something; (or: something OP tmp) 7191 GIMPLE_OMP_STORE (val) 7192 7193 ???FIXME: Allow a more flexible sequence. 7194 Perhaps use data flow to pick the statements. 7195 7196 */ 7197 7198 gsi = gsi_after_labels (store_bb); 7199 stmt = gsi_stmt (gsi); 7200 if (is_gimple_debug (stmt)) 7201 { 7202 gsi_next_nondebug (&gsi); 7203 if (gsi_end_p (gsi)) 7204 return false; 7205 stmt = gsi_stmt (gsi); 7206 } 7207 loc = gimple_location (stmt); 7208 if (!is_gimple_assign (stmt)) 7209 return false; 7210 gsi_next_nondebug (&gsi); 7211 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE) 7212 return false; 7213 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi)); 7214 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb)); 7215 enum omp_memory_order omo 7216 = gimple_omp_atomic_memory_order (last_stmt (load_bb)); 7217 enum memmodel mo = omp_memory_order_to_memmodel (omo); 7218 gcc_checking_assert (!need_old || !need_new); 7219 7220 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0)) 7221 return false; 7222 7223 /* Check for one of the supported fetch-op operations. */ 7224 code = gimple_assign_rhs_code (stmt); 7225 switch (code) 7226 { 7227 case PLUS_EXPR: 7228 case POINTER_PLUS_EXPR: 7229 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N; 7230 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N; 7231 break; 7232 case MINUS_EXPR: 7233 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N; 7234 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N; 7235 break; 7236 case BIT_AND_EXPR: 7237 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N; 7238 newbase = BUILT_IN_ATOMIC_AND_FETCH_N; 7239 break; 7240 case BIT_IOR_EXPR: 7241 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N; 7242 newbase = BUILT_IN_ATOMIC_OR_FETCH_N; 7243 break; 7244 case BIT_XOR_EXPR: 7245 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N; 7246 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N; 7247 break; 7248 default: 7249 return false; 7250 } 7251 7252 /* Make sure the expression is of the proper form. */ 7253 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0)) 7254 rhs = gimple_assign_rhs2 (stmt); 7255 else if (commutative_tree_code (gimple_assign_rhs_code (stmt)) 7256 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0)) 7257 rhs = gimple_assign_rhs1 (stmt); 7258 else 7259 return false; 7260 7261 tmpbase = ((enum built_in_function) 7262 ((need_new ? newbase : oldbase) + index + 1)); 7263 decl = builtin_decl_explicit (tmpbase); 7264 if (decl == NULL_TREE) 7265 return false; 7266 itype = TREE_TYPE (TREE_TYPE (decl)); 7267 imode = TYPE_MODE (itype); 7268 7269 /* We could test all of the various optabs involved, but the fact of the 7270 matter is that (with the exception of i486 vs i586 and xadd) all targets 7271 that support any atomic operaton optab also implements compare-and-swap. 7272 Let optabs.c take care of expanding any compare-and-swap loop. */ 7273 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode)) 7274 return false; 7275 7276 gsi = gsi_last_nondebug_bb (load_bb); 7277 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD); 7278 7279 /* OpenMP does not imply any barrier-like semantics on its atomic ops. 7280 It only requires that the operation happen atomically. Thus we can 7281 use the RELAXED memory model. */ 7282 call = build_call_expr_loc (loc, decl, 3, addr, 7283 fold_convert_loc (loc, itype, rhs), 7284 build_int_cst (NULL, mo)); 7285 7286 if (need_old || need_new) 7287 { 7288 lhs = need_old ? loaded_val : stored_val; 7289 call = fold_convert_loc (loc, TREE_TYPE (lhs), call); 7290 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call); 7291 } 7292 else 7293 call = fold_convert_loc (loc, void_type_node, call); 7294 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); 7295 gsi_remove (&gsi, true); 7296 7297 gsi = gsi_last_nondebug_bb (store_bb); 7298 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE); 7299 gsi_remove (&gsi, true); 7300 gsi = gsi_last_nondebug_bb (store_bb); 7301 stmt = gsi_stmt (gsi); 7302 gsi_remove (&gsi, true); 7303 7304 if (gimple_in_ssa_p (cfun)) 7305 { 7306 release_defs (stmt); 7307 update_ssa (TODO_update_ssa_no_phi); 7308 } 7309 7310 return true; 7311 } 7312 7313 /* A subroutine of expand_omp_atomic. Implement the atomic operation as: 7314 7315 oldval = *addr; 7316 repeat: 7317 newval = rhs; // with oldval replacing *addr in rhs 7318 oldval = __sync_val_compare_and_swap (addr, oldval, newval); 7319 if (oldval != newval) 7320 goto repeat; 7321 7322 INDEX is log2 of the size of the data type, and thus usable to find the 7323 index of the builtin decl. */ 7324 7325 static bool 7326 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb, 7327 tree addr, tree loaded_val, tree stored_val, 7328 int index) 7329 { 7330 tree loadedi, storedi, initial, new_storedi, old_vali; 7331 tree type, itype, cmpxchg, iaddr, atype; 7332 gimple_stmt_iterator si; 7333 basic_block loop_header = single_succ (load_bb); 7334 gimple *phi, *stmt; 7335 edge e; 7336 enum built_in_function fncode; 7337 7338 /* ??? We need a non-pointer interface to __atomic_compare_exchange in 7339 order to use the RELAXED memory model effectively. */ 7340 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N 7341 + index + 1); 7342 cmpxchg = builtin_decl_explicit (fncode); 7343 if (cmpxchg == NULL_TREE) 7344 return false; 7345 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val)); 7346 atype = type; 7347 itype = TREE_TYPE (TREE_TYPE (cmpxchg)); 7348 7349 if (!can_compare_and_swap_p (TYPE_MODE (itype), true) 7350 || !can_atomic_load_p (TYPE_MODE (itype))) 7351 return false; 7352 7353 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */ 7354 si = gsi_last_nondebug_bb (load_bb); 7355 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD); 7356 7357 /* For floating-point values, we'll need to view-convert them to integers 7358 so that we can perform the atomic compare and swap. Simplify the 7359 following code by always setting up the "i"ntegral variables. */ 7360 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type)) 7361 { 7362 tree iaddr_val; 7363 7364 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode, 7365 true)); 7366 atype = itype; 7367 iaddr_val 7368 = force_gimple_operand_gsi (&si, 7369 fold_convert (TREE_TYPE (iaddr), addr), 7370 false, NULL_TREE, true, GSI_SAME_STMT); 7371 stmt = gimple_build_assign (iaddr, iaddr_val); 7372 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 7373 loadedi = create_tmp_var (itype); 7374 if (gimple_in_ssa_p (cfun)) 7375 loadedi = make_ssa_name (loadedi); 7376 } 7377 else 7378 { 7379 iaddr = addr; 7380 loadedi = loaded_val; 7381 } 7382 7383 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1); 7384 tree loaddecl = builtin_decl_explicit (fncode); 7385 if (loaddecl) 7386 initial 7387 = fold_convert (atype, 7388 build_call_expr (loaddecl, 2, iaddr, 7389 build_int_cst (NULL_TREE, 7390 MEMMODEL_RELAXED))); 7391 else 7392 { 7393 tree off 7394 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode, 7395 true), 0); 7396 initial = build2 (MEM_REF, atype, iaddr, off); 7397 } 7398 7399 initial 7400 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true, 7401 GSI_SAME_STMT); 7402 7403 /* Move the value to the LOADEDI temporary. */ 7404 if (gimple_in_ssa_p (cfun)) 7405 { 7406 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header))); 7407 phi = create_phi_node (loadedi, loop_header); 7408 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)), 7409 initial); 7410 } 7411 else 7412 gsi_insert_before (&si, 7413 gimple_build_assign (loadedi, initial), 7414 GSI_SAME_STMT); 7415 if (loadedi != loaded_val) 7416 { 7417 gimple_stmt_iterator gsi2; 7418 tree x; 7419 7420 x = build1 (VIEW_CONVERT_EXPR, type, loadedi); 7421 gsi2 = gsi_start_bb (loop_header); 7422 if (gimple_in_ssa_p (cfun)) 7423 { 7424 gassign *stmt; 7425 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE, 7426 true, GSI_SAME_STMT); 7427 stmt = gimple_build_assign (loaded_val, x); 7428 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT); 7429 } 7430 else 7431 { 7432 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x); 7433 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE, 7434 true, GSI_SAME_STMT); 7435 } 7436 } 7437 gsi_remove (&si, true); 7438 7439 si = gsi_last_nondebug_bb (store_bb); 7440 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE); 7441 7442 if (iaddr == addr) 7443 storedi = stored_val; 7444 else 7445 storedi 7446 = force_gimple_operand_gsi (&si, 7447 build1 (VIEW_CONVERT_EXPR, itype, 7448 stored_val), true, NULL_TREE, true, 7449 GSI_SAME_STMT); 7450 7451 /* Build the compare&swap statement. */ 7452 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi); 7453 new_storedi = force_gimple_operand_gsi (&si, 7454 fold_convert (TREE_TYPE (loadedi), 7455 new_storedi), 7456 true, NULL_TREE, 7457 true, GSI_SAME_STMT); 7458 7459 if (gimple_in_ssa_p (cfun)) 7460 old_vali = loadedi; 7461 else 7462 { 7463 old_vali = create_tmp_var (TREE_TYPE (loadedi)); 7464 stmt = gimple_build_assign (old_vali, loadedi); 7465 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 7466 7467 stmt = gimple_build_assign (loadedi, new_storedi); 7468 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 7469 } 7470 7471 /* Note that we always perform the comparison as an integer, even for 7472 floating point. This allows the atomic operation to properly 7473 succeed even with NaNs and -0.0. */ 7474 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali); 7475 stmt = gimple_build_cond_empty (ne); 7476 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 7477 7478 /* Update cfg. */ 7479 e = single_succ_edge (store_bb); 7480 e->flags &= ~EDGE_FALLTHRU; 7481 e->flags |= EDGE_FALSE_VALUE; 7482 /* Expect no looping. */ 7483 e->probability = profile_probability::guessed_always (); 7484 7485 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE); 7486 e->probability = profile_probability::guessed_never (); 7487 7488 /* Copy the new value to loadedi (we already did that before the condition 7489 if we are not in SSA). */ 7490 if (gimple_in_ssa_p (cfun)) 7491 { 7492 phi = gimple_seq_first_stmt (phi_nodes (loop_header)); 7493 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi); 7494 } 7495 7496 /* Remove GIMPLE_OMP_ATOMIC_STORE. */ 7497 gsi_remove (&si, true); 7498 7499 class loop *loop = alloc_loop (); 7500 loop->header = loop_header; 7501 loop->latch = store_bb; 7502 add_loop (loop, loop_header->loop_father); 7503 7504 if (gimple_in_ssa_p (cfun)) 7505 update_ssa (TODO_update_ssa_no_phi); 7506 7507 return true; 7508 } 7509 7510 /* A subroutine of expand_omp_atomic. Implement the atomic operation as: 7511 7512 GOMP_atomic_start (); 7513 *addr = rhs; 7514 GOMP_atomic_end (); 7515 7516 The result is not globally atomic, but works so long as all parallel 7517 references are within #pragma omp atomic directives. According to 7518 responses received from omp@openmp.org, appears to be within spec. 7519 Which makes sense, since that's how several other compilers handle 7520 this situation as well. 7521 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're 7522 expanding. STORED_VAL is the operand of the matching 7523 GIMPLE_OMP_ATOMIC_STORE. 7524 7525 We replace 7526 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with 7527 loaded_val = *addr; 7528 7529 and replace 7530 GIMPLE_OMP_ATOMIC_STORE (stored_val) with 7531 *addr = stored_val; 7532 */ 7533 7534 static bool 7535 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb, 7536 tree addr, tree loaded_val, tree stored_val) 7537 { 7538 gimple_stmt_iterator si; 7539 gassign *stmt; 7540 tree t; 7541 7542 si = gsi_last_nondebug_bb (load_bb); 7543 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD); 7544 7545 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START); 7546 t = build_call_expr (t, 0); 7547 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT); 7548 7549 tree mem = build_simple_mem_ref (addr); 7550 TREE_TYPE (mem) = TREE_TYPE (loaded_val); 7551 TREE_OPERAND (mem, 1) 7552 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode, 7553 true), 7554 TREE_OPERAND (mem, 1)); 7555 stmt = gimple_build_assign (loaded_val, mem); 7556 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 7557 gsi_remove (&si, true); 7558 7559 si = gsi_last_nondebug_bb (store_bb); 7560 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE); 7561 7562 stmt = gimple_build_assign (unshare_expr (mem), stored_val); 7563 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 7564 7565 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END); 7566 t = build_call_expr (t, 0); 7567 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT); 7568 gsi_remove (&si, true); 7569 7570 if (gimple_in_ssa_p (cfun)) 7571 update_ssa (TODO_update_ssa_no_phi); 7572 return true; 7573 } 7574 7575 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand 7576 using expand_omp_atomic_fetch_op. If it failed, we try to 7577 call expand_omp_atomic_pipeline, and if it fails too, the 7578 ultimate fallback is wrapping the operation in a mutex 7579 (expand_omp_atomic_mutex). REGION is the atomic region built 7580 by build_omp_regions_1(). */ 7581 7582 static void 7583 expand_omp_atomic (struct omp_region *region) 7584 { 7585 basic_block load_bb = region->entry, store_bb = region->exit; 7586 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb)); 7587 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb)); 7588 tree loaded_val = gimple_omp_atomic_load_lhs (load); 7589 tree addr = gimple_omp_atomic_load_rhs (load); 7590 tree stored_val = gimple_omp_atomic_store_val (store); 7591 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val)); 7592 HOST_WIDE_INT index; 7593 7594 /* Make sure the type is one of the supported sizes. */ 7595 index = tree_to_uhwi (TYPE_SIZE_UNIT (type)); 7596 index = exact_log2 (index); 7597 if (index >= 0 && index <= 4) 7598 { 7599 unsigned int align = TYPE_ALIGN_UNIT (type); 7600 7601 /* __sync builtins require strict data alignment. */ 7602 if (exact_log2 (align) >= index) 7603 { 7604 /* Atomic load. */ 7605 scalar_mode smode; 7606 if (loaded_val == stored_val 7607 && (is_int_mode (TYPE_MODE (type), &smode) 7608 || is_float_mode (TYPE_MODE (type), &smode)) 7609 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD 7610 && expand_omp_atomic_load (load_bb, addr, loaded_val, index)) 7611 return; 7612 7613 /* Atomic store. */ 7614 if ((is_int_mode (TYPE_MODE (type), &smode) 7615 || is_float_mode (TYPE_MODE (type), &smode)) 7616 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD 7617 && store_bb == single_succ (load_bb) 7618 && first_stmt (store_bb) == store 7619 && expand_omp_atomic_store (load_bb, addr, loaded_val, 7620 stored_val, index)) 7621 return; 7622 7623 /* When possible, use specialized atomic update functions. */ 7624 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type)) 7625 && store_bb == single_succ (load_bb) 7626 && expand_omp_atomic_fetch_op (load_bb, addr, 7627 loaded_val, stored_val, index)) 7628 return; 7629 7630 /* If we don't have specialized __sync builtins, try and implement 7631 as a compare and swap loop. */ 7632 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr, 7633 loaded_val, stored_val, index)) 7634 return; 7635 } 7636 } 7637 7638 /* The ultimate fallback is wrapping the operation in a mutex. */ 7639 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val); 7640 } 7641 7642 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending 7643 at REGION_EXIT. */ 7644 7645 static void 7646 mark_loops_in_oacc_kernels_region (basic_block region_entry, 7647 basic_block region_exit) 7648 { 7649 class loop *outer = region_entry->loop_father; 7650 gcc_assert (region_exit == NULL || outer == region_exit->loop_father); 7651 7652 /* Don't parallelize the kernels region if it contains more than one outer 7653 loop. */ 7654 unsigned int nr_outer_loops = 0; 7655 class loop *single_outer = NULL; 7656 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next) 7657 { 7658 gcc_assert (loop_outer (loop) == outer); 7659 7660 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry)) 7661 continue; 7662 7663 if (region_exit != NULL 7664 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit)) 7665 continue; 7666 7667 nr_outer_loops++; 7668 single_outer = loop; 7669 } 7670 if (nr_outer_loops != 1) 7671 return; 7672 7673 for (class loop *loop = single_outer->inner; 7674 loop != NULL; 7675 loop = loop->inner) 7676 if (loop->next) 7677 return; 7678 7679 /* Mark the loops in the region. */ 7680 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner) 7681 loop->in_oacc_kernels_region = true; 7682 } 7683 7684 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */ 7685 7686 struct GTY(()) grid_launch_attributes_trees 7687 { 7688 tree kernel_dim_array_type; 7689 tree kernel_lattrs_dimnum_decl; 7690 tree kernel_lattrs_grid_decl; 7691 tree kernel_lattrs_group_decl; 7692 tree kernel_launch_attributes_type; 7693 }; 7694 7695 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees; 7696 7697 /* Create types used to pass kernel launch attributes to target. */ 7698 7699 static void 7700 grid_create_kernel_launch_attr_types (void) 7701 { 7702 if (grid_attr_trees) 7703 return; 7704 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> (); 7705 7706 tree dim_arr_index_type 7707 = build_index_type (build_int_cst (integer_type_node, 2)); 7708 grid_attr_trees->kernel_dim_array_type 7709 = build_array_type (uint32_type_node, dim_arr_index_type); 7710 7711 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE); 7712 grid_attr_trees->kernel_lattrs_dimnum_decl 7713 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"), 7714 uint32_type_node); 7715 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE; 7716 7717 grid_attr_trees->kernel_lattrs_grid_decl 7718 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"), 7719 grid_attr_trees->kernel_dim_array_type); 7720 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl) 7721 = grid_attr_trees->kernel_lattrs_dimnum_decl; 7722 grid_attr_trees->kernel_lattrs_group_decl 7723 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"), 7724 grid_attr_trees->kernel_dim_array_type); 7725 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl) 7726 = grid_attr_trees->kernel_lattrs_grid_decl; 7727 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type, 7728 "__gomp_kernel_launch_attributes", 7729 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE); 7730 } 7731 7732 /* Insert before the current statement in GSI a store of VALUE to INDEX of 7733 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be 7734 of type uint32_type_node. */ 7735 7736 static void 7737 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var, 7738 tree fld_decl, int index, tree value) 7739 { 7740 tree ref = build4 (ARRAY_REF, uint32_type_node, 7741 build3 (COMPONENT_REF, 7742 grid_attr_trees->kernel_dim_array_type, 7743 range_var, fld_decl, NULL_TREE), 7744 build_int_cst (integer_type_node, index), 7745 NULL_TREE, NULL_TREE); 7746 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT); 7747 } 7748 7749 /* Return a tree representation of a pointer to a structure with grid and 7750 work-group size information. Statements filling that information will be 7751 inserted before GSI, TGT_STMT is the target statement which has the 7752 necessary information in it. */ 7753 7754 static tree 7755 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi, 7756 gomp_target *tgt_stmt) 7757 { 7758 grid_create_kernel_launch_attr_types (); 7759 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type, 7760 "__kernel_launch_attrs"); 7761 7762 unsigned max_dim = 0; 7763 for (tree clause = gimple_omp_target_clauses (tgt_stmt); 7764 clause; 7765 clause = OMP_CLAUSE_CHAIN (clause)) 7766 { 7767 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_) 7768 continue; 7769 7770 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause); 7771 max_dim = MAX (dim, max_dim); 7772 7773 grid_insert_store_range_dim (gsi, lattrs, 7774 grid_attr_trees->kernel_lattrs_grid_decl, 7775 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause)); 7776 grid_insert_store_range_dim (gsi, lattrs, 7777 grid_attr_trees->kernel_lattrs_group_decl, 7778 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause)); 7779 } 7780 7781 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs, 7782 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE); 7783 gcc_checking_assert (max_dim <= 2); 7784 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1); 7785 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions), 7786 GSI_SAME_STMT); 7787 TREE_ADDRESSABLE (lattrs) = 1; 7788 return build_fold_addr_expr (lattrs); 7789 } 7790 7791 /* Build target argument identifier from the DEVICE identifier, value 7792 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */ 7793 7794 static tree 7795 get_target_argument_identifier_1 (int device, bool subseqent_param, int id) 7796 { 7797 tree t = build_int_cst (integer_type_node, device); 7798 if (subseqent_param) 7799 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, 7800 build_int_cst (integer_type_node, 7801 GOMP_TARGET_ARG_SUBSEQUENT_PARAM)); 7802 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, 7803 build_int_cst (integer_type_node, id)); 7804 return t; 7805 } 7806 7807 /* Like above but return it in type that can be directly stored as an element 7808 of the argument array. */ 7809 7810 static tree 7811 get_target_argument_identifier (int device, bool subseqent_param, int id) 7812 { 7813 tree t = get_target_argument_identifier_1 (device, subseqent_param, id); 7814 return fold_convert (ptr_type_node, t); 7815 } 7816 7817 /* Return a target argument consisting of DEVICE identifier, value identifier 7818 ID, and the actual VALUE. */ 7819 7820 static tree 7821 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id, 7822 tree value) 7823 { 7824 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node, 7825 fold_convert (integer_type_node, value), 7826 build_int_cst (unsigned_type_node, 7827 GOMP_TARGET_ARG_VALUE_SHIFT)); 7828 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, 7829 get_target_argument_identifier_1 (device, false, id)); 7830 t = fold_convert (ptr_type_node, t); 7831 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT); 7832 } 7833 7834 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15, 7835 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it, 7836 otherwise push an identifier (with DEVICE and ID) and the VALUE in two 7837 arguments. */ 7838 7839 static void 7840 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device, 7841 int id, tree value, vec <tree> *args) 7842 { 7843 if (tree_fits_shwi_p (value) 7844 && tree_to_shwi (value) > -(1 << 15) 7845 && tree_to_shwi (value) < (1 << 15)) 7846 args->quick_push (get_target_argument_value (gsi, device, id, value)); 7847 else 7848 { 7849 args->quick_push (get_target_argument_identifier (device, true, id)); 7850 value = fold_convert (ptr_type_node, value); 7851 value = force_gimple_operand_gsi (gsi, value, true, NULL, true, 7852 GSI_SAME_STMT); 7853 args->quick_push (value); 7854 } 7855 } 7856 7857 /* Create an array of arguments that is then passed to GOMP_target. */ 7858 7859 static tree 7860 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt) 7861 { 7862 auto_vec <tree, 6> args; 7863 tree clauses = gimple_omp_target_clauses (tgt_stmt); 7864 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS); 7865 if (c) 7866 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c); 7867 else 7868 t = integer_minus_one_node; 7869 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL, 7870 GOMP_TARGET_ARG_NUM_TEAMS, t, &args); 7871 7872 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT); 7873 if (c) 7874 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c); 7875 else 7876 t = integer_minus_one_node; 7877 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL, 7878 GOMP_TARGET_ARG_THREAD_LIMIT, t, 7879 &args); 7880 7881 /* Add HSA-specific grid sizes, if available. */ 7882 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt), 7883 OMP_CLAUSE__GRIDDIM_)) 7884 { 7885 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES; 7886 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id); 7887 args.quick_push (t); 7888 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt)); 7889 } 7890 7891 /* Produce more, perhaps device specific, arguments here. */ 7892 7893 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node, 7894 args.length () + 1), 7895 ".omp_target_args"); 7896 for (unsigned i = 0; i < args.length (); i++) 7897 { 7898 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray, 7899 build_int_cst (integer_type_node, i), 7900 NULL_TREE, NULL_TREE); 7901 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]), 7902 GSI_SAME_STMT); 7903 } 7904 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray, 7905 build_int_cst (integer_type_node, args.length ()), 7906 NULL_TREE, NULL_TREE); 7907 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node), 7908 GSI_SAME_STMT); 7909 TREE_ADDRESSABLE (argarray) = 1; 7910 return build_fold_addr_expr (argarray); 7911 } 7912 7913 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */ 7914 7915 static void 7916 expand_omp_target (struct omp_region *region) 7917 { 7918 basic_block entry_bb, exit_bb, new_bb; 7919 struct function *child_cfun; 7920 tree child_fn, block, t; 7921 gimple_stmt_iterator gsi; 7922 gomp_target *entry_stmt; 7923 gimple *stmt; 7924 edge e; 7925 bool offloaded; 7926 int target_kind; 7927 7928 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry)); 7929 target_kind = gimple_omp_target_kind (entry_stmt); 7930 new_bb = region->entry; 7931 7932 offloaded = is_gimple_omp_offloaded (entry_stmt); 7933 switch (target_kind) 7934 { 7935 case GF_OMP_TARGET_KIND_REGION: 7936 case GF_OMP_TARGET_KIND_UPDATE: 7937 case GF_OMP_TARGET_KIND_ENTER_DATA: 7938 case GF_OMP_TARGET_KIND_EXIT_DATA: 7939 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 7940 case GF_OMP_TARGET_KIND_OACC_KERNELS: 7941 case GF_OMP_TARGET_KIND_OACC_SERIAL: 7942 case GF_OMP_TARGET_KIND_OACC_UPDATE: 7943 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: 7944 case GF_OMP_TARGET_KIND_OACC_DECLARE: 7945 case GF_OMP_TARGET_KIND_DATA: 7946 case GF_OMP_TARGET_KIND_OACC_DATA: 7947 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: 7948 break; 7949 default: 7950 gcc_unreachable (); 7951 } 7952 7953 child_fn = NULL_TREE; 7954 child_cfun = NULL; 7955 if (offloaded) 7956 { 7957 child_fn = gimple_omp_target_child_fn (entry_stmt); 7958 child_cfun = DECL_STRUCT_FUNCTION (child_fn); 7959 } 7960 7961 /* Supported by expand_omp_taskreg, but not here. */ 7962 if (child_cfun != NULL) 7963 gcc_checking_assert (!child_cfun->cfg); 7964 gcc_checking_assert (!gimple_in_ssa_p (cfun)); 7965 7966 entry_bb = region->entry; 7967 exit_bb = region->exit; 7968 7969 if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS) 7970 mark_loops_in_oacc_kernels_region (region->entry, region->exit); 7971 7972 /* Going on, all OpenACC compute constructs are mapped to 7973 'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined. 7974 To distinguish between them, we attach attributes. */ 7975 switch (target_kind) 7976 { 7977 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 7978 DECL_ATTRIBUTES (child_fn) 7979 = tree_cons (get_identifier ("oacc parallel"), 7980 NULL_TREE, DECL_ATTRIBUTES (child_fn)); 7981 break; 7982 case GF_OMP_TARGET_KIND_OACC_KERNELS: 7983 DECL_ATTRIBUTES (child_fn) 7984 = tree_cons (get_identifier ("oacc kernels"), 7985 NULL_TREE, DECL_ATTRIBUTES (child_fn)); 7986 break; 7987 case GF_OMP_TARGET_KIND_OACC_SERIAL: 7988 DECL_ATTRIBUTES (child_fn) 7989 = tree_cons (get_identifier ("oacc serial"), 7990 NULL_TREE, DECL_ATTRIBUTES (child_fn)); 7991 break; 7992 default: 7993 /* Make sure we don't miss any. */ 7994 gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt) 7995 && is_gimple_omp_offloaded (entry_stmt))); 7996 break; 7997 } 7998 7999 if (offloaded) 8000 { 8001 unsigned srcidx, dstidx, num; 8002 8003 /* If the offloading region needs data sent from the parent 8004 function, then the very first statement (except possible 8005 tree profile counter updates) of the offloading body 8006 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since 8007 &.OMP_DATA_O is passed as an argument to the child function, 8008 we need to replace it with the argument as seen by the child 8009 function. 8010 8011 In most cases, this will end up being the identity assignment 8012 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had 8013 a function call that has been inlined, the original PARM_DECL 8014 .OMP_DATA_I may have been converted into a different local 8015 variable. In which case, we need to keep the assignment. */ 8016 tree data_arg = gimple_omp_target_data_arg (entry_stmt); 8017 if (data_arg) 8018 { 8019 basic_block entry_succ_bb = single_succ (entry_bb); 8020 gimple_stmt_iterator gsi; 8021 tree arg; 8022 gimple *tgtcopy_stmt = NULL; 8023 tree sender = TREE_VEC_ELT (data_arg, 0); 8024 8025 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi)) 8026 { 8027 gcc_assert (!gsi_end_p (gsi)); 8028 stmt = gsi_stmt (gsi); 8029 if (gimple_code (stmt) != GIMPLE_ASSIGN) 8030 continue; 8031 8032 if (gimple_num_ops (stmt) == 2) 8033 { 8034 tree arg = gimple_assign_rhs1 (stmt); 8035 8036 /* We're ignoring the subcode because we're 8037 effectively doing a STRIP_NOPS. */ 8038 8039 if (TREE_CODE (arg) == ADDR_EXPR 8040 && TREE_OPERAND (arg, 0) == sender) 8041 { 8042 tgtcopy_stmt = stmt; 8043 break; 8044 } 8045 } 8046 } 8047 8048 gcc_assert (tgtcopy_stmt != NULL); 8049 arg = DECL_ARGUMENTS (child_fn); 8050 8051 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg); 8052 gsi_remove (&gsi, true); 8053 } 8054 8055 /* Declare local variables needed in CHILD_CFUN. */ 8056 block = DECL_INITIAL (child_fn); 8057 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls); 8058 /* The gimplifier could record temporaries in the offloading block 8059 rather than in containing function's local_decls chain, 8060 which would mean cgraph missed finalizing them. Do it now. */ 8061 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t)) 8062 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t)) 8063 varpool_node::finalize_decl (t); 8064 DECL_SAVED_TREE (child_fn) = NULL; 8065 /* We'll create a CFG for child_fn, so no gimple body is needed. */ 8066 gimple_set_body (child_fn, NULL); 8067 TREE_USED (block) = 1; 8068 8069 /* Reset DECL_CONTEXT on function arguments. */ 8070 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t)) 8071 DECL_CONTEXT (t) = child_fn; 8072 8073 /* Split ENTRY_BB at GIMPLE_*, 8074 so that it can be moved to the child function. */ 8075 gsi = gsi_last_nondebug_bb (entry_bb); 8076 stmt = gsi_stmt (gsi); 8077 gcc_assert (stmt 8078 && gimple_code (stmt) == gimple_code (entry_stmt)); 8079 e = split_block (entry_bb, stmt); 8080 gsi_remove (&gsi, true); 8081 entry_bb = e->dest; 8082 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 8083 8084 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */ 8085 if (exit_bb) 8086 { 8087 gsi = gsi_last_nondebug_bb (exit_bb); 8088 gcc_assert (!gsi_end_p (gsi) 8089 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 8090 stmt = gimple_build_return (NULL); 8091 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); 8092 gsi_remove (&gsi, true); 8093 } 8094 8095 /* Move the offloading region into CHILD_CFUN. */ 8096 8097 block = gimple_block (entry_stmt); 8098 8099 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block); 8100 if (exit_bb) 8101 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; 8102 /* When the OMP expansion process cannot guarantee an up-to-date 8103 loop tree arrange for the child function to fixup loops. */ 8104 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 8105 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP; 8106 8107 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */ 8108 num = vec_safe_length (child_cfun->local_decls); 8109 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++) 8110 { 8111 t = (*child_cfun->local_decls)[srcidx]; 8112 if (DECL_CONTEXT (t) == cfun->decl) 8113 continue; 8114 if (srcidx != dstidx) 8115 (*child_cfun->local_decls)[dstidx] = t; 8116 dstidx++; 8117 } 8118 if (dstidx != num) 8119 vec_safe_truncate (child_cfun->local_decls, dstidx); 8120 8121 /* Inform the callgraph about the new function. */ 8122 child_cfun->curr_properties = cfun->curr_properties; 8123 child_cfun->has_simduid_loops |= cfun->has_simduid_loops; 8124 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops; 8125 cgraph_node *node = cgraph_node::get_create (child_fn); 8126 node->parallelized_function = 1; 8127 cgraph_node::add_new_function (child_fn, true); 8128 8129 /* Add the new function to the offload table. */ 8130 if (ENABLE_OFFLOADING) 8131 { 8132 if (in_lto_p) 8133 DECL_PRESERVE_P (child_fn) = 1; 8134 vec_safe_push (offload_funcs, child_fn); 8135 } 8136 8137 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl) 8138 && !DECL_ASSEMBLER_NAME_SET_P (child_fn); 8139 8140 /* Fix the callgraph edges for child_cfun. Those for cfun will be 8141 fixed in a following pass. */ 8142 push_cfun (child_cfun); 8143 if (need_asm) 8144 assign_assembler_name_if_needed (child_fn); 8145 cgraph_edge::rebuild_edges (); 8146 8147 /* Some EH regions might become dead, see PR34608. If 8148 pass_cleanup_cfg isn't the first pass to happen with the 8149 new child, these dead EH edges might cause problems. 8150 Clean them up now. */ 8151 if (flag_exceptions) 8152 { 8153 basic_block bb; 8154 bool changed = false; 8155 8156 FOR_EACH_BB_FN (bb, cfun) 8157 changed |= gimple_purge_dead_eh_edges (bb); 8158 if (changed) 8159 cleanup_tree_cfg (); 8160 } 8161 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 8162 verify_loop_structure (); 8163 pop_cfun (); 8164 8165 if (dump_file && !gimple_in_ssa_p (cfun)) 8166 { 8167 omp_any_child_fn_dumped = true; 8168 dump_function_header (dump_file, child_fn, dump_flags); 8169 dump_function_to_file (child_fn, dump_file, dump_flags); 8170 } 8171 8172 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn); 8173 } 8174 8175 /* Emit a library call to launch the offloading region, or do data 8176 transfers. */ 8177 tree t1, t2, t3, t4, depend, c, clauses; 8178 enum built_in_function start_ix; 8179 unsigned int flags_i = 0; 8180 8181 switch (gimple_omp_target_kind (entry_stmt)) 8182 { 8183 case GF_OMP_TARGET_KIND_REGION: 8184 start_ix = BUILT_IN_GOMP_TARGET; 8185 break; 8186 case GF_OMP_TARGET_KIND_DATA: 8187 start_ix = BUILT_IN_GOMP_TARGET_DATA; 8188 break; 8189 case GF_OMP_TARGET_KIND_UPDATE: 8190 start_ix = BUILT_IN_GOMP_TARGET_UPDATE; 8191 break; 8192 case GF_OMP_TARGET_KIND_ENTER_DATA: 8193 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA; 8194 break; 8195 case GF_OMP_TARGET_KIND_EXIT_DATA: 8196 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA; 8197 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA; 8198 break; 8199 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 8200 case GF_OMP_TARGET_KIND_OACC_KERNELS: 8201 case GF_OMP_TARGET_KIND_OACC_SERIAL: 8202 start_ix = BUILT_IN_GOACC_PARALLEL; 8203 break; 8204 case GF_OMP_TARGET_KIND_OACC_DATA: 8205 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: 8206 start_ix = BUILT_IN_GOACC_DATA_START; 8207 break; 8208 case GF_OMP_TARGET_KIND_OACC_UPDATE: 8209 start_ix = BUILT_IN_GOACC_UPDATE; 8210 break; 8211 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: 8212 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA; 8213 break; 8214 case GF_OMP_TARGET_KIND_OACC_DECLARE: 8215 start_ix = BUILT_IN_GOACC_DECLARE; 8216 break; 8217 default: 8218 gcc_unreachable (); 8219 } 8220 8221 clauses = gimple_omp_target_clauses (entry_stmt); 8222 8223 tree device = NULL_TREE; 8224 location_t device_loc = UNKNOWN_LOCATION; 8225 tree goacc_flags = NULL_TREE; 8226 if (is_gimple_omp_oacc (entry_stmt)) 8227 { 8228 /* By default, no GOACC_FLAGs are set. */ 8229 goacc_flags = integer_zero_node; 8230 } 8231 else 8232 { 8233 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE); 8234 if (c) 8235 { 8236 device = OMP_CLAUSE_DEVICE_ID (c); 8237 device_loc = OMP_CLAUSE_LOCATION (c); 8238 } 8239 else 8240 { 8241 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime 8242 library choose). */ 8243 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV); 8244 device_loc = gimple_location (entry_stmt); 8245 } 8246 8247 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT); 8248 if (c) 8249 flags_i |= GOMP_TARGET_FLAG_NOWAIT; 8250 } 8251 8252 /* By default, there is no conditional. */ 8253 tree cond = NULL_TREE; 8254 c = omp_find_clause (clauses, OMP_CLAUSE_IF); 8255 if (c) 8256 cond = OMP_CLAUSE_IF_EXPR (c); 8257 /* If we found the clause 'if (cond)', build: 8258 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK) 8259 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */ 8260 if (cond) 8261 { 8262 tree *tp; 8263 if (is_gimple_omp_oacc (entry_stmt)) 8264 tp = &goacc_flags; 8265 else 8266 { 8267 /* Ensure 'device' is of the correct type. */ 8268 device = fold_convert_loc (device_loc, integer_type_node, device); 8269 8270 tp = &device; 8271 } 8272 8273 cond = gimple_boolify (cond); 8274 8275 basic_block cond_bb, then_bb, else_bb; 8276 edge e; 8277 tree tmp_var; 8278 8279 tmp_var = create_tmp_var (TREE_TYPE (*tp)); 8280 if (offloaded) 8281 e = split_block_after_labels (new_bb); 8282 else 8283 { 8284 gsi = gsi_last_nondebug_bb (new_bb); 8285 gsi_prev (&gsi); 8286 e = split_block (new_bb, gsi_stmt (gsi)); 8287 } 8288 cond_bb = e->src; 8289 new_bb = e->dest; 8290 remove_edge (e); 8291 8292 then_bb = create_empty_bb (cond_bb); 8293 else_bb = create_empty_bb (then_bb); 8294 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); 8295 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb); 8296 8297 stmt = gimple_build_cond_empty (cond); 8298 gsi = gsi_last_bb (cond_bb); 8299 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 8300 8301 gsi = gsi_start_bb (then_bb); 8302 stmt = gimple_build_assign (tmp_var, *tp); 8303 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 8304 8305 gsi = gsi_start_bb (else_bb); 8306 if (is_gimple_omp_oacc (entry_stmt)) 8307 stmt = gimple_build_assign (tmp_var, 8308 BIT_IOR_EXPR, 8309 *tp, 8310 build_int_cst (integer_type_node, 8311 GOACC_FLAG_HOST_FALLBACK)); 8312 else 8313 stmt = gimple_build_assign (tmp_var, 8314 build_int_cst (integer_type_node, 8315 GOMP_DEVICE_HOST_FALLBACK)); 8316 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 8317 8318 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); 8319 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE); 8320 add_bb_to_loop (then_bb, cond_bb->loop_father); 8321 add_bb_to_loop (else_bb, cond_bb->loop_father); 8322 make_edge (then_bb, new_bb, EDGE_FALLTHRU); 8323 make_edge (else_bb, new_bb, EDGE_FALLTHRU); 8324 8325 *tp = tmp_var; 8326 8327 gsi = gsi_last_nondebug_bb (new_bb); 8328 } 8329 else 8330 { 8331 gsi = gsi_last_nondebug_bb (new_bb); 8332 8333 if (device != NULL_TREE) 8334 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE, 8335 true, GSI_SAME_STMT); 8336 } 8337 8338 t = gimple_omp_target_data_arg (entry_stmt); 8339 if (t == NULL) 8340 { 8341 t1 = size_zero_node; 8342 t2 = build_zero_cst (ptr_type_node); 8343 t3 = t2; 8344 t4 = t2; 8345 } 8346 else 8347 { 8348 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1)))); 8349 t1 = size_binop (PLUS_EXPR, t1, size_int (1)); 8350 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0)); 8351 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1)); 8352 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2)); 8353 } 8354 8355 gimple *g; 8356 bool tagging = false; 8357 /* The maximum number used by any start_ix, without varargs. */ 8358 auto_vec<tree, 11> args; 8359 if (is_gimple_omp_oacc (entry_stmt)) 8360 { 8361 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP, 8362 TREE_TYPE (goacc_flags), goacc_flags); 8363 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true, 8364 NULL_TREE, true, 8365 GSI_SAME_STMT); 8366 args.quick_push (goacc_flags_m); 8367 } 8368 else 8369 args.quick_push (device); 8370 if (offloaded) 8371 args.quick_push (build_fold_addr_expr (child_fn)); 8372 args.quick_push (t1); 8373 args.quick_push (t2); 8374 args.quick_push (t3); 8375 args.quick_push (t4); 8376 switch (start_ix) 8377 { 8378 case BUILT_IN_GOACC_DATA_START: 8379 case BUILT_IN_GOACC_DECLARE: 8380 case BUILT_IN_GOMP_TARGET_DATA: 8381 break; 8382 case BUILT_IN_GOMP_TARGET: 8383 case BUILT_IN_GOMP_TARGET_UPDATE: 8384 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA: 8385 args.quick_push (build_int_cst (unsigned_type_node, flags_i)); 8386 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND); 8387 if (c) 8388 depend = OMP_CLAUSE_DECL (c); 8389 else 8390 depend = build_int_cst (ptr_type_node, 0); 8391 args.quick_push (depend); 8392 if (start_ix == BUILT_IN_GOMP_TARGET) 8393 args.quick_push (get_target_arguments (&gsi, entry_stmt)); 8394 break; 8395 case BUILT_IN_GOACC_PARALLEL: 8396 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL) 8397 { 8398 tree dims = NULL_TREE; 8399 unsigned int ix; 8400 8401 /* For serial constructs we set all dimensions to 1. */ 8402 for (ix = GOMP_DIM_MAX; ix--;) 8403 dims = tree_cons (NULL_TREE, integer_one_node, dims); 8404 oacc_replace_fn_attrib (child_fn, dims); 8405 } 8406 else 8407 oacc_set_fn_attrib (child_fn, clauses, &args); 8408 tagging = true; 8409 /* FALLTHRU */ 8410 case BUILT_IN_GOACC_ENTER_EXIT_DATA: 8411 case BUILT_IN_GOACC_UPDATE: 8412 { 8413 tree t_async = NULL_TREE; 8414 8415 /* If present, use the value specified by the respective 8416 clause, making sure that is of the correct type. */ 8417 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC); 8418 if (c) 8419 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c), 8420 integer_type_node, 8421 OMP_CLAUSE_ASYNC_EXPR (c)); 8422 else if (!tagging) 8423 /* Default values for t_async. */ 8424 t_async = fold_convert_loc (gimple_location (entry_stmt), 8425 integer_type_node, 8426 build_int_cst (integer_type_node, 8427 GOMP_ASYNC_SYNC)); 8428 if (tagging && t_async) 8429 { 8430 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX; 8431 8432 if (TREE_CODE (t_async) == INTEGER_CST) 8433 { 8434 /* See if we can pack the async arg in to the tag's 8435 operand. */ 8436 i_async = TREE_INT_CST_LOW (t_async); 8437 if (i_async < GOMP_LAUNCH_OP_MAX) 8438 t_async = NULL_TREE; 8439 else 8440 i_async = GOMP_LAUNCH_OP_MAX; 8441 } 8442 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE, 8443 i_async)); 8444 } 8445 if (t_async) 8446 args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true, 8447 NULL_TREE, true, 8448 GSI_SAME_STMT)); 8449 8450 /* Save the argument index, and ... */ 8451 unsigned t_wait_idx = args.length (); 8452 unsigned num_waits = 0; 8453 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT); 8454 if (!tagging || c) 8455 /* ... push a placeholder. */ 8456 args.safe_push (integer_zero_node); 8457 8458 for (; c; c = OMP_CLAUSE_CHAIN (c)) 8459 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT) 8460 { 8461 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c), 8462 integer_type_node, 8463 OMP_CLAUSE_WAIT_EXPR (c)); 8464 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true, 8465 GSI_SAME_STMT); 8466 args.safe_push (arg); 8467 num_waits++; 8468 } 8469 8470 if (!tagging || num_waits) 8471 { 8472 tree len; 8473 8474 /* Now that we know the number, update the placeholder. */ 8475 if (tagging) 8476 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits); 8477 else 8478 len = build_int_cst (integer_type_node, num_waits); 8479 len = fold_convert_loc (gimple_location (entry_stmt), 8480 unsigned_type_node, len); 8481 args[t_wait_idx] = len; 8482 } 8483 } 8484 break; 8485 default: 8486 gcc_unreachable (); 8487 } 8488 if (tagging) 8489 /* Push terminal marker - zero. */ 8490 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0)); 8491 8492 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args); 8493 gimple_set_location (g, gimple_location (entry_stmt)); 8494 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 8495 if (!offloaded) 8496 { 8497 g = gsi_stmt (gsi); 8498 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET); 8499 gsi_remove (&gsi, true); 8500 } 8501 } 8502 8503 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with 8504 iteration variable derived from the thread number. INTRA_GROUP means this 8505 is an expansion of a loop iterating over work-items within a separate 8506 iteration over groups. */ 8507 8508 static void 8509 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group) 8510 { 8511 gimple_stmt_iterator gsi; 8512 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry)); 8513 gcc_checking_assert (gimple_omp_for_kind (for_stmt) 8514 == GF_OMP_FOR_KIND_GRID_LOOP); 8515 size_t collapse = gimple_omp_for_collapse (for_stmt); 8516 struct omp_for_data_loop *loops 8517 = XALLOCAVEC (struct omp_for_data_loop, 8518 gimple_omp_for_collapse (for_stmt)); 8519 struct omp_for_data fd; 8520 8521 remove_edge (BRANCH_EDGE (kfor->entry)); 8522 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest; 8523 8524 gcc_assert (kfor->cont); 8525 omp_extract_for_data (for_stmt, &fd, loops); 8526 8527 gsi = gsi_start_bb (body_bb); 8528 8529 for (size_t dim = 0; dim < collapse; dim++) 8530 { 8531 tree type, itype; 8532 itype = type = TREE_TYPE (fd.loops[dim].v); 8533 if (POINTER_TYPE_P (type)) 8534 itype = signed_type_for (type); 8535 8536 tree n1 = fd.loops[dim].n1; 8537 tree step = fd.loops[dim].step; 8538 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), 8539 true, NULL_TREE, true, GSI_SAME_STMT); 8540 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), 8541 true, NULL_TREE, true, GSI_SAME_STMT); 8542 tree threadid; 8543 if (gimple_omp_for_grid_group_iter (for_stmt)) 8544 { 8545 gcc_checking_assert (!intra_group); 8546 threadid = build_call_expr (builtin_decl_explicit 8547 (BUILT_IN_HSA_WORKGROUPID), 1, 8548 build_int_cstu (unsigned_type_node, dim)); 8549 } 8550 else if (intra_group) 8551 threadid = build_call_expr (builtin_decl_explicit 8552 (BUILT_IN_HSA_WORKITEMID), 1, 8553 build_int_cstu (unsigned_type_node, dim)); 8554 else 8555 threadid = build_call_expr (builtin_decl_explicit 8556 (BUILT_IN_HSA_WORKITEMABSID), 1, 8557 build_int_cstu (unsigned_type_node, dim)); 8558 threadid = fold_convert (itype, threadid); 8559 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, 8560 true, GSI_SAME_STMT); 8561 8562 tree startvar = fd.loops[dim].v; 8563 tree t = fold_build2 (MULT_EXPR, itype, threadid, step); 8564 if (POINTER_TYPE_P (type)) 8565 t = fold_build_pointer_plus (n1, t); 8566 else 8567 t = fold_build2 (PLUS_EXPR, type, t, n1); 8568 t = fold_convert (type, t); 8569 t = force_gimple_operand_gsi (&gsi, t, 8570 DECL_P (startvar) 8571 && TREE_ADDRESSABLE (startvar), 8572 NULL_TREE, true, GSI_SAME_STMT); 8573 gassign *assign_stmt = gimple_build_assign (startvar, t); 8574 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 8575 } 8576 /* Remove the omp for statement. */ 8577 gsi = gsi_last_nondebug_bb (kfor->entry); 8578 gsi_remove (&gsi, true); 8579 8580 /* Remove the GIMPLE_OMP_CONTINUE statement. */ 8581 gsi = gsi_last_nondebug_bb (kfor->cont); 8582 gcc_assert (!gsi_end_p (gsi) 8583 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE); 8584 gsi_remove (&gsi, true); 8585 8586 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */ 8587 gsi = gsi_last_nondebug_bb (kfor->exit); 8588 gcc_assert (!gsi_end_p (gsi) 8589 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 8590 if (intra_group) 8591 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT); 8592 gsi_remove (&gsi, true); 8593 8594 /* Fixup the much simpler CFG. */ 8595 remove_edge (find_edge (kfor->cont, body_bb)); 8596 8597 if (kfor->cont != body_bb) 8598 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb); 8599 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont); 8600 } 8601 8602 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap 8603 argument_decls. */ 8604 8605 struct grid_arg_decl_map 8606 { 8607 tree old_arg; 8608 tree new_arg; 8609 }; 8610 8611 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones 8612 pertaining to kernel function. */ 8613 8614 static tree 8615 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data) 8616 { 8617 struct walk_stmt_info *wi = (struct walk_stmt_info *) data; 8618 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info; 8619 tree t = *tp; 8620 8621 if (t == adm->old_arg) 8622 *tp = adm->new_arg; 8623 *walk_subtrees = !TYPE_P (t) && !DECL_P (t); 8624 return NULL_TREE; 8625 } 8626 8627 /* If TARGET region contains a kernel body for loop, remove its region from the 8628 TARGET and expand it in HSA gridified kernel fashion. */ 8629 8630 static void 8631 grid_expand_target_grid_body (struct omp_region *target) 8632 { 8633 if (!hsa_gen_requested_p ()) 8634 return; 8635 8636 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry)); 8637 struct omp_region **pp; 8638 8639 for (pp = &target->inner; *pp; pp = &(*pp)->next) 8640 if ((*pp)->type == GIMPLE_OMP_GRID_BODY) 8641 break; 8642 8643 struct omp_region *gpukernel = *pp; 8644 8645 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt); 8646 if (!gpukernel) 8647 { 8648 /* HSA cannot handle OACC stuff. */ 8649 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION) 8650 return; 8651 gcc_checking_assert (orig_child_fndecl); 8652 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt), 8653 OMP_CLAUSE__GRIDDIM_)); 8654 cgraph_node *n = cgraph_node::get (orig_child_fndecl); 8655 8656 hsa_register_kernel (n); 8657 return; 8658 } 8659 8660 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt), 8661 OMP_CLAUSE__GRIDDIM_)); 8662 tree inside_block 8663 = gimple_block (first_stmt (single_succ (gpukernel->entry))); 8664 *pp = gpukernel->next; 8665 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next) 8666 if ((*pp)->type == GIMPLE_OMP_FOR) 8667 break; 8668 8669 struct omp_region *kfor = *pp; 8670 gcc_assert (kfor); 8671 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry)); 8672 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP); 8673 *pp = kfor->next; 8674 if (kfor->inner) 8675 { 8676 if (gimple_omp_for_grid_group_iter (for_stmt)) 8677 { 8678 struct omp_region **next_pp; 8679 for (pp = &kfor->inner; *pp; pp = next_pp) 8680 { 8681 next_pp = &(*pp)->next; 8682 if ((*pp)->type != GIMPLE_OMP_FOR) 8683 continue; 8684 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry)); 8685 gcc_assert (gimple_omp_for_kind (inner) 8686 == GF_OMP_FOR_KIND_GRID_LOOP); 8687 grid_expand_omp_for_loop (*pp, true); 8688 *pp = (*pp)->next; 8689 next_pp = pp; 8690 } 8691 } 8692 expand_omp (kfor->inner); 8693 } 8694 if (gpukernel->inner) 8695 expand_omp (gpukernel->inner); 8696 8697 tree kern_fndecl = copy_node (orig_child_fndecl); 8698 DECL_NAME (kern_fndecl) = clone_function_name_numbered (kern_fndecl, 8699 "kernel"); 8700 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl)); 8701 tree tgtblock = gimple_block (tgt_stmt); 8702 tree fniniblock = make_node (BLOCK); 8703 BLOCK_ABSTRACT_ORIGIN (fniniblock) = BLOCK_ORIGIN (tgtblock); 8704 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock); 8705 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock); 8706 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl; 8707 DECL_INITIAL (kern_fndecl) = fniniblock; 8708 push_struct_function (kern_fndecl); 8709 cfun->function_end_locus = gimple_location (tgt_stmt); 8710 init_tree_ssa (cfun); 8711 pop_cfun (); 8712 8713 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl); 8714 gcc_assert (!DECL_CHAIN (old_parm_decl)); 8715 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl)); 8716 DECL_CONTEXT (new_parm_decl) = kern_fndecl; 8717 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl; 8718 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl)))); 8719 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl)); 8720 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl; 8721 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl); 8722 kern_cfun->curr_properties = cfun->curr_properties; 8723 8724 grid_expand_omp_for_loop (kfor, false); 8725 8726 /* Remove the omp for statement. */ 8727 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry); 8728 gsi_remove (&gsi, true); 8729 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real 8730 return. */ 8731 gsi = gsi_last_nondebug_bb (gpukernel->exit); 8732 gcc_assert (!gsi_end_p (gsi) 8733 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 8734 gimple *ret_stmt = gimple_build_return (NULL); 8735 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT); 8736 gsi_remove (&gsi, true); 8737 8738 /* Statements in the first BB in the target construct have been produced by 8739 target lowering and must be copied inside the GPUKERNEL, with the two 8740 exceptions of the first OMP statement and the OMP_DATA assignment 8741 statement. */ 8742 gsi = gsi_start_bb (single_succ (gpukernel->entry)); 8743 tree data_arg = gimple_omp_target_data_arg (tgt_stmt); 8744 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL; 8745 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry)); 8746 !gsi_end_p (tsi); gsi_next (&tsi)) 8747 { 8748 gimple *stmt = gsi_stmt (tsi); 8749 if (is_gimple_omp (stmt)) 8750 break; 8751 if (sender 8752 && is_gimple_assign (stmt) 8753 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR 8754 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender) 8755 continue; 8756 gimple *copy = gimple_copy (stmt); 8757 gsi_insert_before (&gsi, copy, GSI_SAME_STMT); 8758 gimple_set_block (copy, fniniblock); 8759 } 8760 8761 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry), 8762 gpukernel->exit, inside_block); 8763 8764 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl); 8765 kcn->mark_force_output (); 8766 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl); 8767 8768 hsa_register_kernel (kcn, orig_child); 8769 8770 cgraph_node::add_new_function (kern_fndecl, true); 8771 push_cfun (kern_cfun); 8772 cgraph_edge::rebuild_edges (); 8773 8774 /* Re-map any mention of the PARM_DECL of the original function to the 8775 PARM_DECL of the new one. 8776 8777 TODO: It would be great if lowering produced references into the GPU 8778 kernel decl straight away and we did not have to do this. */ 8779 struct grid_arg_decl_map adm; 8780 adm.old_arg = old_parm_decl; 8781 adm.new_arg = new_parm_decl; 8782 basic_block bb; 8783 FOR_EACH_BB_FN (bb, kern_cfun) 8784 { 8785 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 8786 { 8787 gimple *stmt = gsi_stmt (gsi); 8788 struct walk_stmt_info wi; 8789 memset (&wi, 0, sizeof (wi)); 8790 wi.info = &adm; 8791 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi); 8792 } 8793 } 8794 pop_cfun (); 8795 8796 return; 8797 } 8798 8799 /* Expand the parallel region tree rooted at REGION. Expansion 8800 proceeds in depth-first order. Innermost regions are expanded 8801 first. This way, parallel regions that require a new function to 8802 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any 8803 internal dependencies in their body. */ 8804 8805 static void 8806 expand_omp (struct omp_region *region) 8807 { 8808 omp_any_child_fn_dumped = false; 8809 while (region) 8810 { 8811 location_t saved_location; 8812 gimple *inner_stmt = NULL; 8813 8814 /* First, determine whether this is a combined parallel+workshare 8815 region. */ 8816 if (region->type == GIMPLE_OMP_PARALLEL) 8817 determine_parallel_type (region); 8818 else if (region->type == GIMPLE_OMP_TARGET) 8819 grid_expand_target_grid_body (region); 8820 8821 if (region->type == GIMPLE_OMP_FOR 8822 && gimple_omp_for_combined_p (last_stmt (region->entry))) 8823 inner_stmt = last_stmt (region->inner->entry); 8824 8825 if (region->inner) 8826 expand_omp (region->inner); 8827 8828 saved_location = input_location; 8829 if (gimple_has_location (last_stmt (region->entry))) 8830 input_location = gimple_location (last_stmt (region->entry)); 8831 8832 switch (region->type) 8833 { 8834 case GIMPLE_OMP_PARALLEL: 8835 case GIMPLE_OMP_TASK: 8836 expand_omp_taskreg (region); 8837 break; 8838 8839 case GIMPLE_OMP_FOR: 8840 expand_omp_for (region, inner_stmt); 8841 break; 8842 8843 case GIMPLE_OMP_SECTIONS: 8844 expand_omp_sections (region); 8845 break; 8846 8847 case GIMPLE_OMP_SECTION: 8848 /* Individual omp sections are handled together with their 8849 parent GIMPLE_OMP_SECTIONS region. */ 8850 break; 8851 8852 case GIMPLE_OMP_SINGLE: 8853 expand_omp_single (region); 8854 break; 8855 8856 case GIMPLE_OMP_ORDERED: 8857 { 8858 gomp_ordered *ord_stmt 8859 = as_a <gomp_ordered *> (last_stmt (region->entry)); 8860 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt), 8861 OMP_CLAUSE_DEPEND)) 8862 { 8863 /* We'll expand these when expanding corresponding 8864 worksharing region with ordered(n) clause. */ 8865 gcc_assert (region->outer 8866 && region->outer->type == GIMPLE_OMP_FOR); 8867 region->ord_stmt = ord_stmt; 8868 break; 8869 } 8870 } 8871 /* FALLTHRU */ 8872 case GIMPLE_OMP_MASTER: 8873 case GIMPLE_OMP_TASKGROUP: 8874 case GIMPLE_OMP_CRITICAL: 8875 case GIMPLE_OMP_TEAMS: 8876 expand_omp_synch (region); 8877 break; 8878 8879 case GIMPLE_OMP_ATOMIC_LOAD: 8880 expand_omp_atomic (region); 8881 break; 8882 8883 case GIMPLE_OMP_TARGET: 8884 expand_omp_target (region); 8885 break; 8886 8887 default: 8888 gcc_unreachable (); 8889 } 8890 8891 input_location = saved_location; 8892 region = region->next; 8893 } 8894 if (omp_any_child_fn_dumped) 8895 { 8896 if (dump_file) 8897 dump_function_header (dump_file, current_function_decl, dump_flags); 8898 omp_any_child_fn_dumped = false; 8899 } 8900 } 8901 8902 /* Helper for build_omp_regions. Scan the dominator tree starting at 8903 block BB. PARENT is the region that contains BB. If SINGLE_TREE is 8904 true, the function ends once a single tree is built (otherwise, whole 8905 forest of OMP constructs may be built). */ 8906 8907 static void 8908 build_omp_regions_1 (basic_block bb, struct omp_region *parent, 8909 bool single_tree) 8910 { 8911 gimple_stmt_iterator gsi; 8912 gimple *stmt; 8913 basic_block son; 8914 8915 gsi = gsi_last_nondebug_bb (bb); 8916 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi))) 8917 { 8918 struct omp_region *region; 8919 enum gimple_code code; 8920 8921 stmt = gsi_stmt (gsi); 8922 code = gimple_code (stmt); 8923 if (code == GIMPLE_OMP_RETURN) 8924 { 8925 /* STMT is the return point out of region PARENT. Mark it 8926 as the exit point and make PARENT the immediately 8927 enclosing region. */ 8928 gcc_assert (parent); 8929 region = parent; 8930 region->exit = bb; 8931 parent = parent->outer; 8932 } 8933 else if (code == GIMPLE_OMP_ATOMIC_STORE) 8934 { 8935 /* GIMPLE_OMP_ATOMIC_STORE is analogous to 8936 GIMPLE_OMP_RETURN, but matches with 8937 GIMPLE_OMP_ATOMIC_LOAD. */ 8938 gcc_assert (parent); 8939 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD); 8940 region = parent; 8941 region->exit = bb; 8942 parent = parent->outer; 8943 } 8944 else if (code == GIMPLE_OMP_CONTINUE) 8945 { 8946 gcc_assert (parent); 8947 parent->cont = bb; 8948 } 8949 else if (code == GIMPLE_OMP_SECTIONS_SWITCH) 8950 { 8951 /* GIMPLE_OMP_SECTIONS_SWITCH is part of 8952 GIMPLE_OMP_SECTIONS, and we do nothing for it. */ 8953 } 8954 else 8955 { 8956 region = new_omp_region (bb, code, parent); 8957 /* Otherwise... */ 8958 if (code == GIMPLE_OMP_TARGET) 8959 { 8960 switch (gimple_omp_target_kind (stmt)) 8961 { 8962 case GF_OMP_TARGET_KIND_REGION: 8963 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 8964 case GF_OMP_TARGET_KIND_OACC_KERNELS: 8965 case GF_OMP_TARGET_KIND_OACC_SERIAL: 8966 break; 8967 case GF_OMP_TARGET_KIND_UPDATE: 8968 case GF_OMP_TARGET_KIND_ENTER_DATA: 8969 case GF_OMP_TARGET_KIND_EXIT_DATA: 8970 case GF_OMP_TARGET_KIND_DATA: 8971 case GF_OMP_TARGET_KIND_OACC_DATA: 8972 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: 8973 case GF_OMP_TARGET_KIND_OACC_UPDATE: 8974 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: 8975 case GF_OMP_TARGET_KIND_OACC_DECLARE: 8976 /* ..., other than for those stand-alone directives... */ 8977 region = NULL; 8978 break; 8979 default: 8980 gcc_unreachable (); 8981 } 8982 } 8983 else if (code == GIMPLE_OMP_ORDERED 8984 && omp_find_clause (gimple_omp_ordered_clauses 8985 (as_a <gomp_ordered *> (stmt)), 8986 OMP_CLAUSE_DEPEND)) 8987 /* #pragma omp ordered depend is also just a stand-alone 8988 directive. */ 8989 region = NULL; 8990 else if (code == GIMPLE_OMP_TASK 8991 && gimple_omp_task_taskwait_p (stmt)) 8992 /* #pragma omp taskwait depend(...) is a stand-alone directive. */ 8993 region = NULL; 8994 /* ..., this directive becomes the parent for a new region. */ 8995 if (region) 8996 parent = region; 8997 } 8998 } 8999 9000 if (single_tree && !parent) 9001 return; 9002 9003 for (son = first_dom_son (CDI_DOMINATORS, bb); 9004 son; 9005 son = next_dom_son (CDI_DOMINATORS, son)) 9006 build_omp_regions_1 (son, parent, single_tree); 9007 } 9008 9009 /* Builds the tree of OMP regions rooted at ROOT, storing it to 9010 root_omp_region. */ 9011 9012 static void 9013 build_omp_regions_root (basic_block root) 9014 { 9015 gcc_assert (root_omp_region == NULL); 9016 build_omp_regions_1 (root, NULL, true); 9017 gcc_assert (root_omp_region != NULL); 9018 } 9019 9020 /* Expands omp construct (and its subconstructs) starting in HEAD. */ 9021 9022 void 9023 omp_expand_local (basic_block head) 9024 { 9025 build_omp_regions_root (head); 9026 if (dump_file && (dump_flags & TDF_DETAILS)) 9027 { 9028 fprintf (dump_file, "\nOMP region tree\n\n"); 9029 dump_omp_region (dump_file, root_omp_region, 0); 9030 fprintf (dump_file, "\n"); 9031 } 9032 9033 remove_exit_barriers (root_omp_region); 9034 expand_omp (root_omp_region); 9035 9036 omp_free_regions (); 9037 } 9038 9039 /* Scan the CFG and build a tree of OMP regions. Return the root of 9040 the OMP region tree. */ 9041 9042 static void 9043 build_omp_regions (void) 9044 { 9045 gcc_assert (root_omp_region == NULL); 9046 calculate_dominance_info (CDI_DOMINATORS); 9047 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false); 9048 } 9049 9050 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */ 9051 9052 static unsigned int 9053 execute_expand_omp (void) 9054 { 9055 build_omp_regions (); 9056 9057 if (!root_omp_region) 9058 return 0; 9059 9060 if (dump_file) 9061 { 9062 fprintf (dump_file, "\nOMP region tree\n\n"); 9063 dump_omp_region (dump_file, root_omp_region, 0); 9064 fprintf (dump_file, "\n"); 9065 } 9066 9067 remove_exit_barriers (root_omp_region); 9068 9069 expand_omp (root_omp_region); 9070 9071 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 9072 verify_loop_structure (); 9073 cleanup_tree_cfg (); 9074 9075 omp_free_regions (); 9076 9077 return 0; 9078 } 9079 9080 /* OMP expansion -- the default pass, run before creation of SSA form. */ 9081 9082 namespace { 9083 9084 const pass_data pass_data_expand_omp = 9085 { 9086 GIMPLE_PASS, /* type */ 9087 "ompexp", /* name */ 9088 OPTGROUP_OMP, /* optinfo_flags */ 9089 TV_NONE, /* tv_id */ 9090 PROP_gimple_any, /* properties_required */ 9091 PROP_gimple_eomp, /* properties_provided */ 9092 0, /* properties_destroyed */ 9093 0, /* todo_flags_start */ 9094 0, /* todo_flags_finish */ 9095 }; 9096 9097 class pass_expand_omp : public gimple_opt_pass 9098 { 9099 public: 9100 pass_expand_omp (gcc::context *ctxt) 9101 : gimple_opt_pass (pass_data_expand_omp, ctxt) 9102 {} 9103 9104 /* opt_pass methods: */ 9105 virtual unsigned int execute (function *) 9106 { 9107 bool gate = ((flag_openacc != 0 || flag_openmp != 0 9108 || flag_openmp_simd != 0) 9109 && !seen_error ()); 9110 9111 /* This pass always runs, to provide PROP_gimple_eomp. 9112 But often, there is nothing to do. */ 9113 if (!gate) 9114 return 0; 9115 9116 return execute_expand_omp (); 9117 } 9118 9119 }; // class pass_expand_omp 9120 9121 } // anon namespace 9122 9123 gimple_opt_pass * 9124 make_pass_expand_omp (gcc::context *ctxt) 9125 { 9126 return new pass_expand_omp (ctxt); 9127 } 9128 9129 namespace { 9130 9131 const pass_data pass_data_expand_omp_ssa = 9132 { 9133 GIMPLE_PASS, /* type */ 9134 "ompexpssa", /* name */ 9135 OPTGROUP_OMP, /* optinfo_flags */ 9136 TV_NONE, /* tv_id */ 9137 PROP_cfg | PROP_ssa, /* properties_required */ 9138 PROP_gimple_eomp, /* properties_provided */ 9139 0, /* properties_destroyed */ 9140 0, /* todo_flags_start */ 9141 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */ 9142 }; 9143 9144 class pass_expand_omp_ssa : public gimple_opt_pass 9145 { 9146 public: 9147 pass_expand_omp_ssa (gcc::context *ctxt) 9148 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt) 9149 {} 9150 9151 /* opt_pass methods: */ 9152 virtual bool gate (function *fun) 9153 { 9154 return !(fun->curr_properties & PROP_gimple_eomp); 9155 } 9156 virtual unsigned int execute (function *) { return execute_expand_omp (); } 9157 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); } 9158 9159 }; // class pass_expand_omp_ssa 9160 9161 } // anon namespace 9162 9163 gimple_opt_pass * 9164 make_pass_expand_omp_ssa (gcc::context *ctxt) 9165 { 9166 return new pass_expand_omp_ssa (ctxt); 9167 } 9168 9169 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant 9170 GIMPLE_* codes. */ 9171 9172 bool 9173 omp_make_gimple_edges (basic_block bb, struct omp_region **region, 9174 int *region_idx) 9175 { 9176 gimple *last = last_stmt (bb); 9177 enum gimple_code code = gimple_code (last); 9178 struct omp_region *cur_region = *region; 9179 bool fallthru = false; 9180 9181 switch (code) 9182 { 9183 case GIMPLE_OMP_PARALLEL: 9184 case GIMPLE_OMP_FOR: 9185 case GIMPLE_OMP_SINGLE: 9186 case GIMPLE_OMP_TEAMS: 9187 case GIMPLE_OMP_MASTER: 9188 case GIMPLE_OMP_TASKGROUP: 9189 case GIMPLE_OMP_CRITICAL: 9190 case GIMPLE_OMP_SECTION: 9191 case GIMPLE_OMP_GRID_BODY: 9192 cur_region = new_omp_region (bb, code, cur_region); 9193 fallthru = true; 9194 break; 9195 9196 case GIMPLE_OMP_TASK: 9197 cur_region = new_omp_region (bb, code, cur_region); 9198 fallthru = true; 9199 if (gimple_omp_task_taskwait_p (last)) 9200 cur_region = cur_region->outer; 9201 break; 9202 9203 case GIMPLE_OMP_ORDERED: 9204 cur_region = new_omp_region (bb, code, cur_region); 9205 fallthru = true; 9206 if (omp_find_clause (gimple_omp_ordered_clauses 9207 (as_a <gomp_ordered *> (last)), 9208 OMP_CLAUSE_DEPEND)) 9209 cur_region = cur_region->outer; 9210 break; 9211 9212 case GIMPLE_OMP_TARGET: 9213 cur_region = new_omp_region (bb, code, cur_region); 9214 fallthru = true; 9215 switch (gimple_omp_target_kind (last)) 9216 { 9217 case GF_OMP_TARGET_KIND_REGION: 9218 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 9219 case GF_OMP_TARGET_KIND_OACC_KERNELS: 9220 case GF_OMP_TARGET_KIND_OACC_SERIAL: 9221 break; 9222 case GF_OMP_TARGET_KIND_UPDATE: 9223 case GF_OMP_TARGET_KIND_ENTER_DATA: 9224 case GF_OMP_TARGET_KIND_EXIT_DATA: 9225 case GF_OMP_TARGET_KIND_DATA: 9226 case GF_OMP_TARGET_KIND_OACC_DATA: 9227 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: 9228 case GF_OMP_TARGET_KIND_OACC_UPDATE: 9229 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: 9230 case GF_OMP_TARGET_KIND_OACC_DECLARE: 9231 cur_region = cur_region->outer; 9232 break; 9233 default: 9234 gcc_unreachable (); 9235 } 9236 break; 9237 9238 case GIMPLE_OMP_SECTIONS: 9239 cur_region = new_omp_region (bb, code, cur_region); 9240 fallthru = true; 9241 break; 9242 9243 case GIMPLE_OMP_SECTIONS_SWITCH: 9244 fallthru = false; 9245 break; 9246 9247 case GIMPLE_OMP_ATOMIC_LOAD: 9248 case GIMPLE_OMP_ATOMIC_STORE: 9249 fallthru = true; 9250 break; 9251 9252 case GIMPLE_OMP_RETURN: 9253 /* In the case of a GIMPLE_OMP_SECTION, the edge will go 9254 somewhere other than the next block. This will be 9255 created later. */ 9256 cur_region->exit = bb; 9257 if (cur_region->type == GIMPLE_OMP_TASK) 9258 /* Add an edge corresponding to not scheduling the task 9259 immediately. */ 9260 make_edge (cur_region->entry, bb, EDGE_ABNORMAL); 9261 fallthru = cur_region->type != GIMPLE_OMP_SECTION; 9262 cur_region = cur_region->outer; 9263 break; 9264 9265 case GIMPLE_OMP_CONTINUE: 9266 cur_region->cont = bb; 9267 switch (cur_region->type) 9268 { 9269 case GIMPLE_OMP_FOR: 9270 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE 9271 succs edges as abnormal to prevent splitting 9272 them. */ 9273 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL; 9274 /* Make the loopback edge. */ 9275 make_edge (bb, single_succ (cur_region->entry), 9276 EDGE_ABNORMAL); 9277 9278 /* Create an edge from GIMPLE_OMP_FOR to exit, which 9279 corresponds to the case that the body of the loop 9280 is not executed at all. */ 9281 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL); 9282 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL); 9283 fallthru = false; 9284 break; 9285 9286 case GIMPLE_OMP_SECTIONS: 9287 /* Wire up the edges into and out of the nested sections. */ 9288 { 9289 basic_block switch_bb = single_succ (cur_region->entry); 9290 9291 struct omp_region *i; 9292 for (i = cur_region->inner; i ; i = i->next) 9293 { 9294 gcc_assert (i->type == GIMPLE_OMP_SECTION); 9295 make_edge (switch_bb, i->entry, 0); 9296 make_edge (i->exit, bb, EDGE_FALLTHRU); 9297 } 9298 9299 /* Make the loopback edge to the block with 9300 GIMPLE_OMP_SECTIONS_SWITCH. */ 9301 make_edge (bb, switch_bb, 0); 9302 9303 /* Make the edge from the switch to exit. */ 9304 make_edge (switch_bb, bb->next_bb, 0); 9305 fallthru = false; 9306 } 9307 break; 9308 9309 case GIMPLE_OMP_TASK: 9310 fallthru = true; 9311 break; 9312 9313 default: 9314 gcc_unreachable (); 9315 } 9316 break; 9317 9318 default: 9319 gcc_unreachable (); 9320 } 9321 9322 if (*region != cur_region) 9323 { 9324 *region = cur_region; 9325 if (cur_region) 9326 *region_idx = cur_region->entry->index; 9327 else 9328 *region_idx = 0; 9329 } 9330 9331 return fallthru; 9332 } 9333 9334 #include "gt-omp-expand.h" 9335